diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2018-05-17 12:10:29 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2018-05-17 12:10:35 +1000 |
commit | 7fbe2021c0b8b0a2365bcc42d42141037b8e3d6c (patch) | |
tree | 686642942e98955752cd6196bfa06a6bebcea8f1 | |
parent | 0bc559a5459ade36ea3eb91dada54cf278e5242f (diff) | |
parent | 95d2c3e15da613afd53b4b8f2cdb352dc7d12221 (diff) |
Merge remote-tracking branch 'drm/drm-next'
681 files changed, 48362 insertions, 14682 deletions
diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt b/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt index 0047b1394c70..2c887536258c 100644 --- a/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt +++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt @@ -14,7 +14,13 @@ Required properties: "adi,adv7513" "adi,adv7533" -- reg: I2C slave address +- reg: I2C slave addresses + The ADV7511 internal registers are split into four pages exposed through + different I2C addresses, creating four register maps. Each map has it own + I2C address and acts as a standard slave device on the I2C bus. The main + address is mandatory, others are optional and revert to defaults if not + specified. + The ADV7511 supports a large number of input data formats that differ by their color depth, color format, clock mode, bit justification and random @@ -70,6 +76,9 @@ Optional properties: rather than generate its own timings for HDMI output. - clocks: from common clock binding: reference to the CEC clock. - clock-names: from common clock binding: must be "cec". +- reg-names : Names of maps with programmable addresses. + It can contain any map needing a non-default address. + Possible maps names are : "main", "edid", "cec", "packet" Required nodes: @@ -88,7 +97,12 @@ Example adv7511w: hdmi@39 { compatible = "adi,adv7511w"; - reg = <39>; + /* + * The EDID page will be accessible on address 0x66 on the I2C + * bus. All other maps continue to use their default addresses. + */ + reg = <0x39>, <0x66>; + reg-names = "main", "edid"; interrupt-parent = <&gpio3>; interrupts = <29 IRQ_TYPE_EDGE_FALLING>; clocks = <&cec_clock>; diff --git a/Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt b/Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt new file mode 100644 index 000000000000..f5725bb6c61c --- /dev/null +++ b/Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt @@ -0,0 +1,133 @@ +Cadence DSI bridge +================== + +The Cadence DSI bridge is a DPI to DSI bridge supporting up to 4 DSI lanes. + +Required properties: +- compatible: should be set to "cdns,dsi". +- reg: physical base address and length of the controller's registers. +- interrupts: interrupt line connected to the DSI bridge. +- clocks: DSI bridge clocks. +- clock-names: must contain "dsi_p_clk" and "dsi_sys_clk". +- phys: phandle link to the MIPI D-PHY controller. +- phy-names: must contain "dphy". +- #address-cells: must be set to 1. +- #size-cells: must be set to 0. + +Optional properties: +- resets: DSI reset lines. +- reset-names: can contain "dsi_p_rst". + +Required subnodes: +- ports: Ports as described in Documentation/devicetree/bindings/graph.txt. + 2 ports are available: + * port 0: this port is only needed if some of your DSI devices are + controlled through an external bus like I2C or SPI. Can have at + most 4 endpoints. The endpoint number is directly encoding the + DSI virtual channel used by this device. + * port 1: represents the DPI input. + Other ports will be added later to support the new kind of inputs. + +- one subnode per DSI device connected on the DSI bus. Each DSI device should + contain a reg property encoding its virtual channel. + +Cadence DPHY +============ + +Cadence DPHY block. + +Required properties: +- compatible: should be set to "cdns,dphy". +- reg: physical base address and length of the DPHY registers. +- clocks: DPHY reference clocks. +- clock-names: must contain "psm" and "pll_ref". +- #phy-cells: must be set to 0. + + +Example: + dphy0: dphy@fd0e0000{ + compatible = "cdns,dphy"; + reg = <0x0 0xfd0e0000 0x0 0x1000>; + clocks = <&psm_clk>, <&pll_ref_clk>; + clock-names = "psm", "pll_ref"; + #phy-cells = <0>; + }; + + dsi0: dsi@fd0c0000 { + compatible = "cdns,dsi"; + reg = <0x0 0xfd0c0000 0x0 0x1000>; + clocks = <&pclk>, <&sysclk>; + clock-names = "dsi_p_clk", "dsi_sys_clk"; + interrupts = <1>; + phys = <&dphy0>; + phy-names = "dphy"; + #address-cells = <1>; + #size-cells = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@1 { + reg = <1>; + dsi0_dpi_input: endpoint { + remote-endpoint = <&xxx_dpi_output>; + }; + }; + }; + + panel: dsi-dev@0 { + compatible = "<vendor,panel>"; + reg = <0>; + }; + }; + +or + + dsi0: dsi@fd0c0000 { + compatible = "cdns,dsi"; + reg = <0x0 0xfd0c0000 0x0 0x1000>; + clocks = <&pclk>, <&sysclk>; + clock-names = "dsi_p_clk", "dsi_sys_clk"; + interrupts = <1>; + phys = <&dphy1>; + phy-names = "dphy"; + #address-cells = <1>; + #size-cells = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + + dsi0_output: endpoint@0 { + reg = <0>; + remote-endpoint = <&dsi_panel_input>; + }; + }; + + port@1 { + reg = <1>; + dsi0_dpi_input: endpoint { + remote-endpoint = <&xxx_dpi_output>; + }; + }; + }; + }; + + i2c@xxx { + panel: panel@59 { + compatible = "<vendor,panel>"; + reg = <0x59>; + + port { + dsi_panel_input: endpoint { + remote-endpoint = <&dsi0_output>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt index 3a72a103a18a..a41d280c3f9f 100644 --- a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt +++ b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt @@ -14,6 +14,7 @@ Required properties: - compatible : Shall contain one or more of - "renesas,r8a7795-hdmi" for R8A7795 (R-Car H3) compatible HDMI TX - "renesas,r8a7796-hdmi" for R8A7796 (R-Car M3-W) compatible HDMI TX + - "renesas,r8a77965-hdmi" for R8A77965 (R-Car M3-N) compatible HDMI TX - "renesas,rcar-gen3-hdmi" for the generic R-Car Gen3 compatible HDMI TX When compatible with generic versions, nodes must list the SoC-specific diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt new file mode 100644 index 000000000000..37f0c04d5a28 --- /dev/null +++ b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt @@ -0,0 +1,60 @@ +Thine Electronics THC63LVD1024 LVDS decoder +------------------------------------------- + +The THC63LVD1024 is a dual link LVDS receiver designed to convert LVDS streams +to parallel data outputs. The chip supports single/dual input/output modes, +handling up to two LVDS input streams and up to two digital CMOS/TTL outputs. + +Single or dual operation mode, output data mapping and DDR output modes are +configured through input signals and the chip does not expose any control bus. + +Required properties: +- compatible: Shall be "thine,thc63lvd1024" +- vcc-supply: Power supply for TTL output, TTL CLOCKOUT signal, LVDS input, + PPL and digital circuitry + +Optional properties: +- powerdown-gpios: Power down GPIO signal, pin name "/PDWN". Active low +- oe-gpios: Output enable GPIO signal, pin name "OE". Active high + +The THC63LVD1024 video port connections are modeled according +to OF graph bindings specified by Documentation/devicetree/bindings/graph.txt + +Required video port nodes: +- port@0: First LVDS input port +- port@2: First digital CMOS/TTL parallel output + +Optional video port nodes: +- port@1: Second LVDS input port +- port@3: Second digital CMOS/TTL parallel output + +Example: +-------- + + thc63lvd1024: lvds-decoder { + compatible = "thine,thc63lvd1024"; + + vcc-supply = <®_lvds_vcc>; + powerdown-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + lvds_dec_in_0: endpoint { + remote-endpoint = <&lvds_out>; + }; + }; + + port@2{ + reg = <2>; + + lvds_dec_out_2: endpoint { + remote-endpoint = <&adv7511_in>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/renesas,du.txt b/Documentation/devicetree/bindings/display/renesas,du.txt index c9cd17f99702..7c6854bd0a04 100644 --- a/Documentation/devicetree/bindings/display/renesas,du.txt +++ b/Documentation/devicetree/bindings/display/renesas,du.txt @@ -13,6 +13,7 @@ Required Properties: - "renesas,du-r8a7794" for R8A7794 (R-Car E2) compatible DU - "renesas,du-r8a7795" for R8A7795 (R-Car H3) compatible DU - "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU + - "renesas,du-r8a77965" for R8A77965 (R-Car M3-N) compatible DU - "renesas,du-r8a77970" for R8A77970 (R-Car V3M) compatible DU - "renesas,du-r8a77995" for R8A77995 (R-Car D3) compatible DU @@ -47,20 +48,21 @@ bindings specified in Documentation/devicetree/bindings/graph.txt. The following table lists for each supported model the port number corresponding to each DU output. - Port0 Port1 Port2 Port3 + Port0 Port1 Port2 Port3 ----------------------------------------------------------------------------- - R8A7743 (RZ/G1M) DPAD 0 LVDS 0 - - - R8A7745 (RZ/G1E) DPAD 0 DPAD 1 - - - R8A7779 (R-Car H1) DPAD 0 DPAD 1 - - - R8A7790 (R-Car H2) DPAD 0 LVDS 0 LVDS 1 - - R8A7791 (R-Car M2-W) DPAD 0 LVDS 0 - - - R8A7792 (R-Car V2H) DPAD 0 DPAD 1 - - - R8A7793 (R-Car M2-N) DPAD 0 LVDS 0 - - - R8A7794 (R-Car E2) DPAD 0 DPAD 1 - - - R8A7795 (R-Car H3) DPAD 0 HDMI 0 HDMI 1 LVDS 0 - R8A7796 (R-Car M3-W) DPAD 0 HDMI 0 LVDS 0 - - R8A77970 (R-Car V3M) DPAD 0 LVDS 0 - - - R8A77995 (R-Car D3) DPAD 0 LVDS 0 LVDS 1 - + R8A7743 (RZ/G1M) DPAD 0 LVDS 0 - - + R8A7745 (RZ/G1E) DPAD 0 DPAD 1 - - + R8A7779 (R-Car H1) DPAD 0 DPAD 1 - - + R8A7790 (R-Car H2) DPAD 0 LVDS 0 LVDS 1 - + R8A7791 (R-Car M2-W) DPAD 0 LVDS 0 - - + R8A7792 (R-Car V2H) DPAD 0 DPAD 1 - - + R8A7793 (R-Car M2-N) DPAD 0 LVDS 0 - - + R8A7794 (R-Car E2) DPAD 0 DPAD 1 - - + R8A7795 (R-Car H3) DPAD 0 HDMI 0 HDMI 1 LVDS 0 + R8A7796 (R-Car M3-W) DPAD 0 HDMI 0 LVDS 0 - + R8A77965 (R-Car M3-N) DPAD 0 HDMI 0 LVDS 0 - + R8A77970 (R-Car V3M) DPAD 0 LVDS 0 - - + R8A77995 (R-Car D3) DPAD 0 LVDS 0 LVDS 1 - Example: R8A7795 (R-Car H3) ES2.0 DU diff --git a/Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt b/Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt new file mode 100644 index 000000000000..6a6cf5de08b0 --- /dev/null +++ b/Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt @@ -0,0 +1,93 @@ +Allwinner A31 DSI Encoder +========================= + +The DSI pipeline consists of two separate blocks: the DSI controller +itself, and its associated D-PHY. + +DSI Encoder +----------- + +The DSI Encoder generates the DSI signal from the TCON's. + +Required properties: + - compatible: value must be one of: + * allwinner,sun6i-a31-mipi-dsi + - reg: base address and size of memory-mapped region + - interrupts: interrupt associated to this IP + - clocks: phandles to the clocks feeding the DSI encoder + * bus: the DSI interface clock + * mod: the DSI module clock + - clock-names: the clock names mentioned above + - phys: phandle to the D-PHY + - phy-names: must be "dphy" + - resets: phandle to the reset controller driving the encoder + + - ports: A ports node with endpoint definitions as defined in + Documentation/devicetree/bindings/media/video-interfaces.txt. The + first port should be the input endpoint, usually coming from the + associated TCON. + +Any MIPI-DSI device attached to this should be described according to +the bindings defined in ../mipi-dsi-bus.txt + +D-PHY +----- + +Required properties: + - compatible: value must be one of: + * allwinner,sun6i-a31-mipi-dphy + - reg: base address and size of memory-mapped region + - clocks: phandles to the clocks feeding the DSI encoder + * bus: the DSI interface clock + * mod: the DSI module clock + - clock-names: the clock names mentioned above + - resets: phandle to the reset controller driving the encoder + +Example: + +dsi0: dsi@1ca0000 { + compatible = "allwinner,sun6i-a31-mipi-dsi"; + reg = <0x01ca0000 0x1000>; + interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&ccu CLK_BUS_MIPI_DSI>, + <&ccu CLK_DSI_SCLK>; + clock-names = "bus", "mod"; + resets = <&ccu RST_BUS_MIPI_DSI>; + phys = <&dphy0>; + phy-names = "dphy"; + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { + compatible = "bananapi,lhr050h41", "ilitek,ili9881c"; + reg = <0>; + power-gpios = <&pio 1 7 GPIO_ACTIVE_HIGH>; /* PB07 */ + reset-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL05 */ + backlight = <&pwm_bl>; + }; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + + dsi0_in_tcon0: endpoint { + remote-endpoint = <&tcon0_out_dsi0>; + }; + }; + }; +}; + +dphy0: d-phy@1ca1000 { + compatible = "allwinner,sun6i-a31-mipi-dphy"; + reg = <0x01ca1000 0x1000>; + clocks = <&ccu CLK_BUS_MIPI_DSI>, + <&ccu CLK_DSI_DPHY>; + clock-names = "bus", "mod"; + resets = <&ccu RST_BUS_MIPI_DSI>; + #phy-cells = <0>; +}; diff --git a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt new file mode 100644 index 000000000000..c907aa8dd755 --- /dev/null +++ b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt @@ -0,0 +1,28 @@ +Broadcom V3D GPU + +Only the Broadcom V3D 3.x and newer GPUs are covered by this binding. +For V3D 2.x, see brcm,bcm-vc4.txt. + +Required properties: +- compatible: Should be "brcm,7268-v3d" or "brcm,7278-v3d" +- reg: Physical base addresses and lengths of the register areas +- reg-names: Names for the register areas. The "hub", "bridge", and "core0" + register areas are always required. The "gca" register area + is required if the GCA cache controller is present. +- interrupts: The interrupt numbers. The first interrupt is for the hub, + while the following interrupts are for the cores. + See bindings/interrupt-controller/interrupts.txt + +Optional properties: +- clocks: The core clock the unit runs on + +v3d { + compatible = "brcm,7268-v3d"; + reg = <0xf1204000 0x100>, + <0xf1200000 0x4000>, + <0xf1208000 0x4000>, + <0xf1204100 0x100>; + reg-names = "bridge", "hub", "core0", "gca"; + interrupts = <0 78 4>, + <0 77 4>; +}; diff --git a/Documentation/devicetree/bindings/gpu/samsung-scaler.txt b/Documentation/devicetree/bindings/gpu/samsung-scaler.txt new file mode 100644 index 000000000000..9c3d98105dfd --- /dev/null +++ b/Documentation/devicetree/bindings/gpu/samsung-scaler.txt @@ -0,0 +1,27 @@ +* Samsung Exynos Image Scaler + +Required properties: + - compatible : value should be one of the following: + (a) "samsung,exynos5420-scaler" for Scaler IP in Exynos5420 + (b) "samsung,exynos5433-scaler" for Scaler IP in Exynos5433 + + - reg : Physical base address of the IP registers and length of memory + mapped region. + + - interrupts : Interrupt specifier for scaler interrupt, according to format + specific to interrupt parent. + + - clocks : Clock specifier for scaler clock, according to generic clock + bindings. (See Documentation/devicetree/bindings/clock/exynos*.txt) + + - clock-names : Names of clocks. For exynos scaler, it should be "mscl" + on 5420 and "pclk", "aclk" and "aclk_xiu" on 5433. + +Example: + scaler@12800000 { + compatible = "samsung,exynos5420-scaler"; + reg = <0x12800000 0x1294>; + interrupts = <0 220 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clock CLK_MSCL0>; + clock-names = "mscl"; + }; diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst index e8c84419a2a1..f982558fc25d 100644 --- a/Documentation/gpu/drivers.rst +++ b/Documentation/gpu/drivers.rst @@ -10,8 +10,10 @@ GPU Driver Documentation tegra tinydrm tve200 + v3d vc4 bridge/dw-hdmi + xen-front .. only:: subproject and html diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 41dc881b00dc..055df45596c1 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -58,6 +58,12 @@ Intel GVT-g Host Support(vGPU device model) .. kernel-doc:: drivers/gpu/drm/i915/intel_gvt.c :internal: +Workarounds +----------- + +.. kernel-doc:: drivers/gpu/drm/i915/intel_workarounds.c + :doc: Hardware workarounds + Display Hardware Handling ========================= @@ -249,6 +255,103 @@ Memory Management and Command Submission This sections covers all things related to the GEM implementation in the i915 driver. +Intel GPU Basics +---------------- + +An Intel GPU has multiple engines. There are several engine types. + +- RCS engine is for rendering 3D and performing compute, this is named + `I915_EXEC_RENDER` in user space. +- BCS is a blitting (copy) engine, this is named `I915_EXEC_BLT` in user + space. +- VCS is a video encode and decode engine, this is named `I915_EXEC_BSD` + in user space +- VECS is video enhancement engine, this is named `I915_EXEC_VEBOX` in user + space. +- The enumeration `I915_EXEC_DEFAULT` does not refer to specific engine; + instead it is to be used by user space to specify a default rendering + engine (for 3D) that may or may not be the same as RCS. + +The Intel GPU family is a family of integrated GPU's using Unified +Memory Access. For having the GPU "do work", user space will feed the +GPU batch buffers via one of the ioctls `DRM_IOCTL_I915_GEM_EXECBUFFER2` +or `DRM_IOCTL_I915_GEM_EXECBUFFER2_WR`. Most such batchbuffers will +instruct the GPU to perform work (for example rendering) and that work +needs memory from which to read and memory to which to write. All memory +is encapsulated within GEM buffer objects (usually created with the ioctl +`DRM_IOCTL_I915_GEM_CREATE`). An ioctl providing a batchbuffer for the GPU +to create will also list all GEM buffer objects that the batchbuffer reads +and/or writes. For implementation details of memory management see +`GEM BO Management Implementation Details`_. + +The i915 driver allows user space to create a context via the ioctl +`DRM_IOCTL_I915_GEM_CONTEXT_CREATE` which is identified by a 32-bit +integer. Such a context should be viewed by user-space as -loosely- +analogous to the idea of a CPU process of an operating system. The i915 +driver guarantees that commands issued to a fixed context are to be +executed so that writes of a previously issued command are seen by +reads of following commands. Actions issued between different contexts +(even if from the same file descriptor) are NOT given that guarantee +and the only way to synchronize across contexts (even from the same +file descriptor) is through the use of fences. At least as far back as +Gen4, also have that a context carries with it a GPU HW context; +the HW context is essentially (most of atleast) the state of a GPU. +In addition to the ordering guarantees, the kernel will restore GPU +state via HW context when commands are issued to a context, this saves +user space the need to restore (most of atleast) the GPU state at the +start of each batchbuffer. The non-deprecated ioctls to submit batchbuffer +work can pass that ID (in the lower bits of drm_i915_gem_execbuffer2::rsvd1) +to identify what context to use with the command. + +The GPU has its own memory management and address space. The kernel +driver maintains the memory translation table for the GPU. For older +GPUs (i.e. those before Gen8), there is a single global such translation +table, a global Graphics Translation Table (GTT). For newer generation +GPUs each context has its own translation table, called Per-Process +Graphics Translation Table (PPGTT). Of important note, is that although +PPGTT is named per-process it is actually per context. When user space +submits a batchbuffer, the kernel walks the list of GEM buffer objects +used by the batchbuffer and guarantees that not only is the memory of +each such GEM buffer object resident but it is also present in the +(PP)GTT. If the GEM buffer object is not yet placed in the (PP)GTT, +then it is given an address. Two consequences of this are: the kernel +needs to edit the batchbuffer submitted to write the correct value of +the GPU address when a GEM BO is assigned a GPU address and the kernel +might evict a different GEM BO from the (PP)GTT to make address room +for another GEM BO. Consequently, the ioctls submitting a batchbuffer +for execution also include a list of all locations within buffers that +refer to GPU-addresses so that the kernel can edit the buffer correctly. +This process is dubbed relocation. + +GEM BO Management Implementation Details +---------------------------------------- + +.. kernel-doc:: drivers/gpu/drm/i915/i915_vma.h + :doc: Virtual Memory Address + +Buffer Object Eviction +---------------------- + +This section documents the interface functions for evicting buffer +objects to make space available in the virtual gpu address spaces. Note +that this is mostly orthogonal to shrinking buffer objects caches, which +has the goal to make main memory (shared with the gpu through the +unified memory architecture) available. + +.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_evict.c + :internal: + +Buffer Object Memory Shrinking +------------------------------ + +This section documents the interface function for shrinking memory usage +of buffer object caches. Shrinking is used to make main memory +available. Note that this is mostly orthogonal to evicting buffer +objects, which has the goal to make space in gpu virtual address spaces. + +.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c + :internal: + Batchbuffer Parsing ------------------- @@ -267,6 +370,12 @@ Batchbuffer Pools .. kernel-doc:: drivers/gpu/drm/i915/i915_gem_batch_pool.c :internal: +User Batchbuffer Execution +-------------------------- + +.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_execbuffer.c + :doc: User command execution + Logical Rings, Logical Ring Contexts and Execlists -------------------------------------------------- @@ -312,28 +421,14 @@ Object Tiling IOCTLs .. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c :doc: buffer object tiling -Buffer Object Eviction ----------------------- - -This section documents the interface functions for evicting buffer -objects to make space available in the virtual gpu address spaces. Note -that this is mostly orthogonal to shrinking buffer objects caches, which -has the goal to make main memory (shared with the gpu through the -unified memory architecture) available. - -.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_evict.c - :internal: - -Buffer Object Memory Shrinking ------------------------------- +WOPCM +===== -This section documents the interface function for shrinking memory usage -of buffer object caches. Shrinking is used to make main memory -available. Note that this is mostly orthogonal to evicting buffer -objects, which has the goal to make space in gpu virtual address spaces. +WOPCM Layout +------------ -.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c - :internal: +.. kernel-doc:: drivers/gpu/drm/i915/intel_wopcm.c + :doc: WOPCM Layout GuC === @@ -359,6 +454,12 @@ GuC Firmware Layout .. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fwif.h :doc: GuC Firmware Layout +GuC Address Space +----------------- + +.. kernel-doc:: drivers/gpu/drm/i915/intel_guc.c + :doc: GuC Address Space + Tracing ======= diff --git a/Documentation/gpu/kms-properties.csv b/Documentation/gpu/kms-properties.csv index 6b28b014cb7d..07ed22ea3bd6 100644 --- a/Documentation/gpu/kms-properties.csv +++ b/Documentation/gpu/kms-properties.csv @@ -98,5 +98,4 @@ radeon,DVI-I,“coherent”,RANGE,"Min=0, Max=1",Connector,TBD ,,"""underscan vborder""",RANGE,"Min=0, Max=128",Connector,TBD ,Audio,“audio”,ENUM,"{ ""off"", ""on"", ""auto"" }",Connector,TBD ,FMT Dithering,“dither”,ENUM,"{ ""off"", ""on"" }",Connector,TBD -rcar-du,Generic,"""alpha""",RANGE,"Min=0, Max=255",Plane,TBD ,,"""colorkey""",RANGE,"Min=0, Max=0x01ffffff",Plane,TBD diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index f4d0b3476d9c..a7c150d6b63f 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -212,6 +212,24 @@ probably use drm_fb_helper_fbdev_teardown(). Contact: Maintainer of the driver you plan to convert +Clean up mmap forwarding +------------------------ + +A lot of drivers forward gem mmap calls to dma-buf mmap for imported buffers. +And also a lot of them forward dma-buf mmap to the gem mmap implementations. +Would be great to refactor this all into a set of small common helpers. + +Contact: Daniel Vetter + +Put a reservation_object into drm_gem_object +-------------------------------------------- + +This would remove the need for the ->gem_prime_res_obj callback. It would also +allow us to implement generic helpers for waiting for a bo, allowing for quite a +bit of refactoring in the various wait ioctl implementations. + +Contact: Daniel Vetter + idr_init_base() --------------- diff --git a/Documentation/gpu/xen-front.rst b/Documentation/gpu/xen-front.rst new file mode 100644 index 000000000000..d988da7d1983 --- /dev/null +++ b/Documentation/gpu/xen-front.rst @@ -0,0 +1,31 @@ +==================================================== + drm/xen-front Xen para-virtualized frontend driver +==================================================== + +This frontend driver implements Xen para-virtualized display +according to the display protocol described at +include/xen/interface/io/displif.h + +Driver modes of operation in terms of display buffers used +========================================================== + +.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h + :doc: Driver modes of operation in terms of display buffers used + +Buffers allocated by the frontend driver +---------------------------------------- + +.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h + :doc: Buffers allocated by the frontend driver + +Buffers allocated by the backend +-------------------------------- + +.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h + :doc: Buffers allocated by the backend + +Driver limitations +================== + +.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h + :doc: Driver limitations diff --git a/MAINTAINERS b/MAINTAINERS index 3c92d6214e47..f933ecd938d4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -767,12 +767,14 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c F: drivers/gpu/drm/amd/amdkfd/ F: drivers/gpu/drm/amd/include/cik_structs.h F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h F: drivers/gpu/drm/amd/include/vi_structs.h +F: drivers/gpu/drm/amd/include/v9_structs.h F: include/uapi/linux/kfd_ioctl.h AMD SEATTLE DEVICE TREE SUPPORT @@ -4794,6 +4796,14 @@ S: Maintained F: drivers/gpu/drm/omapdrm/ F: Documentation/devicetree/bindings/display/ti/ +DRM DRIVERS FOR V3D +M: Eric Anholt <eric@anholt.net> +S: Supported +F: drivers/gpu/drm/v3d/ +F: include/uapi/drm/v3d_drm.h +F: Documentation/devicetree/bindings/display/brcm,bcm-v3d.txt +T: git git://anongit.freedesktop.org/drm/drm-misc + DRM DRIVERS FOR VC4 M: Eric Anholt <eric@anholt.net> T: git git://github.com/anholt/linux @@ -4840,6 +4850,15 @@ S: Maintained F: drivers/gpu/drm/tinydrm/ F: include/drm/tinydrm/ +DRM DRIVERS FOR XEN +M: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> +T: git git://anongit.freedesktop.org/drm/drm-misc +L: dri-devel@lists.freedesktop.org +L: xen-devel@lists.xen.org +S: Supported +F: drivers/gpu/drm/xen/ +F: Documentation/gpu/xen-front.rst + DRM TTM SUBSYSTEM M: Christian Koenig <christian.koenig@amd.com> M: Roger He <Hongbo.He@amd.com> diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index d615a89f774c..05e33f937ad0 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -62,8 +62,6 @@ struct sync_pt { struct rb_node node; }; -#ifdef CONFIG_SW_SYNC - extern const struct file_operations sw_sync_debugfs_fops; void sync_timeline_debug_add(struct sync_timeline *obj); @@ -72,12 +70,4 @@ void sync_file_debug_add(struct sync_file *fence); void sync_file_debug_remove(struct sync_file *fence); void sync_dump(void); -#else -# define sync_timeline_debug_add(obj) -# define sync_timeline_debug_remove(obj) -# define sync_file_debug_add(fence) -# define sync_file_debug_remove(fence) -# define sync_dump() -#endif - #endif /* _LINUX_SYNC_H */ diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index deeefa7a1773..2a72d2feb76d 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -49,16 +49,17 @@ config DRM_DEBUG_MM If in doubt, say "N". -config DRM_DEBUG_MM_SELFTEST - tristate "kselftests for DRM range manager (struct drm_mm)" +config DRM_DEBUG_SELFTEST + tristate "kselftests for DRM" depends on DRM depends on DEBUG_KERNEL select PRIME_NUMBERS select DRM_LIB_RANDOM + select DRM_KMS_HELPER default n help - This option provides a kernel module that can be used to test - the DRM range manager (drm_mm) and its API. This option is not + This option provides kernel modules that can be used to run + various selftests on parts of the DRM api. This option is not useful for distributions or general kernels, but only for kernel developers working on DRM and associated drivers. @@ -267,6 +268,8 @@ source "drivers/gpu/drm/amd/amdkfd/Kconfig" source "drivers/gpu/drm/imx/Kconfig" +source "drivers/gpu/drm/v3d/Kconfig" + source "drivers/gpu/drm/vc4/Kconfig" source "drivers/gpu/drm/etnaviv/Kconfig" @@ -289,6 +292,8 @@ source "drivers/gpu/drm/pl111/Kconfig" source "drivers/gpu/drm/tve200/Kconfig" +source "drivers/gpu/drm/xen/Kconfig" + # Keep legacy drivers last menuconfig DRM_LEGACY diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 50093ff4479b..ef9f3dab287f 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -43,7 +43,7 @@ drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o -obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += selftests/ +obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/ obj-$(CONFIG_DRM) += drm.o obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o @@ -61,6 +61,7 @@ obj-$(CONFIG_DRM_MGA) += mga/ obj-$(CONFIG_DRM_I810) += i810/ obj-$(CONFIG_DRM_I915) += i915/ obj-$(CONFIG_DRM_MGAG200) += mgag200/ +obj-$(CONFIG_DRM_V3D) += v3d/ obj-$(CONFIG_DRM_VC4) += vc4/ obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/ obj-$(CONFIG_DRM_SIS) += sis/ @@ -103,3 +104,4 @@ obj-$(CONFIG_DRM_MXSFB) += mxsfb/ obj-$(CONFIG_DRM_TINYDRM) += tinydrm/ obj-$(CONFIG_DRM_PL111) += pl111/ obj-$(CONFIG_DRM_TVE200) += tve200/ +obj-$(CONFIG_DRM_XEN) += xen/ diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2ca2b5154d52..68e9f584c570 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -64,6 +64,10 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o +# add DF block +amdgpu-y += \ + df_v1_7.o + # add GMC block amdgpu-y += \ gmc_v7_0.o \ @@ -130,7 +134,8 @@ amdgpu-y += \ amdgpu_amdkfd.o \ amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_gpuvm.o \ - amdgpu_amdkfd_gfx_v8.o + amdgpu_amdkfd_gfx_v8.o \ + amdgpu_amdkfd_gfx_v9.o # add cgs amdgpu-y += amdgpu_cgs.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c8b605f3dc05..03a2c0be0bf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -129,6 +129,7 @@ extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; extern int amdgpu_gpu_recovery; extern int amdgpu_emu_mode; +extern uint amdgpu_smu_memory_pool_size; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -137,6 +138,7 @@ extern int amdgpu_si_support; extern int amdgpu_cik_support; #endif +#define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ @@ -222,10 +224,10 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_LAST }; -int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_clockgating_state(void *dev, enum amd_ip_block_type block_type, enum amd_clockgating_state state); -int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_powergating_state(void *dev, enum amd_ip_block_type block_type, enum amd_powergating_state state); void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, @@ -681,6 +683,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); @@ -771,9 +775,18 @@ struct amdgpu_rlc { u32 starting_offsets_start; u32 reg_list_format_size_bytes; u32 reg_list_size_bytes; + u32 reg_list_format_direct_reg_list_length; + u32 save_restore_list_cntl_size_bytes; + u32 save_restore_list_gpm_size_bytes; + u32 save_restore_list_srm_size_bytes; u32 *register_list_format; u32 *register_restore; + u8 *save_restore_list_cntl; + u8 *save_restore_list_gpm; + u8 *save_restore_list_srm; + + bool is_rlc_v2_1; }; #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES @@ -867,6 +880,8 @@ struct amdgpu_gfx_config { /* gfx configure feature */ uint32_t double_offchip_lds_buf; + /* cached value of DB_DEBUG2 */ + uint32_t db_debug2; }; struct amdgpu_cu_info { @@ -938,6 +953,12 @@ struct amdgpu_gfx { uint32_t ce_feature_version; uint32_t pfp_feature_version; uint32_t rlc_feature_version; + uint32_t rlc_srlc_fw_version; + uint32_t rlc_srlc_feature_version; + uint32_t rlc_srlg_fw_version; + uint32_t rlc_srlg_feature_version; + uint32_t rlc_srls_fw_version; + uint32_t rlc_srls_feature_version; uint32_t mec_feature_version; uint32_t mec2_feature_version; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; @@ -1204,6 +1225,8 @@ struct amdgpu_asic_funcs { /* invalidate hdp read cache */ void (*invalidate_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + /* check if the asic needs a full reset of if soft reset will work */ + bool (*need_full_reset)(struct amdgpu_device *adev); }; /* @@ -1368,7 +1391,17 @@ struct amdgpu_nbio_funcs { void (*detect_hw_virt)(struct amdgpu_device *adev); }; - +struct amdgpu_df_funcs { + void (*init)(struct amdgpu_device *adev); + void (*enable_broadcast_mode)(struct amdgpu_device *adev, + bool enable); + u32 (*get_fb_channel_number)(struct amdgpu_device *adev); + u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); +}; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { GC_HWIP = 1, @@ -1398,6 +1431,7 @@ enum amd_hw_ip_block_type { struct amd_powerplay { void *pp_handle; const struct amd_pm_funcs *pp_funcs; + uint32_t pp_feature; }; #define AMDGPU_RESET_MAGIC_NUM 64 @@ -1590,6 +1624,7 @@ struct amdgpu_device { uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; const struct amdgpu_nbio_funcs *nbio_funcs; + const struct amdgpu_df_funcs *df_funcs; /* delayed work_func for deferring clockgating during resume */ struct delayed_work late_init_work; @@ -1764,6 +1799,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) +#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) #define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) @@ -1790,6 +1826,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) +#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index a29362f9ef41..03ee36739efe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -290,12 +290,11 @@ static int acp_hw_init(void *handle) else if (r) return r; - r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO, - 0x5289, 0, &acp_base); - if (r == -ENODEV) - return 0; - else if (r) - return r; + if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) + return -EINVAL; + + acp_base = adev->rmmio_base; + if (adev->asic_type != CHIP_STONEY) { adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); if (adev->acp.acp_genpd == NULL) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 4d36203ffb11..bd36ee9f7e6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -92,6 +92,10 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) case CHIP_POLARIS11: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; + case CHIP_VEGA10: + case CHIP_RAVEN: + kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); + break; default: dev_dbg(adev->dev, "kfd not supported on this ASIC\n"); return; @@ -175,6 +179,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); + if (adev->asic_type >= CHIP_VEGA10) { + /* On SOC15 the BIF is involved in routing + * doorbells using the low 12 bits of the + * address. Communicate the assignments to + * KFD. KFD uses two doorbell pages per + * process in case of 64-bit doorbells so we + * can use each doorbell assignment twice. + */ + gpu_resources.sdma_doorbell[0][0] = + AMDGPU_DOORBELL64_sDMA_ENGINE0; + gpu_resources.sdma_doorbell[0][1] = + AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200; + gpu_resources.sdma_doorbell[1][0] = + AMDGPU_DOORBELL64_sDMA_ENGINE1; + gpu_resources.sdma_doorbell[1][1] = + AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200; + /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for + * SDMA, IH and VCN. So don't use them for the CP. + */ + gpu_resources.reserved_doorbell_mask = 0x1f0; + gpu_resources.reserved_doorbell_val = 0x0f0; + } kgd2kfd->device_init(adev->kfd, &gpu_resources); } @@ -217,13 +243,19 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = NULL; + struct amdgpu_bo_param bp; int r; uint64_t gpu_addr_tmp = 0; void *cpu_ptr_tmp = NULL; - r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel, - NULL, &bo); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + r = amdgpu_bo_create(adev, &bp, &bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index c2c2bea731e0..12367a9951e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -28,6 +28,7 @@ #include <linux/types.h> #include <linux/mm.h> #include <linux/mmu_context.h> +#include <linux/workqueue.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> #include "amdgpu_sync.h" @@ -59,7 +60,9 @@ struct kgd_mem { uint32_t mapping_flags; + atomic_t invalid; struct amdkfd_process_info *process_info; + struct page **user_pages; struct amdgpu_sync sync; @@ -84,6 +87,9 @@ struct amdkfd_process_info { struct list_head vm_list_head; /* List head for all KFD BOs that belong to a KFD process. */ struct list_head kfd_bo_list; + /* List of userptr BOs that are valid or invalid */ + struct list_head userptr_valid_list; + struct list_head userptr_inval_list; /* Lock to protect kfd_bo_list */ struct mutex lock; @@ -91,6 +97,11 @@ struct amdkfd_process_info { unsigned int n_vms; /* Eviction Fence */ struct amdgpu_amdkfd_fence *eviction_fence; + + /* MMU-notifier related fields */ + atomic_t evicted_bos; + struct delayed_work restore_userptr_work; + struct pid *pid; }; int amdgpu_amdkfd_init(void); @@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ea54e53172b9..0ff36d45a597 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* amdgpu owns the per-pipe state */ - return 0; -} - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 89264c9a5e9f..6ef9762b4b00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_bases); static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* amdgpu owns the per-pipe state */ - return 0; -} - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c new file mode 100644 index 000000000000..8f37991df61b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -0,0 +1,1043 @@ +/* + * Copyright 2014-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_ucode.h" +#include "soc15_hw_ip.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "vega10_enum.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "sdma0/sdma0_4_0_sh_mask.h" +#include "sdma1/sdma1_4_0_offset.h" +#include "sdma1/sdma1_4_0_sh_mask.h" +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" +#include "soc15_common.h" +#include "v9_structs.h" +#include "soc15.h" +#include "soc15d.h" + +/* HACK: MMHUB and GC both have VM-related register with the same + * names but different offsets. Define the MMHUB register we need here + * with a prefix. A proper solution would be to move the functions + * programming these registers into gfx_v9_0.c and mmhub_v1_0.c + * respectively. + */ +#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3 +#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0 + +#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705 +#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 + +#define V9_PIPE_PER_MEC (4) +#define V9_QUEUES_PER_PIPE_MEC (8) + +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES +}; + +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} + +static const struct kfd2kgd_calls kfd2kgd = { + .init_gtt_mem_allocation = alloc_gtt_mem, + .free_gtt_mem = free_gtt_mem, + .get_local_mem_info = get_local_mem_info, + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + get_atc_vmid_pasid_mapping_valid, + .get_fw_version = get_fw_version, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = amdgpu_amdkfd_get_tile_config, + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, mec, pipe, queue, vmid); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static uint32_t get_queue_mask(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id) +{ + unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id) & 31; + + return ((uint32_t)1) << bit; +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + /* APE1 no longer exists on GFX9 */ + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished + * and the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + /* + * need to do this twice, once for gfx and once for mmhub + * for ATC add 16 to VMID for mmhub, for IH different registers. + * ATC_VMID0..15 registers are separate from ATC_VMID16..31. + */ + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, + pasid_mapping); + + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << vmid))) + cpu_relax(); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << vmid); + + /* Mapping vmid to pasid also for IH block */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, + pasid_mapping); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, + pasid_mapping); + + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << (vmid + 16)))) + cpu_relax(); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << (vmid + 16)); + + /* Mapping vmid to pasid also for IH block */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, + pasid_mapping); + return 0; +} + +/* TODO - RING0 form of field is obsolete, seems to date back to SI + * but still works + */ + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t base[2] = { + SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL + }; + uint32_t retval; + + retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - + mmSDMA0_RLC0_RB_CNTL); + + pr_debug("sdma base address: 0x%x\n", retval); + + return retval; +} + +static inline struct v9_mqd *get_mqd(void *mqd) +{ + return (struct v9_mqd *)mqd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, hqd_base, data; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + /* HIQ is set during driver init period with vmid set to 0*/ + if (m->cp_hqd_vmid == 0) { + uint32_t value, mec, pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + } + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + + for (reg = hqd_base; + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + WREG32(reg, mqd_hqd[reg - hqd_base]); + + + /* Activate doorbell logic before triggering WPTR poll. */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + + if (wptr) { + /* Don't read wptr with get_user because the user + * context may not be accessible (if this function + * runs in a work queue). Instead trigger a one-shot + * polling read from memory in the CP. This assumes + * that wptr is GPU-accessible in the queue's VMID via + * ATC or SVM. WPTR==RPTR before starting the poll so + * the CP starts fetching new commands from the right + * place. + * + * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit + * tricky. Assume that the queue didn't overflow. The + * number of valid bits in the 32-bit RPTR depends on + * the queue size. The remaining bits are taken from + * the saved 64-bit WPTR. If the WPTR wrapped, add the + * queue size. + */ + uint32_t queue_size = + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, + CP_HQD_PQ_CONTROL, QUEUE_SIZE); + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); + + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) + guessed_wptr += queue_size; + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uint64_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + upper_32_bits((uint64_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + get_queue_mask(adev, pipe_id, queue_id)); + } + + /* Start the EOP fetcher */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + REG_SET_FIELD(m->cp_hqd_eop_rptr, + CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + + release_queue(kgd); + + return 0; +} + +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + sdmax_gfx_context_cntl = m->sdma_engine_id ? + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + data = RREG32(sdmax_gfx_context_cntl); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(sdmax_gfx_context_cntl, data); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_base_addr + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && + high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + enum hqd_dequeue_request_type type; + unsigned long end_jiffies; + uint32_t temp; + struct v9_mqd *m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + if (m->cp_hqd_vmid == 0) + WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out.\n"); + release_queue(kgd); + return -ETIME; + } + usleep_range(500, 1000); + } + + release_queue(kgd); + return 0; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; +} + +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + uint32_t req = (1 << vmid) | + (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */ + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; + + mutex_lock(&adev->srbm_mutex); + + /* Use legacy mode tlb invalidation. + * + * Currently on Raven the code below is broken for anything but + * legacy mode due to a MMHUB power gating problem. A workaround + * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ + * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack + * bit. + * + * TODO 1: agree on the right set of invalidation registers for + * KFD use. Use the last one for now. Invalidate both GC and + * MMHUB. + * + * TODO 2: support range-based invalidation, requires kfg2kgd + * interface change + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), + req); + + while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) + cpu_relax(); + + while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) + cpu_relax(); + + mutex_unlock(&adev->srbm_mutex); + +} + +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +{ + signed long r; + uint32_t seq; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + if (ring->ready) + return invalidate_tlbs_with_kiq(adev, pasid); + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { + if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) + == pasid) { + write_vmid_invalidate_request(kgd, vmid); + break; + } + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return 0; + } + + write_vmid_invalidate_request(kgd, vmid); + return 0; +} + +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SH_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return 0; +} + +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + /* No longer needed on GFXv9. The scratch base address is + * passed to the shader by the CP. It's the user mode driver's + * responsibility. + */ +} + +/* FIXME: Does this need to be ASIC-specific code? */ +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + const union amdgpu_firmware_header *hdr; + + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; + break; + + default: + return 0; + } + + if (hdr == NULL) + return 0; + + /* Only 12 bit in use*/ + return hdr->common.ucode_version; +} + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT | + AMDGPU_PTE_VALID; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID %u\n", + vmid); + return; + } + + /* TODO: take advantage of per-process address space size. For + * now, all processes share the same address space size, like + * on GFX8 and older. + */ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1d6e1479da38..ff8fd75f7ca5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -23,6 +23,8 @@ #define pr_fmt(fmt) "kfd2kgd: " fmt #include <linux/list.h> +#include <linux/pagemap.h> +#include <linux/sched/mm.h> #include <drm/drmP.h> #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -33,10 +35,20 @@ */ #define VI_BO_SIZE_ALIGN (0x8000) +/* BO flag to indicate a KFD userptr BO */ +#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) + +/* Userptr restore delay, just long enough to allow consecutive VM + * changes to accumulate + */ +#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; + uint64_t max_userptr_mem_limit; int64_t system_mem_used; + int64_t userptr_mem_used; spinlock_t mem_limit_lock; } kfd_mem_limit; @@ -57,6 +69,7 @@ static const char * const domain_bit_to_string[] = { #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) @@ -78,6 +91,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, /* Set memory usage limits. Current, limits are * System (kernel) memory - 3/8th System RAM + * Userptr memory - 3/4th System RAM */ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) { @@ -90,8 +104,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) spin_lock_init(&kfd_mem_limit.mem_limit_lock); kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); - pr_debug("Kernel memory limit %lluM\n", - (kfd_mem_limit.max_system_mem_limit >> 20)); + kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); + pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", + (kfd_mem_limit.max_system_mem_limit >> 20), + (kfd_mem_limit.max_userptr_mem_limit >> 20)); } static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, @@ -111,6 +127,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, goto err_no_mem; } kfd_mem_limit.system_mem_used += (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + if ((kfd_mem_limit.system_mem_used + acc_size > + kfd_mem_limit.max_system_mem_limit) || + (kfd_mem_limit.userptr_mem_used + (size + acc_size) > + kfd_mem_limit.max_userptr_mem_limit)) { + ret = -ENOMEM; + goto err_no_mem; + } + kfd_mem_limit.system_mem_used += acc_size; + kfd_mem_limit.userptr_mem_used += size; } err_no_mem: spin_unlock(&kfd_mem_limit.mem_limit_lock); @@ -126,10 +152,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, sizeof(struct amdgpu_bo)); spin_lock(&kfd_mem_limit.mem_limit_lock); - if (domain == AMDGPU_GEM_DOMAIN_GTT) + if (domain == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + kfd_mem_limit.system_mem_used -= acc_size; + kfd_mem_limit.userptr_mem_used -= size; + } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, + "kfd userptr memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -138,12 +170,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) { spin_lock(&kfd_mem_limit.mem_limit_lock); - if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { + if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { + kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; + kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); + } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (bo->tbo.acc_size + amdgpu_bo_size(bo)); } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, + "kfd userptr memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -506,7 +543,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev, } static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, - struct amdkfd_process_info *process_info) + struct amdkfd_process_info *process_info, + bool userptr) { struct ttm_validate_buffer *entry = &mem->validate_list; struct amdgpu_bo *bo = mem->bo; @@ -515,10 +553,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, entry->shared = true; entry->bo = &bo->tbo; mutex_lock(&process_info->lock); - list_add_tail(&entry->head, &process_info->kfd_bo_list); + if (userptr) + list_add_tail(&entry->head, &process_info->userptr_valid_list); + else + list_add_tail(&entry->head, &process_info->kfd_bo_list); mutex_unlock(&process_info->lock); } +/* Initializes user pages. It registers the MMU notifier and validates + * the userptr BO in the GTT domain. + * + * The BO must already be on the userptr_valid_list. Otherwise an + * eviction and restore may happen that leaves the new BO unmapped + * with the user mode queues running. + * + * Takes the process_info->lock to protect against concurrent restore + * workers. + * + * Returns 0 for success, negative errno for errors. + */ +static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, + uint64_t user_addr) +{ + struct amdkfd_process_info *process_info = mem->process_info; + struct amdgpu_bo *bo = mem->bo; + struct ttm_operation_ctx ctx = { true, false }; + int ret = 0; + + mutex_lock(&process_info->lock); + + ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); + if (ret) { + pr_err("%s: Failed to set userptr: %d\n", __func__, ret); + goto out; + } + + ret = amdgpu_mn_register(bo, user_addr); + if (ret) { + pr_err("%s: Failed to register MMU notifier: %d\n", + __func__, ret); + goto out; + } + + /* If no restore worker is running concurrently, user_pages + * should not be allocated + */ + WARN(mem->user_pages, "Leaking user_pages array"); + + mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", __func__); + ret = -ENOMEM; + goto unregister_out; + } + + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + if (ret) { + pr_err("%s: Failed to get user pages: %d\n", __func__, ret); + goto free_out; + } + + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("%s: Failed to reserve BO\n", __func__); + goto release_out; + } + amdgpu_ttm_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + pr_err("%s: failed to validate BO\n", __func__); + amdgpu_bo_unreserve(bo); + +release_out: + if (ret) + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); +free_out: + kvfree(mem->user_pages); + mem->user_pages = NULL; +unregister_out: + if (ret) + amdgpu_mn_unregister(bo); +out: + mutex_unlock(&process_info->lock); + return ret; +} + /* Reserving a BO and its page table BOs must happen atomically to * avoid deadlocks. Some operations update multiple VMs at once. Track * all the reservation info in a context structure. Optionally a sync @@ -748,7 +871,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, } static int map_bo_to_gpuvm(struct amdgpu_device *adev, - struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) + struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, + bool no_update_pte) { int ret; @@ -762,6 +886,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, return ret; } + if (no_update_pte) + return 0; + ret = update_gpuvm_pte(adev, entry, sync); if (ret) { pr_err("update_gpuvm_pte() failed\n"); @@ -820,6 +947,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, mutex_init(&info->lock); INIT_LIST_HEAD(&info->vm_list_head); INIT_LIST_HEAD(&info->kfd_bo_list); + INIT_LIST_HEAD(&info->userptr_valid_list); + INIT_LIST_HEAD(&info->userptr_inval_list); info->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), @@ -830,6 +959,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, goto create_evict_fence_fail; } + info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); + atomic_set(&info->evicted_bos, 0); + INIT_DELAYED_WORK(&info->restore_userptr_work, + amdgpu_amdkfd_restore_userptr_worker); + *process_info = info; *ef = dma_fence_get(&info->eviction_fence->base); } @@ -872,6 +1006,7 @@ reserve_pd_fail: dma_fence_put(*ef); *ef = NULL; *process_info = NULL; + put_pid(info->pid); create_evict_fence_fail: mutex_destroy(&info->lock); kfree(info); @@ -967,8 +1102,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, /* Release per-process resources when last compute VM is destroyed */ if (!process_info->n_vms) { WARN_ON(!list_empty(&process_info->kfd_bo_list)); + WARN_ON(!list_empty(&process_info->userptr_valid_list)); + WARN_ON(!list_empty(&process_info->userptr_inval_list)); dma_fence_put(&process_info->eviction_fence->base); + cancel_delayed_work_sync(&process_info->restore_userptr_work); + put_pid(process_info->pid); mutex_destroy(&process_info->lock); kfree(process_info); } @@ -1003,9 +1142,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + uint64_t user_addr = 0; struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; int byte_align; - u32 alloc_domain; + u32 domain, alloc_domain; u64 alloc_flags; uint32_t mapping_flags; int ret; @@ -1014,14 +1155,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( * Check on which domain to allocate BO */ if (flags & ALLOC_MEM_FLAGS_VRAM) { - alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; + domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; } else if (flags & ALLOC_MEM_FLAGS_GTT) { - alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_flags = 0; + } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; alloc_flags = 0; + if (!offset || !*offset) + return -EINVAL; + user_addr = *offset; } else { return -EINVAL; } @@ -1069,8 +1217,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", va, size, domain_string(alloc_domain)); - ret = amdgpu_bo_create(adev, size, byte_align, - alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = byte_align; + bp.domain = alloc_domain; + bp.flags = alloc_flags; + bp.type = ttm_bo_type_device; + bp.resv = NULL; + ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) { pr_debug("Failed to create BO on domain %s. ret %d\n", domain_string(alloc_domain), ret); @@ -1078,18 +1232,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } bo->kfd_bo = *mem; (*mem)->bo = bo; + if (user_addr) + bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; (*mem)->va = va; - (*mem)->domain = alloc_domain; + (*mem)->domain = domain; (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; - add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info); + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); + + if (user_addr) { + ret = init_user_pages(*mem, current->mm, user_addr); + if (ret) { + mutex_lock(&avm->process_info->lock); + list_del(&(*mem)->validate_list.head); + mutex_unlock(&avm->process_info->lock); + goto allocate_init_user_pages_failed; + } + } if (offset) *offset = amdgpu_bo_mmap_offset(bo); return 0; +allocate_init_user_pages_failed: + amdgpu_bo_unref(&bo); + /* Don't unreserve system mem limit twice */ + goto err_reserve_system_mem; err_bo_create: unreserve_system_mem_limit(adev, size, alloc_domain); err_reserve_system_mem: @@ -1122,12 +1292,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( * be freed anyway */ + /* No more MMU notifiers */ + amdgpu_mn_unregister(mem->bo); + /* Make sure restore workers don't access the BO any more */ bo_list_entry = &mem->validate_list; mutex_lock(&process_info->lock); list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); + /* Free user pages if necessary */ + if (mem->user_pages) { + pr_debug("%s: Freeing user_pages array\n", __func__); + if (mem->user_pages[0]) + release_pages(mem->user_pages, + mem->bo->tbo.ttm->num_pages); + kvfree(mem->user_pages); + } + ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1173,21 +1355,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct kfd_bo_va_list *bo_va_entry = NULL; struct kfd_bo_va_list *bo_va_entry_aql = NULL; unsigned long bo_size; - - /* Make sure restore is not running concurrently. - */ - mutex_lock(&mem->process_info->lock); - - mutex_lock(&mem->lock); + bool is_invalid_userptr = false; bo = mem->bo; - if (!bo) { pr_err("Invalid BO when mapping memory to GPU\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } + /* Make sure restore is not running concurrently. Since we + * don't map invalid userptr BOs, we rely on the next restore + * worker to do the mapping + */ + mutex_lock(&mem->process_info->lock); + + /* Lock mmap-sem. If we find an invalid userptr BO, we can be + * sure that the MMU notifier is no longer running + * concurrently and the queues are actually stopped + */ + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + down_write(¤t->mm->mmap_sem); + is_invalid_userptr = atomic_read(&mem->invalid); + up_write(¤t->mm->mmap_sem); + } + + mutex_lock(&mem->lock); + domain = mem->domain; bo_size = bo->tbo.mem.size; @@ -1200,6 +1393,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( if (unlikely(ret)) goto out; + /* Userptr can be marked as "not invalid", but not actually be + * validated yet (still in the system domain). In that case + * the queues are still stopped and we can leave mapping for + * the next restore worker + */ + if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + is_invalid_userptr = true; + if (check_if_add_bo_to_vm(avm, mem)) { ret = add_bo_to_vm(adev, mem, avm, false, &bo_va_entry); @@ -1217,7 +1418,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( goto add_bo_to_vm_failed; } - if (mem->mapped_to_gpu_memory == 0) { + if (mem->mapped_to_gpu_memory == 0 && + !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { /* Validate BO only once. The eviction fence gets added to BO * the first time it is mapped. Validate will wait for all * background evictions to complete. @@ -1235,7 +1437,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); - ret = map_bo_to_gpuvm(adev, entry, ctx.sync); + ret = map_bo_to_gpuvm(adev, entry, ctx.sync, + is_invalid_userptr); if (ret) { pr_err("Failed to map radeon bo to gpuvm\n"); goto map_bo_to_gpuvm_failed; @@ -1418,6 +1621,337 @@ bo_reserve_failed: return ret; } +/* Evict a userptr BO by stopping the queues if necessary + * + * Runs in MMU notifier, may be in RECLAIM_FS context. This means it + * cannot do any memory allocations, and cannot take any locks that + * are held elsewhere while allocating memory. Therefore this is as + * simple as possible, using atomic counters. + * + * It doesn't do anything to the BO itself. The real work happens in + * restore, where we get updated page addresses. This function only + * ensures that GPU access to the BO is stopped. + */ +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, + struct mm_struct *mm) +{ + struct amdkfd_process_info *process_info = mem->process_info; + int invalid, evicted_bos; + int r = 0; + + invalid = atomic_inc_return(&mem->invalid); + evicted_bos = atomic_inc_return(&process_info->evicted_bos); + if (evicted_bos == 1) { + /* First eviction, stop the queues */ + r = kgd2kfd->quiesce_mm(mm); + if (r) + pr_err("Failed to quiesce KFD\n"); + schedule_delayed_work(&process_info->restore_userptr_work, + msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); + } + + return r; +} + +/* Update invalid userptr BOs + * + * Moves invalidated (evicted) userptr BOs from userptr_valid_list to + * userptr_inval_list and updates user pages for all BOs that have + * been invalidated since their last update. + */ +static int update_invalid_user_pages(struct amdkfd_process_info *process_info, + struct mm_struct *mm) +{ + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + struct ttm_operation_ctx ctx = { false, false }; + int invalid, ret; + + /* Move all invalidated BOs to the userptr_inval_list and + * release their user pages by migration to the CPU domain + */ + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_valid_list, + validate_list.head) { + if (!atomic_read(&mem->invalid)) + continue; /* BO is still valid */ + + bo = mem->bo; + + if (amdgpu_bo_reserve(bo, true)) + return -EAGAIN; + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + amdgpu_bo_unreserve(bo); + if (ret) { + pr_err("%s: Failed to invalidate userptr BO\n", + __func__); + return -EAGAIN; + } + + list_move_tail(&mem->validate_list.head, + &process_info->userptr_inval_list); + } + + if (list_empty(&process_info->userptr_inval_list)) + return 0; /* All evicted userptr BOs were freed */ + + /* Go through userptr_inval_list and update any invalid user_pages */ + list_for_each_entry(mem, &process_info->userptr_inval_list, + validate_list.head) { + invalid = atomic_read(&mem->invalid); + if (!invalid) + /* BO hasn't been invalidated since the last + * revalidation attempt. Keep its BO list. + */ + continue; + + bo = mem->bo; + + if (!mem->user_pages) { + mem->user_pages = + kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", + __func__); + return -ENOMEM; + } + } else if (mem->user_pages[0]) { + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); + } + + /* Get updated user pages */ + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, + mem->user_pages); + if (ret) { + mem->user_pages[0] = NULL; + pr_info("%s: Failed to get user pages: %d\n", + __func__, ret); + /* Pretend it succeeded. It will fail later + * with a VM fault if the GPU tries to access + * it. Better than hanging indefinitely with + * stalled user mode queues. + */ + } + + /* Mark the BO as valid unless it was invalidated + * again concurrently + */ + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) + return -EAGAIN; + } + + return 0; +} + +/* Validate invalid userptr BOs + * + * Validates BOs on the userptr_inval_list, and moves them back to the + * userptr_valid_list. Also updates GPUVM page tables with new page + * addresses and waits for the page table updates to complete. + */ +static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) +{ + struct amdgpu_bo_list_entry *pd_bo_list_entries; + struct list_head resv_list, duplicates; + struct ww_acquire_ctx ticket; + struct amdgpu_sync sync; + + struct amdgpu_vm *peer_vm; + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + struct ttm_operation_ctx ctx = { false, false }; + int i, ret; + + pd_bo_list_entries = kcalloc(process_info->n_vms, + sizeof(struct amdgpu_bo_list_entry), + GFP_KERNEL); + if (!pd_bo_list_entries) { + pr_err("%s: Failed to allocate PD BO list entries\n", __func__); + return -ENOMEM; + } + + INIT_LIST_HEAD(&resv_list); + INIT_LIST_HEAD(&duplicates); + + /* Get all the page directory BOs that need to be reserved */ + i = 0; + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_vm_get_pd_bo(peer_vm, &resv_list, + &pd_bo_list_entries[i++]); + /* Add the userptr_inval_list entries to resv_list */ + list_for_each_entry(mem, &process_info->userptr_inval_list, + validate_list.head) { + list_add_tail(&mem->resv_list.head, &resv_list); + mem->resv_list.bo = mem->validate_list.bo; + mem->resv_list.shared = mem->validate_list.shared; + } + + /* Reserve all BOs and page tables for validation */ + ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); + WARN(!list_empty(&duplicates), "Duplicates should be empty"); + if (ret) + goto out; + + amdgpu_sync_create(&sync); + + /* Avoid triggering eviction fences when unmapping invalid + * userptr BOs (waits for all fences, doesn't use + * FENCE_OWNER_VM) + */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, + process_info->eviction_fence, + NULL, NULL); + + ret = process_validate_vms(process_info); + if (ret) + goto unreserve_out; + + /* Validate BOs and update GPUVM page tables */ + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + struct kfd_bo_va_list *bo_va_entry; + + bo = mem->bo; + + /* Copy pages array and validate the BO if we got user pages */ + if (mem->user_pages[0]) { + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, + mem->user_pages); + amdgpu_ttm_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) { + pr_err("%s: failed to validate BO\n", __func__); + goto unreserve_out; + } + } + + /* Validate succeeded, now the BO owns the pages, free + * our copy of the pointer array. Put this BO back on + * the userptr_valid_list. If we need to revalidate + * it, we need to start from scratch. + */ + kvfree(mem->user_pages); + mem->user_pages = NULL; + list_move_tail(&mem->validate_list.head, + &process_info->userptr_valid_list); + + /* Update mapping. If the BO was not validated + * (because we couldn't get user pages), this will + * clear the page table entries, which will result in + * VM faults if the GPU tries to access the invalid + * memory. + */ + list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { + if (!bo_va_entry->is_mapped) + continue; + + ret = update_gpuvm_pte((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo_va_entry, &sync); + if (ret) { + pr_err("%s: update PTE failed\n", __func__); + /* make sure this gets validated again */ + atomic_inc(&mem->invalid); + goto unreserve_out; + } + } + } + + /* Update page directories */ + ret = process_update_pds(process_info, &sync); + +unreserve_out: + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_bo_fence(peer_vm->root.base.bo, + &process_info->eviction_fence->base, true); + ttm_eu_backoff_reservation(&ticket, &resv_list); + amdgpu_sync_wait(&sync, false); + amdgpu_sync_free(&sync); +out: + kfree(pd_bo_list_entries); + + return ret; +} + +/* Worker callback to restore evicted userptr BOs + * + * Tries to update and validate all userptr BOs. If successful and no + * concurrent evictions happened, the queues are restarted. Otherwise, + * reschedule for another attempt later. + */ +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct amdkfd_process_info *process_info = + container_of(dwork, struct amdkfd_process_info, + restore_userptr_work); + struct task_struct *usertask; + struct mm_struct *mm; + int evicted_bos; + + evicted_bos = atomic_read(&process_info->evicted_bos); + if (!evicted_bos) + return; + + /* Reference task and mm in case of concurrent process termination */ + usertask = get_pid_task(process_info->pid, PIDTYPE_PID); + if (!usertask) + return; + mm = get_task_mm(usertask); + if (!mm) { + put_task_struct(usertask); + return; + } + + mutex_lock(&process_info->lock); + + if (update_invalid_user_pages(process_info, mm)) + goto unlock_out; + /* userptr_inval_list can be empty if all evicted userptr BOs + * have been freed. In that case there is nothing to validate + * and we can just restart the queues. + */ + if (!list_empty(&process_info->userptr_inval_list)) { + if (atomic_read(&process_info->evicted_bos) != evicted_bos) + goto unlock_out; /* Concurrent eviction, try again */ + + if (validate_invalid_user_pages(process_info)) + goto unlock_out; + } + /* Final check for concurrent evicton and atomic update. If + * another eviction happens after successful update, it will + * be a first eviction that calls quiesce_mm. The eviction + * reference counting inside KFD will handle this case. + */ + if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != + evicted_bos) + goto unlock_out; + evicted_bos = 0; + if (kgd2kfd->resume_mm(mm)) { + pr_err("%s: Failed to resume KFD\n", __func__); + /* No recovery from this failure. Probably the CP is + * hanging. No point trying again. + */ + } +unlock_out: + mutex_unlock(&process_info->lock); + mmput(mm); + put_task_struct(usertask); + + /* If validation failed, reschedule another attempt */ + if (evicted_bos) + schedule_delayed_work(&process_info->restore_userptr_work, + msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); +} + /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given * KFD process identified by process_info * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 1ae5ae8c45a4..1bcb2b247335 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -550,7 +550,7 @@ static int amdgpu_atpx_init(void) * look up whether we are the integrated or discrete GPU (all asics). * Returns the client id. */ -static int amdgpu_atpx_get_client_id(struct pci_dev *pdev) +static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev) { if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev)) return VGA_SWITCHEROO_IGD; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 02b849be083b..19cfff31f2e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -75,13 +75,20 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, { struct amdgpu_bo *dobj = NULL; struct amdgpu_bo *sobj = NULL; + struct amdgpu_bo_param bp; uint64_t saddr, daddr; int r, n; int time; + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = sdomain; + bp.flags = 0; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; n = AMDGPU_BENCHMARK_ITERATIONS; - r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0, - ttm_bo_type_kernel, NULL, &sobj); + r = amdgpu_bo_create(adev, &bp, &sobj); if (r) { goto out_cleanup; } @@ -93,8 +100,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, if (r) { goto out_cleanup; } - r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0, - ttm_bo_type_kernel, NULL, &dobj); + bp.domain = ddomain; + r = amdgpu_bo_create(adev, &bp, &dobj); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 71a57b2f7f04..5b3d3bf5b599 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -23,7 +23,6 @@ */ #include <linux/list.h> #include <linux/slab.h> -#include <linux/pci.h> #include <drm/drmP.h> #include <linux/firmware.h> #include <drm/amdgpu_drm.h> @@ -109,121 +108,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, WARN(1, "Invalid indirect register space"); } -static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device, - enum cgs_resource_type resource_type, - uint64_t size, - uint64_t offset, - uint64_t *resource_base) -{ - CGS_FUNC_ADEV; - - if (resource_base == NULL) - return -EINVAL; - - switch (resource_type) { - case CGS_RESOURCE_TYPE_MMIO: - if (adev->rmmio_size == 0) - return -ENOENT; - if ((offset + size) > adev->rmmio_size) - return -EINVAL; - *resource_base = adev->rmmio_base; - return 0; - case CGS_RESOURCE_TYPE_DOORBELL: - if (adev->doorbell.size == 0) - return -ENOENT; - if ((offset + size) > adev->doorbell.size) - return -EINVAL; - *resource_base = adev->doorbell.base; - return 0; - case CGS_RESOURCE_TYPE_FB: - case CGS_RESOURCE_TYPE_IO: - case CGS_RESOURCE_TYPE_ROM: - default: - return -EINVAL; - } -} - -static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device, - unsigned table, uint16_t *size, - uint8_t *frev, uint8_t *crev) -{ - CGS_FUNC_ADEV; - uint16_t data_start; - - if (amdgpu_atom_parse_data_header( - adev->mode_info.atom_context, table, size, - frev, crev, &data_start)) - return (uint8_t*)adev->mode_info.atom_context->bios + - data_start; - - return NULL; -} - -static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table, - uint8_t *frev, uint8_t *crev) -{ - CGS_FUNC_ADEV; - - if (amdgpu_atom_parse_cmd_header( - adev->mode_info.atom_context, table, - frev, crev)) - return 0; - - return -EINVAL; -} - -static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table, - void *args) -{ - CGS_FUNC_ADEV; - - return amdgpu_atom_execute_table( - adev->mode_info.atom_context, table, args); -} - -static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device, - enum amd_ip_block_type block_type, - enum amd_clockgating_state state) -{ - CGS_FUNC_ADEV; - int i, r = -1; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - - if (adev->ip_blocks[i].version->type == block_type) { - r = adev->ip_blocks[i].version->funcs->set_clockgating_state( - (void *)adev, - state); - break; - } - } - return r; -} - -static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device, - enum amd_ip_block_type block_type, - enum amd_powergating_state state) -{ - CGS_FUNC_ADEV; - int i, r = -1; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - - if (adev->ip_blocks[i].version->type == block_type) { - r = adev->ip_blocks[i].version->funcs->set_powergating_state( - (void *)adev, - state); - break; - } - } - return r; -} - - static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) { CGS_FUNC_ADEV; @@ -271,18 +155,6 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) return result; } -static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type) -{ - CGS_FUNC_ADEV; - if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) { - release_firmware(adev->pm.fw); - adev->pm.fw = NULL; - return 0; - } - /* cannot release other firmware because they are not created by cgs */ - return -EINVAL; -} - static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device, enum cgs_ucode_id type) { @@ -326,34 +198,6 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device, return fw_version; } -static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device, - bool en) -{ - CGS_FUNC_ADEV; - - if (adev->gfx.rlc.funcs->enter_safe_mode == NULL || - adev->gfx.rlc.funcs->exit_safe_mode == NULL) - return 0; - - if (en) - adev->gfx.rlc.funcs->enter_safe_mode(adev); - else - adev->gfx.rlc.funcs->exit_safe_mode(adev); - - return 0; -} - -static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device, - bool lock) -{ - CGS_FUNC_ADEV; - - if (lock) - mutex_lock(&adev->grbm_idx_mutex); - else - mutex_unlock(&adev->grbm_idx_mutex); -} - static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info) @@ -541,6 +385,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, case CHIP_POLARIS12: strcpy(fw_name, "amdgpu/polaris12_smc.bin"); break; + case CHIP_VEGAM: + strcpy(fw_name, "amdgpu/vegam_smc.bin"); + break; case CHIP_VEGA10: if ((adev->pdev->device == 0x687f) && ((adev->pdev->revision == 0xc0) || @@ -598,97 +445,12 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return 0; } -static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device) -{ - CGS_FUNC_ADEV; - return amdgpu_sriov_vf(adev); -} - -static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, - struct cgs_display_info *info) -{ - CGS_FUNC_ADEV; - struct cgs_mode_info *mode_info; - - if (info == NULL) - return -EINVAL; - - mode_info = info->mode_info; - if (mode_info) - /* if the displays are off, vblank time is max */ - mode_info->vblank_time_us = 0xffffffff; - - if (!amdgpu_device_has_dc_support(adev)) { - struct amdgpu_crtc *amdgpu_crtc; - struct drm_device *ddev = adev->ddev; - struct drm_crtc *crtc; - uint32_t line_time_us, vblank_lines; - - if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, - &ddev->mode_config.crtc_list, head) { - amdgpu_crtc = to_amdgpu_crtc(crtc); - if (crtc->enabled) { - info->active_display_mask |= (1 << amdgpu_crtc->crtc_id); - info->display_count++; - } - if (mode_info != NULL && - crtc->enabled && amdgpu_crtc->enabled && - amdgpu_crtc->hw_mode.clock) { - line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) / - amdgpu_crtc->hw_mode.clock; - vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end - - amdgpu_crtc->hw_mode.crtc_vdisplay + - (amdgpu_crtc->v_border * 2); - mode_info->vblank_time_us = vblank_lines * line_time_us; - mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); - /* we have issues with mclk switching with refresh rates - * over 120 hz on the non-DC code. - */ - if (mode_info->refresh_rate > 120) - mode_info->vblank_time_us = 0; - mode_info = NULL; - } - } - } - } else { - info->display_count = adev->pm.pm_display_cfg.num_display; - if (mode_info != NULL) { - mode_info->vblank_time_us = adev->pm.pm_display_cfg.min_vblank_time; - mode_info->refresh_rate = adev->pm.pm_display_cfg.vrefresh; - } - } - return 0; -} - - -static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled) -{ - CGS_FUNC_ADEV; - - adev->pm.dpm_enabled = enabled; - - return 0; -} - static const struct cgs_ops amdgpu_cgs_ops = { .read_register = amdgpu_cgs_read_register, .write_register = amdgpu_cgs_write_register, .read_ind_register = amdgpu_cgs_read_ind_register, .write_ind_register = amdgpu_cgs_write_ind_register, - .get_pci_resource = amdgpu_cgs_get_pci_resource, - .atom_get_data_table = amdgpu_cgs_atom_get_data_table, - .atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs, - .atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table, .get_firmware_info = amdgpu_cgs_get_firmware_info, - .rel_firmware = amdgpu_cgs_rel_firmware, - .set_powergating_state = amdgpu_cgs_set_powergating_state, - .set_clockgating_state = amdgpu_cgs_set_clockgating_state, - .get_active_displays_info = amdgpu_cgs_get_active_displays_info, - .notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled, - .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled, - .enter_safe_mode = amdgpu_cgs_enter_safe_mode, - .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, }; struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 96501ff0e55b..8e66851eb427 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -691,7 +691,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector) return ret; } -static int amdgpu_connector_lvds_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector); @@ -843,7 +843,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector) return ret; } -static int amdgpu_connector_vga_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1172,7 +1172,7 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector) amdgpu_connector->use_digital = true; } -static int amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1448,7 +1448,7 @@ out: return ret; } -static int amdgpu_connector_dp_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dc34b50e6b29..9c1d491d742e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -382,8 +382,7 @@ retry: p->bytes_moved += ctx.bytes_moved; if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) + amdgpu_bo_in_cpu_visible_vram(bo)) p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { @@ -411,7 +410,6 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *candidate = p->evictable; struct amdgpu_bo *bo = candidate->robj; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved, bytes_moved; bool update_bytes_moved_vis; uint32_t other; @@ -435,18 +433,14 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, continue; /* Good we can try to move this BO somewhere else */ - amdgpu_ttm_placement_from_domain(bo, other); update_bytes_moved_vis = adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT; - initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); + amdgpu_bo_in_cpu_visible_vram(bo); + amdgpu_ttm_placement_from_domain(bo, other); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - bytes_moved = atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; - p->bytes_moved += bytes_moved; + p->bytes_moved += ctx.bytes_moved; if (update_bytes_moved_vis) - p->bytes_moved_vis += bytes_moved; + p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r)) break; @@ -536,7 +530,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->bo_list) { amdgpu_bo_list_get_list(p->bo_list, &p->validated); if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev); + p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); } INIT_LIST_HEAD(&duplicates); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 3fabf9f97022..a8e531d604fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -91,7 +91,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, continue; r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity, - rq, amdgpu_sched_jobs, &ctx->guilty); + rq, &ctx->guilty); if (r) goto failed; } @@ -111,8 +111,9 @@ failed: return r; } -static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) +static void amdgpu_ctx_fini(struct kref *ref) { + struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); struct amdgpu_device *adev = ctx->adev; unsigned i, j; @@ -125,13 +126,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) kfree(ctx->fences); ctx->fences = NULL; - for (i = 0; i < adev->num_rings; i++) - drm_sched_entity_fini(&adev->rings[i]->sched, - &ctx->rings[i].entity); - amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); mutex_destroy(&ctx->lock); + + kfree(ctx); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ -170,12 +169,15 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; + u32 i; ctx = container_of(ref, struct amdgpu_ctx, refcount); - amdgpu_ctx_fini(ctx); + for (i = 0; i < ctx->adev->num_rings; i++) + drm_sched_entity_fini(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity); - kfree(ctx); + amdgpu_ctx_fini(ref); } static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) @@ -437,16 +439,62 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) idr_init(&mgr->ctx_handles); } +void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) +{ + struct amdgpu_ctx *ctx; + struct idr *idp; + uint32_t id, i; + + idp = &mgr->ctx_handles; + + idr_for_each_entry(idp, ctx, id) { + + if (!ctx->adev) + return; + + for (i = 0; i < ctx->adev->num_rings; i++) + if (kref_read(&ctx->refcount) == 1) + drm_sched_entity_do_release(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity); + else + DRM_ERROR("ctx %p is still alive\n", ctx); + } +} + +void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr) +{ + struct amdgpu_ctx *ctx; + struct idr *idp; + uint32_t id, i; + + idp = &mgr->ctx_handles; + + idr_for_each_entry(idp, ctx, id) { + + if (!ctx->adev) + return; + + for (i = 0; i < ctx->adev->num_rings; i++) + if (kref_read(&ctx->refcount) == 1) + drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity); + else + DRM_ERROR("ctx %p is still alive\n", ctx); + } +} + void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) { struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id; + amdgpu_ctx_mgr_entity_cleanup(mgr); + idp = &mgr->ctx_handles; idr_for_each_entry(idp, ctx, id) { - if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1) + if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) DRM_ERROR("ctx %p is still alive\n", ctx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 448d69fe3756..f5fb93795a69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -28,8 +28,13 @@ #include <linux/debugfs.h> #include "amdgpu.h" -/* - * Debugfs +/** + * amdgpu_debugfs_add_files - Add simple debugfs entries + * + * @adev: Device to attach debugfs entries to + * @files: Array of function callbacks that respond to reads + * @nfiles: Number of callbacks to register + * */ int amdgpu_debugfs_add_files(struct amdgpu_device *adev, const struct drm_info_list *files, @@ -64,7 +69,33 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev, #if defined(CONFIG_DEBUG_FS) - +/** + * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes + * + * @read: True if reading + * @f: open file handle + * @buf: User buffer to write/read to + * @size: Number of bytes to write/read + * @pos: Offset to seek to + * + * This debugfs entry has special meaning on the offset being sought. + * Various bits have different meanings: + * + * Bit 62: Indicates a GRBM bank switch is needed + * Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is + * zero) + * Bits 24..33: The SE or ME selector if needed + * Bits 34..43: The SH (or SA) or PIPE selector if needed + * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed + * + * Bit 23: Indicates that the PM power gating lock should be held + * This is necessary to read registers that might be + * unreliable during a power gating transistion. + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -164,19 +195,37 @@ end: return result; } - +/** + * amdgpu_debugfs_regs_read - Callback for reading MMIO registers + */ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos); } +/** + * amdgpu_debugfs_regs_write - Callback for writing MMIO registers + */ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); } + +/** + * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -204,6 +253,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to write. This + * allows writing multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -232,6 +293,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user return result; } +/** + * amdgpu_debugfs_regs_didt_read - Read from a DIDT register + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -259,6 +332,18 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_regs_didt_write - Write to a DIDT register + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to write. This + * allows writing multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -287,6 +372,18 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user return result; } +/** + * amdgpu_debugfs_regs_smc_read - Read from a SMC register + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -314,6 +411,18 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_regs_smc_write - Write to a SMC register + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to write. This + * allows writing multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -342,6 +451,20 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * return result; } +/** + * amdgpu_debugfs_gca_config_read - Read from gfx config data + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * This file is used to access configuration data in a somewhat + * stable fashion. The format is a series of DWORDs with the first + * indicating which revision it is. New content is appended to the + * end so that older software can still read the data. + */ + static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -418,6 +541,19 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_sensor_read - Read from the powerplay sensors + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The offset is treated as the BYTE address of one of the sensors + * enumerated in amd/include/kgd_pp_interface.h under the + * 'amd_pp_sensors' enumeration. For instance to read the UVD VCLK + * you would use the offset 3 * 4 = 12. + */ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -428,7 +564,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, if (size & 3 || *pos & 0x3) return -EINVAL; - if (amdgpu_dpm == 0) + if (!adev->pm.dpm_enabled) return -EINVAL; /* convert offset to sensor number */ @@ -457,6 +593,27 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, return !r ? outsize : r; } +/** amdgpu_debugfs_wave_read - Read WAVE STATUS data + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The offset being sought changes which wave that the status data + * will be returned for. The bits are used as follows: + * + * Bits 0..6: Byte offset into data + * Bits 7..14: SE selector + * Bits 15..22: SH/SA selector + * Bits 23..30: CU/{WGP+SIMD} selector + * Bits 31..36: WAVE ID selector + * Bits 37..44: SIMD ID selector + * + * The returned data begins with one DWORD of version information + * Followed by WAVE STATUS registers relevant to the GFX IP version + * being used. See gfx_v8_0_read_wave_data() for an example output. + */ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -507,6 +664,28 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, return result; } +/** amdgpu_debugfs_gpr_read - Read wave gprs + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The offset being sought changes which wave that the status data + * will be returned for. The bits are used as follows: + * + * Bits 0..11: Byte offset into data + * Bits 12..19: SE selector + * Bits 20..27: SH/SA selector + * Bits 28..35: CU/{WGP+SIMD} selector + * Bits 36..43: WAVE ID selector + * Bits 37..44: SIMD ID selector + * Bits 52..59: Thread selector + * Bits 60..61: Bank selector (VGPR=0,SGPR=1) + * + * The return data comes from the SGPR or VGPR register bank for + * the selected operational unit. + */ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -637,6 +816,12 @@ static const char *debugfs_regs_names[] = { "amdgpu_gpr", }; +/** + * amdgpu_debugfs_regs_init - Initialize debugfs entries that provide + * register access. + * + * @adev: The device to attach the debugfs entries to + */ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { struct drm_minor *minor = adev->ddev->primary; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 34af664b9f93..9fb20a53d5b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -83,6 +83,7 @@ static const char *amdgpu_asic_name[] = { "POLARIS10", "POLARIS11", "POLARIS12", + "VEGAM", "VEGA10", "VEGA12", "RAVEN", @@ -690,6 +691,8 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev, { u64 size_af, size_bf; + mc->gart_size += adev->pm.smu_prv_buffer_size; + size_af = adev->gmc.mc_mask - mc->vram_end; size_bf = mc->vram_start; if (size_bf > size_af) { @@ -907,6 +910,46 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) } } +static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) +{ + struct sysinfo si; + bool is_os_64 = (sizeof(void *) == 8) ? true : false; + uint64_t total_memory; + uint64_t dram_size_seven_GB = 0x1B8000000; + uint64_t dram_size_three_GB = 0xB8000000; + + if (amdgpu_smu_memory_pool_size == 0) + return; + + if (!is_os_64) { + DRM_WARN("Not 64-bit OS, feature not supported\n"); + goto def_value; + } + si_meminfo(&si); + total_memory = (uint64_t)si.totalram * si.mem_unit; + + if ((amdgpu_smu_memory_pool_size == 1) || + (amdgpu_smu_memory_pool_size == 2)) { + if (total_memory < dram_size_three_GB) + goto def_value1; + } else if ((amdgpu_smu_memory_pool_size == 4) || + (amdgpu_smu_memory_pool_size == 8)) { + if (total_memory < dram_size_seven_GB) + goto def_value1; + } else { + DRM_WARN("Smu memory pool size not supported\n"); + goto def_value; + } + adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; + + return; + +def_value1: + DRM_WARN("No enough system memory\n"); +def_value: + adev->pm.smu_prv_buffer_size = 0; +} + /** * amdgpu_device_check_arguments - validate module params * @@ -948,6 +991,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_vm_fragment_size = -1; } + amdgpu_device_check_smu_prv_buffer_size(adev); + amdgpu_device_check_vm_size(adev); amdgpu_device_check_block_size(adev); @@ -1039,10 +1084,11 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { * the hardware IP specified. * Returns the error code from the last instance. */ -int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_clockgating_state(void *dev, enum amd_ip_block_type block_type, enum amd_clockgating_state state) { + struct amdgpu_device *adev = dev; int i, r = 0; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1072,10 +1118,11 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, * the hardware IP specified. * Returns the error code from the last instance. */ -int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_powergating_state(void *dev, enum amd_ip_block_type block_type, enum amd_powergating_state state) { + struct amdgpu_device *adev = dev; int i, r = 0; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1320,9 +1367,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_TOPAZ: case CHIP_TONGA: case CHIP_FIJI: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: case CHIP_CARRIZO: case CHIP_STONEY: #ifdef CONFIG_DRM_AMDGPU_SI @@ -1428,9 +1476,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_TOPAZ: case CHIP_TONGA: case CHIP_FIJI: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: case CHIP_CARRIZO: case CHIP_STONEY: if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) @@ -1499,6 +1548,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return -EAGAIN; } + adev->powerplay.pp_feature = amdgpu_pp_feature_mask; + for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { DRM_ERROR("disabled ip block: %d <%s>\n", @@ -1654,6 +1705,10 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) if (amdgpu_emu_mode == 1) return 0; + r = amdgpu_ib_ring_tests(adev); + if (r) + DRM_ERROR("ib ring test failed (%d).\n", r); + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -1704,8 +1759,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) } } - mod_delayed_work(system_wq, &adev->late_init_work, - msecs_to_jiffies(AMDGPU_RESUME_MS)); + queue_delayed_work(system_wq, &adev->late_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); amdgpu_device_fill_reset_magic(adev); @@ -1850,6 +1905,12 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) amdgpu_virt_request_full_gpu(adev, false); + /* ungate SMC block powergating */ + if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) + amdgpu_device_ip_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_SMC, + AMD_CG_STATE_UNGATE); + /* ungate SMC block first */ r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, AMD_CG_STATE_UNGATE); @@ -2086,14 +2147,12 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_MULLINS: case CHIP_CARRIZO: case CHIP_STONEY: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: case CHIP_TONGA: case CHIP_FIJI: -#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA) - return amdgpu_dc != 0; -#endif case CHIP_VEGA10: case CHIP_VEGA12: #if defined(CONFIG_DRM_AMD_DC_DCN1_0) @@ -2375,10 +2434,6 @@ fence_driver_init: goto failed; } - r = amdgpu_ib_ring_tests(adev); - if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); - if (amdgpu_sriov_vf(adev)) amdgpu_virt_init_data_exchange(adev); @@ -2539,7 +2594,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb); + struct drm_framebuffer *fb = crtc->primary->fb; struct amdgpu_bo *robj; if (amdgpu_crtc->cursor_bo) { @@ -2551,10 +2606,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) } } - if (rfb == NULL || rfb->obj == NULL) { + if (fb == NULL || fb->obj[0] == NULL) { continue; } - robj = gem_to_amdgpu_bo(rfb->obj); + robj = gem_to_amdgpu_bo(fb->obj[0]); /* don't unpin kernel fb objects */ if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { r = amdgpu_bo_reserve(robj, true); @@ -2640,11 +2695,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } amdgpu_fence_driver_resume(adev); - if (resume) { - r = amdgpu_ib_ring_tests(adev); - if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); - } r = amdgpu_device_ip_late_init(adev); if (r) @@ -2736,6 +2786,9 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return true; + if (amdgpu_asic_need_full_reset(adev)) + return true; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -2792,6 +2845,9 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) { int i; + if (amdgpu_asic_need_full_reset(adev)) + return true; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -3087,20 +3143,19 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, /* now we are okay to resume SMC/CP/SDMA */ r = amdgpu_device_ip_reinit_late_sriov(adev); - amdgpu_virt_release_full_gpu(adev, true); if (r) goto error; amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); +error: + amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { atomic_inc(&adev->vram_lost_counter); r = amdgpu_device_handle_vram_lost(adev); } -error: - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 93f700ab1bfb..76ee8e04ff11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -35,6 +35,7 @@ #include <linux/pm_runtime.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_edid.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fb_helper.h> static void amdgpu_display_flip_callback(struct dma_fence *f, @@ -151,8 +152,6 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_framebuffer *old_amdgpu_fb; - struct amdgpu_framebuffer *new_amdgpu_fb; struct drm_gem_object *obj; struct amdgpu_flip_work *work; struct amdgpu_bo *new_abo; @@ -174,15 +173,13 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; /* schedule unpin of the old buffer */ - old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - obj = old_amdgpu_fb->obj; + obj = crtc->primary->fb->obj[0]; /* take a reference to the old object */ work->old_abo = gem_to_amdgpu_bo(obj); amdgpu_bo_ref(work->old_abo); - new_amdgpu_fb = to_amdgpu_framebuffer(fb); - obj = new_amdgpu_fb->obj; + obj = fb->obj[0]; new_abo = gem_to_amdgpu_bo(obj); /* pin the new buffer */ @@ -192,7 +189,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto cleanup; } - r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base); + r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev), &base); if (unlikely(r != 0)) { DRM_ERROR("failed to pin new abo buffer before flip\n"); goto unreserve; @@ -482,31 +479,12 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, return true; } -static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb) -{ - struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); - - drm_gem_object_put_unlocked(amdgpu_fb->obj); - drm_framebuffer_cleanup(fb); - kfree(amdgpu_fb); -} - -static int amdgpu_display_user_framebuffer_create_handle( - struct drm_framebuffer *fb, - struct drm_file *file_priv, - unsigned int *handle) -{ - struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); - - return drm_gem_handle_create(file_priv, amdgpu_fb->obj, handle); -} - static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { - .destroy = amdgpu_display_user_framebuffer_destroy, - .create_handle = amdgpu_display_user_framebuffer_create_handle, + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, }; -uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev) +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev) { uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; @@ -526,11 +504,11 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev, struct drm_gem_object *obj) { int ret; - rfb->obj = obj; + rfb->base.obj[0] = obj; drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); if (ret) { - rfb->obj = NULL; + rfb->base.obj[0] = NULL; return ret; } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 2b11d808f297..f66e3e3fef0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -23,7 +23,7 @@ #ifndef __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__ -uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev); +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index e997ebbe43ea..def1010ac05e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -115,6 +115,26 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, pr_cont("\n"); } +void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev) +{ + struct drm_device *ddev = adev->ddev; + struct drm_crtc *crtc; + struct amdgpu_crtc *amdgpu_crtc; + + adev->pm.dpm.new_active_crtcs = 0; + adev->pm.dpm.new_active_crtc_count = 0; + if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { + list_for_each_entry(crtc, + &ddev->mode_config.crtc_list, head) { + amdgpu_crtc = to_amdgpu_crtc(crtc); + if (amdgpu_crtc->enabled) { + adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id); + adev->pm.dpm.new_active_crtc_count++; + } + } + } +} + u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 643d008410c6..dd6203a0a6b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -52,8 +52,6 @@ enum amdgpu_dpm_event_src { AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 }; -#define SCLK_DEEP_SLEEP_MASK 0x8 - struct amdgpu_ps { u32 caps; /* vbios flags */ u32 class; /* vbios flags */ @@ -349,12 +347,6 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ (adev)->powerplay.pp_handle, msg_id)) -#define amdgpu_dpm_notify_smu_memory_info(adev, virtual_addr_low, \ - virtual_addr_hi, mc_addr_low, mc_addr_hi, size) \ - ((adev)->powerplay.pp_funcs->notify_smu_memory_info)( \ - (adev)->powerplay.pp_handle, virtual_addr_low, \ - virtual_addr_hi, mc_addr_low, mc_addr_hi, size) - #define amdgpu_dpm_get_power_profile_mode(adev, buf) \ ((adev)->powerplay.pp_funcs->get_power_profile_mode(\ (adev)->powerplay.pp_handle, buf)) @@ -445,6 +437,8 @@ struct amdgpu_pm { uint32_t pcie_gen_mask; uint32_t pcie_mlw_mask; struct amd_pp_display_configuration pm_display_cfg;/* set by dc */ + uint32_t smu_prv_buffer_size; + struct amdgpu_bo *smu_prv_buffer; }; #define R600_SSTU_DFLT 0 @@ -482,6 +476,7 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, struct amdgpu_ps *rps); u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev); +void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev); bool amdgpu_is_uvd_state(u32 class, u32 class2); void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, u32 *p, u32 *u); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0b19482b36b8..739e7e09c8b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -75,9 +75,10 @@ * - 3.23.0 - Add query for VRAM lost counter * - 3.24.0 - Add high priority compute support for gfx9 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). + * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 25 +#define KMS_DRIVER_MINOR 26 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -121,7 +122,7 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; -uint amdgpu_pp_feature_mask = 0xffffbfff; +uint amdgpu_pp_feature_mask = 0xffff3fff; /* gfxoff (bit 15) disabled by default */ int amdgpu_ngg = 0; int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; @@ -132,6 +133,7 @@ int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ int amdgpu_emu_mode = 0; +uint amdgpu_smu_memory_pool_size = 0; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -316,6 +318,11 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled) module_param_named(cik_support, amdgpu_cik_support, int, 0444); #endif +MODULE_PARM_DESC(smu_memory_pool_size, + "reserve gtt for smu debug usage, 0 = disable," + "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); +module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -534,6 +541,9 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, + /* VEGAM */ + {0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, + {0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, /* Vega 10 */ {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 12063019751b..bc5fd8ebab5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -137,7 +137,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, /* need to align pitch with crtc limits */ mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, fb_tiled); - domain = amdgpu_display_framebuffer_domains(adev); + domain = amdgpu_display_supported_domains(adev); height = ALIGN(mode_cmd->height, 8); size = mode_cmd->pitches[0] * height; @@ -292,9 +292,9 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb drm_fb_helper_unregister_fbi(&rfbdev->helper); - if (rfb->obj) { - amdgpufb_destroy_pinned_object(rfb->obj); - rfb->obj = NULL; + if (rfb->base.obj[0]) { + amdgpufb_destroy_pinned_object(rfb->base.obj[0]); + rfb->base.obj[0] = NULL; drm_framebuffer_unregister_private(&rfb->base); drm_framebuffer_cleanup(&rfb->base); } @@ -377,7 +377,7 @@ int amdgpu_fbdev_total_size(struct amdgpu_device *adev) if (!adev->mode_info.rfbdev) return 0; - robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj); + robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]); size += amdgpu_bo_size(robj); return size; } @@ -386,7 +386,7 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) { if (!adev->mode_info.rfbdev) return false; - if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj)) + if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0])) return true; return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 97449e06a242..d09fcab2398f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, + unsigned flags) { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; @@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) adev->fence_context + ring->idx, seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, - seq, AMDGPU_FENCE_FLAG_INT); + seq, flags | AMDGPU_FENCE_FLAG_INT); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; /* This function can't be called concurrently anyway, otherwise diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index cf0f186c6092..17d6b9fb6d77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -113,12 +113,17 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) int r; if (adev->gart.robj == NULL) { - r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - ttm_bo_type_kernel, NULL, - &adev->gart.robj); + struct amdgpu_bo_param bp; + + memset(&bp, 0, sizeof(bp)); + bp.size = adev->gart.table_size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + r = amdgpu_bo_create(adev, &bp, &adev->gart.robj); if (r) { return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 46b9ea4e6103..2c8e27370284 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -48,17 +48,25 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, struct drm_gem_object **obj) { struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; int r; + memset(&bp, 0, sizeof(bp)); *obj = NULL; /* At least align on page size */ if (alignment < PAGE_SIZE) { alignment = PAGE_SIZE; } + bp.size = size; + bp.byte_align = alignment; + bp.type = type; + bp.resv = resv; + bp.preferred_domain = initial_domain; retry: - r = amdgpu_bo_create(adev, size, alignment, initial_domain, - flags, type, resv, &bo); + bp.flags = flags; + bp.domain = initial_domain; + r = amdgpu_bo_create(adev, &bp, &bo); if (r) { if (r != -ERESTARTSYS) { if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { @@ -221,12 +229,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; /* reject invalid gem domains */ - if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | - AMDGPU_GEM_DOMAIN_GTT | - AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GDS | - AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA)) + if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) return -EINVAL; /* create a gem object to contain this object in */ @@ -771,16 +774,23 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, } #if defined(CONFIG_DEBUG_FS) + +#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag) \ + if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \ + seq_printf((m), " " #flag); \ + } + static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) { struct drm_gem_object *gobj = ptr; struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); struct seq_file *m = data; + struct dma_buf_attachment *attachment; + struct dma_buf *dma_buf; unsigned domain; const char *placement; unsigned pin_count; - uint64_t offset; domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); switch (domain) { @@ -798,13 +808,27 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) seq_printf(m, "\t0x%08x: %12ld byte %s", id, amdgpu_bo_size(bo), placement); - offset = READ_ONCE(bo->tbo.mem.start); - if (offset != AMDGPU_BO_INVALID_OFFSET) - seq_printf(m, " @ 0x%010Lx", offset); - pin_count = READ_ONCE(bo->pin_count); if (pin_count) seq_printf(m, " pin count %d", pin_count); + + dma_buf = READ_ONCE(bo->gem_base.dma_buf); + attachment = READ_ONCE(bo->gem_base.import_attach); + + if (attachment) + seq_printf(m, " imported from %p", dma_buf); + else if (dma_buf) + seq_printf(m, " exported as %p", dma_buf); + + amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED); + amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS); + amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC); + amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED); + amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW); + amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS); + amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID); + amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC); + seq_printf(m, "\n"); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 311589e02d17..f70eeed9ed76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; + unsigned fence_flags = 0; unsigned i; int r = 0; @@ -227,7 +228,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, #endif amdgpu_asic_invalidate_hdp(adev, ring); - r = amdgpu_fence_emit(ring, f); + if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) + fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; + + r = amdgpu_fence_emit(ring, f, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) @@ -242,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* wrap the last IB with fence */ if (job && job->uf_addr) { amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, - AMDGPU_FENCE_FLAG_64BIT); + fence_flags | AMDGPU_FENCE_FLAG_64BIT); } if (patch_offset != ~0 && ring->funcs->patch_cond_exec) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 4b7824d30e73..eb4785e51573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -31,6 +31,7 @@ #include "amdgpu_sched.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" +#include "atom.h" #include <linux/vga_switcheroo.h> #include <linux/slab.h> @@ -214,6 +215,18 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->gfx.rlc_fw_version; fw_info->feature = adev->gfx.rlc_feature_version; break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL: + fw_info->ver = adev->gfx.rlc_srlc_fw_version; + fw_info->feature = adev->gfx.rlc_srlc_feature_version; + break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM: + fw_info->ver = adev->gfx.rlc_srlg_fw_version; + fw_info->feature = adev->gfx.rlc_srlg_feature_version; + break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM: + fw_info->ver = adev->gfx.rlc_srls_fw_version; + fw_info->feature = adev->gfx.rlc_srls_feature_version; + break; case AMDGPU_INFO_FW_GFX_MEC: if (query_fw->index == 0) { fw_info->ver = adev->gfx.mec_fw_version; @@ -279,6 +292,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (!info->return_size || !info->return_pointer) return -EINVAL; + /* Ensure IB tests are run on ring */ + flush_delayed_work(&adev->late_init_work); + switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: ui32 = adev->accel_working; @@ -701,10 +717,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file } } case AMDGPU_INFO_SENSOR: { - struct pp_gpu_power query = {0}; - int query_size = sizeof(query); - - if (amdgpu_dpm == 0) + if (!adev->pm.dpm_enabled) return -ENOENT; switch (info->sensor_info.type) { @@ -746,10 +759,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file /* get average GPU power */ if (amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, - (void *)&query, &query_size)) { + (void *)&ui32, &ui32_size)) { return -EINVAL; } - ui32 = query.average_gpu_power >> 8; + ui32 >>= 8; break; case AMDGPU_INFO_SENSOR_VDDNB: /* get VDDNB in millivolts */ @@ -913,8 +926,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, return; pm_runtime_get_sync(dev->dev); - - amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); + amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr); if (adev->asic_type != CHIP_RAVEN) { amdgpu_uvd_free_handles(adev, file_priv); @@ -935,6 +947,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); amdgpu_vm_fini(adev, &fpriv->vm); + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); + if (pasid) amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); amdgpu_bo_unref(&pd); @@ -1088,6 +1102,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; struct drm_amdgpu_info_firmware fw_info; struct drm_amdgpu_query_fw query_fw; + struct atom_context *ctx = adev->mode_info.atom_context; int ret, i; /* VCE */ @@ -1146,6 +1161,30 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + /* RLC SAVE RESTORE LIST CNTL */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* RLC SAVE RESTORE LIST GPM MEM */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* RLC SAVE RESTORE LIST SRM MEM */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + /* MEC */ query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC; query_fw.index = 0; @@ -1210,6 +1249,9 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + + seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index bd67f4cb8e6c..83e344fbb50a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -36,12 +36,14 @@ #include <drm/drm.h> #include "amdgpu.h" +#include "amdgpu_amdkfd.h" struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; struct mm_struct *mm; struct mmu_notifier mn; + enum amdgpu_mn_type type; /* only used on destruction */ struct work_struct work; @@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, } /** - * amdgpu_mn_invalidate_range_start - callback to notify about mm change + * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change * * @mn: our notifier * @mn: the mm this callback is about @@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, * We block for all BOs between start and end to be idle and * unmap them by move them into system domain again. */ -static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) +static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) { struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; @@ -220,6 +222,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, } /** + * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * + * @mn: our notifier + * @mn: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * We temporarily evict all BOs between start and end. This + * necessitates evicting all user-mode queues of the process. The BOs + * are restorted in amdgpu_mn_invalidate_range_end_hsa. + */ +static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct interval_tree_node *it; + + /* notification is exclusive, but interval is inclusive */ + end -= 1; + + amdgpu_mn_read_lock(rmn); + + it = interval_tree_iter_first(&rmn->objects, start, end); + while (it) { + struct amdgpu_mn_node *node; + struct amdgpu_bo *bo; + + node = container_of(it, struct amdgpu_mn_node, it); + it = interval_tree_iter_next(it, start, end); + + list_for_each_entry(bo, &node->bos, mn_list) { + struct kgd_mem *mem = bo->kfd_bo; + + if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, + start, end)) + amdgpu_amdkfd_evict_userptr(mem, mm); + } + } +} + +/** * amdgpu_mn_invalidate_range_end - callback to notify about mm change * * @mn: our notifier @@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, amdgpu_mn_read_unlock(rmn); } -static const struct mmu_notifier_ops amdgpu_mn_ops = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, +static const struct mmu_notifier_ops amdgpu_mn_ops[] = { + [AMDGPU_MN_TYPE_GFX] = { + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, + [AMDGPU_MN_TYPE_HSA] = { + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, }; +/* Low bits of any reasonable mm pointer will be unused due to struct + * alignment. Use these bits to make a unique key from the mm pointer + * and notifier type. + */ +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) + /** * amdgpu_mn_get - create notifier context * * @adev: amdgpu device pointer + * @type: type of MMU notifier context * * Creates a notifier context for current->mm. */ -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) { struct mm_struct *mm = current->mm; struct amdgpu_mn *rmn; + unsigned long key = AMDGPU_MN_KEY(mm, type); int r; mutex_lock(&adev->mn_lock); @@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) return ERR_PTR(-EINTR); } - hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) - if (rmn->mm == mm) + hash_for_each_possible(adev->mn_hash, rmn, node, key) + if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key) goto release_locks; rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); @@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) rmn->adev = adev; rmn->mm = mm; - rmn->mn.ops = &amdgpu_mn_ops; init_rwsem(&rmn->lock); + rmn->type = type; + rmn->mn.ops = &amdgpu_mn_ops[type]; rmn->objects = RB_ROOT_CACHED; mutex_init(&rmn->read_lock); atomic_set(&rmn->recursion, 0); @@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) if (r) goto free_rmn; - hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); + hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type)); release_locks: up_write(&mm->mmap_sem); @@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { unsigned long end = addr + amdgpu_bo_size(bo) - 1; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + enum amdgpu_mn_type type = + bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; struct amdgpu_mn *rmn; - struct amdgpu_mn_node *node = NULL; + struct amdgpu_mn_node *node = NULL, *new_node; struct list_head bos; struct interval_tree_node *it; - rmn = amdgpu_mn_get(adev); + rmn = amdgpu_mn_get(adev, type); if (IS_ERR(rmn)) return PTR_ERR(rmn); + new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + INIT_LIST_HEAD(&bos); down_write(&rmn->lock); @@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) list_splice(&node->bos, &bos); } - if (!node) { - node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); - if (!node) { - up_write(&rmn->lock); - return -ENOMEM; - } - } + if (!node) + node = new_node; + else + kfree(new_node); bo->mn = rmn; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index d0095a3793b8..eb0f432f78fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -29,16 +29,23 @@ */ struct amdgpu_mn; +enum amdgpu_mn_type { + AMDGPU_MN_TYPE_GFX, + AMDGPU_MN_TYPE_HSA, +}; + #if defined(CONFIG_MMU_NOTIFIER) void amdgpu_mn_lock(struct amdgpu_mn *mn); void amdgpu_mn_unlock(struct amdgpu_mn *mn); -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev); +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} -static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) { return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d6416ee52e32..b9e9e8b02fb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -308,7 +308,6 @@ struct amdgpu_display_funcs { struct amdgpu_framebuffer { struct drm_framebuffer base; - struct drm_gem_object *obj; /* caching for later use */ uint64_t address; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6d08cde8443c..6a9e46ae7f0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -191,14 +191,21 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr) { + struct amdgpu_bo_param bp; bool free = false; int r; + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = align; + bp.domain = domain; + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + if (!*bo_ptr) { - r = amdgpu_bo_create(adev, size, align, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - ttm_bo_type_kernel, NULL, bo_ptr); + r = amdgpu_bo_create(adev, &bp, bo_ptr); if (r) { dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); @@ -341,27 +348,25 @@ fail: return false; } -static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, - int byte_align, u32 domain, - u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, +static int amdgpu_bo_do_create(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) { struct ttm_operation_ctx ctx = { - .interruptible = (type != ttm_bo_type_kernel), + .interruptible = (bp->type != ttm_bo_type_kernel), .no_wait_gpu = false, - .resv = resv, + .resv = bp->resv, .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT }; struct amdgpu_bo *bo; - unsigned long page_align; + unsigned long page_align, size = bp->size; size_t acc_size; int r; - page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; + page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; size = ALIGN(size, PAGE_SIZE); - if (!amdgpu_bo_validate_size(adev, size, domain)) + if (!amdgpu_bo_validate_size(adev, size, bp->domain)) return -ENOMEM; *bo_ptr = NULL; @@ -375,18 +380,14 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->va); - bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT | - AMDGPU_GEM_DOMAIN_CPU | - AMDGPU_GEM_DOMAIN_GDS | - AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA); + bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : + bp->domain; bo->allowed_domains = bo->preferred_domains; - if (type != ttm_bo_type_kernel && + if (bp->type != ttm_bo_type_kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; - bo->flags = flags; + bo->flags = bp->flags; #ifdef CONFIG_X86_32 /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit @@ -417,11 +418,13 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, #endif bo->tbo.bdev = &adev->mman.bdev; - amdgpu_ttm_placement_from_domain(bo, domain); + amdgpu_ttm_placement_from_domain(bo, bp->domain); + if (bp->type == ttm_bo_type_kernel) + bo->tbo.priority = 1; - r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, + r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type, &bo->placement, page_align, &ctx, acc_size, - NULL, resv, &amdgpu_ttm_bo_destroy); + NULL, bp->resv, &amdgpu_ttm_bo_destroy); if (unlikely(r != 0)) return r; @@ -433,10 +436,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, else amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); - if (type == ttm_bo_type_kernel) - bo->tbo.priority = 1; - - if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && + if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; @@ -449,20 +449,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, bo->tbo.moving = dma_fence_get(fence); dma_fence_put(fence); } - if (!resv) + if (!bp->resv) amdgpu_bo_unreserve(bo); *bo_ptr = bo; trace_amdgpu_bo_create(bo); /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */ - if (type == ttm_bo_type_device) + if (bp->type == ttm_bo_type_device) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; return 0; fail_unreserve: - if (!resv) + if (!bp->resv) ww_mutex_unlock(&bo->tbo.resv->lock); amdgpu_bo_unref(&bo); return r; @@ -472,16 +472,22 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, unsigned long size, int byte_align, struct amdgpu_bo *bo) { + struct amdgpu_bo_param bp; int r; if (bo->shadow) return 0; - r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_SHADOW, - ttm_bo_type_kernel, - bo->tbo.resv, &bo->shadow); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = byte_align; + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_SHADOW; + bp.type = ttm_bo_type_kernel; + bp.resv = bo->tbo.resv; + + r = amdgpu_bo_do_create(adev, &bp, &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); mutex_lock(&adev->shadow_list_lock); @@ -492,28 +498,26 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, return r; } -int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, - int byte_align, u32 domain, - u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, +int amdgpu_bo_create(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) { - uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; + u64 flags = bp->flags; int r; - r = amdgpu_bo_do_create(adev, size, byte_align, domain, - parent_flags, type, resv, bo_ptr); + bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW; + r = amdgpu_bo_do_create(adev, bp, bo_ptr); if (r) return r; if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) { - if (!resv) + if (!bp->resv) WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, NULL)); - r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); + r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr)); - if (!resv) + if (!bp->resv) reservation_object_unlock((*bo_ptr)->tbo.resv); if (r) @@ -689,8 +693,21 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; /* A shared bo cannot be migrated to VRAM */ - if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM)) - return -EINVAL; + if (bo->prime_shared_count) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) + domain = AMDGPU_GEM_DOMAIN_GTT; + else + return -EINVAL; + } + + /* This assumes only APU display buffers are pinned with (VRAM|GTT). + * See function amdgpu_display_supported_domains() + */ + if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { + domain = AMDGPU_GEM_DOMAIN_VRAM; + if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD) + domain = AMDGPU_GEM_DOMAIN_GTT; + } if (bo->pin_count) { uint32_t mem_type = bo->tbo.mem.mem_type; @@ -838,6 +855,13 @@ int amdgpu_bo_init(struct amdgpu_device *adev) return amdgpu_ttm_init(adev); } +int amdgpu_bo_late_init(struct amdgpu_device *adev) +{ + amdgpu_ttm_late_init(adev); + + return 0; +} + void amdgpu_bo_fini(struct amdgpu_device *adev) { amdgpu_ttm_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 546f77cb7882..540e03fa159f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -33,6 +33,16 @@ #define AMDGPU_BO_INVALID_OFFSET LONG_MAX +struct amdgpu_bo_param { + unsigned long size; + int byte_align; + u32 domain; + u32 preferred_domain; + u64 flags; + enum ttm_bo_type type; + struct reservation_object *resv; +}; + /* bo virtual addresses in a vm */ struct amdgpu_bo_va_mapping { struct amdgpu_bo_va *bo_va; @@ -196,6 +206,27 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } /** + * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM + */ +static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; + struct drm_mm_node *node = bo->tbo.mem.mm_node; + unsigned long pages_left; + + if (bo->tbo.mem.mem_type != TTM_PL_VRAM) + return false; + + for (pages_left = bo->tbo.mem.num_pages; pages_left; + pages_left -= node->size, node++) + if (node->start < fpfn) + return true; + + return false; +} + +/** * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced */ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) @@ -203,10 +234,8 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; } -int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, - int byte_align, u32 domain, - u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, +int amdgpu_bo_create(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_reserved(struct amdgpu_device *adev, unsigned long size, int align, @@ -230,6 +259,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, int amdgpu_bo_unpin(struct amdgpu_bo *bo); int amdgpu_bo_evict_vram(struct amdgpu_device *adev); int amdgpu_bo_init(struct amdgpu_device *adev); +int amdgpu_bo_late_init(struct amdgpu_device *adev); void amdgpu_bo_fini(struct amdgpu_device *adev); int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 361975cf45a9..b455da487782 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -77,6 +77,37 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) } } +/** + * DOC: power_dpm_state + * + * This is a legacy interface and is only provided for backwards compatibility. + * The amdgpu driver provides a sysfs API for adjusting certain power + * related parameters. The file power_dpm_state is used for this. + * It accepts the following arguments: + * - battery + * - balanced + * - performance + * + * battery + * + * On older GPUs, the vbios provided a special power state for battery + * operation. Selecting battery switched to this state. This is no + * longer provided on newer GPUs so the option does nothing in that case. + * + * balanced + * + * On older GPUs, the vbios provided a special power state for balanced + * operation. Selecting balanced switched to this state. This is no + * longer provided on newer GPUs so the option does nothing in that case. + * + * performance + * + * On older GPUs, the vbios provided a special power state for performance + * operation. Selecting performance switched to this state. This is no + * longer provided on newer GPUs so the option does nothing in that case. + * + */ + static ssize_t amdgpu_get_dpm_state(struct device *dev, struct device_attribute *attr, char *buf) @@ -131,6 +162,59 @@ fail: return count; } + +/** + * DOC: power_dpm_force_performance_level + * + * The amdgpu driver provides a sysfs API for adjusting certain power + * related parameters. The file power_dpm_force_performance_level is + * used for this. It accepts the following arguments: + * - auto + * - low + * - high + * - manual + * - GPU fan + * - profile_standard + * - profile_min_sclk + * - profile_min_mclk + * - profile_peak + * + * auto + * + * When auto is selected, the driver will attempt to dynamically select + * the optimal power profile for current conditions in the driver. + * + * low + * + * When low is selected, the clocks are forced to the lowest power state. + * + * high + * + * When high is selected, the clocks are forced to the highest power state. + * + * manual + * + * When manual is selected, the user can manually adjust which power states + * are enabled for each clock domain via the sysfs pp_dpm_mclk, pp_dpm_sclk, + * and pp_dpm_pcie files and adjust the power state transition heuristics + * via the pp_power_profile_mode sysfs file. + * + * profile_standard + * profile_min_sclk + * profile_min_mclk + * profile_peak + * + * When the profiling modes are selected, clock and power gating are + * disabled and the clocks are set for different profiling cases. This + * mode is recommended for profiling specific work loads where you do + * not want clock or power gating for clock fluctuation to interfere + * with your results. profile_standard sets the clocks to a fixed clock + * level which varies from asic to asic. profile_min_sclk forces the sclk + * to the lowest level. profile_min_mclk forces the mclk to the lowest level. + * profile_peak sets all clocks (mclk, sclk, pcie) to the highest levels. + * + */ + static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, struct device_attribute *attr, char *buf) @@ -324,6 +408,17 @@ fail: return count; } +/** + * DOC: pp_table + * + * The amdgpu driver provides a sysfs API for uploading new powerplay + * tables. The file pp_table is used for this. Reading the file + * will dump the current power play table. Writing to the file + * will attempt to upload a new powerplay table and re-initialize + * powerplay using that new table. + * + */ + static ssize_t amdgpu_get_pp_table(struct device *dev, struct device_attribute *attr, char *buf) @@ -360,6 +455,29 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, return count; } +/** + * DOC: pp_od_clk_voltage + * + * The amdgpu driver provides a sysfs API for adjusting the clocks and voltages + * in each power level within a power state. The pp_od_clk_voltage is used for + * this. + * + * Reading the file will display: + * - a list of engine clock levels and voltages labeled OD_SCLK + * - a list of memory clock levels and voltages labeled OD_MCLK + * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE + * + * To manually adjust these settings, first select manual using + * power_dpm_force_performance_level. Enter a new value for each + * level by writing a string that contains "s/m level clock voltage" to + * the file. E.g., "s 1 500 820" will update sclk level 1 to be 500 MHz + * at 820 mV; "m 0 350 810" will update mclk level 0 to be 350 MHz at + * 810 mV. When you have edited all of the states as needed, write + * "c" (commit) to the file to commit your changes. If you want to reset to the + * default power levels, write "r" (reset) to the file to reset them. + * + */ + static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, struct device_attribute *attr, const char *buf, @@ -437,6 +555,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); + size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size); return size; } else { return snprintf(buf, PAGE_SIZE, "\n"); @@ -444,6 +563,23 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, } +/** + * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie + * + * The amdgpu driver provides a sysfs API for adjusting what power levels + * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk, + * and pp_dpm_pcie are used for this. + * + * Reading back the files will show you the available power levels within + * the power state and the clock information for those levels. + * + * To manually adjust these states, first select manual using + * power_dpm_force_performance_level. + * Secondly,Enter a new value for each level by inputing a string that + * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie" + * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6. + */ + static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, struct device_attribute *attr, char *buf) @@ -466,23 +602,27 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret; long level; - uint32_t i, mask = 0; - char sub_str[2]; + uint32_t mask = 0; + char *sub_str = NULL; + char *tmp; + char buf_cpy[count]; + const char delimiter[3] = {' ', '\n', '\0'}; - for (i = 0; i < strlen(buf); i++) { - if (*(buf + i) == '\n') - continue; - sub_str[0] = *(buf + i); - sub_str[1] = '\0'; - ret = kstrtol(sub_str, 0, &level); + memcpy(buf_cpy, buf, count+1); + tmp = buf_cpy; + while (tmp[0]) { + sub_str = strsep(&tmp, delimiter); + if (strlen(sub_str)) { + ret = kstrtol(sub_str, 0, &level); - if (ret) { - count = -EINVAL; - goto fail; - } - mask |= 1 << level; + if (ret) { + count = -EINVAL; + goto fail; + } + mask |= 1 << level; + } else + break; } - if (adev->powerplay.pp_funcs->force_clock_level) amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); @@ -512,21 +652,26 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret; long level; - uint32_t i, mask = 0; - char sub_str[2]; + uint32_t mask = 0; + char *sub_str = NULL; + char *tmp; + char buf_cpy[count]; + const char delimiter[3] = {' ', '\n', '\0'}; - for (i = 0; i < strlen(buf); i++) { - if (*(buf + i) == '\n') - continue; - sub_str[0] = *(buf + i); - sub_str[1] = '\0'; - ret = kstrtol(sub_str, 0, &level); + memcpy(buf_cpy, buf, count+1); + tmp = buf_cpy; + while (tmp[0]) { + sub_str = strsep(&tmp, delimiter); + if (strlen(sub_str)) { + ret = kstrtol(sub_str, 0, &level); - if (ret) { - count = -EINVAL; - goto fail; - } - mask |= 1 << level; + if (ret) { + count = -EINVAL; + goto fail; + } + mask |= 1 << level; + } else + break; } if (adev->powerplay.pp_funcs->force_clock_level) amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); @@ -557,21 +702,27 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret; long level; - uint32_t i, mask = 0; - char sub_str[2]; + uint32_t mask = 0; + char *sub_str = NULL; + char *tmp; + char buf_cpy[count]; + const char delimiter[3] = {' ', '\n', '\0'}; - for (i = 0; i < strlen(buf); i++) { - if (*(buf + i) == '\n') - continue; - sub_str[0] = *(buf + i); - sub_str[1] = '\0'; - ret = kstrtol(sub_str, 0, &level); + memcpy(buf_cpy, buf, count+1); + tmp = buf_cpy; - if (ret) { - count = -EINVAL; - goto fail; - } - mask |= 1 << level; + while (tmp[0]) { + sub_str = strsep(&tmp, delimiter); + if (strlen(sub_str)) { + ret = kstrtol(sub_str, 0, &level); + + if (ret) { + count = -EINVAL; + goto fail; + } + mask |= 1 << level; + } else + break; } if (adev->powerplay.pp_funcs->force_clock_level) amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); @@ -668,6 +819,26 @@ fail: return count; } +/** + * DOC: pp_power_profile_mode + * + * The amdgpu driver provides a sysfs API for adjusting the heuristics + * related to switching between power levels in a power state. The file + * pp_power_profile_mode is used for this. + * + * Reading this file outputs a list of all of the predefined power profiles + * and the relevant heuristics settings for that profile. + * + * To select a profile or create a custom profile, first select manual using + * power_dpm_force_performance_level. Writing the number of a predefined + * profile to pp_power_profile_mode will enable those heuristics. To + * create a custom set of heuristics, write a string of numbers to the file + * starting with the number of the custom profile along with a setting + * for each heuristic parameter. Due to differences across asic families + * the heuristic parameters vary from family to family. + * + */ + static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, struct device_attribute *attr, char *buf) @@ -1020,8 +1191,8 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; - struct pp_gpu_power query = {0}; - int r, size = sizeof(query); + u32 query = 0; + int r, size = sizeof(u32); unsigned uw; /* Can't get power when the card is off */ @@ -1041,7 +1212,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, return r; /* convert to microwatts */ - uw = (query.average_gpu_power >> 8) * 1000000; + uw = (query >> 8) * 1000000 + (query & 0xff) * 1000; return snprintf(buf, PAGE_SIZE, "%u\n", uw); } @@ -1109,6 +1280,46 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, return count; } + +/** + * DOC: hwmon + * + * The amdgpu driver exposes the following sensor interfaces: + * - GPU temperature (via the on-die sensor) + * - GPU voltage + * - Northbridge voltage (APUs only) + * - GPU power + * - GPU fan + * + * hwmon interfaces for GPU temperature: + * - temp1_input: the on die GPU temperature in millidegrees Celsius + * - temp1_crit: temperature critical max value in millidegrees Celsius + * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * + * hwmon interfaces for GPU voltage: + * - in0_input: the voltage on the GPU in millivolts + * - in1_input: the voltage on the Northbridge in millivolts + * + * hwmon interfaces for GPU power: + * - power1_average: average power used by the GPU in microWatts + * - power1_cap_min: minimum cap supported in microWatts + * - power1_cap_max: maximum cap supported in microWatts + * - power1_cap: selected power cap in microWatts + * + * hwmon interfaces for GPU fan: + * - pwm1: pulse width modulation fan level (0-255) + * - pwm1_enable: pulse width modulation fan control method + * 0: no fan speed control + * 1: manual fan speed control using pwm interface + * 2: automatic fan speed control + * - pwm1_min: pulse width modulation fan control minimum level (0) + * - pwm1_max: pulse width modulation fan control maximum level (255) + * - fan1_input: fan speed in RPM + * + * You can use hwmon tools like sensors to view this information on your system. + * + */ + static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); @@ -1153,19 +1364,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct amdgpu_device *adev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; - /* handle non-powerplay limitations */ - if (!adev->powerplay.pp_handle) { - /* Skip fan attributes if fan is not present */ - if (adev->pm.no_fan && - (attr == &sensor_dev_attr_pwm1.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) - return 0; - /* requires powerplay */ - if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr) - return 0; - } + + /* Skip fan attributes if fan is not present */ + if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_input.dev_attr.attr)) + return 0; /* Skip limit attributes if DPM is not enabled */ if (!adev->pm.dpm_enabled && @@ -1658,9 +1864,6 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) { - struct drm_device *ddev = adev->ddev; - struct drm_crtc *crtc; - struct amdgpu_crtc *amdgpu_crtc; int i = 0; if (!adev->pm.dpm_enabled) @@ -1676,21 +1879,25 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) } if (adev->powerplay.pp_funcs->dispatch_tasks) { + if (!amdgpu_device_has_dc_support(adev)) { + mutex_lock(&adev->pm.mutex); + amdgpu_dpm_get_active_displays(adev); + adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtcs; + adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); + adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev); + /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */ + if (adev->pm.pm_display_cfg.vrefresh > 120) + adev->pm.pm_display_cfg.min_vblank_time = 0; + if (adev->powerplay.pp_funcs->display_configuration_change) + adev->powerplay.pp_funcs->display_configuration_change( + adev->powerplay.pp_handle, + &adev->pm.pm_display_cfg); + mutex_unlock(&adev->pm.mutex); + } amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); } else { mutex_lock(&adev->pm.mutex); - adev->pm.dpm.new_active_crtcs = 0; - adev->pm.dpm.new_active_crtc_count = 0; - if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, - &ddev->mode_config.crtc_list, head) { - amdgpu_crtc = to_amdgpu_crtc(crtc); - if (amdgpu_crtc->enabled) { - adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id); - adev->pm.dpm.new_active_crtc_count++; - } - } - } + amdgpu_dpm_get_active_displays(adev); /* update battery/ac status */ if (power_supply_is_system_supplied() > 0) adev->pm.dpm.ac_power = true; @@ -1711,7 +1918,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev) { uint32_t value; - struct pp_gpu_power query = {0}; + uint32_t query = 0; int size; /* sanity check PP is enabled */ @@ -1734,17 +1941,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a seq_printf(m, "\t%u mV (VDDGFX)\n", value); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) seq_printf(m, "\t%u mV (VDDNB)\n", value); - size = sizeof(query); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) { - seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8, - query.vddc_power & 0xff); - seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8, - query.vddci_power & 0xff); - seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8, - query.max_gpu_power & 0xff); - seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8, - query.average_gpu_power & 0xff); - } + size = sizeof(uint32_t); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) + seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff); size = sizeof(value); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 4b584cb75bf4..4683626b065f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -102,12 +102,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, struct reservation_object *resv = attach->dmabuf->resv; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; int ret; + memset(&bp, 0, sizeof(bp)); + bp.size = attach->dmabuf->size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_CPU; + bp.flags = 0; + bp.type = ttm_bo_type_sg; + bp.resv = resv; ww_mutex_lock(&resv->lock, NULL); - ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg, - resv, &bo); + ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) goto error; @@ -209,7 +215,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { true, false }; - u32 domain = amdgpu_display_framebuffer_domains(adev); + u32 domain = amdgpu_display_supported_domains(adev); int ret; bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d5f526f38e50..49cad08b5c16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -459,6 +459,26 @@ void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring) spin_unlock(&adev->ring_lru_list_lock); } +/** + * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper + * + * @adev: amdgpu_device pointer + * @reg0: register to write + * @reg1: register to wait on + * @ref: reference value to write/wait on + * @mask: mask to wait on + * + * Helper for rings that don't support write and wait in a + * single oneshot packet. + */ +void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + /* * Debugfs info */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1a5911882657..4f8dac2d36a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -42,6 +42,7 @@ #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) +#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) enum amdgpu_ring_type { AMDGPU_RING_TYPE_GFX, @@ -90,7 +91,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, unsigned irq_type); void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, + unsigned flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); @@ -154,6 +156,9 @@ struct amdgpu_ring_funcs { void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); + void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask); void (*emit_tmz)(struct amdgpu_ring *ring, bool start); /* priority functions */ void (*set_priority) (struct amdgpu_ring *ring, @@ -228,6 +233,10 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, int num_blacklist, bool lru_pipe_order, struct amdgpu_ring **ring); void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); +void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t val0, + uint32_t reg1, uint32_t val1); + static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { int i = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 2dbe87591f81..d167e8ab76d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -33,6 +33,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_bo *vram_obj = NULL; struct amdgpu_bo **gtt_obj = NULL; + struct amdgpu_bo_param bp; uint64_t gart_addr, vram_addr; unsigned n, size; int i, r; @@ -58,9 +59,15 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = 1; goto out_cleanup; } - - r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0, - ttm_bo_type_kernel, NULL, &vram_obj); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = 0; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + + r = amdgpu_bo_create(adev, &bp, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -79,9 +86,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) void **vram_start, **vram_end; struct dma_fence *fence = NULL; - r = amdgpu_bo_create(adev, size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, 0, - ttm_bo_type_kernel, NULL, gtt_obj + i); + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + r = amdgpu_bo_create(adev, &bp, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 532263ab6e16..e96e26d3f3b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -275,7 +275,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, ), TP_fast_assign( - __entry->bo = bo_va->base.bo; + __entry->bo = bo_va ? bo_va->base.bo : NULL; __entry->start = mapping->start; __entry->last = mapping->last; __entry->offset = mapping->offset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 205da3ff9cd0..69a2b25b3696 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -111,7 +111,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) ring = adev->mman.buffer_funcs_ring; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r) { DRM_ERROR("Failed setting up TTM BO move run queue.\n"); goto error_entity; @@ -223,20 +223,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, if (!adev->mman.buffer_funcs_enabled) { amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { - unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; - struct drm_mm_node *node = bo->mem.mm_node; - unsigned long pages_left; - - for (pages_left = bo->mem.num_pages; - pages_left; - pages_left -= node->size, node++) { - if (node->start < fpfn) - break; - } - - if (!pages_left) - goto gtt; + !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && + amdgpu_bo_in_cpu_visible_vram(abo)) { /* Try evicting to the CPU inaccessible part of VRAM * first, but only set GTT as busy placement, so this @@ -245,12 +233,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, */ amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - abo->placements[0].fpfn = fpfn; + abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; abo->placements[0].lpfn = 0; abo->placement.busy_placement = &abo->placements[1]; abo->placement.num_busy_placement = 1; } else { -gtt: amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); } break; @@ -695,7 +682,7 @@ struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; - struct mm_struct *usermm; + struct task_struct *usertask; uint32_t userflags; spinlock_t guptasklock; struct list_head guptasks; @@ -706,14 +693,18 @@ struct amdgpu_ttm_tt { int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct mm_struct *mm = gtt->usertask->mm; unsigned int flags = 0; unsigned pinned = 0; int r; + if (!mm) /* Happens during process shutdown */ + return -ESRCH; + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) flags |= FOLL_WRITE; - down_read(¤t->mm->mmap_sem); + down_read(&mm->mmap_sem); if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { /* check that we only use anonymous memory @@ -721,9 +712,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; struct vm_area_struct *vma; - vma = find_vma(gtt->usermm, gtt->userptr); + vma = find_vma(mm, gtt->userptr); if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return -EPERM; } } @@ -739,7 +730,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) list_add(&guptask.list, >t->guptasks); spin_unlock(>t->guptasklock); - r = get_user_pages(userptr, num_pages, flags, p, NULL); + if (mm == current->mm) + r = get_user_pages(userptr, num_pages, flags, p, NULL); + else + r = get_user_pages_remote(gtt->usertask, + mm, userptr, num_pages, + flags, p, NULL, NULL); spin_lock(>t->guptasklock); list_del(&guptask.list); @@ -752,12 +748,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) } while (pinned < ttm->num_pages); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return 0; release_pages: release_pages(pages, pinned); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return r; } @@ -847,6 +843,45 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) sg_free_table(ttm->sg); } +int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, + struct ttm_buffer_object *tbo, + uint64_t flags) +{ + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); + struct ttm_tt *ttm = tbo->ttm; + struct amdgpu_ttm_tt *gtt = (void *)ttm; + int r; + + if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) { + uint64_t page_idx = 1; + + r = amdgpu_gart_bind(adev, gtt->offset, page_idx, + ttm->pages, gtt->ttm.dma_address, flags); + if (r) + goto gart_bind_fail; + + /* Patch mtype of the second part BO */ + flags &= ~AMDGPU_PTE_MTYPE_MASK; + flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC); + + r = amdgpu_gart_bind(adev, + gtt->offset + (page_idx << PAGE_SHIFT), + ttm->num_pages - page_idx, + &ttm->pages[page_idx], + &(gtt->ttm.dma_address[page_idx]), flags); + } else { + r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, + ttm->pages, gtt->ttm.dma_address, flags); + } + +gart_bind_fail: + if (r) + DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + ttm->num_pages, gtt->offset); + + return r; +} + static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { @@ -920,8 +955,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); gtt->offset = (u64)tmp.start << PAGE_SHIFT; - r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages, - bo->ttm->pages, gtt->ttm.dma_address, flags); + r = amdgpu_ttm_gart_bind(adev, bo, flags); if (unlikely(r)) { ttm_bo_mem_put(bo, &tmp); return r; @@ -938,19 +972,15 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm; uint64_t flags; int r; - if (!gtt) + if (!tbo->ttm) return 0; - flags = amdgpu_ttm_tt_pte_flags(adev, >t->ttm.ttm, &tbo->mem); - r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, - gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags); - if (r) - DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", - gtt->ttm.ttm.num_pages, gtt->offset); + flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem); + r = amdgpu_ttm_gart_bind(adev, tbo, flags); + return r; } @@ -978,6 +1008,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + if (gtt->usertask) + put_task_struct(gtt->usertask); + ttm_dma_tt_fini(>t->ttm); kfree(gtt); } @@ -1079,8 +1112,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, return -EINVAL; gtt->userptr = addr; - gtt->usermm = current->mm; gtt->userflags = flags; + + if (gtt->usertask) + put_task_struct(gtt->usertask); + gtt->usertask = current->group_leader; + get_task_struct(gtt->usertask); + spin_lock_init(>t->guptasklock); INIT_LIST_HEAD(>t->guptasks); atomic_set(>t->mmu_invalidations, 0); @@ -1096,7 +1134,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) if (gtt == NULL) return NULL; - return gtt->usermm; + if (gtt->usertask == NULL) + return NULL; + + return gtt->usertask->mm; } bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, @@ -1329,6 +1370,7 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) { struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_param bp; int r = 0; int i; u64 vram_size = adev->gmc.visible_vram_size; @@ -1336,17 +1378,21 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) u64 size = adev->fw_vram_usage.size; struct amdgpu_bo *bo; + memset(&bp, 0, sizeof(bp)); + bp.size = adev->fw_vram_usage.size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; adev->fw_vram_usage.va = NULL; adev->fw_vram_usage.reserved_bo = NULL; if (adev->fw_vram_usage.size > 0 && adev->fw_vram_usage.size <= vram_size) { - r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - ttm_bo_type_kernel, NULL, + r = amdgpu_bo_create(adev, &bp, &adev->fw_vram_usage.reserved_bo); if (r) goto error_create; @@ -1454,12 +1500,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } - r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->stolen_vga_memory, - NULL, NULL); - if (r) - return r; + if (adev->gmc.stolen_size) { + r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->stolen_vga_memory, + NULL, NULL); + if (r) + return r; + } DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); @@ -1528,13 +1576,17 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return 0; } +void amdgpu_ttm_late_init(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); +} + void amdgpu_ttm_fini(struct amdgpu_device *adev) { if (!adev->mman.initialized) return; amdgpu_ttm_debugfs_fini(adev); - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); if (adev->mman.aper_base_kaddr) iounmap(adev->mman.aper_base_kaddr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6ea7de863041..e969c879d87e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -77,6 +77,7 @@ uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); int amdgpu_ttm_init(struct amdgpu_device *adev); +void amdgpu_ttm_late_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 5916cc25e28b..75592bd04d6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -161,8 +161,38 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr) le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes)); DRM_DEBUG("reg_list_separate_size_bytes: %u\n", le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); - DRM_DEBUG("reg_list_separate_size_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); + DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes)); + if (version_minor == 1) { + const struct rlc_firmware_header_v2_1 *v2_1 = + container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0); + DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n", + le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length)); + DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver)); + DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver)); + DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes)); + DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes)); + DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver)); + DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver)); + DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes)); + DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes)); + DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver)); + DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_feature_ver)); + DRM_DEBUG("save_restore_list_srm_size_bytes %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_size_bytes)); + DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes)); + } } else { DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor); } @@ -265,6 +295,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else @@ -307,7 +338,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT && - ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) { + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) { ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + @@ -329,6 +363,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, le32_to_cpu(header->ucode_array_offset_bytes) + le32_to_cpu(cp_hdr->jt_offset) * 4), ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl, + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm, + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm, + ucode->ucode_size); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 30b5500dc152..08e38579af24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -98,6 +98,24 @@ struct rlc_firmware_header_v2_0 { uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */ }; +/* version_major=2, version_minor=1 */ +struct rlc_firmware_header_v2_1 { + struct rlc_firmware_header_v2_0 v2_0; + uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */ + uint32_t save_restore_list_cntl_ucode_ver; + uint32_t save_restore_list_cntl_feature_ver; + uint32_t save_restore_list_cntl_size_bytes; + uint32_t save_restore_list_cntl_offset_bytes; + uint32_t save_restore_list_gpm_ucode_ver; + uint32_t save_restore_list_gpm_feature_ver; + uint32_t save_restore_list_gpm_size_bytes; + uint32_t save_restore_list_gpm_offset_bytes; + uint32_t save_restore_list_srm_ucode_ver; + uint32_t save_restore_list_srm_feature_ver; + uint32_t save_restore_list_srm_size_bytes; + uint32_t save_restore_list_srm_offset_bytes; +}; + /* version_major=1, version_minor=0 */ struct sdma_firmware_header_v1_0 { struct common_firmware_header header; @@ -148,6 +166,7 @@ union amdgpu_firmware_header { struct gfx_firmware_header_v1_0 gfx; struct rlc_firmware_header_v1_0 rlc; struct rlc_firmware_header_v2_0 rlc_v2_0; + struct rlc_firmware_header_v2_1 rlc_v2_1; struct sdma_firmware_header_v1_0 sdma; struct sdma_firmware_header_v1_1 sdma_v1_1; struct gpu_info_firmware_header_v1_0 gpu_info; @@ -168,6 +187,9 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MEC2, AMDGPU_UCODE_ID_CP_MEC2_JT, AMDGPU_UCODE_ID_RLC_G, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, AMDGPU_UCODE_ID_STORAGE, AMDGPU_UCODE_ID_SMC, AMDGPU_UCODE_ID_UVD, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 627542b22ae4..de4d77af02ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -66,6 +66,7 @@ #define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin" #define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin" #define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin" +#define FIRMWARE_VEGAM "amdgpu/vegam_uvd.bin" #define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin" #define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin" @@ -109,6 +110,7 @@ MODULE_FIRMWARE(FIRMWARE_STONEY); MODULE_FIRMWARE(FIRMWARE_POLARIS10); MODULE_FIRMWARE(FIRMWARE_POLARIS11); MODULE_FIRMWARE(FIRMWARE_POLARIS12); +MODULE_FIRMWARE(FIRMWARE_VEGAM); MODULE_FIRMWARE(FIRMWARE_VEGA10); MODULE_FIRMWARE(FIRMWARE_VEGA12); @@ -172,6 +174,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) case CHIP_VEGA12: fw_name = FIRMWARE_VEGA12; break; + case CHIP_VEGAM: + fw_name = FIRMWARE_VEGAM; + break; default: return -EINVAL; } @@ -237,7 +242,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) ring = &adev->uvd.ring; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r != 0) { DRM_ERROR("Failed setting up UVD run queue.\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index a33804bd3314..a86322f5164f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -51,8 +51,9 @@ #define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" #define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" #define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" -#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" -#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" +#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" +#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" +#define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin" #define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin" #define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin" @@ -71,6 +72,7 @@ MODULE_FIRMWARE(FIRMWARE_STONEY); MODULE_FIRMWARE(FIRMWARE_POLARIS10); MODULE_FIRMWARE(FIRMWARE_POLARIS11); MODULE_FIRMWARE(FIRMWARE_POLARIS12); +MODULE_FIRMWARE(FIRMWARE_VEGAM); MODULE_FIRMWARE(FIRMWARE_VEGA10); MODULE_FIRMWARE(FIRMWARE_VEGA12); @@ -132,6 +134,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) case CHIP_POLARIS12: fw_name = FIRMWARE_POLARIS12; break; + case CHIP_VEGAM: + fw_name = FIRMWARE_VEGAM; + break; case CHIP_VEGA10: fw_name = FIRMWARE_VEGA10; break; @@ -181,7 +186,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) ring = &adev->vce.ring[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->vce.entity, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCE run queue.\n"); return r; @@ -755,6 +760,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) if (r) goto out; break; + + case 0x0500000d: /* MV buffer */ + r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, + idx + 2, 0, 0); + if (r) + goto out; + + r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8, + idx + 7, 0, 0); + if (r) + goto out; + break; } idx += len / 4; @@ -860,6 +877,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) goto out; break; + case 0x0500000d: /* MV buffer */ + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, + idx + 2, *size, 0); + if (r) + goto out; + + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8, + idx + 7, *size / 12, 0); + if (r) + goto out; + break; + default: DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); r = -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 58e495330b38..e5d234cf804f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -105,7 +105,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) ring = &adev->vcn.ring_dec; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCN dec run queue.\n"); return r; @@ -114,7 +114,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) ring = &adev->vcn.ring_enc[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCN enc run queue.\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index da55a78d7380..1a8f4e0dd023 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -94,6 +94,36 @@ struct amdgpu_prt_cb { struct dma_fence_cb cb; }; +static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo) +{ + base->vm = vm; + base->bo = bo; + INIT_LIST_HEAD(&base->bo_list); + INIT_LIST_HEAD(&base->vm_status); + + if (!bo) + return; + list_add_tail(&base->bo_list, &bo->va); + + if (bo->tbo.resv != vm->root.base.bo->tbo.resv) + return; + + if (bo->preferred_domains & + amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) + return; + + /* + * we checked all the prerequisites, but it looks like this per vm bo + * is currently evicted. add the bo to the evicted list to make sure it + * is validated on next vm use to avoid fault. + * */ + spin_lock(&vm->status_lock); + list_move_tail(&base->vm_status, &vm->evicted); + spin_unlock(&vm->status_lock); +} + /** * amdgpu_vm_level_shift - return the addr shift for each level * @@ -412,11 +442,16 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, struct amdgpu_bo *pt; if (!entry->base.bo) { - r = amdgpu_bo_create(adev, - amdgpu_vm_bo_size(adev, level), - AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, flags, - ttm_bo_type_kernel, resv, &pt); + struct amdgpu_bo_param bp; + + memset(&bp, 0, sizeof(bp)); + bp.size = amdgpu_vm_bo_size(adev, level); + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = flags; + bp.type = ttm_bo_type_kernel; + bp.resv = resv; + r = amdgpu_bo_create(adev, &bp, &pt); if (r) return r; @@ -441,11 +476,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, */ pt->parent = amdgpu_bo_ref(parent->base.bo); - entry->base.vm = vm; - entry->base.bo = pt; - list_add_tail(&entry->base.bo_list, &pt->va); + amdgpu_vm_bo_base_init(&entry->base, vm, pt); spin_lock(&vm->status_lock); - list_add(&entry->base.vm_status, &vm->relocated); + list_move(&entry->base.vm_status, &vm->relocated); spin_unlock(&vm->status_lock); } @@ -628,7 +661,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); if (vm_flush_needed || pasid_mapping_needed) { - r = amdgpu_fence_emit(ring, &fence); + r = amdgpu_fence_emit(ring, &fence, 0); if (r) return r; } @@ -1557,6 +1590,15 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_del_init(&bo_va->base.vm_status); + + /* If the BO is not in its preferred location add it back to + * the evicted list so that it gets validated again on the + * next command submission. + */ + if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && + !(bo->preferred_domains & + amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))) + list_add_tail(&bo_va->base.vm_status, &vm->evicted); spin_unlock(&vm->status_lock); list_splice_init(&bo_va->invalids, &bo_va->valids); @@ -1827,36 +1869,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, if (bo_va == NULL) { return NULL; } - bo_va->base.vm = vm; - bo_va->base.bo = bo; - INIT_LIST_HEAD(&bo_va->base.bo_list); - INIT_LIST_HEAD(&bo_va->base.vm_status); + amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); bo_va->ref_count = 1; INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); - if (!bo) - return bo_va; - - list_add_tail(&bo_va->base.bo_list, &bo->va); - - if (bo->tbo.resv != vm->root.base.bo->tbo.resv) - return bo_va; - - if (bo->preferred_domains & - amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) - return bo_va; - - /* - * We checked all the prerequisites, but it looks like this per VM BO - * is currently evicted. add the BO to the evicted list to make sure it - * is validated on next VM use to avoid fault. - * */ - spin_lock(&vm->status_lock); - list_move_tail(&bo_va->base.vm_status, &vm->evicted); - spin_unlock(&vm->status_lock); - return bo_va; } @@ -2234,6 +2252,10 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, { struct amdgpu_vm_bo_base *bo_base; + /* shadow bo doesn't have bo base, its validation needs its parent */ + if (bo->parent && bo->parent->shadow == bo) + bo = bo->parent; + list_for_each_entry(bo_base, &bo->va, bo_list) { struct amdgpu_vm *vm = bo_base->vm; @@ -2355,6 +2377,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int vm_context, unsigned int pasid) { + struct amdgpu_bo_param bp; + struct amdgpu_bo *root; const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT(adev) * 8); unsigned ring_instance; @@ -2380,7 +2404,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ring = adev->vm_manager.vm_pte_rings[ring_instance]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; r = drm_sched_entity_init(&ring->sched, &vm->entity, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r) return r; @@ -2409,24 +2433,28 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, flags |= AMDGPU_GEM_CREATE_SHADOW; size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); - r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, - ttm_bo_type_kernel, NULL, &vm->root.base.bo); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = align; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = flags; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + r = amdgpu_bo_create(adev, &bp, &root); if (r) goto error_free_sched_entity; - r = amdgpu_bo_reserve(vm->root.base.bo, true); + r = amdgpu_bo_reserve(root, true); if (r) goto error_free_root; - r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, + r = amdgpu_vm_clear_bo(adev, vm, root, adev->vm_manager.root_level, vm->pte_support_ats); if (r) goto error_unreserve; - vm->root.base.vm = vm; - list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); - list_add_tail(&vm->root.base.vm_status, &vm->evicted); + amdgpu_vm_bo_base_init(&vm->root.base, vm, root); amdgpu_bo_unreserve(vm->root.base.bo); if (pasid) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 30f080364c97..4cf678684a12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -75,11 +75,12 @@ struct amdgpu_bo_list_entry; /* PDE Block Fragment Size for VEGA10 */ #define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) -/* VEGA10 only */ + +/* For GFX9 */ #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) -/* For Raven */ +#define AMDGPU_MTYPE_NC 0 #define AMDGPU_MTYPE_CC 2 #define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 47ef3e6e7178..a266dcf5daed 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -5903,7 +5903,7 @@ static int ci_dpm_init(struct amdgpu_device *adev) pi->pcie_dpm_key_disabled = 0; pi->thermal_sclk_dpm_enabled = 0; - if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK) + if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) pi->caps_sclk_ds = true; else pi->caps_sclk_ds = false; @@ -6255,7 +6255,7 @@ static int ci_dpm_late_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_dpm) + if (!adev->pm.dpm_enabled) return 0; /* init the sysfs and debugfs files late */ diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 0df22030e713..8ff4c60d1b59 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1735,6 +1735,12 @@ static void cik_invalidate_hdp(struct amdgpu_device *adev, } } +static bool cik_need_full_reset(struct amdgpu_device *adev) +{ + /* change this when we support soft reset */ + return true; +} + static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, @@ -1748,6 +1754,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .get_config_memsize = &cik_get_config_memsize, .flush_hdp = &cik_flush_hdp, .invalidate_hdp = &cik_invalidate_hdp, + .need_full_reset = &cik_need_full_reset, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 452f88ea46a2..ada241bfeee9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1823,7 +1823,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1842,18 +1841,15 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) @@ -2043,8 +2039,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2526,11 +2521,9 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc) dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index a7c1c584a191..a5b96eac3033 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -173,6 +173,7 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev) ARRAY_SIZE(polaris11_golden_settings_a11)); break; case CHIP_POLARIS10: + case CHIP_VEGAM: amdgpu_device_program_register_sequence(adev, polaris10_golden_settings_a11, ARRAY_SIZE(polaris10_golden_settings_a11)); @@ -473,6 +474,7 @@ static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev) num_crtc = 2; break; case CHIP_POLARIS10: + case CHIP_VEGAM: num_crtc = 6; break; case CHIP_POLARIS11: @@ -1445,6 +1447,7 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev) adev->mode_info.audio.num_pins = 7; break; case CHIP_POLARIS10: + case CHIP_VEGAM: adev->mode_info.audio.num_pins = 8; break; case CHIP_POLARIS11: @@ -1862,7 +1865,6 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1881,18 +1883,15 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) @@ -2082,8 +2081,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2253,7 +2251,8 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc) if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(amdgpu_crtc->encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; @@ -2601,11 +2600,9 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc) dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); @@ -2673,7 +2670,8 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc, if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(amdgpu_crtc->encoder); int encoder_mode = @@ -2830,6 +2828,7 @@ static int dce_v11_0_early_init(void *handle) adev->mode_info.num_dig = 9; break; case CHIP_POLARIS10: + case CHIP_VEGAM: adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; break; @@ -2949,7 +2948,8 @@ static int dce_v11_0_hw_init(void *handle) amdgpu_atombios_encoder_init_dig(adev); if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk, DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS); amdgpu_atombios_crtc_set_dce_clock(adev, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 9f67b7fd3487..394cc1e8fe20 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1780,7 +1780,6 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1798,18 +1797,15 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) @@ -1978,8 +1974,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2414,11 +2409,9 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc) dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index f55422cbd77a..c9b9ab8f1b05 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1754,7 +1754,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1773,18 +1772,15 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) @@ -1955,8 +1951,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2430,11 +2425,9 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc) dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index b51f05dc9582..de7be3de0f41 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -168,11 +168,9 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc) dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); @@ -329,7 +327,7 @@ static int dce_virtual_get_modes(struct drm_connector *connector) return 0; } -static int dce_virtual_mode_valid(struct drm_connector *connector, +static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { return MODE_OK; @@ -462,8 +460,9 @@ static int dce_virtual_hw_init(void *handle) break; case CHIP_CARRIZO: case CHIP_STONEY: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_VEGAM: dce_v11_0_disable_dce(adev); break; case CHIP_TOPAZ: diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c new file mode 100644 index 000000000000..4ffda996660f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -0,0 +1,112 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v1_7.h" + +#include "df/df_1_7_default.h" +#include "df/df_1_7_offset.h" +#include "df/df_1_7_sh_mask.h" + +static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; + +static void df_v1_7_init (struct amdgpu_device *adev) +{ +} + +static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + if (enable) { + tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl); + tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; + WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp); + } else + WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, + mmFabricConfigAccessControl_DEFAULT); +} + +static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev) +{ + u32 tmp; + + tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); + tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; + tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; + + return tmp; +} + +static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev) +{ + int fb_channel_number; + + fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + + return df_v1_7_channel_number[fb_channel_number]; +} + +static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + /* Put DF on broadcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, true); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY; + WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } else { + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V1_7_MGCG_DISABLE; + WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } + + /* Exit boradcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, false); +} + +static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + u32 tmp; + + /* AMD_CG_SUPPORT_DF_MGCG */ + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY) + *flags |= AMD_CG_SUPPORT_DF_MGCG; +} + +const struct amdgpu_df_funcs df_v1_7_funcs = { + .init = df_v1_7_init, + .enable_broadcast_mode = df_v1_7_enable_broadcast_mode, + .get_fb_channel_number = df_v1_7_get_fb_channel_number, + .get_hbm_channel_number = df_v1_7_get_hbm_channel_number, + .update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating, + .get_clockgating_state = df_v1_7_get_clockgating_state, +}; diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h index 214f370c5efd..74621104c487 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,33 +20,21 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#ifndef PP_SOC15_H -#define PP_SOC15_H -#include "soc15_hw_ip.h" -#include "vega10_ip_offset.h" +#ifndef __DF_V1_7_H__ +#define __DF_V1_7_H__ -inline static uint32_t soc15_get_register_offset( - uint32_t hw_id, - uint32_t inst, - uint32_t segment, - uint32_t offset) +#include "soc15_common.h" +enum DF_V1_7_MGCG { - uint32_t reg = 0; + DF_V1_7_MGCG_DISABLE = 0, + DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1, + DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2, + DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13, + DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14, + DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15 +}; - if (hw_id == THM_HWID) - reg = THM_BASE.instance[inst].segment[segment] + offset; - else if (hw_id == NBIF_HWID) - reg = NBIF_BASE.instance[inst].segment[segment] + offset; - else if (hw_id == MP1_HWID) - reg = MP1_BASE.instance[inst].segment[segment] + offset; - else if (hw_id == DF_HWID) - reg = DF_BASE.instance[inst].segment[segment] + offset; - else if (hw_id == GC_HWID) - reg = GC_BASE.instance[inst].segment[segment] + offset; - else if (hw_id == SMUIO_HWID) - reg = SMUIO_BASE.instance[inst].segment[segment] + offset; - return reg; -} +extern const struct amdgpu_df_funcs df_v1_7_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index e14263fca1c9..818874b13c99 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -125,18 +125,6 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); - MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); @@ -149,6 +137,18 @@ MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); + MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); @@ -161,6 +161,13 @@ MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); +MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); +MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); +MODULE_FIRMWARE("amdgpu/vegam_me.bin"); +MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); +MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); +MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); + static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = { {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, @@ -292,6 +299,37 @@ static const u32 tonga_mgcg_cgcg_init[] = mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, }; +static const u32 golden_settings_vegam_a11[] = +{ + mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, + mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, + mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, + mmDB_DEBUG2, 0xf00fffff, 0x00000400, + mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, + mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, + mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, + mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, + mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, + mmSQ_CONFIG, 0x07f80000, 0x01180000, + mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, + mmTCC_CTRL, 0x00100000, 0xf31fff7f, + mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, + mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, + mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, + mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, +}; + +static const u32 vegam_golden_common_all[] = +{ + mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, + mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, + mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, +}; + static const u32 golden_settings_polaris11_a11[] = { mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, @@ -712,6 +750,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) tonga_golden_common_all, ARRAY_SIZE(tonga_golden_common_all)); break; + case CHIP_VEGAM: + amdgpu_device_program_register_sequence(adev, + golden_settings_vegam_a11, + ARRAY_SIZE(golden_settings_vegam_a11)); + amdgpu_device_program_register_sequence(adev, + vegam_golden_common_all, + ARRAY_SIZE(vegam_golden_common_all)); + break; case CHIP_POLARIS11: case CHIP_POLARIS12: amdgpu_device_program_register_sequence(adev, @@ -918,17 +964,20 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: chip_name = "fiji"; break; - case CHIP_POLARIS11: - chip_name = "polaris11"; + case CHIP_STONEY: + chip_name = "stoney"; break; case CHIP_POLARIS10: chip_name = "polaris10"; break; + case CHIP_POLARIS11: + chip_name = "polaris11"; + break; case CHIP_POLARIS12: chip_name = "polaris12"; break; - case CHIP_STONEY: - chip_name = "stoney"; + case CHIP_VEGAM: + chip_name = "vegam"; break; default: BUG(); @@ -1770,6 +1819,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_POLARIS10: + case CHIP_VEGAM: ret = amdgpu_atombios_get_gfx_info(adev); if (ret) return ret; @@ -1957,12 +2007,13 @@ static int gfx_v8_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (adev->asic_type) { - case CHIP_FIJI: case CHIP_TONGA: + case CHIP_CARRIZO: + case CHIP_FIJI: + case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: - case CHIP_POLARIS10: - case CHIP_CARRIZO: + case CHIP_VEGAM: adev->gfx.mec.num_mec = 2; break; case CHIP_TOPAZ: @@ -2323,6 +2374,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) break; case CHIP_FIJI: + case CHIP_VEGAM: modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | @@ -3504,6 +3556,7 @@ gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) { switch (adev->asic_type) { case CHIP_FIJI: + case CHIP_VEGAM: *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) | PKR_YSEL(1) | @@ -4071,7 +4124,8 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev) gfx_v8_0_init_power_gating(adev); WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); } else if ((adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { gfx_v8_0_init_csb(adev); gfx_v8_0_init_save_restore_list(adev); gfx_v8_0_enable_save_restore_machine(adev); @@ -4146,7 +4200,8 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); if (adev->asic_type == CHIP_POLARIS11 || adev->asic_type == CHIP_POLARIS10 || - adev->asic_type == CHIP_POLARIS12) { + adev->asic_type == CHIP_POLARIS12 || + adev->asic_type == CHIP_VEGAM) { tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); tmp &= ~0x3; WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); @@ -5498,7 +5553,8 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade bool enable) { if ((adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) /* Send msg to SMU via Powerplay */ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_SMC, @@ -5588,6 +5644,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, break; case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); else @@ -6154,6 +6211,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle, case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9d39fd5b1822..fc1911834ab5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -41,7 +41,6 @@ #define GFX9_MEC_HPD_SIZE 2048 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L -#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34 #define mmPWR_MISC_CNTL_STATUS 0x0183 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 @@ -185,6 +184,30 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000) }; +static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = +{ + mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, +}; + +static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = +{ + mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, +}; + #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 @@ -401,6 +424,27 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } +static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_1 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); + adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); + adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); + adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); + adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); + adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); + adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); + adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); + adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); + adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); + adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); + adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); + adev->gfx.rlc.reg_list_format_direct_reg_list_length = + le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); +} + static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -412,6 +456,8 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) const struct rlc_firmware_header_v2_0 *rlc_hdr; unsigned int *tmp = NULL; unsigned int i = 0; + uint16_t version_major; + uint16_t version_minor; DRM_DEBUG("\n"); @@ -468,6 +514,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); adev->gfx.rlc.save_and_restore_offset = @@ -508,6 +560,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v9_0_init_rlc_ext_microcode(adev); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) @@ -566,6 +621,26 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + if (adev->gfx.rlc.is_rlc_v2_1) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); + } + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; @@ -1600,6 +1675,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) gfx_v9_0_setup_rb(adev); gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); + adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ @@ -1616,7 +1692,10 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); - tmp = adev->gmc.shared_aperture_start >> 48; + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> 48)); WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); } } @@ -1708,55 +1787,42 @@ static void gfx_v9_0_init_csb(struct amdgpu_device *adev) adev->gfx.rlc.clear_state_size); } -static void gfx_v9_0_parse_ind_reg_list(int *register_list_format, +static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, int indirect_offset, int list_size, int *unique_indirect_regs, int *unique_indirect_reg_count, - int max_indirect_reg_count, int *indirect_start_offsets, - int *indirect_start_offsets_count, - int max_indirect_start_offsets_count) + int *indirect_start_offsets_count) { int idx; - bool new_entry = true; for (; indirect_offset < list_size; indirect_offset++) { + indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; + *indirect_start_offsets_count = *indirect_start_offsets_count + 1; - if (new_entry) { - new_entry = false; - indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; - *indirect_start_offsets_count = *indirect_start_offsets_count + 1; - BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count); - } + while (register_list_format[indirect_offset] != 0xFFFFFFFF) { + indirect_offset += 2; - if (register_list_format[indirect_offset] == 0xFFFFFFFF) { - new_entry = true; - continue; - } + /* look for the matching indice */ + for (idx = 0; idx < *unique_indirect_reg_count; idx++) { + if (unique_indirect_regs[idx] == + register_list_format[indirect_offset] || + !unique_indirect_regs[idx]) + break; + } - indirect_offset += 2; + BUG_ON(idx >= *unique_indirect_reg_count); - /* look for the matching indice */ - for (idx = 0; idx < *unique_indirect_reg_count; idx++) { - if (unique_indirect_regs[idx] == - register_list_format[indirect_offset]) - break; - } + if (!unique_indirect_regs[idx]) + unique_indirect_regs[idx] = register_list_format[indirect_offset]; - if (idx >= *unique_indirect_reg_count) { - unique_indirect_regs[*unique_indirect_reg_count] = - register_list_format[indirect_offset]; - idx = *unique_indirect_reg_count; - *unique_indirect_reg_count = *unique_indirect_reg_count + 1; - BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count); + indirect_offset++; } - - register_list_format[indirect_offset] = idx; } } -static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) +static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) { int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; int unique_indirect_reg_count = 0; @@ -1765,7 +1831,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) int indirect_start_offsets_count = 0; int list_size = 0; - int i = 0; + int i = 0, j = 0; u32 tmp = 0; u32 *register_list_format = @@ -1776,15 +1842,14 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.reg_list_format_size_bytes); /* setup unique_indirect_regs array and indirect_start_offsets array */ - gfx_v9_0_parse_ind_reg_list(register_list_format, - GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH, - adev->gfx.rlc.reg_list_format_size_bytes >> 2, - unique_indirect_regs, - &unique_indirect_reg_count, - ARRAY_SIZE(unique_indirect_regs), - indirect_start_offsets, - &indirect_start_offsets_count, - ARRAY_SIZE(indirect_start_offsets)); + unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); + gfx_v9_1_parse_ind_reg_list(register_list_format, + adev->gfx.rlc.reg_list_format_direct_reg_list_length, + adev->gfx.rlc.reg_list_format_size_bytes >> 2, + unique_indirect_regs, + &unique_indirect_reg_count, + indirect_start_offsets, + &indirect_start_offsets_count); /* enable auto inc in case it is disabled */ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); @@ -1798,19 +1863,37 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), adev->gfx.rlc.register_restore[i]); - /* load direct register */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0); - for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), - adev->gfx.rlc.register_restore[i]); - /* load indirect register */ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), adev->gfx.rlc.reg_list_format_start); - for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) + + /* direct register portion */ + for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), register_list_format[i]); + /* indirect register portion */ + while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { + if (register_list_format[i] == 0xFFFFFFFF) { + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + continue; + } + + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + + for (j = 0; j < unique_indirect_reg_count; j++) { + if (register_list_format[i] == unique_indirect_regs[j]) { + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); + break; + } + } + + BUG_ON(j >= unique_indirect_reg_count); + + i++; + } + /* set save/restore list size */ list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; list_size = list_size >> 1; @@ -1823,14 +1906,19 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.starting_offsets_start); for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), - indirect_start_offsets[i]); + indirect_start_offsets[i]); /* load unique indirect regs*/ for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, - unique_indirect_regs[i] & 0x3FFFF); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, - unique_indirect_regs[i] >> 20); + if (unique_indirect_regs[i] != 0) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], + unique_indirect_regs[i] & 0x3FFFF); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], + unique_indirect_regs[i] >> 20); + } } kfree(register_list_format); @@ -2010,6 +2098,9 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad static void gfx_v9_0_init_pg(struct amdgpu_device *adev) { + if (!adev->gfx.rlc.is_rlc_v2_1) + return; + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_DMG | @@ -2017,27 +2108,12 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { gfx_v9_0_init_csb(adev); - gfx_v9_0_init_rlc_save_restore_list(adev); + gfx_v9_1_init_rlc_save_restore_list(adev); gfx_v9_0_enable_save_restore_machine(adev); - if (adev->asic_type == CHIP_RAVEN) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); - gfx_v9_0_init_gfx_power_gating(adev); - - if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { - gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); - gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); - } else { - gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); - gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); - } - - if (adev->pg_flags & AMD_PG_SUPPORT_CP) - gfx_v9_0_enable_cp_power_gating(adev, true); - else - gfx_v9_0_enable_cp_power_gating(adev, false); - } + WREG32(mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); + gfx_v9_0_init_gfx_power_gating(adev); } } @@ -3061,6 +3137,9 @@ static int gfx_v9_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_UNGATE); + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -3279,6 +3358,11 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; + r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_GATE); + if (r) + return r; + return 0; } @@ -3339,8 +3423,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, bool enable) { - /* TODO: double check if we need to perform under safe mdoe */ - /* gfx_v9_0_enter_rlc_safe_mode(adev); */ + gfx_v9_0_enter_rlc_safe_mode(adev); if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { gfx_v9_0_enable_gfx_cg_power_gating(adev, true); @@ -3351,7 +3434,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } - /* gfx_v9_0_exit_rlc_safe_mode(adev); */ + gfx_v9_0_exit_rlc_safe_mode(adev); } static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, @@ -3742,7 +3825,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, header); -BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -3774,13 +3857,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); - amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | - EOP_TC_ACTION_EN | - EOP_TC_WB_ACTION_EN | - EOP_TC_MD_ACTION_EN | + amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | + EOP_TC_NC_ACTION_EN) : + (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EOP_TC_MD_ACTION_EN)) | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5))); amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -4137,6 +4223,20 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); } +static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + + if (amdgpu_sriov_vf(ring->adev)) + gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); + else + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { @@ -4458,6 +4558,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_tmz = gfx_v9_0_ring_emit_tmz, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -4492,6 +4593,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .set_priority = gfx_v9_0_ring_set_priority_compute, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { @@ -4522,6 +4624,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .emit_rreg = gfx_v9_0_ring_emit_rreg, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) @@ -4686,6 +4789,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 5617cf62c566..79f9ac29019b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -819,12 +819,33 @@ static int gmc_v6_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_bo_late_init(adev); + if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } +static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32(mmD1VGA_CONTROL); + unsigned size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + return size; +} + static int gmc_v6_0_sw_init(void *handle) { int r; @@ -851,8 +872,6 @@ static int gmc_v6_0_sw_init(void *handle) adev->gmc.mc_mask = 0xffffffffffULL; - adev->gmc.stolen_size = 256 * 1024; - adev->need_dma32 = false; dma_bits = adev->need_dma32 ? 32 : 40; r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); @@ -878,6 +897,8 @@ static int gmc_v6_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v6_0_get_vbios_fb_size(adev); + r = amdgpu_bo_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 80054f36e487..7147bfe25a23 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -958,12 +958,33 @@ static int gmc_v7_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_bo_late_init(adev); + if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } +static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32(mmD1VGA_CONTROL); + unsigned size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + return size; +} + static int gmc_v7_0_sw_init(void *handle) { int r; @@ -998,8 +1019,6 @@ static int gmc_v7_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - adev->gmc.stolen_size = 256 * 1024; - /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. * IGP - can handle 40-bits @@ -1030,6 +1049,8 @@ static int gmc_v7_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v7_0_get_vbios_fb_size(adev); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index d71d4cb68f9c..1edbe6b477b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -138,6 +138,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) break; case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: amdgpu_device_program_register_sequence(adev, golden_settings_polaris11_a11, ARRAY_SIZE(golden_settings_polaris11_a11)); @@ -231,6 +232,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: case CHIP_CARRIZO: case CHIP_STONEY: + case CHIP_VEGAM: return 0; default: BUG(); } @@ -567,9 +569,10 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) /* set the gart size */ if (amdgpu_gart_size == -1) { switch (adev->asic_type) { - case CHIP_POLARIS11: /* all engines support GPUVM */ case CHIP_POLARIS10: /* all engines support GPUVM */ + case CHIP_POLARIS11: /* all engines support GPUVM */ case CHIP_POLARIS12: /* all engines support GPUVM */ + case CHIP_VEGAM: /* all engines support GPUVM */ default: adev->gmc.gart_size = 256ULL << 20; break; @@ -1049,12 +1052,33 @@ static int gmc_v8_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_bo_late_init(adev); + if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } +static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32(mmD1VGA_CONTROL); + unsigned size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + return size; +} + #define mmMC_SEQ_MISC0_FIJI 0xA71 static int gmc_v8_0_sw_init(void *handle) @@ -1068,7 +1092,8 @@ static int gmc_v8_0_sw_init(void *handle) } else { u32 tmp; - if (adev->asic_type == CHIP_FIJI) + if ((adev->asic_type == CHIP_FIJI) || + (adev->asic_type == CHIP_VEGAM)) tmp = RREG32(mmMC_SEQ_MISC0_FIJI); else tmp = RREG32(mmMC_SEQ_MISC0); @@ -1096,8 +1121,6 @@ static int gmc_v8_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - adev->gmc.stolen_size = 256 * 1024; - /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. * IGP - can handle 40-bits @@ -1128,6 +1151,8 @@ static int gmc_v8_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v8_0_get_vbios_fb_size(adev); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e687363900bb..6cccf0e0acd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -43,19 +43,13 @@ #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" -#define mmDF_CS_AON0_DramBaseAddress0 0x0044 -#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0 -//DF_CS_AON0_DramBaseAddress0 -#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 -#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 -#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4 -#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8 -#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc -#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L -#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L -#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L -#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L -#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L +/* add these here since we already include dce12 headers and these are for DCN */ +#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d +#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0 +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10 +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ #define AMDGPU_NUM_OF_VMIDS 8 @@ -385,11 +379,9 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); - - /* wait for the invalidate to complete */ - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, - 1 << vmid, 1 << vmid); + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, + req, 1 << vmid); return pd_addr; } @@ -556,8 +548,7 @@ static int gmc_v9_0_early_init(void *handle) adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = adev->gmc.shared_aperture_start + (4ULL << 30) - 1; - adev->gmc.private_aperture_start = - adev->gmc.shared_aperture_end + 1; + adev->gmc.private_aperture_start = 0x1000000000000000ULL; adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; @@ -659,6 +650,11 @@ static int gmc_v9_0_late_init(void *handle) unsigned i; int r; + /* + * TODO - Uncomment once GART corruption issue is fixed. + */ + /* amdgpu_bo_late_init(adev); */ + for(i = 0; i < adev->num_rings; ++i) { struct amdgpu_ring *ring = adev->rings[i]; unsigned vmhub = ring->funcs->vmhub; @@ -714,7 +710,6 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, */ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) { - u32 tmp; int chansize, numchan; int r; @@ -727,39 +722,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) else chansize = 128; - tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); - tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; - tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; - switch (tmp) { - case 0: - default: - numchan = 1; - break; - case 1: - numchan = 2; - break; - case 2: - numchan = 0; - break; - case 3: - numchan = 4; - break; - case 4: - numchan = 0; - break; - case 5: - numchan = 8; - break; - case 6: - numchan = 0; - break; - case 7: - numchan = 16; - break; - case 8: - numchan = 2; - break; - } + numchan = adev->df_funcs->get_hbm_channel_number(adev); adev->gmc.vram_width = numchan * chansize; } @@ -826,6 +789,52 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) return amdgpu_gart_table_vram_alloc(adev); } +static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ +#if 0 + u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); +#endif + unsigned size; + + /* + * TODO Remove once GART corruption is resolved + * Check related code in gmc_v9_0_sw_fini + * */ + size = 9 * 1024 * 1024; + +#if 0 + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport; + + switch (adev->asic_type) { + case CHIP_RAVEN: + viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); + size = (REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) * + 4); + break; + case CHIP_VEGA10: + case CHIP_VEGA12: + default: + viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + break; + } + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + +#endif + return size; +} + static int gmc_v9_0_sw_init(void *handle) { int r; @@ -877,12 +886,6 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - /* - * It needs to reserve 8M stolen memory for vega10 - * TODO: Figure out how to avoid that... - */ - adev->gmc.stolen_size = 8 * 1024 * 1024; - /* set DMA mask + need_dma32 flags. * PCIE - can handle 44-bits. * IGP - can handle 44-bits @@ -907,6 +910,8 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -950,6 +955,18 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); gmc_v9_0_gart_fini(adev); + + /* + * TODO: + * Currently there is a bug where some memory client outside + * of the driver writes to first 8M of VRAM on S3 resume, + * this overrides GART which by default gets placed in first 8M and + * causes VM_FAULTS once GTT is accessed. + * Keep the stolen memory reservation until the while this is not solved. + * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init + */ + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_bo_fini(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 26ba984ab2b7..17f7f074cedc 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -2817,7 +2817,7 @@ static int kv_dpm_init(struct amdgpu_device *adev) pi->caps_tcp_ramping = true; } - if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK) + if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) pi->caps_sclk_ds = true; else pi->caps_sclk_ds = false; @@ -2974,7 +2974,7 @@ static int kv_dpm_late_init(void *handle) /* powerdown unused blocks for now */ struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_dpm) + if (!adev->pm.dpm_enabled) return 0; kv_dpm_powergate_acp(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 493348672475..078f70faedcb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -260,8 +260,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } while (timeout > 1); flr_done: - if (locked) + if (locked) { + adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); + } /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_lockup_timeout == 0) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 8da6da90b1c9..0cf48d26c676 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -40,11 +40,20 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */ GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */ GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */ + GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */ + GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ + GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ }; +/*----------------------------------------------------------------------------- + NOTE: All physical addresses used in this interface are actually + GPU Virtual Addresses. +*/ + + /* Control registers of the TEE Gfx interface. These are located in * SRBM-to-PSP mailbox registers (total 8 registers). */ @@ -55,8 +64,8 @@ struct psp_gfx_ctrl volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */ volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */ volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */ - volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of physical address of ring buffer */ - volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of physical address of ring buffer */ + volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of GPU Virtual of ring buffer (VMID=0)*/ + volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of GPU Virtual of ring buffer (VMID=0) */ volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */ }; @@ -78,6 +87,8 @@ enum psp_gfx_cmd_id GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */ GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */ GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ + GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */ + GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ }; @@ -85,11 +96,11 @@ enum psp_gfx_cmd_id /* Command to load Trusted Application binary into PSP OS. */ struct psp_gfx_cmd_load_ta { - uint32_t app_phy_addr_lo; /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */ - uint32_t app_phy_addr_hi; /* bits [63:32] of the physical address of the TA binary */ + uint32_t app_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of the TA binary (must be 4 KB aligned) */ + uint32_t app_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of the TA binary */ uint32_t app_len; /* length of the TA binary in bytes */ - uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */ - uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the physical address of CMD buffer */ + uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of CMD buffer (must be 4 KB aligned) */ + uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of CMD buffer */ uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */ /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided @@ -111,8 +122,8 @@ struct psp_gfx_cmd_unload_ta */ struct psp_gfx_buf_desc { - uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */ - uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of the buffer */ + uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of the buffer (must be 4 KB aligned) */ + uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of the buffer */ uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */ }; @@ -145,8 +156,8 @@ struct psp_gfx_cmd_invoke_cmd /* Command to setup TMR region. */ struct psp_gfx_cmd_setup_tmr { - uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */ - uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of TMR buffer */ + uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */ + uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */ uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */ }; @@ -174,18 +185,32 @@ enum psp_gfx_fw_type GFX_FW_TYPE_ISP = 16, GFX_FW_TYPE_ACP = 17, GFX_FW_TYPE_SMU = 18, + GFX_FW_TYPE_MMSCH = 19, + GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, + GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, + GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22, + GFX_FW_TYPE_MAX = 23 }; /* Command to load HW IP FW. */ struct psp_gfx_cmd_load_ip_fw { - uint32_t fw_phy_addr_lo; /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */ - uint32_t fw_phy_addr_hi; /* bits [63:32] of physical address of FW location */ + uint32_t fw_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */ + uint32_t fw_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */ uint32_t fw_size; /* FW buffer size in bytes */ enum psp_gfx_fw_type fw_type; /* FW type */ }; +/* Command to save/restore HW IP FW. */ +struct psp_gfx_cmd_save_restore_ip_fw +{ + uint32_t save_fw; /* if set, command is used for saving fw otherwise for resetoring*/ + uint32_t save_restore_addr_lo; /* bits [31:0] of FB address of GART memory used as save/restore buffer (must be 4 KB aligned) */ + uint32_t save_restore_addr_hi; /* bits [63:32] of FB address of GART memory used as save/restore buffer */ + uint32_t buf_size; /* Size of the save/restore buffer in bytes */ + enum psp_gfx_fw_type fw_type; /* FW type */ +}; /* All GFX ring buffer commands. */ union psp_gfx_commands @@ -195,7 +220,7 @@ union psp_gfx_commands struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd; struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; - + struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw; }; @@ -226,8 +251,8 @@ struct psp_gfx_cmd_resp /* These fields are used for RBI only. They are all 0 in GPCOM commands */ - uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */ - uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of physical address of response buffer */ + uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of GPU Virtual address of response buffer (must be 4 KB aligned) */ + uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of GPU Virtual address of response buffer */ uint32_t resp_offset; /* +20 offset within response buffer */ uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */ @@ -251,19 +276,19 @@ struct psp_gfx_cmd_resp /* Structure of the Ring Buffer Frame */ struct psp_gfx_rb_frame { - uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of physical address of command buffer (must be 4 KB aligned) */ - uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of physical address of command buffer */ + uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of GPU Virtual address of command buffer (must be 4 KB aligned) */ + uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of GPU Virtual address of command buffer */ uint32_t cmd_buf_size; /* +8 command buffer size in bytes */ - uint32_t fence_addr_lo; /* +12 bits [31:0] of physical address of Fence for this frame */ - uint32_t fence_addr_hi; /* +16 bits [63:32] of physical address of Fence for this frame */ + uint32_t fence_addr_lo; /* +12 bits [31:0] of GPU Virtual address of Fence for this frame */ + uint32_t fence_addr_hi; /* +16 bits [63:32] of GPU Virtual address of Fence for this frame */ uint32_t fence_value; /* +20 Fence value */ uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */ uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */ uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */ uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */ uint8_t reserved1[2]; /* +34 reserved, must be 0 */ - uint32_t reserved2[7]; /* +40 reserved, must be 0 */ - /* total 64 bytes */ + uint32_t reserved2[7]; /* +36 reserved, must be 0 */ + /* total 64 bytes */ }; #endif /* _PSP_TEE_GFX_IF_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 8873d833a7f7..0ff136d02d9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -70,6 +70,15 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type * case AMDGPU_UCODE_ID_RLC_G: *type = GFX_FW_TYPE_RLC_G; break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM; + break; case AMDGPU_UCODE_ID_SMC: *type = GFX_FW_TYPE_SMU; break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index be20a387d961..aa9ab299fd32 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -62,6 +62,8 @@ MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin"); MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin"); MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin"); MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/vegam_sdma.bin"); +MODULE_FIRMWARE("amdgpu/vegam_sdma1.bin"); static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = @@ -209,6 +211,7 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) break; case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: amdgpu_device_program_register_sequence(adev, golden_settings_polaris11_a11, ARRAY_SIZE(golden_settings_polaris11_a11)); @@ -275,15 +278,18 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: chip_name = "fiji"; break; - case CHIP_POLARIS11: - chip_name = "polaris11"; - break; case CHIP_POLARIS10: chip_name = "polaris10"; break; + case CHIP_POLARIS11: + chip_name = "polaris11"; + break; case CHIP_POLARIS12: chip_name = "polaris12"; break; + case CHIP_VEGAM: + chip_name = "vegam"; + break; case CHIP_CARRIZO: chip_name = "carrizo"; break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 399f876f9cad..03a36cbe7557 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -360,6 +360,31 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, } +static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring, + int mem_space, int hdp, + uint32_t addr0, uint32_t addr1, + uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) | + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + if (mem_space) { + /* memory */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + } else { + /* registers */ + amdgpu_ring_write(ring, addr0 << 2); + amdgpu_ring_write(ring, addr1 << 2); + } + amdgpu_ring_write(ring, ref); /* reference */ + amdgpu_ring_write(ring, mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */ +} + /** * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring * @@ -378,15 +403,10 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) else ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + sdma_v4_0_wait_reg_mem(ring, 0, 1, + adev->nbio_funcs->get_hdp_flush_done_offset(adev), + adev->nbio_funcs->get_hdp_flush_req_offset(adev), + ref_and_mask, ref_and_mask, 10); } /** @@ -1114,16 +1134,10 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) uint64_t addr = ring->fence_drv.gpu_addr; /* wait for idle */ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ - SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); - amdgpu_ring_write(ring, addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); - amdgpu_ring_write(ring, seq); /* reference */ - amdgpu_ring_write(ring, 0xffffffff); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ + sdma_v4_0_wait_reg_mem(ring, 1, 0, + addr & 0xfffffffc, + upper_32_bits(addr) & 0xffffffff, + seq, 0xffffffff, 4); } @@ -1154,15 +1168,7 @@ static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring, static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ - amdgpu_ring_write(ring, reg << 2); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, val); /* reference */ - amdgpu_ring_write(ring, mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); + sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10); } static int sdma_v4_0_early_init(void *handle) @@ -1605,6 +1611,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .pad_ib = sdma_v4_0_ring_pad_ib, .emit_wreg = sdma_v4_0_ring_emit_wreg, .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index a675ec6d2811..c364ef94cc36 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1252,6 +1252,12 @@ static void si_invalidate_hdp(struct amdgpu_device *adev, } } +static bool si_need_full_reset(struct amdgpu_device *adev) +{ + /* change this when we support soft reset */ + return true; +} + static int si_get_pcie_lanes(struct amdgpu_device *adev) { u32 link_width_cntl; @@ -1332,6 +1338,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .get_config_memsize = &si_get_config_memsize, .flush_hdp = &si_flush_hdp, .invalidate_hdp = &si_invalidate_hdp, + .need_full_reset = &si_need_full_reset, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 797d505bf9ee..b12d7c9d42a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -7580,7 +7580,7 @@ static int si_dpm_late_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_dpm) + if (!adev->pm.dpm_enabled) return 0; ret = si_set_temperature_range(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 51cf8a30f6c2..90065766fffb 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -52,6 +52,7 @@ #include "gmc_v9_0.h" #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" +#include "df_v1_7.h" #include "vega10_ih.h" #include "sdma_v4_0.h" #include "uvd_v7_0.h" @@ -60,33 +61,6 @@ #include "dce_virtual.h" #include "mxgpu_ai.h" -#define mmFabricConfigAccessControl 0x0410 -#define mmFabricConfigAccessControl_BASE_IDX 0 -#define mmFabricConfigAccessControl_DEFAULT 0x00000000 -//FabricConfigAccessControl -#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0 -#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1 -#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10 -#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L -#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L -#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L - - -#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc -#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0 -//DF_PIE_AON0_DfGlobalClkGater -#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0 -#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL - -enum { - DF_MGCG_DISABLE = 0, - DF_MGCG_ENABLE_00_CYCLE_DELAY =1, - DF_MGCG_ENABLE_01_CYCLE_DELAY =2, - DF_MGCG_ENABLE_15_CYCLE_DELAY =13, - DF_MGCG_ENABLE_31_CYCLE_DELAY =14, - DF_MGCG_ENABLE_63_CYCLE_DELAY =15 -}; - #define mmMP0_MISC_CGTT_CTRL0 0x01b9 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 #define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba @@ -313,6 +287,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, + { SOC15_REG_ENTRY(GC, 0, mmDB_DEBUG2)}, }; static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, @@ -341,6 +316,8 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev, } else { if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)) return adev->gfx.config.gb_addr_config; + else if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2)) + return adev->gfx.config.db_debug2; return RREG32(reg_offset); } } @@ -521,6 +498,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) else adev->nbio_funcs = &nbio_v6_1_funcs; + adev->df_funcs = &df_v1_7_funcs; adev->nbio_funcs->detect_hw_virt(adev); if (amdgpu_sriov_vf(adev)) @@ -593,6 +571,12 @@ static void soc15_invalidate_hdp(struct amdgpu_device *adev, HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); } +static bool soc15_need_full_reset(struct amdgpu_device *adev) +{ + /* change this when we implement soft reset */ + return true; +} + static const struct amdgpu_asic_funcs soc15_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, @@ -606,6 +590,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .get_config_memsize = &soc15_get_config_memsize, .flush_hdp = &soc15_flush_hdp, .invalidate_hdp = &soc15_invalidate_hdp, + .need_full_reset = &soc15_need_full_reset, }; static int soc15_common_early_init(void *handle) @@ -697,6 +682,11 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS; adev->pg_flags = AMD_PG_SUPPORT_SDMA; + if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; + adev->external_rev_id = 0x1; break; default: @@ -871,32 +861,6 @@ static void soc15_update_rom_medium_grain_clock_gating(struct amdgpu_device *ade WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data); } -static void soc15_update_df_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) -{ - uint32_t data; - - /* Put DF on broadcast mode */ - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl)); - data &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; - WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), data); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); - data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - data |= DF_MGCG_ENABLE_15_CYCLE_DELAY; - WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data); - } else { - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); - data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - data |= DF_MGCG_DISABLE; - WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data); - } - - WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), - mmFabricConfigAccessControl_DEFAULT); -} - static int soc15_common_set_clockgating_state(void *handle, enum amd_clockgating_state state) { @@ -920,7 +884,7 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE ? true : false); soc15_update_rom_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - soc15_update_df_medium_grain_clock_gating(adev, + adev->df_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; case CHIP_RAVEN: @@ -973,10 +937,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) *flags |= AMD_CG_SUPPORT_ROM_MGCG; - /* AMD_CG_SUPPORT_DF_MGCG */ - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); - if (data & DF_MGCG_ENABLE_15_CYCLE_DELAY) - *flags |= AMD_CG_SUPPORT_DF_MGCG; + adev->df_funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 7f408f85fdb6..8dc29107228f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -159,6 +159,7 @@ #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ #define EOP_TCL1_ACTION_EN (1 << 16) #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ +#define EOP_TC_NC_ACTION_EN (1 << 19) #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ #define DATA_SEL(x) ((x) << 29) @@ -268,6 +269,11 @@ * x=1: tmz_end */ +#define PACKET3_INVALIDATE_TLBS 0x98 +# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) +# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) +# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) +# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) #define PACKET3_SET_RESOURCES 0xA0 /* 1. header * 2. CONTROL diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 948bb9437757..87cbb142dd0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -688,7 +688,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, if (state == AMD_PG_STATE_GATE) { uvd_v4_2_stop(adev); - if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) { + if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) { if (!(RREG32_SMC(ixCURRENT_PG_STATUS) & CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) { WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | @@ -699,7 +699,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, } return 0; } else { - if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) { + if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) { if (RREG32_SMC(ixCURRENT_PG_STATUS) & CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) { WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index f26f515db2fb..ca6ab56357b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -62,7 +62,7 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev) { return ((adev->asic_type >= CHIP_POLARIS10) && - (adev->asic_type <= CHIP_POLARIS12) && + (adev->asic_type <= CHIP_VEGAM) && (!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16)); } @@ -429,7 +429,7 @@ static int uvd_v6_0_sw_init(void *handle) ring = &adev->uvd.ring_enc[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r) { DRM_ERROR("Failed setting up UVD ENC run queue.\n"); return r; @@ -964,6 +964,16 @@ static void uvd_v6_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, } /** + * uvd_v6_0_ring_emit_hdp_flush - skip HDP flushing + * + * @ring: amdgpu_ring pointer + */ +static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* The firmware doesn't seem to like touching registers at this point. */ +} + +/** * uvd_v6_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -1528,12 +1538,13 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { .set_wptr = uvd_v6_0_ring_set_wptr, .parse_cs = amdgpu_uvd_ring_parse_cs, .emit_frame_size = - 6 + 6 + /* hdp flush / invalidate */ + 6 + /* hdp invalidate */ 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ 14, /* uvd_v6_0_ring_emit_fence x1 no user fence */ .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */ .emit_ib = uvd_v6_0_ring_emit_ib, .emit_fence = uvd_v6_0_ring_emit_fence, + .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush, .test_ring = uvd_v6_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, @@ -1552,7 +1563,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, .emit_frame_size = - 6 + 6 + /* hdp flush / invalidate */ + 6 + /* hdp invalidate */ 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */ 14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */ @@ -1561,6 +1572,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .emit_fence = uvd_v6_0_ring_emit_fence, .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush, .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync, + .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush, .test_ring = uvd_v6_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index eddc57f3b72a..0ca63d588670 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -418,7 +418,7 @@ static int uvd_v7_0_sw_init(void *handle) ring = &adev->uvd.ring_enc[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r) { DRM_ERROR("Failed setting up UVD ENC run queue.\n"); return r; @@ -1136,6 +1136,16 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, } /** + * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing + * + * @ring: amdgpu_ring pointer + */ +static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* The firmware doesn't seem to like touching registers at this point. */ +} + +/** * uvd_v7_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -1654,7 +1664,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, .emit_frame_size = - 6 + 6 + /* hdp flush / invalidate */ + 6 + /* hdp invalidate */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 8 + /* uvd_v7_0_ring_emit_vm_flush */ @@ -1663,6 +1673,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .emit_ib = uvd_v7_0_ring_emit_ib, .emit_fence = uvd_v7_0_ring_emit_fence, .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush, + .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush, .test_ring = uvd_v7_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, .insert_nop = uvd_v7_0_ring_insert_nop, @@ -1671,6 +1682,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .end_use = amdgpu_uvd_ring_end_use, .emit_wreg = uvd_v7_0_ring_emit_wreg, .emit_reg_wait = uvd_v7_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { @@ -1702,6 +1714,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .end_use = amdgpu_uvd_ring_end_use, .emit_wreg = uvd_v7_0_enc_ring_emit_wreg, .emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 428d1928e44e..0999c843f623 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -388,7 +388,8 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev) default: if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) return AMDGPU_VCE_HARVEST_VCE1; return 0; @@ -467,8 +468,8 @@ static int vce_v3_0_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; vce_v3_0_override_vce_clock_gating(adev, true); - if (!(adev->flags & AMD_IS_APU)) - amdgpu_asic_set_vce_clocks(adev, 10000, 10000); + + amdgpu_asic_set_vce_clocks(adev, 10000, 10000); for (i = 0; i < adev->vce.num_rings; i++) adev->vce.ring[i].ready = false; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 73fd48d6c756..8fd1b742985a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1081,6 +1081,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .end_use = amdgpu_vce_ring_end_use, .emit_wreg = vce_v4_0_emit_wreg, .emit_reg_wait = vce_v4_0_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 8c132673bc79..0501746b6c2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1109,6 +1109,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_v1_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { @@ -1139,6 +1140,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_v1_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 126f1276d347..4ac1288ab7df 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -305,9 +305,10 @@ static void vi_init_golden_registers(struct amdgpu_device *adev) stoney_mgcg_cgcg_init, ARRAY_SIZE(stoney_mgcg_cgcg_init)); break; - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: default: break; } @@ -728,33 +729,59 @@ static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock, return r; tmp = RREG32_SMC(cntl_reg); - tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK | - CG_DCLK_CNTL__DCLK_DIVIDER_MASK); + + if (adev->flags & AMD_IS_APU) + tmp &= ~CG_DCLK_CNTL__DCLK_DIVIDER_MASK; + else + tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK | + CG_DCLK_CNTL__DCLK_DIVIDER_MASK); tmp |= dividers.post_divider; WREG32_SMC(cntl_reg, tmp); for (i = 0; i < 100; i++) { - if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK) - break; + tmp = RREG32_SMC(status_reg); + if (adev->flags & AMD_IS_APU) { + if (tmp & 0x10000) + break; + } else { + if (tmp & CG_DCLK_STATUS__DCLK_STATUS_MASK) + break; + } mdelay(10); } if (i == 100) return -ETIMEDOUT; - return 0; } +#define ixGNB_CLK1_DFS_CNTL 0xD82200F0 +#define ixGNB_CLK1_STATUS 0xD822010C +#define ixGNB_CLK2_DFS_CNTL 0xD8220110 +#define ixGNB_CLK2_STATUS 0xD822012C +#define ixGNB_CLK3_DFS_CNTL 0xD8220130 +#define ixGNB_CLK3_STATUS 0xD822014C + static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) { int r; - r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); - if (r) - return r; + if (adev->flags & AMD_IS_APU) { + r = vi_set_uvd_clock(adev, vclk, ixGNB_CLK2_DFS_CNTL, ixGNB_CLK2_STATUS); + if (r) + return r; - r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); - if (r) - return r; + r = vi_set_uvd_clock(adev, dclk, ixGNB_CLK1_DFS_CNTL, ixGNB_CLK1_STATUS); + if (r) + return r; + } else { + r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); + if (r) + return r; + + r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); + if (r) + return r; + } return 0; } @@ -764,6 +791,22 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) int r, i; struct atom_clock_dividers dividers; u32 tmp; + u32 reg_ctrl; + u32 reg_status; + u32 status_mask; + u32 reg_mask; + + if (adev->flags & AMD_IS_APU) { + reg_ctrl = ixGNB_CLK3_DFS_CNTL; + reg_status = ixGNB_CLK3_STATUS; + status_mask = 0x00010000; + reg_mask = CG_ECLK_CNTL__ECLK_DIVIDER_MASK; + } else { + reg_ctrl = ixCG_ECLK_CNTL; + reg_status = ixCG_ECLK_STATUS; + status_mask = CG_ECLK_STATUS__ECLK_STATUS_MASK; + reg_mask = CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | CG_ECLK_CNTL__ECLK_DIVIDER_MASK; + } r = amdgpu_atombios_get_clock_dividers(adev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, @@ -772,24 +815,25 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) return r; for (i = 0; i < 100; i++) { - if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) + if (RREG32_SMC(reg_status) & status_mask) break; mdelay(10); } + if (i == 100) return -ETIMEDOUT; - tmp = RREG32_SMC(ixCG_ECLK_CNTL); - tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | - CG_ECLK_CNTL__ECLK_DIVIDER_MASK); + tmp = RREG32_SMC(reg_ctrl); + tmp &= ~reg_mask; tmp |= dividers.post_divider; - WREG32_SMC(ixCG_ECLK_CNTL, tmp); + WREG32_SMC(reg_ctrl, tmp); for (i = 0; i < 100; i++) { - if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) + if (RREG32_SMC(reg_status) & status_mask) break; mdelay(10); } + if (i == 100) return -ETIMEDOUT; @@ -876,6 +920,27 @@ static void vi_invalidate_hdp(struct amdgpu_device *adev, } } +static bool vi_need_full_reset(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + /* CZ has hang issues with full reset at the moment */ + return false; + case CHIP_FIJI: + case CHIP_TONGA: + /* XXX: soft reset should work on fiji and tonga */ + return true; + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_TOPAZ: + default: + /* change this when we support soft reset */ + return true; + } +} + static const struct amdgpu_asic_funcs vi_asic_funcs = { .read_disabled_bios = &vi_read_disabled_bios, @@ -889,6 +954,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .get_config_memsize = &vi_get_config_memsize, .flush_hdp = &vi_flush_hdp, .invalidate_hdp = &vi_invalidate_hdp, + .need_full_reset = &vi_need_full_reset, }; #define CZ_REV_BRISTOL(rev) \ @@ -1031,6 +1097,30 @@ static int vi_common_early_init(void *handle) adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x64; break; + case CHIP_VEGAM: + adev->cg_flags = 0; + /*AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_DRM_LS | + AMD_CG_SUPPORT_UVD_MGCG | + AMD_CG_SUPPORT_VCE_MGCG;*/ + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x6E; + break; case CHIP_CARRIZO: adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG | AMD_CG_SUPPORT_GFX_MGCG | @@ -1422,6 +1512,7 @@ static int vi_common_set_clockgating_state(void *handle, case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: vi_common_set_clockgating_state_by_smu(adev, state); default: break; @@ -1551,9 +1642,10 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); } break; - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block); amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 0d0242240c47..ffd096fffc1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -30,12 +30,14 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ kfd_process.o kfd_queue.o kfd_mqd_manager.o \ kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \ + kfd_mqd_manager_v9.o \ kfd_kernel_queue.o kfd_kernel_queue_cik.o \ - kfd_kernel_queue_vi.o kfd_packet_manager.o \ - kfd_process_queue_manager.o kfd_device_queue_manager.o \ - kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ + kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \ + kfd_packet_manager.o kfd_process_queue_manager.o \ + kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \ + kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ - kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o + kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o ifneq ($(CONFIG_AMD_IOMMU_V2),) amdkfd-y += kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 3d5ccb3755d4..49df6c791cfc 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -27,18 +27,28 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry) { - unsigned int pasid; const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; + unsigned int vmid, pasid; + + /* Only handle interrupts from KFD VMIDs */ + vmid = (ihre->ring_id & 0x0000ff00) >> 8; + if (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd) + return 0; + /* If there is no valid PASID, it's likely a firmware bug */ pasid = (ihre->ring_id & 0xffff0000) >> 16; + if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) + return 0; - /* Do not process in ISR, just request it to be forwarded to WQ. */ - return (pasid != 0) && - (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || + /* Interrupt types we care about: various signals and faults. + * They will be forwarded to a work queue (see below). + */ + return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || ihre->source_id == CIK_INTSRC_SDMA_TRAP || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || - ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE); + ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE; } static void cik_event_interrupt_wq(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h index 48769d12dd7b..37ce6dd65391 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h @@ -33,7 +33,8 @@ #define APE1_MTYPE(x) ((x) << 7) /* valid for both DEFAULT_MTYPE and APE1_MTYPE */ -#define MTYPE_CACHED 0 +#define MTYPE_CACHED_NV 0 +#define MTYPE_CACHED 1 #define MTYPE_NONCACHED 3 #define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h new file mode 100644 index 000000000000..f68aef02fc1f --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -0,0 +1,560 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +static const uint32_t cwsr_trap_gfx8_hex[] = { + 0xbf820001, 0xbf820125, + 0xb8f4f802, 0x89748674, + 0xb8f5f803, 0x8675ff75, + 0x00000400, 0xbf850011, + 0xc00a1e37, 0x00000000, + 0xbf8c007f, 0x87777978, + 0xbf840002, 0xb974f802, + 0xbe801d78, 0xb8f5f803, + 0x8675ff75, 0x000001ff, + 0xbf850002, 0x80708470, + 0x82718071, 0x8671ff71, + 0x0000ffff, 0xb974f802, + 0xbe801f70, 0xb8f5f803, + 0x8675ff75, 0x00000100, + 0xbf840006, 0xbefa0080, + 0xb97a0203, 0x8671ff71, + 0x0000ffff, 0x80f08870, + 0x82f18071, 0xbefa0080, + 0xb97a0283, 0xbef60068, + 0xbef70069, 0xb8fa1c07, + 0x8e7a9c7a, 0x87717a71, + 0xb8fa03c7, 0x8e7a9b7a, + 0x87717a71, 0xb8faf807, + 0x867aff7a, 0x00007fff, + 0xb97af807, 0xbef2007e, + 0xbef3007f, 0xbefe0180, + 0xbf900004, 0x877a8474, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x867aff7f, + 0x08000000, 0x8f7a837a, + 0x877b7a7b, 0x867aff7f, + 0x70000000, 0x8f7a817a, + 0x877b7a7b, 0xbeef007c, + 0xbeee0080, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cbc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611d3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xb8f5f803, + 0xbefe007c, 0xbefc006e, + 0xc0611d7c, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dbc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dfc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xb8eff801, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbef30080, + 0x8773737a, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8f51605, 0x80758175, + 0x8e758475, 0x8e7a8275, + 0xbefa00ff, 0x01000000, + 0xbef60178, 0x80786e78, + 0x82798079, 0xbefc0080, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003c, 0x00000000, + 0xc06b013c, 0x00000010, + 0xc06b023c, 0x00000020, + 0xc06b033c, 0x00000030, + 0x8078c078, 0x82798079, + 0x807c907c, 0xbf0a757c, + 0xbf85ffeb, 0xbef80176, + 0xbeee0080, 0xbefe00c1, + 0xbeff00c1, 0xbefa00ff, + 0x01000000, 0xe0724000, + 0x6e1e0000, 0xe0724100, + 0x6e1e0100, 0xe0724200, + 0x6e1e0200, 0xe0724300, + 0x6e1e0300, 0xbefe00c1, + 0xbeff00c1, 0xb8f54306, + 0x8675c175, 0xbf84002c, + 0xbf8a0000, 0x867aff73, + 0x04000000, 0xbf840028, + 0x8e758675, 0x8e758275, + 0xbefa0075, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0x806eff6e, 0x00000080, + 0xbefa00ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe80007b, + 0x867bff7b, 0xff7fffff, + 0x877bff7b, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8c007f, 0xe0765000, + 0x6e1e0002, 0x32040702, + 0xd0c9006a, 0x0000eb02, + 0xbf87fff7, 0xbefb0000, + 0xbeee00ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8f52a05, 0x80758175, + 0x8e758275, 0x8e7a8875, + 0xbefa00ff, 0x01000000, + 0xbefc0084, 0xbf0a757c, + 0xbf840015, 0xbf11017c, + 0x8075ff75, 0x00001000, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x6e1e0000, + 0xe0724100, 0x6e1e0100, + 0xe0724200, 0x6e1e0200, + 0xe0724300, 0x6e1e0300, + 0x807c847c, 0x806eff6e, + 0x00000400, 0xbf0a757c, + 0xbf85ffef, 0xbf9c0000, + 0xbf8200ca, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x8676ff7f, + 0x08000000, 0x8f768376, + 0x877b767b, 0x8676ff7f, + 0x70000000, 0x8f768176, + 0x877b767b, 0x8676ff7f, + 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, + 0xb8f34306, 0x8673c173, + 0xbf840019, 0x8e738673, + 0x8e738273, 0xbefa0073, + 0xb8f22a05, 0x80728172, + 0x8e728a72, 0xb8f61605, + 0x80768176, 0x8e768676, + 0x80727672, 0x8072ff72, + 0x00000080, 0xbefa00ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x721e0000, + 0xe0510100, 0x721e0000, + 0x807cff7c, 0x00000200, + 0x8072ff72, 0x00000200, + 0xbf0a737c, 0xbf85fff6, + 0xbef20080, 0xbefe00c1, + 0xbeff00c1, 0xb8f32a05, + 0x80738173, 0x8e738273, + 0x8e7a8873, 0xbefa00ff, + 0x01000000, 0xbef60072, + 0x8072ff72, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0x8073ff73, 0x00008000, + 0xe0524000, 0x721e0000, + 0xe0524100, 0x721e0100, + 0xe0524200, 0x721e0200, + 0xe0524300, 0x721e0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8072ff72, 0x00000400, + 0xbf0a737c, 0xbf85ffee, + 0xbf9c0000, 0xe0524000, + 0x761e0000, 0xe0524100, + 0x761e0100, 0xe0524200, + 0x761e0200, 0xe0524300, + 0x761e0300, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0x80f2c072, 0xb8f31605, + 0x80738173, 0x8e738473, + 0x8e7a8273, 0xbefa00ff, + 0x01000000, 0xbefc0073, + 0xc031003c, 0x00000072, + 0x80f2c072, 0xbf8c007f, + 0x80fc907c, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff1, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xc0211cfc, + 0x00000072, 0x80728472, + 0xc0211c3c, 0x00000072, + 0x80728472, 0xc0211c7c, + 0x00000072, 0x80728472, + 0xc0211bbc, 0x00000072, + 0x80728472, 0xc0211bfc, + 0x00000072, 0x80728472, + 0xc0211d3c, 0x00000072, + 0x80728472, 0xc0211d7c, + 0x00000072, 0x80728472, + 0xc0211a3c, 0x00000072, + 0x80728472, 0xc0211a7c, + 0x00000072, 0x80728472, + 0xc0211dfc, 0x00000072, + 0x80728472, 0xc0211b3c, + 0x00000072, 0x80728472, + 0xc0211b7c, 0x00000072, + 0x80728472, 0xbf8c007f, + 0xbefc0073, 0xbefe006e, + 0xbeff006f, 0x867375ff, + 0x000003ff, 0xb9734803, + 0x867375ff, 0xfffff800, + 0x8f738b73, 0xb973a2c3, + 0xb977f801, 0x8673ff71, + 0xf0000000, 0x8f739c73, + 0x8e739073, 0xbef60080, + 0x87767376, 0x8673ff71, + 0x08000000, 0x8f739b73, + 0x8e738f73, 0x87767376, + 0x8673ff74, 0x00800000, + 0x8f739773, 0xb976f807, + 0x8671ff71, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0xb974f802, 0xbf8a0000, + 0x95807370, 0xbf810000, +}; + + +static const uint32_t cwsr_trap_gfx9_hex[] = { + 0xbf820001, 0xbf82015a, + 0xb8f8f802, 0x89788678, + 0xb8f1f803, 0x866eff71, + 0x00000400, 0xbf850034, + 0x866eff71, 0x00000800, + 0xbf850003, 0x866eff71, + 0x00000100, 0xbf840008, + 0x866eff78, 0x00002000, + 0xbf840001, 0xbf810000, + 0x8778ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xb8eef807, 0x866fff6e, + 0x001f8000, 0x8e6f8b6f, + 0x8977ff77, 0xfc000000, + 0x87776f77, 0x896eff6e, + 0x001f8000, 0xb96ef807, + 0xb8f0f812, 0xb8f1f813, + 0x8ef08870, 0xc0071bb8, + 0x00000000, 0xbf8cc07f, + 0xc0071c38, 0x00000008, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0xb8f1f803, 0x8671ff71, + 0x000001ff, 0xbf850002, + 0x806c846c, 0x826d806d, + 0x866dff6d, 0x0000ffff, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x86fe7e7e, 0x86ea6a6a, + 0xb978f802, 0xbe801f6c, + 0x866dff6d, 0x0000ffff, + 0xbef00080, 0xb9700283, + 0xb8f02407, 0x8e709c70, + 0x876d706d, 0xb8f003c7, + 0x8e709b70, 0x876d706d, + 0xb8f0f807, 0x8670ff70, + 0x00007fff, 0xb970f807, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x87708478, 0xb970f802, + 0xbf8e0002, 0xbf88fffe, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8f11605, + 0x80718171, 0x8e718671, + 0x80707170, 0x80707e70, + 0x8271807f, 0x8671ff71, + 0x0000ffff, 0xc0471cb8, + 0x00000040, 0xbf8cc07f, + 0xc04b1d38, 0x00000048, + 0xbf8cc07f, 0xc0431e78, + 0x00000058, 0xbf8cc07f, + 0xc0471eb8, 0x0000005c, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x8670ff7f, + 0x08000000, 0x8f708370, + 0x87777077, 0x8670ff7f, + 0x70000000, 0x8f708170, + 0x87777077, 0xbefb007c, + 0xbefa0080, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8f01605, 0x80708170, + 0x8e708670, 0x807a707a, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc007a, 0xc0611efa, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611b3a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611b7a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611bba, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611bfa, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xb8f1f803, + 0xbefe007c, 0xbefc007a, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611a3a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xb8fbf801, + 0xbefe007c, 0xbefc007a, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0x8670ff7f, + 0x04000000, 0xbeef0080, + 0x876f6f70, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8f11605, 0x80718171, + 0x8e718471, 0x8e768271, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747a74, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a717c, 0xbf85ffe7, + 0xbef40172, 0xbefa0080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0xe0724000, 0x7a1d0000, + 0xe0724100, 0x7a1d0100, + 0xe0724200, 0x7a1d0200, + 0xe0724300, 0x7a1d0300, + 0xbefe00c1, 0xbeff00c1, + 0xb8f14306, 0x8671c171, + 0xbf84002c, 0xbf8a0000, + 0x8670ff6f, 0x04000000, + 0xbf840028, 0x8e718671, + 0x8e718271, 0xbef60071, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0xb8f01605, + 0x80708170, 0x8e708670, + 0x807a707a, 0x807aff7a, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x7a1d0002, + 0x68040702, 0xd0c9006a, + 0x0000e302, 0xbf87fff7, + 0xbef70000, 0xbefa00ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8f12a05, + 0x80718171, 0x8e718271, + 0x8e768871, 0xbef600ff, + 0x01000000, 0xbefc0084, + 0xbf0a717c, 0xbf840015, + 0xbf11017c, 0x8071ff71, + 0x00001000, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0xe0724000, + 0x7a1d0000, 0xe0724100, + 0x7a1d0100, 0xe0724200, + 0x7a1d0200, 0xe0724300, + 0x7a1d0300, 0x807c847c, + 0x807aff7a, 0x00000400, + 0xbf0a717c, 0xbf85ffef, + 0xbf9c0000, 0xbf8200d9, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x08000000, + 0x8f6e836e, 0x87776e77, + 0x866eff7f, 0x70000000, + 0x8f6e816e, 0x87776e77, + 0x866eff7f, 0x04000000, + 0xbf84001e, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf840019, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbef80080, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x8e76886f, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0x806fff6f, + 0x00008000, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, + 0xbef600ff, 0x01000000, + 0xbefc006f, 0xc031003a, + 0x00000078, 0x80f8c078, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff0, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, + 0x00000078, 0x80788478, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, + 0x00000078, 0x80788478, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, + 0x00000078, 0x80788478, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, + 0x00000078, 0x80788478, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, + 0x00000078, 0x80788478, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe007a, + 0xbeff007b, 0x866f71ff, + 0x000003ff, 0xb96f4803, + 0x866f71ff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8ef1605, 0x806f816f, + 0x8e6f866f, 0x806e6f6e, + 0x806e746e, 0x826f8075, + 0x866fff6f, 0x0000ffff, + 0xc0071cb7, 0x00000040, + 0xc00b1d37, 0x00000048, + 0xc0031e77, 0x00000058, + 0xc0071eb7, 0x0000005c, + 0xbf8cc07f, 0x866fff6d, + 0xf0000000, 0x8f6f9c6f, + 0x8e6f906f, 0xbeee0080, + 0x876e6f6e, 0x866fff6d, + 0x08000000, 0x8f6f9b6f, + 0x8e6f8f6f, 0x876e6f6e, + 0x866fff70, 0x00800000, + 0x8f6f976f, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0xb970f802, 0xbf8a0000, + 0x95806f6c, 0xbf810000, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index 997a383dcb8b..a2a04bb64096 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -20,9 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#if 0 -HW (VI) source code for CWSR trap handler -#Version 18 + multiple trap handler +/* To compile this assembly code: + * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex + */ + +/* HW (VI) source code for CWSR trap handler */ +/* Version 18 + multiple trap handler */ // this performance-optimal version was originally from Seven Xu at SRDC @@ -98,6 +101,7 @@ var SWIZZLE_EN = 0 //whether we use swi /**************************************************************************/ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 @@ -149,7 +153,7 @@ var s_save_spi_init_lo = exec_lo var s_save_spi_init_hi = exec_hi //tba_lo and tba_hi need to be saved/restored -var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} +var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} var s_save_pc_hi = ttmp1 var s_save_exec_lo = ttmp2 var s_save_exec_hi = ttmp3 @@ -319,6 +323,10 @@ end s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC end + // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. + s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp + L_SLEEP: s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 @@ -1007,8 +1015,6 @@ end s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS - s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS - //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) @@ -1044,6 +1050,7 @@ end s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu @@ -1127,258 +1134,3 @@ end function get_hwreg_size_bytes return 128 //HWREG size 128 bytes end - - -#endif - -static const uint32_t cwsr_trap_gfx8_hex[] = { - 0xbf820001, 0xbf820123, - 0xb8f4f802, 0x89748674, - 0xb8f5f803, 0x8675ff75, - 0x00000400, 0xbf850011, - 0xc00a1e37, 0x00000000, - 0xbf8c007f, 0x87777978, - 0xbf840002, 0xb974f802, - 0xbe801d78, 0xb8f5f803, - 0x8675ff75, 0x000001ff, - 0xbf850002, 0x80708470, - 0x82718071, 0x8671ff71, - 0x0000ffff, 0xb974f802, - 0xbe801f70, 0xb8f5f803, - 0x8675ff75, 0x00000100, - 0xbf840006, 0xbefa0080, - 0xb97a0203, 0x8671ff71, - 0x0000ffff, 0x80f08870, - 0x82f18071, 0xbefa0080, - 0xb97a0283, 0xbef60068, - 0xbef70069, 0xb8fa1c07, - 0x8e7a9c7a, 0x87717a71, - 0xb8fa03c7, 0x8e7a9b7a, - 0x87717a71, 0xb8faf807, - 0x867aff7a, 0x00007fff, - 0xb97af807, 0xbef2007e, - 0xbef3007f, 0xbefe0180, - 0xbf900004, 0xbf8e0002, - 0xbf88fffe, 0xbef8007e, - 0x8679ff7f, 0x0000ffff, - 0x8779ff79, 0x00040000, - 0xbefa0080, 0xbefb00ff, - 0x00807fac, 0x867aff7f, - 0x08000000, 0x8f7a837a, - 0x877b7a7b, 0x867aff7f, - 0x70000000, 0x8f7a817a, - 0x877b7a7b, 0xbeef007c, - 0xbeee0080, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x806e7a6e, - 0xbefa0084, 0xbefa00ff, - 0x01000000, 0xbefe007c, - 0xbefc006e, 0xc0611bfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611c3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611c7c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611cbc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611cfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611d3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xb8f5f803, - 0xbefe007c, 0xbefc006e, - 0xc0611d7c, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xbefe007c, 0xbefc006e, - 0xc0611dbc, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xbefe007c, 0xbefc006e, - 0xc0611dfc, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xb8eff801, 0xbefe007c, - 0xbefc006e, 0xc0611bfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611b3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611b7c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbef30080, - 0x8773737a, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8f51605, 0x80758175, - 0x8e758475, 0x8e7a8275, - 0xbefa00ff, 0x01000000, - 0xbef60178, 0x80786e78, - 0x82798079, 0xbefc0080, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003c, 0x00000000, - 0xc06b013c, 0x00000010, - 0xc06b023c, 0x00000020, - 0xc06b033c, 0x00000030, - 0x8078c078, 0x82798079, - 0x807c907c, 0xbf0a757c, - 0xbf85ffeb, 0xbef80176, - 0xbeee0080, 0xbefe00c1, - 0xbeff00c1, 0xbefa00ff, - 0x01000000, 0xe0724000, - 0x6e1e0000, 0xe0724100, - 0x6e1e0100, 0xe0724200, - 0x6e1e0200, 0xe0724300, - 0x6e1e0300, 0xbefe00c1, - 0xbeff00c1, 0xb8f54306, - 0x8675c175, 0xbf84002c, - 0xbf8a0000, 0x867aff73, - 0x04000000, 0xbf840028, - 0x8e758675, 0x8e758275, - 0xbefa0075, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x806e7a6e, - 0x806eff6e, 0x00000080, - 0xbefa00ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe80007b, - 0x867bff7b, 0xff7fffff, - 0x877bff7b, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8c007f, 0xe0765000, - 0x6e1e0002, 0x32040702, - 0xd0c9006a, 0x0000eb02, - 0xbf87fff7, 0xbefb0000, - 0xbeee00ff, 0x00000400, - 0xbefe00c1, 0xbeff00c1, - 0xb8f52a05, 0x80758175, - 0x8e758275, 0x8e7a8875, - 0xbefa00ff, 0x01000000, - 0xbefc0084, 0xbf0a757c, - 0xbf840015, 0xbf11017c, - 0x8075ff75, 0x00001000, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0xe0724000, 0x6e1e0000, - 0xe0724100, 0x6e1e0100, - 0xe0724200, 0x6e1e0200, - 0xe0724300, 0x6e1e0300, - 0x807c847c, 0x806eff6e, - 0x00000400, 0xbf0a757c, - 0xbf85ffef, 0xbf9c0000, - 0xbf8200ca, 0xbef8007e, - 0x8679ff7f, 0x0000ffff, - 0x8779ff79, 0x00040000, - 0xbefa0080, 0xbefb00ff, - 0x00807fac, 0x8676ff7f, - 0x08000000, 0x8f768376, - 0x877b767b, 0x8676ff7f, - 0x70000000, 0x8f768176, - 0x877b767b, 0x8676ff7f, - 0x04000000, 0xbf84001e, - 0xbefe00c1, 0xbeff00c1, - 0xb8f34306, 0x8673c173, - 0xbf840019, 0x8e738673, - 0x8e738273, 0xbefa0073, - 0xb8f22a05, 0x80728172, - 0x8e728a72, 0xb8f61605, - 0x80768176, 0x8e768676, - 0x80727672, 0x8072ff72, - 0x00000080, 0xbefa00ff, - 0x01000000, 0xbefc0080, - 0xe0510000, 0x721e0000, - 0xe0510100, 0x721e0000, - 0x807cff7c, 0x00000200, - 0x8072ff72, 0x00000200, - 0xbf0a737c, 0xbf85fff6, - 0xbef20080, 0xbefe00c1, - 0xbeff00c1, 0xb8f32a05, - 0x80738173, 0x8e738273, - 0x8e7a8873, 0xbefa00ff, - 0x01000000, 0xbef60072, - 0x8072ff72, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0x8073ff73, 0x00008000, - 0xe0524000, 0x721e0000, - 0xe0524100, 0x721e0100, - 0xe0524200, 0x721e0200, - 0xe0524300, 0x721e0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8072ff72, 0x00000400, - 0xbf0a737c, 0xbf85ffee, - 0xbf9c0000, 0xe0524000, - 0x761e0000, 0xe0524100, - 0x761e0100, 0xe0524200, - 0x761e0200, 0xe0524300, - 0x761e0300, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0x80f2c072, 0xb8f31605, - 0x80738173, 0x8e738473, - 0x8e7a8273, 0xbefa00ff, - 0x01000000, 0xbefc0073, - 0xc031003c, 0x00000072, - 0x80f2c072, 0xbf8c007f, - 0x80fc907c, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff1, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0xbefa0084, 0xbefa00ff, - 0x01000000, 0xc0211cfc, - 0x00000072, 0x80728472, - 0xc0211c3c, 0x00000072, - 0x80728472, 0xc0211c7c, - 0x00000072, 0x80728472, - 0xc0211bbc, 0x00000072, - 0x80728472, 0xc0211bfc, - 0x00000072, 0x80728472, - 0xc0211d3c, 0x00000072, - 0x80728472, 0xc0211d7c, - 0x00000072, 0x80728472, - 0xc0211a3c, 0x00000072, - 0x80728472, 0xc0211a7c, - 0x00000072, 0x80728472, - 0xc0211dfc, 0x00000072, - 0x80728472, 0xc0211b3c, - 0x00000072, 0x80728472, - 0xc0211b7c, 0x00000072, - 0x80728472, 0xbf8c007f, - 0x8671ff71, 0x0000ffff, - 0xbefc0073, 0xbefe006e, - 0xbeff006f, 0x867375ff, - 0x000003ff, 0xb9734803, - 0x867375ff, 0xfffff800, - 0x8f738b73, 0xb973a2c3, - 0xb977f801, 0x8673ff71, - 0xf0000000, 0x8f739c73, - 0x8e739073, 0xbef60080, - 0x87767376, 0x8673ff71, - 0x08000000, 0x8f739b73, - 0x8e738f73, 0x87767376, - 0x8673ff74, 0x00800000, - 0x8f739773, 0xb976f807, - 0x86fe7e7e, 0x86ea6a6a, - 0xb974f802, 0xbf8a0000, - 0x95807370, 0xbf810000, -}; - diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm new file mode 100644 index 000000000000..998be96be736 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -0,0 +1,1214 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* To compile this assembly code: + * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex + */ + +/* HW (GFX9) source code for CWSR trap handler */ +/* Version 18 + multiple trap handler */ + +// this performance-optimal version was originally from Seven Xu at SRDC + +// Revison #18 --... +/* Rev History +** #1. Branch from gc dv. //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) +** #4. SR Memory Layout: +** 1. VGPR-SGPR-HWREG-{LDS} +** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. +** #5. Update: 1. Accurate g8sr_ts_save_d timestamp +** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation) +** #7. Update: 1. don't barrier if noLDS +** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version +** 2. Fix SQ issue by s_sleep 2 +** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last +** 2. optimize s_buffer save by burst 16sgprs... +** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs. +** #11. Update 1. Add 2 more timestamp for debug version +** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance +** #13. Integ 1. Always use MUBUF for PV trap shader... +** #14. Update 1. s_buffer_store soft clause... +** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot. +** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree +** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part] +** 2. PERF - Save LDS before save VGPR to cover LDS save long latency... +** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32 +** 2. FUNC - Handle non-CWSR traps +*/ + +var G8SR_WDMEM_HWREG_OFFSET = 0 +var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes + +// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore. + +var G8SR_DEBUG_TIMESTAMP = 0 +var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset +var s_g8sr_ts_save_s = s[34:35] // save start +var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi +var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ +var s_g8sr_ts_save_d = s[40:41] // save end +var s_g8sr_ts_restore_s = s[42:43] // restore start +var s_g8sr_ts_restore_d = s[44:45] // restore end + +var G8SR_VGPR_SR_IN_DWX4 = 0 +var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes +var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 + + +/*************************************************************************/ +/* control on how to run the shader */ +/*************************************************************************/ +//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) +var EMU_RUN_HACK = 0 +var EMU_RUN_HACK_RESTORE_NORMAL = 0 +var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 +var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 +var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +var SAVE_LDS = 1 +var WG_BASE_ADDR_LO = 0x9000a000 +var WG_BASE_ADDR_HI = 0x0 +var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem +var CTX_SAVE_CONTROL = 0x0 +var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL +var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) +var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write +var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes +var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing +var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency + +/**************************************************************************/ +/* variables */ +/**************************************************************************/ +var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 +var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 +var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 +var SQ_WAVE_STATUS_HALT_MASK = 0x2000 + +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits + +var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 +var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask +var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 +var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 +var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 + +var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME +var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME + +var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 +var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 + +var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data +var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 + +/* Save */ +var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes +var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE + +var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_SAVE_SPI_INIT_ATC_SHIFT = 27 +var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 +var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used +var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME + +var s_save_spi_init_lo = exec_lo +var s_save_spi_init_hi = exec_hi + +var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} +var s_save_pc_hi = ttmp1 +var s_save_exec_lo = ttmp2 +var s_save_exec_hi = ttmp3 +var s_save_tmp = ttmp4 +var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine +var s_save_xnack_mask_lo = ttmp6 +var s_save_xnack_mask_hi = ttmp7 +var s_save_buf_rsrc0 = ttmp8 +var s_save_buf_rsrc1 = ttmp9 +var s_save_buf_rsrc2 = ttmp10 +var s_save_buf_rsrc3 = ttmp11 +var s_save_status = ttmp12 +var s_save_mem_offset = ttmp14 +var s_save_alloc_size = s_save_trapsts //conflict +var s_save_m0 = ttmp15 +var s_save_ttmps_lo = s_save_tmp //no conflict +var s_save_ttmps_hi = s_save_trapsts //no conflict + +/* Restore */ +var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC + +var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 +var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 +var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT +var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK +var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT +var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK + +var s_restore_spi_init_lo = exec_lo +var s_restore_spi_init_hi = exec_hi + +var s_restore_mem_offset = ttmp12 +var s_restore_alloc_size = ttmp3 +var s_restore_tmp = ttmp2 +var s_restore_mem_offset_save = s_restore_tmp //no conflict + +var s_restore_m0 = s_restore_alloc_size //no conflict + +var s_restore_mode = ttmp7 + +var s_restore_pc_lo = ttmp0 +var s_restore_pc_hi = ttmp1 +var s_restore_exec_lo = ttmp14 +var s_restore_exec_hi = ttmp15 +var s_restore_status = ttmp4 +var s_restore_trapsts = ttmp5 +var s_restore_xnack_mask_lo = xnack_mask_lo +var s_restore_xnack_mask_hi = xnack_mask_hi +var s_restore_buf_rsrc0 = ttmp8 +var s_restore_buf_rsrc1 = ttmp9 +var s_restore_buf_rsrc2 = ttmp10 +var s_restore_buf_rsrc3 = ttmp11 +var s_restore_ttmps_lo = s_restore_tmp //no conflict +var s_restore_ttmps_hi = s_restore_alloc_size //no conflict + +/**************************************************************************/ +/* trap handler entry points */ +/**************************************************************************/ +/* Shader Main*/ + +shader main + asic(GFX9) + type(CS) + + + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore + //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC + s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. + s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE + //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE + s_branch L_SKIP_RESTORE //NOT restore, SAVE actually + else + s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save + end + +L_JUMP_TO_RESTORE: + s_branch L_RESTORE //restore + +L_SKIP_RESTORE: + + s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save + s_cbranch_scc1 L_SAVE //this is the operation for save + + // ********* Handle non-CWSR traps ******************* +if (!EMU_RUN_HACK) + // Illegal instruction is a non-maskable exception which blocks context save. + // Halt the wavefront and return from the trap. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + s_cbranch_scc1 L_HALT_WAVE + + // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA. + // Instead, halt the wavefront and return from the trap. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK + s_cbranch_scc0 L_FETCH_2ND_TRAP + +L_HALT_WAVE: + // If STATUS.HALT is set then this fault must come from SQC instruction fetch. + // We cannot prevent further faults so just terminate the wavefront. + s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_cbranch_scc0 L_NOT_ALREADY_HALTED + s_endpgm +L_NOT_ALREADY_HALTED: + s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK + + // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set. + // Rewind the PC to prevent this from occurring. The debugger compensates for this. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + +L_FETCH_2ND_TRAP: + // Preserve and clear scalar XNACK state before issuing scalar reads. + // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. + s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) + s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK + s_or_b32 ttmp11, ttmp11, ttmp3 + + s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + + // Read second-level TBA/TMA from first-level TMA and jump if available. + // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) + // ttmp12 holds SQ_WAVE_STATUS + s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO) + s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI) + s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA + s_waitcnt lgkmcnt(0) + s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA + s_waitcnt lgkmcnt(0) + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] + s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set + s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler + +L_NO_NEXT_TRAP: + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception + s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. + s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 + s_addc_u32 ttmp1, ttmp1, 0 +L_EXCP_CASE: + s_and_b32 ttmp1, ttmp1, 0xFFFF + + // Restore SQ_WAVE_IB_STS. + s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + + // Restore SQ_WAVE_STATUS. + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status + + s_rfe_b64 [ttmp0, ttmp1] +end + // ********* End handling of non-CWSR traps ******************* + +/**************************************************************************/ +/* save routine */ +/**************************************************************************/ + +L_SAVE: + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? +end + + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + + s_mov_b32 s_save_tmp, 0 //clear saveCtx bit + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS + s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG + + s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp + + /* inform SPI the readiness and wait for SPI's go signal */ + s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI + s_mov_b32 s_save_exec_hi, exec_hi + s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_sq_save_msg + s_waitcnt lgkmcnt(0) +end + + if (EMU_RUN_HACK) + + else + s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC + end + + // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. + s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp + + L_SLEEP: + s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 + + if (EMU_RUN_HACK) + + else + s_cbranch_execz L_SLEEP + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_spi_wrexec + s_waitcnt lgkmcnt(0) +end + + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_save_tmp, v9, 0 + s_lshr_b32 s_save_tmp, s_save_tmp, 6 + s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + + // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + get_vgpr_size_bytes(s_save_ttmps_lo) + get_sgpr_size_bytes(s_save_ttmps_hi) + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo + s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0 + s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF + s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1 + ack_sqc_store_workaround() + s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1 + ack_sqc_store_workaround() + s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1 + ack_sqc_store_workaround() + s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1 + ack_sqc_store_workaround() + + /* setup Resource Contants */ + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited + s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE + + //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?) + s_mov_b32 s_save_m0, m0 //save M0 + + /* global mem offset */ + s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0 + + + + + /* save HW registers */ + ////////////////////////////// + + L_SAVE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + + + s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0 + + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + end + + write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC + write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC + write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) //STATUS + + //s_save_trapsts conflicts with s_save_alloc_size + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS + + write_hwreg_to_mem(xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO + write_hwreg_to_mem(xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI + + //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 + s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + + + + /* the first wave in the threadgroup */ + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit + s_mov_b32 s_save_exec_hi, 0x0 + s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26] + + + /* save SGPRs */ + // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 + //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0 + s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0 + s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset + s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0 + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + s_nop 0x0 //Manually inserted wait states + L_SAVE_SGPR_LOOP: + // SGPR is allocated in 16 SGPR granularity + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] + s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] + + write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4 + s_add_u32 m0, m0, 16 //next sgpr index + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete? + // restore s_save_buf_rsrc0,1 + //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo + s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo + + + + + /* save first 4 VGPR, then LDS save could use */ + // each wave will alloc 4 vgprs at least... + ///////////////////////////////////////////////////////////////////////////////////// + + s_mov_b32 s_save_mem_offset, 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + s_mov_b32 xnack_mask_lo, 0x0 + s_mov_b32 xnack_mask_hi, 0x0 + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 +end + + + + /* save LDS */ + ////////////////////////////// + + L_SAVE_LDS: + + // Change EXEC to all threads... + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + + s_barrier //LDS is used? wait for other waves in the same TG + s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here + s_cbranch_scc0 L_SAVE_LDS_DONE + + // first wave do LDS save; + + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() + + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + +var LDS_DMA_ENABLE = 0 +var UNROLL = 0 +if UNROLL==0 && LDS_DMA_ENABLE==1 + s_mov_b32 s3, 256*2 + s_nop 0 + s_nop 0 + s_nop 0 + L_SAVE_LDS_LOOP: + //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.??? + if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + end + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? + +elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss + // store from higest LDS address to lowest + s_mov_b32 s3, 256*2 + s_sub_u32 m0, s_save_alloc_size, s3 + s_add_u32 s_save_mem_offset, s_save_mem_offset, m0 + s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks... + s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest + s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction + s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc + s_nop 0 + s_nop 0 + s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes + s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved + s_add_u32 s0, s0,s_save_alloc_size + s_addc_u32 s1, s1, 0 + s_setpc_b64 s[0:1] + + + for var i =0; i< 128; i++ + // be careful to make here a 64Byte aligned address, which could improve performance... + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + + if i!=127 + s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline + s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3 + end + end + +else // BUFFER_STORE + v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0 + v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid + v_mul_i32_i24 v2, v3, 8 // tid*8 + v_mov_b32 v3, 256*2 + s_mov_b32 m0, 0x10000 + s_mov_b32 s0, s_save_buf_rsrc3 + s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF // disable add_tid + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000 //DFMT + +L_SAVE_LDS_LOOP_VECTOR: + ds_read_b64 v[0:1], v2 //x =LDS[a], byte address + s_waitcnt lgkmcnt(0) + buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1 +// s_waitcnt vmcnt(0) +// v_add_u32 v2, vcc[0:1], v2, v3 + v_add_u32 v2, v2, v3 + v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size + s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR + + // restore rsrc3 + s_mov_b32 s_save_buf_rsrc3, s0 + +end + +L_SAVE_LDS_DONE: + + + /* save VGPRs - set the Rest VGPRs */ + ////////////////////////////////////////////////////////////////////////////////////// + L_SAVE_VGPR: + // VGPR SR memory offset: 0 + // TODO rearrange the RSRC words to use swizzle for VGPR save... + + s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, 4 // skip first 4 VGPRs + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs + + s_set_gpr_idx_on m0, 0x1 // This will change M0 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0 +L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 // v0 = v[0+m0] + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + s_add_u32 m0, m0, 4 + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +L_SAVE_VGPR_LOOP_END: + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =0 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_END + + + s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later + + L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 //v0 = v[0+m0] + v_mov_b32 v1, v1 //v0 = v[0+m0] + v_mov_b32 v2, v2 //v0 = v[0+m0] + v_mov_b32 v3, v3 //v0 = v[0+m0] + + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 + end + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +end + +L_SAVE_VGPR_END: + + + + + + + /* S_PGM_END_SAVED */ //FIXME graphics ONLY + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + s_rfe_b64 s_save_pc_lo //Return to the main shader program + else + end + +// Save Done timestamp +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_d + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // Need reset rsrc2?? + s_mov_b32 m0, s_save_mem_offset + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1 +end + + + s_branch L_END_PGM + + + +/**************************************************************************/ +/* restore routine */ +/**************************************************************************/ + +L_RESTORE: + /* Setup Resource Contants */ + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_restore_tmp, v9, 0 + s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 + s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE + s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL + else + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case... + s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0] + s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored.. +end + + + + s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo + s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE + + /* global mem offset */ +// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 + + /* the first wave in the threadgroup */ + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_RESTORE_VGPR + + /* restore LDS */ + ////////////////////////////// + L_RESTORE_LDS: + + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow??? + + + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + L_RESTORE_LDS_LOOP: + if (SAVE_LDS) + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW + end + s_add_u32 m0, m0, 256*2 // 128 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete? + + + /* restore VGPRs */ + ////////////////////////////// + L_RESTORE_VGPR: + // VGPR SR memory offset : 0 + s_mov_b32 s_restore_mem_offset, 0x0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + +if G8SR_VGPR_SR_IN_DWX4 + get_vgpr_size_bytes(s_restore_mem_offset) + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, s_restore_alloc_size + s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0 + +L_RESTORE_VGPR_LOOP: + buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + s_waitcnt vmcnt(0) + s_sub_u32 m0, m0, 4 + v_mov_b32 v0, v0 // v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_cmp_eq_u32 m0, 0x8000 + s_cbranch_scc0 L_RESTORE_VGPR_LOOP + s_set_gpr_idx_off + + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes + +else + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_mov_b32 m0, 4 //VGPR initial index value = 1 + s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later + + L_RESTORE_VGPR_LOOP: + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 + end + s_waitcnt vmcnt(0) //ensure data ready + v_mov_b32 v0, v0 //v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? + s_set_gpr_idx_off + /* VGPR restore on v0 */ + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 + end + +end + + /* restore SGPRs */ + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4 // restore SGPR from S[n] to S[0], by 16 sgprs group + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + s_mov_b32 m0, s_restore_alloc_size + + L_RESTORE_SGPR_LOOP: + read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) //PV: further performance improvement can be made + s_waitcnt lgkmcnt(0) //ensure data ready + + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + s_movreld_b64 s8, s8 + s_movreld_b64 s10, s10 + s_movreld_b64 s12, s12 + s_movreld_b64 s14, s14 + + s_cmp_eq_u32 m0, 0 //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc0 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete? + + /* restore HW registers */ + ////////////////////////////// + L_RESTORE_HWREG: + + +if G8SR_DEBUG_TIMESTAMP + s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo + s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi +end + + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + + + s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0 + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //EXEC + read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) //STATUS + read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) //TRAPSTS + read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO + read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE + + s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS + + //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + + s_mov_b32 m0, s_restore_m0 + s_mov_b32 exec_lo, s_restore_exec_lo + s_mov_b32 exec_hi, s_restore_exec_hi + + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 + //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore + s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode + + // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + get_vgpr_size_bytes(s_restore_ttmps_lo) + get_sgpr_size_bytes(s_restore_ttmps_hi) + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 + s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 + s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF + s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1 + s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1 + s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1 + s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1 + s_waitcnt lgkmcnt(0) + + //reuse s_restore_m0 as a temp register + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT + s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT + s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + + s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_d + s_waitcnt lgkmcnt(0) +end + +// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc + + +/**************************************************************************/ +/* the END */ +/**************************************************************************/ +L_END_PGM: + s_endpgm + +end + + +/**************************************************************************/ +/* the helper functions */ +/**************************************************************************/ + +//Only for save hwreg to mem +function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) + s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on + s_mov_b32 m0, s_mem_offset + s_buffer_store_dword s, s_rsrc, m0 glc:1 + ack_sqc_store_workaround() + s_add_u32 s_mem_offset, s_mem_offset, 4 + s_mov_b32 m0, exec_lo +end + + +// HWREG are saved before SGPRs, so all HWREG could be use. +function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset) + + s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1 + ack_sqc_store_workaround() + s_add_u32 s_rsrc[0], s_rsrc[0], 4*16 + s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc +end + + +function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 + s_add_u32 s_mem_offset, s_mem_offset, 4 +end + +function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1 + s_sub_u32 s_mem_offset, s_mem_offset, 4*16 +end + + + +function get_lds_size_bytes(s_lds_size_byte) + // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW + s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) // lds_size + s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW +end + +function get_vgpr_size_bytes(s_vgpr_size_byte) + s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible +end + +function get_sgpr_size_bytes(s_sgpr_size_byte) + s_getreg_b32 s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_sgpr_size_byte, s_sgpr_size_byte, 1 + s_lshl_b32 s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4 (non-zero value) +end + +function get_hwreg_size_bytes + return 128 //HWREG size 128 bytes +end + +function ack_sqc_store_workaround + if ACK_SQC_STORE + s_waitcnt lgkmcnt(0) + end +end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 59808a39ecf4..f64c5551cdba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -233,7 +233,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, pr_debug("Queue Size: 0x%llX, %u\n", q_properties->queue_size, args->ring_size); - pr_debug("Queue r/w Pointers: %p, %p\n", + pr_debug("Queue r/w Pointers: %px, %px\n", q_properties->read_ptr, q_properties->write_ptr); @@ -292,8 +292,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, /* Return gpu_id as doorbell offset for mmap usage */ - args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id); + args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; + args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); args->doorbell_offset <<= PAGE_SHIFT; + if (KFD_IS_SOC15(dev->device_info->asic_family)) + /* On SOC15 ASICs, doorbell allocation must be + * per-device, and independent from the per-process + * queue_id. Return the doorbell offset within the + * doorbell aperture to user mode. + */ + args->doorbell_offset |= q_properties.doorbell_off; mutex_unlock(&p->mutex); @@ -1296,8 +1304,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, return -EINVAL; } - devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), - GFP_KERNEL); + devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), + GFP_KERNEL); if (!devices_arr) return -ENOMEM; @@ -1405,8 +1413,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, return -EINVAL; } - devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), - GFP_KERNEL); + devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), + GFP_KERNEL); if (!devices_arr) return -ENOMEM; @@ -1645,23 +1653,33 @@ err_i1: static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) { struct kfd_process *process; + struct kfd_dev *dev = NULL; + unsigned long vm_pgoff; + unsigned int gpu_id; process = kfd_get_process(current); if (IS_ERR(process)) return PTR_ERR(process); - if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) == - KFD_MMAP_DOORBELL_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK; - return kfd_doorbell_mmap(process, vma); - } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) == - KFD_MMAP_EVENTS_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); + gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); + if (gpu_id) + dev = kfd_device_by_id(gpu_id); + + switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { + case KFD_MMAP_TYPE_DOORBELL: + if (!dev) + return -ENODEV; + return kfd_doorbell_mmap(dev, process, vma); + + case KFD_MMAP_TYPE_EVENTS: return kfd_event_mmap(process, vma); - } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) == - KFD_MMAP_RESERVED_MEM_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK; - return kfd_reserved_mem_mmap(process, vma); + + case KFD_MMAP_TYPE_RESERVED_MEM: + if (!dev) + return -ENODEV; + return kfd_reserved_mem_mmap(dev, process, vma); } return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 4f126ef6139b..296b3f230280 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -132,6 +132,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { #define fiji_cache_info carrizo_cache_info #define polaris10_cache_info carrizo_cache_info #define polaris11_cache_info carrizo_cache_info +/* TODO - check & update Vega10 cache details */ +#define vega10_cache_info carrizo_cache_info +#define raven_cache_info carrizo_cache_info static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) @@ -603,6 +606,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = polaris11_cache_info; num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); break; + case CHIP_VEGA10: + pcache_info = vega10_cache_info; + num_of_cache_types = ARRAY_SIZE(vega10_cache_info); + break; + case CHIP_RAVEN: + pcache_info = raven_cache_info; + num_of_cache_types = ARRAY_SIZE(raven_cache_info); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3346699960dd..7ee6cec2c060 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -20,16 +20,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) -#include <linux/amd-iommu.h> -#endif #include <linux/bsearch.h> #include <linux/pci.h> #include <linux/slab.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" -#include "cwsr_trap_handler_gfx8.asm" +#include "cwsr_trap_handler.h" #include "kfd_iommu.h" #define MQD_SIZE_ALIGNED 768 @@ -41,6 +38,7 @@ static const struct kfd_device_info kaveri_device_info = { .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -55,6 +53,7 @@ static const struct kfd_device_info carrizo_device_info = { .max_pasid_bits = 16, /* max num of queues for CZ.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -70,6 +69,7 @@ static const struct kfd_device_info hawaii_device_info = { .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -83,6 +83,7 @@ static const struct kfd_device_info tonga_device_info = { .asic_family = CHIP_TONGA, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -96,6 +97,7 @@ static const struct kfd_device_info tonga_vf_device_info = { .asic_family = CHIP_TONGA, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -109,6 +111,7 @@ static const struct kfd_device_info fiji_device_info = { .asic_family = CHIP_FIJI, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -122,6 +125,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .asic_family = CHIP_FIJI, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -136,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = { .asic_family = CHIP_POLARIS10, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -149,6 +154,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .asic_family = CHIP_POLARIS10, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -162,6 +168,7 @@ static const struct kfd_device_info polaris11_device_info = { .asic_family = CHIP_POLARIS11, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -171,6 +178,34 @@ static const struct kfd_device_info polaris11_device_info = { .needs_pci_atomics = true, }; +static const struct kfd_device_info vega10_device_info = { + .asic_family = CHIP_VEGA10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info vega10_vf_device_info = { + .asic_family = CHIP_VEGA10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + struct kfd_deviceid { unsigned short did; @@ -250,6 +285,15 @@ static const struct kfd_deviceid supported_devices[] = { { 0x67EB, &polaris11_device_info }, /* Polaris11 */ { 0x67EF, &polaris11_device_info }, /* Polaris11 */ { 0x67FF, &polaris11_device_info }, /* Polaris11 */ + { 0x6860, &vega10_device_info }, /* Vega10 */ + { 0x6861, &vega10_device_info }, /* Vega10 */ + { 0x6862, &vega10_device_info }, /* Vega10 */ + { 0x6863, &vega10_device_info }, /* Vega10 */ + { 0x6864, &vega10_device_info }, /* Vega10 */ + { 0x6867, &vega10_device_info }, /* Vega10 */ + { 0x6868, &vega10_device_info }, /* Vega10 */ + { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/ + { 0x687F, &vega10_device_info }, /* Vega10 */ }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -279,7 +323,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, const struct kfd2kgd_calls *f2g) { struct kfd_dev *kfd; - + int ret; const struct kfd_device_info *device_info = lookup_device_info(pdev->device); @@ -288,19 +332,18 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return NULL; } - if (device_info->needs_pci_atomics) { - /* Allow BIF to recode atomics to PCIe 3.0 - * AtomicOps. 32 and 64-bit requests are possible and - * must be supported. - */ - if (pci_enable_atomic_ops_to_root(pdev, - PCI_EXP_DEVCAP2_ATOMIC_COMP32 | - PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) { - dev_info(kfd_device, - "skipped device %x:%x, PCI rejects atomics", - pdev->vendor, pdev->device); - return NULL; - } + /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. + * 32 and 64-bit requests are possible and must be + * supported. + */ + ret = pci_enable_atomic_ops_to_root(pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64); + if (device_info->needs_pci_atomics && ret < 0) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics\n", + pdev->vendor, pdev->device); + return NULL; } kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); @@ -323,10 +366,16 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, static void kfd_cwsr_init(struct kfd_dev *kfd) { if (cwsr_enable && kfd->device_info->supports_cwsr) { - BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + if (kfd->device_info->asic_family < CHIP_VEGA10) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx8_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); + } else { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx9_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); + } - kfd->cwsr_isa = cwsr_trap_gfx8_hex; - kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); kfd->cwsr_enabled = true; } } @@ -541,6 +590,44 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_unlock(&kfd->interrupt_lock); } +int kgd2kfd_quiesce_mm(struct mm_struct *mm) +{ + struct kfd_process *p; + int r; + + /* Because we are called from arbitrary context (workqueue) as opposed + * to process context, kfd_process could attempt to exit while we are + * running so the lookup function increments the process ref count. + */ + p = kfd_lookup_process_by_mm(mm); + if (!p) + return -ESRCH; + + r = kfd_process_evict_queues(p); + + kfd_unref_process(p); + return r; +} + +int kgd2kfd_resume_mm(struct mm_struct *mm) +{ + struct kfd_process *p; + int r; + + /* Because we are called from arbitrary context (workqueue) as opposed + * to process context, kfd_process could attempt to exit while we are + * running so the lookup function increments the process ref count. + */ + p = kfd_lookup_process_by_mm(mm); + if (!p) + return -ESRCH; + + r = kfd_process_restore_queues(p); + + kfd_unref_process(p); + return r; +} + /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will * prepare for safe eviction of KFD BOs that belong to the specified * process. @@ -652,7 +739,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) return -ENOMEM; - *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); if ((*mem_obj) == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d55d29d31da4..668ad07ebe1f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -110,6 +110,57 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, qpd->sh_mem_bases); } +static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) +{ + struct kfd_dev *dev = qpd->dqm->dev; + + if (!KFD_IS_SOC15(dev->device_info->asic_family)) { + /* On pre-SOC15 chips we need to use the queue ID to + * preserve the user mode ABI. + */ + q->doorbell_id = q->properties.queue_id; + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + /* For SDMA queues on SOC15, use static doorbell + * assignments based on the engine and queue. + */ + q->doorbell_id = dev->shared_resources.sdma_doorbell + [q->properties.sdma_engine_id] + [q->properties.sdma_queue_id]; + } else { + /* For CP queues on SOC15 reserve a free doorbell ID */ + unsigned int found; + + found = find_first_zero_bit(qpd->doorbell_bitmap, + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); + if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { + pr_debug("No doorbells available"); + return -EBUSY; + } + set_bit(found, qpd->doorbell_bitmap); + q->doorbell_id = found; + } + + q->properties.doorbell_off = + kfd_doorbell_id_to_offset(dev, q->process, + q->doorbell_id); + + return 0; +} + +static void deallocate_doorbell(struct qcm_process_device *qpd, + struct queue *q) +{ + unsigned int old; + struct kfd_dev *dev = qpd->dqm->dev; + + if (!KFD_IS_SOC15(dev->device_info->asic_family) || + q->properties.type == KFD_QUEUE_TYPE_SDMA) + return; + + old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); + WARN_ON(!old); +} + static int allocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -145,15 +196,19 @@ static int allocate_vmid(struct device_queue_manager *dqm, static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, struct qcm_process_device *qpd) { - uint32_t len; + const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; + int ret; if (!qpd->ib_kaddr) return -ENOMEM; - len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + if (ret) + return ret; return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, - qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); + qpd->ib_base, (uint32_t *)qpd->ib_kaddr, + pmf->release_mem_size / sizeof(uint32_t)); } static void deallocate_vmid(struct device_queue_manager *dqm, @@ -301,10 +356,14 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, if (retval) return retval; + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_hqd; + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out_deallocate_hqd; + goto out_deallocate_doorbell; pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", q->pipe, q->queue); @@ -324,6 +383,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, out_uninit_mqd: mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); out_deallocate_hqd: deallocate_hqd(dqm, q); @@ -357,6 +418,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, } dqm->total_queue_count--; + deallocate_doorbell(qpd, q); + retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, @@ -861,6 +924,10 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_sdma_queue; + pr_debug("SDMA id is: %d\n", q->sdma_id); pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -869,7 +936,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out_deallocate_sdma_queue; + goto out_deallocate_doorbell; retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); if (retval) @@ -879,6 +946,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, out_uninit_mqd: mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: deallocate_sdma_queue(dqm, q->sdma_id); @@ -1070,12 +1139,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; } + + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_sdma_queue; + mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { retval = -ENOMEM; - goto out_deallocate_sdma_queue; + goto out_deallocate_doorbell; } /* * Eviction state logic: we only mark active queues as evicted @@ -1093,7 +1167,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out_deallocate_sdma_queue; + goto out_deallocate_doorbell; list_add(&q->list, &qpd->queues_list); qpd->queue_count++; @@ -1117,6 +1191,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, mutex_unlock(&dqm->lock); return retval; +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); @@ -1257,6 +1333,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, goto failed; } + deallocate_doorbell(qpd, q); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; deallocate_sdma_queue(dqm, q->sdma_id); @@ -1308,7 +1386,10 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, void __user *alternate_aperture_base, uint64_t alternate_aperture_size) { - bool retval; + bool retval = true; + + if (!dqm->asic_ops.set_cache_memory_policy) + return retval; mutex_lock(&dqm->lock); @@ -1577,6 +1658,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_POLARIS11: device_queue_manager_init_vi_tonga(&dqm->asic_ops); break; + + case CHIP_VEGA10: + case CHIP_RAVEN: + device_queue_manager_init_v9(&dqm->asic_ops); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); @@ -1627,6 +1713,18 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) int pipe, queue; int r = 0; + r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, + KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); + if (!r) { + seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", + KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, + KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), + KFD_CIK_HIQ_QUEUE); + seq_reg_dump(m, dump, n_regs); + + kfree(dump); + } + for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 412beff3281d..59a6b1956932 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -200,6 +200,8 @@ void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops); void device_queue_manager_init_vi_tonga( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_v9( + struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); unsigned int get_queues_num(struct device_queue_manager *dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c new file mode 100644 index 000000000000..79e5bcf6367c --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -0,0 +1,84 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_device_queue_manager.h" +#include "vega10_enum.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "sdma0/sdma0_4_0_sh_mask.h" + +static int update_qpd_v9(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd); + +void device_queue_manager_init_v9( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->update_qpd = update_qpd_v9; + asic_ops->init_sdma_vm = init_sdma_vm_v9; +} + +static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) +{ + uint32_t shared_base = pdd->lds_base >> 48; + uint32_t private_base = pdd->scratch_base >> 48; + + return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) | + private_base; +} + +static int update_qpd_v9(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + if (vega10_noretry && + !dqm->dev->device_info->needs_iommu_device) + qpd->sh_mem_config |= + 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; + + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd); + + pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases); + + return 0; +} + +static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + /* Not needed on SDMAv4 any more */ + q->properties.sdma_vm_addr = 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index ebb4da14e3df..c3744d89352c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -33,7 +33,6 @@ static DEFINE_IDA(doorbell_ida); static unsigned int max_doorbell_slices; -#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4 /* * Each device exposes a doorbell aperture, a PCI MMIO aperture that @@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices; */ /* # of doorbell bytes allocated for each process. */ -static inline size_t doorbell_process_allocation(void) +size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) { - return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES * + return roundup(kfd->device_info->doorbell_size * KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, PAGE_SIZE); } @@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd) doorbell_start_offset = roundup(kfd->shared_resources.doorbell_start_offset, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); doorbell_aperture_size = rounddown(kfd->shared_resources.doorbell_aperture_size, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); if (doorbell_aperture_size > doorbell_start_offset) doorbell_process_limit = (doorbell_aperture_size - doorbell_start_offset) / - doorbell_process_allocation(); + kfd_doorbell_process_slice(kfd); else return -ENOSPC; @@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32); kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); if (!kfd->doorbell_kernel_ptr) return -ENOMEM; @@ -127,21 +126,16 @@ void kfd_doorbell_fini(struct kfd_dev *kfd) iounmap(kfd->doorbell_kernel_ptr); } -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma) { phys_addr_t address; - struct kfd_dev *dev; /* * For simplicitly we only allow mapping of the entire doorbell * allocation of a single device & process. */ - if (vma->vm_end - vma->vm_start != doorbell_process_allocation()) - return -EINVAL; - - /* Find kfd device according to gpu id */ - dev = kfd_device_by_id(vma->vm_pgoff); - if (!dev) + if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) return -EINVAL; /* Calculate physical address of doorbell */ @@ -158,19 +152,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) " vm_flags == 0x%04lX\n" " size == 0x%04lX\n", (unsigned long long) vma->vm_start, address, vma->vm_flags, - doorbell_process_allocation()); + kfd_doorbell_process_slice(dev)); return io_remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, - doorbell_process_allocation(), + kfd_doorbell_process_slice(dev), vma->vm_page_prot); } /* get kernel iomem pointer for a doorbell */ -u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, +void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off) { u32 inx; @@ -185,6 +179,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) return NULL; + inx *= kfd->device_info->doorbell_size / sizeof(u32); + /* * Calculating the kernel doorbell offset using the first * doorbell page. @@ -210,7 +206,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) mutex_unlock(&kfd->doorbell_mutex); } -inline void write_kernel_doorbell(u32 __iomem *db, u32 value) +void write_kernel_doorbell(void __iomem *db, u32 value) { if (db) { writel(value, db); @@ -218,30 +214,37 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value) } } -/* - * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1 - * to doorbells with the process's doorbell page - */ -unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, +void write_kernel_doorbell64(void __iomem *db, u64 value) +{ + if (db) { + WARN(((unsigned long)db & 7) != 0, + "Unaligned 64-bit doorbell"); + writeq(value, (u64 __iomem *)db); + pr_debug("writing %llu to doorbell address %p\n", value, db); + } +} + +unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, struct kfd_process *process, - unsigned int queue_id) + unsigned int doorbell_id) { /* * doorbell_id_offset accounts for doorbells taken by KGD. - * index * doorbell_process_allocation/sizeof(u32) adjusts to - * the process's doorbells. + * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to + * the process's doorbells. The offset returned is in dword + * units regardless of the ASIC-dependent doorbell size. */ return kfd->doorbell_id_offset + process->doorbell_index - * doorbell_process_allocation() / sizeof(u32) + - queue_id; + * kfd_doorbell_process_slice(kfd) / sizeof(u32) + + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); } uint64_t kfd_get_number_elems(struct kfd_dev *kfd) { uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size - kfd->shared_resources.doorbell_start_offset) / - doorbell_process_allocation() + 1; + kfd_doorbell_process_slice(kfd) + 1; return num_of_elems; @@ -251,7 +254,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, struct kfd_process *process) { return dev->doorbell_base + - process->doorbell_index * doorbell_process_allocation(); + process->doorbell_index * kfd_doorbell_process_slice(dev); } int kfd_alloc_process_doorbells(struct kfd_process *process) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 4890a90f1e44..5562e94e786a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, case KFD_EVENT_TYPE_DEBUG: ret = create_signal_event(devkfd, p, ev); if (!ret) { - *event_page_offset = KFD_MMAP_EVENTS_MASK; + *event_page_offset = KFD_MMAP_TYPE_EVENTS; *event_page_offset <<= PAGE_SHIFT; *event_slot_index = ev->event_id; } @@ -496,7 +496,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", partial_id, valid_id_bits); - if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) { /* With relatively few events, it's faster to * iterate over the event IDR */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 66852de410c8..97d5423c5673 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -275,23 +275,35 @@ * for FLAT_* / S_LOAD operations. */ -#define MAKE_GPUVM_APP_BASE(gpu_num) \ +#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \ (((uint64_t)(gpu_num) << 61) + 0x1000000000000L) #define MAKE_GPUVM_APP_LIMIT(base, size) \ (((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1) -#define MAKE_SCRATCH_APP_BASE() \ +#define MAKE_SCRATCH_APP_BASE_VI() \ (((uint64_t)(0x1UL) << 61) + 0x100000000L) #define MAKE_SCRATCH_APP_LIMIT(base) \ (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) -#define MAKE_LDS_APP_BASE() \ +#define MAKE_LDS_APP_BASE_VI() \ (((uint64_t)(0x1UL) << 61) + 0x0) #define MAKE_LDS_APP_LIMIT(base) \ (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) +/* On GFXv9 the LDS and scratch apertures are programmed independently + * using the high 16 bits of the 64-bit virtual address. They must be + * in the hole, which will be the case as long as the high 16 bits are + * not 0. + * + * The aperture sizes are still 4GB implicitly. + * + * A GPUVM aperture is not applicable on GFXv9. + */ +#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48) +#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48) + /* User mode manages most of the SVM aperture address space. The low * 16MB are reserved for kernel use (CWSR trap handler and kernel IB * for now). @@ -300,6 +312,55 @@ #define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE) #define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE) +static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) +{ + /* + * node id couldn't be 0 - the three MSB bits of + * aperture shoudn't be 0 + */ + pdd->lds_base = MAKE_LDS_APP_BASE_VI(); + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); + + if (!pdd->dev->device_info->needs_iommu_device) { + /* dGPUs: SVM aperture starting at 0 + * with small reserved space for kernel. + * Set them to CANONICAL addresses. + */ + pdd->gpuvm_base = SVM_USER_BASE; + pdd->gpuvm_limit = + pdd->dev->shared_resources.gpuvm_size - 1; + } else { + /* set them to non CANONICAL addresses, and no SVM is + * allocated. + */ + pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1); + pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base, + pdd->dev->shared_resources.gpuvm_size); + } + + pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI(); + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); +} + +static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) +{ + pdd->lds_base = MAKE_LDS_APP_BASE_V9(); + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); + + /* Raven needs SVM to support graphic handle, etc. Leave the small + * reserved space before SVM on Raven as well, even though we don't + * have to. + * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they + * are used in Thunk to reserve SVM. + */ + pdd->gpuvm_base = SVM_USER_BASE; + pdd->gpuvm_limit = + pdd->dev->shared_resources.gpuvm_size - 1; + + pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9(); + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); +} + int kfd_init_apertures(struct kfd_process *process) { uint8_t id = 0; @@ -307,9 +368,7 @@ int kfd_init_apertures(struct kfd_process *process) struct kfd_process_device *pdd; /*Iterating over all devices*/ - while (kfd_topology_enum_kfd_devices(id, &dev) == 0 && - id < NUM_OF_SUPPORTED_GPUS) { - + while (kfd_topology_enum_kfd_devices(id, &dev) == 0) { if (!dev) { id++; /* Skip non GPU devices */ continue; @@ -318,7 +377,7 @@ int kfd_init_apertures(struct kfd_process *process) pdd = kfd_create_process_device_data(dev, process); if (!pdd) { pr_err("Failed to create process device data\n"); - return -1; + return -ENOMEM; } /* * For 64 bit process apertures will be statically reserved in @@ -330,32 +389,30 @@ int kfd_init_apertures(struct kfd_process *process) pdd->gpuvm_base = pdd->gpuvm_limit = 0; pdd->scratch_base = pdd->scratch_limit = 0; } else { - /* Same LDS and scratch apertures can be used - * on all GPUs. This allows using more dGPUs - * than placement options for apertures. - */ - pdd->lds_base = MAKE_LDS_APP_BASE(); - pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); - - pdd->scratch_base = MAKE_SCRATCH_APP_BASE(); - pdd->scratch_limit = - MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); + switch (dev->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: + case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + kfd_init_apertures_vi(pdd, id); + break; + case CHIP_VEGA10: + case CHIP_RAVEN: + kfd_init_apertures_v9(pdd, id); + break; + default: + WARN(1, "Unexpected ASIC family %u", + dev->device_info->asic_family); + return -EINVAL; + } - if (dev->device_info->needs_iommu_device) { - /* APUs: GPUVM aperture in - * non-canonical address space - */ - pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); - pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT( - pdd->gpuvm_base, - dev->shared_resources.gpuvm_size); - } else { - /* dGPUs: SVM aperture starting at 0 - * with small reserved space for kernel + if (!dev->device_info->needs_iommu_device) { + /* dGPUs: the reserved space for kernel + * before SVM */ - pdd->gpuvm_base = SVM_USER_BASE; - pdd->gpuvm_limit = - dev->shared_resources.gpuvm_size - 1; pdd->qpd.cwsr_base = SVM_CWSR_BASE; pdd->qpd.ib_base = SVM_IB_BASE; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c new file mode 100644 index 000000000000..37029baa3346 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -0,0 +1,92 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "kfd_priv.h" +#include "kfd_events.h" +#include "soc15_int.h" + + +static bool event_interrupt_isr_v9(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) +{ + uint16_t source_id, client_id, pasid, vmid; + const uint32_t *data = ih_ring_entry; + + /* Only handle interrupts from KFD VMIDs */ + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + if (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd) + return 0; + + /* If there is no valid PASID, it's likely a firmware bug */ + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) + return 0; + + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + + pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n", + client_id, source_id, pasid); + pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], + data[4], data[5], data[6], data[7]); + + /* Interrupt types we care about: various signals and faults. + * They will be forwarded to a work queue (see below). + */ + return source_id == SOC15_INTSRC_CP_END_OF_PIPE || + source_id == SOC15_INTSRC_SDMA_TRAP || + source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || + source_id == SOC15_INTSRC_CP_BAD_OPCODE; +} + +static void event_interrupt_wq_v9(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) +{ + uint16_t source_id, client_id, pasid, vmid; + uint32_t context_id; + + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); + + if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) + kfd_signal_event_interrupt(pasid, context_id, 32); + else if (source_id == SOC15_INTSRC_SDMA_TRAP) + kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28); + else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) + kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24); + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + kfd_signal_hw_exception_event(pasid); + else if (client_id == SOC15_IH_CLIENTID_VMC || + client_id == SOC15_IH_CLIENTID_UTCL2) { + /* TODO */ + } +} + +const struct kfd_event_interrupt_class event_interrupt_class_v9 = { + .interrupt_isr = event_interrupt_isr_v9, + .interrupt_wq = event_interrupt_wq_v9, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 035c351f47c5..db6d9336b80d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -139,10 +139,12 @@ static void interrupt_wq(struct work_struct *work) { struct kfd_dev *dev = container_of(work, struct kfd_dev, interrupt_work); + uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; - uint32_t ih_ring_entry[DIV_ROUND_UP( - dev->device_info->ih_ring_entry_size, - sizeof(uint32_t))]; + if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) { + dev_err_once(kfd_chardev(), "Ring entry too small\n"); + return; + } while (dequeue_ih_ring_entry(dev, ih_ring_entry)) dev->device_info->event_interrupt_class->interrupt_wq(dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 69f496485331..476951d8c91c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->rptr_kernel = kq->rptr_mem->cpu_ptr; kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel), + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, &kq->wptr_mem); if (retval != 0) @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, size_t available_size; size_t queue_size_dwords; uint32_t wptr, rptr; + uint64_t wptr64; unsigned int *queue_address; /* When rptr == wptr, the buffer is empty. @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, * the opposite. So we can only use up to queue_size_dwords - 1 dwords. */ rptr = *kq->rptr_kernel; - wptr = *kq->wptr_kernel; + wptr = kq->pending_wptr; + wptr64 = kq->pending_wptr64; queue_address = (unsigned int *)kq->pq_kernel_addr; queue_size_dwords = kq->queue->properties.queue_size / 4; @@ -232,29 +234,33 @@ static int acquire_packet_buffer(struct kernel_queue *kq, * make sure calling functions know * acquire_packet_buffer() failed */ - *buffer_ptr = NULL; - return -ENOMEM; + goto err_no_space; } if (wptr + packet_size_in_dwords >= queue_size_dwords) { /* make sure after rolling back to position 0, there is * still enough space. */ - if (packet_size_in_dwords >= rptr) { - *buffer_ptr = NULL; - return -ENOMEM; - } + if (packet_size_in_dwords >= rptr) + goto err_no_space; + /* fill nops, roll back and start at position 0 */ while (wptr > 0) { queue_address[wptr] = kq->nop_packet; wptr = (wptr + 1) % queue_size_dwords; + wptr64++; } } *buffer_ptr = &queue_address[wptr]; kq->pending_wptr = wptr + packet_size_in_dwords; + kq->pending_wptr64 = wptr64 + packet_size_in_dwords; return 0; + +err_no_space: + *buffer_ptr = NULL; + return -ENOMEM; } static void submit_packet(struct kernel_queue *kq) @@ -270,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq) pr_debug("\n"); #endif - *kq->wptr_kernel = kq->pending_wptr; - write_kernel_doorbell(kq->queue->properties.doorbell_ptr, - kq->pending_wptr); + kq->ops_asic_specific.submit_packet(kq); } static void rollback_packet(struct kernel_queue *kq) { - kq->pending_wptr = *kq->queue->properties.write_ptr; + if (kq->dev->device_info->doorbell_size == 8) { + kq->pending_wptr64 = *kq->wptr64_kernel; + kq->pending_wptr = *kq->wptr_kernel % + (kq->queue->properties.queue_size / 4); + } else { + kq->pending_wptr = *kq->wptr_kernel; + } } struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, @@ -308,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_HAWAII: kernel_queue_init_cik(&kq->ops_asic_specific); break; + + case CHIP_VEGA10: + case CHIP_RAVEN: + kernel_queue_init_v9(&kq->ops_asic_specific); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index 594053136ee4..97aff2041a5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -72,6 +72,7 @@ struct kernel_queue { struct kfd_dev *dev; struct mqd_manager *mqd; struct queue *queue; + uint64_t pending_wptr64; uint32_t pending_wptr; unsigned int nop_packet; @@ -79,7 +80,10 @@ struct kernel_queue { uint32_t *rptr_kernel; uint64_t rptr_gpu_addr; struct kfd_mem_obj *wptr_mem; - uint32_t *wptr_kernel; + union { + uint64_t *wptr64_kernel; + uint32_t *wptr_kernel; + }; uint64_t wptr_gpu_addr; struct kfd_mem_obj *pq; uint64_t pq_gpu_addr; @@ -97,5 +101,6 @@ struct kernel_queue { void kernel_queue_init_cik(struct kernel_queue_ops *ops); void kernel_queue_init_vi(struct kernel_queue_ops *ops); +void kernel_queue_init_v9(struct kernel_queue_ops *ops); #endif /* KFD_KERNEL_QUEUE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c index a90eb440b1fb..19e54acb4125 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c @@ -26,11 +26,13 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); static void uninitialize_cik(struct kernel_queue *kq); +static void submit_packet_cik(struct kernel_queue *kq); void kernel_queue_init_cik(struct kernel_queue_ops *ops) { ops->initialize = initialize_cik; ops->uninitialize = uninitialize_cik; + ops->submit_packet = submit_packet_cik; } static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, @@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, static void uninitialize_cik(struct kernel_queue *kq) { } + +static void submit_packet_cik(struct kernel_queue *kq) +{ + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c new file mode 100644 index 000000000000..684a3bf07efd --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -0,0 +1,340 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_kernel_queue.h" +#include "kfd_device_queue_manager.h" +#include "kfd_pm4_headers_ai.h" +#include "kfd_pm4_opcodes.h" + +static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +static void uninitialize_v9(struct kernel_queue *kq); +static void submit_packet_v9(struct kernel_queue *kq); + +void kernel_queue_init_v9(struct kernel_queue_ops *ops) +{ + ops->initialize = initialize_v9; + ops->uninitialize = uninitialize_v9; + ops->submit_packet = submit_packet_v9; +} + +static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) +{ + int retval; + + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); + if (retval) + return false; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; + kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; + + memset(kq->eop_kernel_addr, 0, PAGE_SIZE); + + return true; +} + +static void uninitialize_v9(struct kernel_queue *kq) +{ + kfd_gtt_sa_free(kq->dev, kq->eop_mem); +} + +static void submit_packet_v9(struct kernel_queue *kq) +{ + *kq->wptr64_kernel = kq->pending_wptr64; + write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, + kq->pending_wptr64); +} + +static int pm_map_process_v9(struct packet_manager *pm, + uint32_t *buffer, struct qcm_process_device *qpd) +{ + struct pm4_mes_map_process *packet; + uint64_t vm_page_table_base_addr = + (uint64_t)(qpd->page_table_base) << 12; + + packet = (struct pm4_mes_map_process *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_mes_map_process)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 1; + packet->bitfields2.pasid = qpd->pqm->process->pasid; + packet->bitfields14.gds_size = qpd->gds_size; + packet->bitfields14.num_gws = qpd->num_gws; + packet->bitfields14.num_oac = qpd->num_oac; + packet->bitfields14.sdma_enable = 1; + packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); + packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8); + packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); + packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); + + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); + packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); + + packet->vm_context_page_table_base_addr_lo32 = + lower_32_bits(vm_page_table_base_addr); + packet->vm_context_page_table_base_addr_hi32 = + upper_32_bits(vm_page_table_base_addr); + + return 0; +} + +static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) +{ + struct pm4_mes_runlist *packet; + + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; + + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number + * of processes in the runlist and kfd module parameter + * hws_max_conc_proc. + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); + packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST, + sizeof(struct pm4_mes_runlist)); + + packet->bitfields4.ib_size = ib_size_in_dwords; + packet->bitfields4.chain = chain ? 1 : 0; + packet->bitfields4.offload_polling = 0; + packet->bitfields4.valid = 1; + packet->bitfields4.process_cnt = concurrent_proc_cnt; + packet->ordinal2 = lower_32_bits(ib); + packet->ib_base_hi = upper_32_bits(ib); + + return 0; +} + +static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) +{ + struct pm4_mes_map_queues *packet; + bool use_static = is_static; + + packet = (struct pm4_mes_map_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_mes_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe_vi; + packet->bitfields2.num_queues = 1; + packet->bitfields2.queue_sel = + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; + + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_compute_vi; + + switch (q->properties.type) { + case KFD_QUEUE_TYPE_COMPUTE: + if (use_static) + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_latency_static_queue_vi; + break; + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.queue_type = + queue_type__mes_map_queues__debug_interface_queue_vi; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + + engine_sel__mes_map_queues__sdma0_vi; + use_static = false; /* no static queues under SDMA */ + break; + default: + WARN(1, "queue type %d", q->properties.type); + return -EINVAL; + } + packet->bitfields3.doorbell_offset = + q->properties.doorbell_off; + + packet->mqd_addr_lo = + lower_32_bits(q->gart_mqd_addr); + + packet->mqd_addr_hi = + upper_32_bits(q->gart_mqd_addr); + + packet->wptr_addr_lo = + lower_32_bits((uint64_t)q->properties.write_ptr); + + packet->wptr_addr_hi = + upper_32_bits((uint64_t)q->properties.write_ptr); + + return 0; +} + +static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, bool reset, + unsigned int sdma_engine) +{ + struct pm4_mes_unmap_queues *packet; + + packet = (struct pm4_mes_unmap_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES, + sizeof(struct pm4_mes_unmap_queues)); + switch (type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; + default: + WARN(1, "queue type %d", type); + return -EINVAL; + } + + if (reset) + packet->bitfields2.action = + action__mes_unmap_queues__reset_queues; + else + packet->bitfields2.action = + action__mes_unmap_queues__preempt_queues; + + switch (filter) { + case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_specified_queues; + packet->bitfields2.num_queues = 1; + packet->bitfields3b.doorbell_offset0 = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_BY_PASID: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; + packet->bitfields3a.pasid = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_queues; + break; + case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: + /* in this case, we do not preempt static queues */ + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_non_static_queues; + break; + default: + WARN(1, "filter %d", filter); + return -EINVAL; + } + + return 0; + +} + +static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value) +{ + struct pm4_mes_query_status *packet; + + packet = (struct pm4_mes_query_status *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_query_status)); + + + packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_mes_query_status)); + + packet->bitfields2.context_id = 0; + packet->bitfields2.interrupt_sel = + interrupt_sel__mes_query_status__completion_status; + packet->bitfields2.command = + command__mes_query_status__fence_only_after_write_ack; + + packet->addr_hi = upper_32_bits((uint64_t)fence_address); + packet->addr_lo = lower_32_bits((uint64_t)fence_address); + packet->data_hi = upper_32_bits((uint64_t)fence_value); + packet->data_lo = lower_32_bits((uint64_t)fence_value); + + return 0; +} + + +static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4_mec_release_mem *packet; + + packet = (struct pm4_mec_release_mem *)buffer; + memset(buffer, 0, sizeof(struct pm4_mec_release_mem)); + + packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, + sizeof(struct pm4_mec_release_mem)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru; + + packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel__mec_release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return 0; +} + +const struct packet_manager_funcs kfd_v9_pm_funcs = { + .map_process = pm_map_process_v9, + .runlist = pm_runlist_v9, + .set_resources = pm_set_resources_vi, + .map_queues = pm_map_queues_v9, + .unmap_queues = pm_unmap_queues_v9, + .query_status = pm_query_status_v9, + .release_mem = pm_release_mem_v9, + .map_process_size = sizeof(struct pm4_mes_map_process), + .runlist_size = sizeof(struct pm4_mes_runlist), + .set_resources_size = sizeof(struct pm4_mes_set_resources), + .map_queues_size = sizeof(struct pm4_mes_map_queues), + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .query_status_size = sizeof(struct pm4_mes_query_status), + .release_mem_size = sizeof(struct pm4_mec_release_mem) +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index f1d48281e322..bf20c6d32ef3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -22,15 +22,20 @@ */ #include "kfd_kernel_queue.h" +#include "kfd_device_queue_manager.h" +#include "kfd_pm4_headers_vi.h" +#include "kfd_pm4_opcodes.h" static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); static void uninitialize_vi(struct kernel_queue *kq); +static void submit_packet_vi(struct kernel_queue *kq); void kernel_queue_init_vi(struct kernel_queue_ops *ops) { ops->initialize = initialize_vi; ops->uninitialize = uninitialize_vi; + ops->submit_packet = submit_packet_vi; } static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, @@ -54,3 +59,317 @@ static void uninitialize_vi(struct kernel_queue *kq) { kfd_gtt_sa_free(kq->dev, kq->eop_mem); } + +static void submit_packet_vi(struct kernel_queue *kq) +{ + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); +} + +unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) +{ + union PM4_MES_TYPE_3_HEADER header; + + header.u32All = 0; + header.opcode = opcode; + header.count = packet_size / 4 - 2; + header.type = PM4_TYPE_3; + + return header.u32All; +} + +static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd) +{ + struct pm4_mes_map_process *packet; + + packet = (struct pm4_mes_map_process *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_mes_map_process)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 1; + packet->bitfields2.pasid = qpd->pqm->process->pasid; + packet->bitfields3.page_table_base = qpd->page_table_base; + packet->bitfields10.gds_size = qpd->gds_size; + packet->bitfields10.num_gws = qpd->num_gws; + packet->bitfields10.num_oac = qpd->num_oac; + packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; + packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; + + packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base; + + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); + packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); + + return 0; +} + +static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) +{ + struct pm4_mes_runlist *packet; + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; + + if (WARN_ON(!ib)) + return -EFAULT; + + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number + * of processes in the runlist and kfd module parameter + * hws_max_conc_proc. + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); + packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST, + sizeof(struct pm4_mes_runlist)); + + packet->bitfields4.ib_size = ib_size_in_dwords; + packet->bitfields4.chain = chain ? 1 : 0; + packet->bitfields4.offload_polling = 0; + packet->bitfields4.valid = 1; + packet->bitfields4.process_cnt = concurrent_proc_cnt; + packet->ordinal2 = lower_32_bits(ib); + packet->bitfields3.ib_base_hi = upper_32_bits(ib); + + return 0; +} + +int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res) +{ + struct pm4_mes_set_resources *packet; + + packet = (struct pm4_mes_set_resources *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); + + packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, + sizeof(struct pm4_mes_set_resources)); + + packet->bitfields2.queue_type = + queue_type__mes_set_resources__hsa_interface_queue_hiq; + packet->bitfields2.vmid_mask = res->vmid_mask; + packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; + packet->bitfields7.oac_mask = res->oac_mask; + packet->bitfields8.gds_heap_base = res->gds_heap_base; + packet->bitfields8.gds_heap_size = res->gds_heap_size; + + packet->gws_mask_lo = lower_32_bits(res->gws_mask); + packet->gws_mask_hi = upper_32_bits(res->gws_mask); + + packet->queue_mask_lo = lower_32_bits(res->queue_mask); + packet->queue_mask_hi = upper_32_bits(res->queue_mask); + + return 0; +} + +static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) +{ + struct pm4_mes_map_queues *packet; + bool use_static = is_static; + + packet = (struct pm4_mes_map_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_mes_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe_vi; + packet->bitfields2.num_queues = 1; + packet->bitfields2.queue_sel = + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; + + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_compute_vi; + + switch (q->properties.type) { + case KFD_QUEUE_TYPE_COMPUTE: + if (use_static) + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_latency_static_queue_vi; + break; + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.queue_type = + queue_type__mes_map_queues__debug_interface_queue_vi; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + + engine_sel__mes_map_queues__sdma0_vi; + use_static = false; /* no static queues under SDMA */ + break; + default: + WARN(1, "queue type %d", q->properties.type); + return -EINVAL; + } + packet->bitfields3.doorbell_offset = + q->properties.doorbell_off; + + packet->mqd_addr_lo = + lower_32_bits(q->gart_mqd_addr); + + packet->mqd_addr_hi = + upper_32_bits(q->gart_mqd_addr); + + packet->wptr_addr_lo = + lower_32_bits((uint64_t)q->properties.write_ptr); + + packet->wptr_addr_hi = + upper_32_bits((uint64_t)q->properties.write_ptr); + + return 0; +} + +static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, bool reset, + unsigned int sdma_engine) +{ + struct pm4_mes_unmap_queues *packet; + + packet = (struct pm4_mes_unmap_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES, + sizeof(struct pm4_mes_unmap_queues)); + switch (type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; + default: + WARN(1, "queue type %d", type); + return -EINVAL; + } + + if (reset) + packet->bitfields2.action = + action__mes_unmap_queues__reset_queues; + else + packet->bitfields2.action = + action__mes_unmap_queues__preempt_queues; + + switch (filter) { + case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_specified_queues; + packet->bitfields2.num_queues = 1; + packet->bitfields3b.doorbell_offset0 = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_BY_PASID: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; + packet->bitfields3a.pasid = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_queues; + break; + case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: + /* in this case, we do not preempt static queues */ + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_non_static_queues; + break; + default: + WARN(1, "filter %d", filter); + return -EINVAL; + } + + return 0; + +} + +static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value) +{ + struct pm4_mes_query_status *packet; + + packet = (struct pm4_mes_query_status *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_query_status)); + + packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_mes_query_status)); + + packet->bitfields2.context_id = 0; + packet->bitfields2.interrupt_sel = + interrupt_sel__mes_query_status__completion_status; + packet->bitfields2.command = + command__mes_query_status__fence_only_after_write_ack; + + packet->addr_hi = upper_32_bits((uint64_t)fence_address); + packet->addr_lo = lower_32_bits((uint64_t)fence_address); + packet->data_hi = upper_32_bits((uint64_t)fence_value); + packet->data_lo = lower_32_bits((uint64_t)fence_value); + + return 0; +} + +static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4_mec_release_mem *packet; + + packet = (struct pm4_mec_release_mem *)buffer; + memset(buffer, 0, sizeof(*packet)); + + packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, + sizeof(*packet)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy___release_mem__lru; + packet->bitfields2.atc = 0; + + packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel___release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return 0; +} + +const struct packet_manager_funcs kfd_vi_pm_funcs = { + .map_process = pm_map_process_vi, + .runlist = pm_runlist_vi, + .set_resources = pm_set_resources_vi, + .map_queues = pm_map_queues_vi, + .unmap_queues = pm_unmap_queues_vi, + .query_status = pm_query_status_vi, + .release_mem = pm_release_mem_vi, + .map_process_size = sizeof(struct pm4_mes_map_process), + .runlist_size = sizeof(struct pm4_mes_runlist), + .set_resources_size = sizeof(struct pm4_mes_set_resources), + .map_queues_size = sizeof(struct pm4_mes_map_queues), + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .query_status_size = sizeof(struct pm4_mes_query_status), + .release_mem_size = sizeof(struct pm4_mec_release_mem) +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index e0c07d24d251..76bf2dc8aec4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = { .interrupt = kgd2kfd_interrupt, .suspend = kgd2kfd_suspend, .resume = kgd2kfd_resume, + .quiesce_mm = kgd2kfd_quiesce_mm, + .resume_mm = kgd2kfd_resume_mm, .schedule_evict_and_restore_process = kgd2kfd_schedule_evict_and_restore_process, }; @@ -81,6 +83,11 @@ module_param(ignore_crat, int, 0444); MODULE_PARM_DESC(ignore_crat, "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); +int vega10_noretry; +module_param_named(noretry, vega10_noretry, int, 0644); +MODULE_PARM_DESC(noretry, + "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); + static int amdkfd_init_completed; int kgd2kfd_init(unsigned int interface_version, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index ee7061e1c466..4b8eb506642b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, case CHIP_POLARIS10: case CHIP_POLARIS11: return mqd_manager_init_vi_tonga(type, dev); + case CHIP_VEGA10: + case CHIP_RAVEN: + return mqd_manager_init_v9(type, dev); default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index c00c325ed3c9..06eaa218eba6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -79,10 +79,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, m->cp_mqd_base_addr_lo = lower_32_bits(addr); m->cp_mqd_base_addr_hi = upper_32_bits(addr); - m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN; - /* Although WinKFD writes this, I suspect it should not be necessary */ - m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE; - m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | QUANTUM_DURATION(10); @@ -412,7 +408,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c new file mode 100644 index 000000000000..684054ff02cd --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -0,0 +1,443 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include "kfd_priv.h" +#include "kfd_mqd_manager.h" +#include "v9_structs.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "sdma0/sdma0_4_0_sh_mask.h" + +static inline struct v9_mqd *get_mqd(void *mqd) +{ + return (struct v9_mqd *)mqd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static int init_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + uint64_t addr; + struct v9_mqd *m; + struct kfd_dev *kfd = mm->dev; + + /* From V9, for CWSR, the control stack is located on the next page + * boundary after the mqd, we will use the gtt allocation function + * instead of sub-allocation function. + */ + if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { + *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + if (!*mqd_mem_obj) + return -ENOMEM; + retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd, + ALIGN(q->ctl_stack_size, PAGE_SIZE) + + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), + &((*mqd_mem_obj)->gtt_mem), + &((*mqd_mem_obj)->gpu_addr), + (void *)&((*mqd_mem_obj)->cpu_ptr)); + } else + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd), + mqd_mem_obj); + if (retval != 0) + return -ENOMEM; + + m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr; + addr = (*mqd_mem_obj)->gpu_addr; + + memset(m, 0, sizeof(struct v9_mqd)); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | + 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + + m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; + + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT | + 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | + 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + + m->cp_hqd_pipe_priority = 1; + m->cp_hqd_queue_priority = 15; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_aql_control = + 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; + } + + if (q->tba_addr) { + m->compute_pgm_rsrc2 |= + (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); + } + + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->cp_hqd_ctx_save_base_addr_lo = + lower_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_base_addr_hi = + upper_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size; + m->cp_hqd_cntl_stack_size = q->ctl_stack_size; + m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; + m->cp_hqd_wg_state_offset = q->ctl_stack_size; + } + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static int load_mqd(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); + + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + (uint32_t __user *)p->write_ptr, + wptr_shift, 0, mms); +} + +static int update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_mqd *m; + + m = get_mqd(mqd); + + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); + + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); + m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); + + m->cp_hqd_pq_doorbell_control = + q->doorbell_off << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + + m->cp_hqd_ib_control = + 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | + 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT; + + /* + * HW does not clamp this field correctly. Maximum EOP queue size + * is constrained by per-SE EOP done signal count, which is 8-bit. + * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit + * more than (EOP entry count - 1) so a queue size of 0x800 dwords + * is safe, giving a maximum field value of 0xA. + */ + m->cp_hqd_eop_control = min(0xA, + order_base_2(q->eop_ring_buffer_size / 4) - 1); + m->cp_hqd_eop_base_addr_lo = + lower_32_bits(q->eop_ring_buffer_address >> 8); + m->cp_hqd_eop_base_addr_hi = + upper_32_bits(q->eop_ring_buffer_address >> 8); + + m->cp_hqd_iq_timer = 0; + + m->cp_hqd_vmid = q->vmid; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | + 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | + 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | + 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; + m->cp_hqd_pq_doorbell_control |= 1 << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; + } + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) + m->cp_hqd_ctx_save_control = 0; + + q->is_active = (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0 && + !q->is_evicted); + + return 0; +} + + +static int destroy_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_destroy + (mm->dev->kgd, mqd, type, timeout, + pipe_id, queue_id); +} + +static void uninit_mqd(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + struct kfd_dev *kfd = mm->dev; + + if (mqd_mem_obj->gtt_mem) { + kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem); + kfree(mqd_mem_obj); + } else { + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); + } +} + +static bool is_occupied(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_is_occupied( + mm->dev->kgd, queue_address, + pipe_id, queue_id); +} + +static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + struct v9_mqd *m; + int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q); + + if (retval != 0) + return retval; + + m = get_mqd(*mqd); + + m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | + 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; + + return retval; +} + +static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_mqd *m; + int retval = update_mqd(mm, mqd, q); + + if (retval != 0) + return retval; + + /* TODO: what's the point? update_mqd already does this. */ + m = get_mqd(mqd); + m->cp_hqd_vmid = q->vmid; + return retval; +} + +static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + struct v9_sdma_mqd *m; + + + retval = kfd_gtt_sa_allocate(mm->dev, + sizeof(struct v9_sdma_mqd), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; + + memset(m, 0, sizeof(struct v9_sdma_mqd)); + + *mqd = m; + if (gart_addr) + *gart_addr = (*mqd_mem_obj)->gpu_addr; + + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + (uint32_t __user *)p->write_ptr, + mms); +} + +#define SDMA_RLC_DUMMY_DEFAULT 0xf + +static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_sdma_mqd *m; + + m = get_sdma_mqd(mqd); + m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; + + m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_doorbell_offset = + q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT; + + m->sdma_engine_id = q->sdma_engine_id; + m->sdma_queue_id = q->sdma_queue_id; + m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; + + q->is_active = (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0 && + !q->is_evicted); + + return 0; +} + +/* + * * preempt type here is ignored because there is only one way + * * to preempt sdma queue + */ +static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +} + +static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +} + +#if defined(CONFIG_DEBUG_FS) + +static int debugfs_show_mqd(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct v9_mqd), false); + return 0; +} + +static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct v9_sdma_mqd), false); + return 0; +} + +#endif + +struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; + + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + if (!mqd) + return NULL; + + mqd->dev = dev; + + switch (type) { + case KFD_MQD_TYPE_CP: + case KFD_MQD_TYPE_COMPUTE: + mqd->init_mqd = init_mqd; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_HIQ: + mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_SDMA: + mqd->init_mqd = init_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_sdma; + mqd->load_mqd = load_mqd_sdma; + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; +#endif + break; + default: + kfree(mqd); + return NULL; + } + + return mqd; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 89e4242e43e7..481307b8b4db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 89ba4c670ec5..c317feb43f69 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -26,8 +26,6 @@ #include "kfd_device_queue_manager.h" #include "kfd_kernel_queue.h" #include "kfd_priv.h" -#include "kfd_pm4_headers_vi.h" -#include "kfd_pm4_opcodes.h" static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) @@ -39,18 +37,6 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, *wptr = temp; } -static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) -{ - union PM4_MES_TYPE_3_HEADER header; - - header.u32All = 0; - header.opcode = opcode; - header.count = packet_size / 4 - 2; - header.type = PM4_TYPE_3; - - return header.u32All; -} - static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) @@ -80,9 +66,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm, pr_debug("Over subscribed runlist\n"); } - map_queue_size = sizeof(struct pm4_mes_map_queues); + map_queue_size = pm->pmf->map_queues_size; /* calculate run list ib allocation size */ - *rlib_size = process_count * sizeof(struct pm4_mes_map_process) + + *rlib_size = process_count * pm->pmf->map_process_size + queue_count * map_queue_size; /* @@ -90,7 +76,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * when over subscription */ if (*over_subscription) - *rlib_size += sizeof(struct pm4_mes_runlist); + *rlib_size += pm->pmf->runlist_size; pr_debug("runlist ib size %d\n", *rlib_size); } @@ -108,12 +94,14 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); + mutex_lock(&pm->lock); + retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, &pm->ib_buffer_obj); if (retval) { pr_err("Failed to allocate runlist IB\n"); - return retval; + goto out; } *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr; @@ -121,138 +109,10 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, memset(*rl_buffer, 0, *rl_buffer_size); pm->allocated = true; - return retval; -} - -static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, - uint64_t ib, size_t ib_size_in_dwords, bool chain) -{ - struct pm4_mes_runlist *packet; - int concurrent_proc_cnt = 0; - struct kfd_dev *kfd = pm->dqm->dev; - - if (WARN_ON(!ib)) - return -EFAULT; - - /* Determine the number of processes to map together to HW: - * it can not exceed the number of VMIDs available to the - * scheduler, and it is determined by the smaller of the number - * of processes in the runlist and kfd module parameter - * hws_max_conc_proc. - * Note: the arbitration between the number of VMIDs and - * hws_max_conc_proc has been done in - * kgd2kfd_device_init(). - */ - concurrent_proc_cnt = min(pm->dqm->processes_count, - kfd->max_proc_per_quantum); - - packet = (struct pm4_mes_runlist *)buffer; - - memset(buffer, 0, sizeof(struct pm4_mes_runlist)); - packet->header.u32All = build_pm4_header(IT_RUN_LIST, - sizeof(struct pm4_mes_runlist)); - - packet->bitfields4.ib_size = ib_size_in_dwords; - packet->bitfields4.chain = chain ? 1 : 0; - packet->bitfields4.offload_polling = 0; - packet->bitfields4.valid = 1; - packet->bitfields4.process_cnt = concurrent_proc_cnt; - packet->ordinal2 = lower_32_bits(ib); - packet->bitfields3.ib_base_hi = upper_32_bits(ib); - - return 0; -} - -static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, - struct qcm_process_device *qpd) -{ - struct pm4_mes_map_process *packet; - - packet = (struct pm4_mes_map_process *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_map_process)); - - packet->header.u32All = build_pm4_header(IT_MAP_PROCESS, - sizeof(struct pm4_mes_map_process)); - packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; - packet->bitfields2.process_quantum = 1; - packet->bitfields2.pasid = qpd->pqm->process->pasid; - packet->bitfields3.page_table_base = qpd->page_table_base; - packet->bitfields10.gds_size = qpd->gds_size; - packet->bitfields10.num_gws = qpd->num_gws; - packet->bitfields10.num_oac = qpd->num_oac; - packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; - - packet->sh_mem_config = qpd->sh_mem_config; - packet->sh_mem_bases = qpd->sh_mem_bases; - packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; - packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; - - packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base; - - packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); - packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); - - return 0; -} - -static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, - struct queue *q, bool is_static) -{ - struct pm4_mes_map_queues *packet; - bool use_static = is_static; - - packet = (struct pm4_mes_map_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); - - packet->header.u32All = build_pm4_header(IT_MAP_QUEUES, - sizeof(struct pm4_mes_map_queues)); - packet->bitfields2.alloc_format = - alloc_format__mes_map_queues__one_per_pipe_vi; - packet->bitfields2.num_queues = 1; - packet->bitfields2.queue_sel = - queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; - - packet->bitfields2.engine_sel = - engine_sel__mes_map_queues__compute_vi; - packet->bitfields2.queue_type = - queue_type__mes_map_queues__normal_compute_vi; - - switch (q->properties.type) { - case KFD_QUEUE_TYPE_COMPUTE: - if (use_static) - packet->bitfields2.queue_type = - queue_type__mes_map_queues__normal_latency_static_queue_vi; - break; - case KFD_QUEUE_TYPE_DIQ: - packet->bitfields2.queue_type = - queue_type__mes_map_queues__debug_interface_queue_vi; - break; - case KFD_QUEUE_TYPE_SDMA: - packet->bitfields2.engine_sel = q->properties.sdma_engine_id + - engine_sel__mes_map_queues__sdma0_vi; - use_static = false; /* no static queues under SDMA */ - break; - default: - WARN(1, "queue type %d", q->properties.type); - return -EINVAL; - } - packet->bitfields3.doorbell_offset = - q->properties.doorbell_off; - - packet->mqd_addr_lo = - lower_32_bits(q->gart_mqd_addr); - - packet->mqd_addr_hi = - upper_32_bits(q->gart_mqd_addr); - - packet->wptr_addr_lo = - lower_32_bits((uint64_t)q->properties.write_ptr); - - packet->wptr_addr_hi = - upper_32_bits((uint64_t)q->properties.write_ptr); - - return 0; +out: + mutex_unlock(&pm->lock); + return retval; } static int pm_create_runlist_ib(struct packet_manager *pm, @@ -292,12 +152,12 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return -ENOMEM; } - retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); + retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd); if (retval) return retval; proccesses_mapped++; - inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process), + inc_wptr(&rl_wptr, pm->pmf->map_process_size, alloc_size_bytes); list_for_each_entry(kq, &qpd->priv_queue_list, list) { @@ -307,7 +167,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("static_queue, mapping kernel q %d, is debug status %d\n", kq->queue->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, + retval = pm->pmf->map_queues(pm, &rl_buffer[rl_wptr], kq->queue, qpd->is_debug); @@ -315,7 +175,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_mes_map_queues), + pm->pmf->map_queues_size, alloc_size_bytes); } @@ -326,7 +186,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("static_queue, mapping user queue %d, is debug status %d\n", q->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, + retval = pm->pmf->map_queues(pm, &rl_buffer[rl_wptr], q, qpd->is_debug); @@ -335,7 +195,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_mes_map_queues), + pm->pmf->map_queues_size, alloc_size_bytes); } } @@ -343,7 +203,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("Finished map process and queues to runlist\n"); if (is_over_subscription) - retval = pm_create_runlist(pm, &rl_buffer[rl_wptr], + retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, alloc_size_bytes / sizeof(uint32_t), true); @@ -355,45 +215,29 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; } -/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size - * of this packet - * @gpu_addr - GPU address of the packet. It's a virtual address. - * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer - * Return - length of the packet - */ -uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) -{ - struct pm4_mec_release_mem *packet; - - WARN_ON(!buffer); - - packet = (struct pm4_mec_release_mem *)buffer; - memset(buffer, 0, sizeof(*packet)); - - packet->header.u32All = build_pm4_header(IT_RELEASE_MEM, - sizeof(*packet)); - - packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; - packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; - packet->bitfields2.tcl1_action_ena = 1; - packet->bitfields2.tc_action_ena = 1; - packet->bitfields2.cache_policy = cache_policy___release_mem__lru; - packet->bitfields2.atc = 0; - - packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; - packet->bitfields3.int_sel = - int_sel___release_mem__send_interrupt_after_write_confirm; - - packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; - packet->address_hi = upper_32_bits(gpu_addr); - - packet->data_lo = 0; - - return sizeof(*packet) / sizeof(unsigned int); -} - int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) { + switch (dqm->dev->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: + /* PM4 packet structures on CIK are the same as on VI */ + case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + pm->pmf = &kfd_vi_pm_funcs; + break; + case CHIP_VEGA10: + case CHIP_RAVEN: + pm->pmf = &kfd_v9_pm_funcs; + break; + default: + WARN(1, "Unexpected ASIC family %u", + dqm->dev->device_info->asic_family); + return -EINVAL; + } + pm->dqm = dqm; mutex_init(&pm->lock); pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); @@ -415,38 +259,25 @@ void pm_uninit(struct packet_manager *pm) int pm_send_set_resources(struct packet_manager *pm, struct scheduling_resources *res) { - struct pm4_mes_set_resources *packet; + uint32_t *buffer, size; int retval = 0; + size = pm->pmf->set_resources_size; mutex_lock(&pm->lock); pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, - sizeof(*packet) / sizeof(uint32_t), - (unsigned int **)&packet); - if (!packet) { + size / sizeof(uint32_t), + (unsigned int **)&buffer); + if (!buffer) { pr_err("Failed to allocate buffer on kernel queue\n"); retval = -ENOMEM; goto out; } - memset(packet, 0, sizeof(struct pm4_mes_set_resources)); - packet->header.u32All = build_pm4_header(IT_SET_RESOURCES, - sizeof(struct pm4_mes_set_resources)); - - packet->bitfields2.queue_type = - queue_type__mes_set_resources__hsa_interface_queue_hiq; - packet->bitfields2.vmid_mask = res->vmid_mask; - packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; - packet->bitfields7.oac_mask = res->oac_mask; - packet->bitfields8.gds_heap_base = res->gds_heap_base; - packet->bitfields8.gds_heap_size = res->gds_heap_size; - - packet->gws_mask_lo = lower_32_bits(res->gws_mask); - packet->gws_mask_hi = upper_32_bits(res->gws_mask); - - packet->queue_mask_lo = lower_32_bits(res->queue_mask); - packet->queue_mask_hi = upper_32_bits(res->queue_mask); - - pm->priv_queue->ops.submit_packet(pm->priv_queue); + retval = pm->pmf->set_resources(pm, buffer, res); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); + else + pm->priv_queue->ops.rollback_packet(pm->priv_queue); out: mutex_unlock(&pm->lock); @@ -468,7 +299,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); - packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t); + packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, @@ -476,7 +307,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval) goto fail_acquire_packet_buffer; - retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr, + retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, rl_ib_size / sizeof(uint32_t), false); if (retval) goto fail_create_runlist; @@ -499,37 +330,29 @@ fail_create_runlist_ib: int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, uint32_t fence_value) { - int retval; - struct pm4_mes_query_status *packet; + uint32_t *buffer, size; + int retval = 0; if (WARN_ON(!fence_address)) return -EFAULT; + size = pm->pmf->query_status_size; mutex_lock(&pm->lock); - retval = pm->priv_queue->ops.acquire_packet_buffer( - pm->priv_queue, - sizeof(struct pm4_mes_query_status) / sizeof(uint32_t), - (unsigned int **)&packet); - if (retval) - goto fail_acquire_packet_buffer; - - packet->header.u32All = build_pm4_header(IT_QUERY_STATUS, - sizeof(struct pm4_mes_query_status)); - - packet->bitfields2.context_id = 0; - packet->bitfields2.interrupt_sel = - interrupt_sel__mes_query_status__completion_status; - packet->bitfields2.command = - command__mes_query_status__fence_only_after_write_ack; - - packet->addr_hi = upper_32_bits((uint64_t)fence_address); - packet->addr_lo = lower_32_bits((uint64_t)fence_address); - packet->data_hi = upper_32_bits((uint64_t)fence_value); - packet->data_lo = lower_32_bits((uint64_t)fence_value); + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); + retval = -ENOMEM; + goto out; + } - pm->priv_queue->ops.submit_packet(pm->priv_queue); + retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); + else + pm->priv_queue->ops.rollback_packet(pm->priv_queue); -fail_acquire_packet_buffer: +out: mutex_unlock(&pm->lock); return retval; } @@ -539,82 +362,27 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, uint32_t filter_param, bool reset, unsigned int sdma_engine) { - int retval; - uint32_t *buffer; - struct pm4_mes_unmap_queues *packet; + uint32_t *buffer, size; + int retval = 0; + size = pm->pmf->unmap_queues_size; mutex_lock(&pm->lock); - retval = pm->priv_queue->ops.acquire_packet_buffer( - pm->priv_queue, - sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t), - &buffer); - if (retval) - goto err_acquire_packet_buffer; - - packet = (struct pm4_mes_unmap_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); - pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n", - filter, reset, type); - packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES, - sizeof(struct pm4_mes_unmap_queues)); - switch (type) { - case KFD_QUEUE_TYPE_COMPUTE: - case KFD_QUEUE_TYPE_DIQ: - packet->bitfields2.engine_sel = - engine_sel__mes_unmap_queues__compute; - break; - case KFD_QUEUE_TYPE_SDMA: - packet->bitfields2.engine_sel = - engine_sel__mes_unmap_queues__sdma0 + sdma_engine; - break; - default: - WARN(1, "queue type %d", type); - retval = -EINVAL; - goto err_invalid; + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); + retval = -ENOMEM; + goto out; } - if (reset) - packet->bitfields2.action = - action__mes_unmap_queues__reset_queues; + retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param, + reset, sdma_engine); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); else - packet->bitfields2.action = - action__mes_unmap_queues__preempt_queues; - - switch (filter) { - case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_specified_queues; - packet->bitfields2.num_queues = 1; - packet->bitfields3b.doorbell_offset0 = filter_param; - break; - case KFD_UNMAP_QUEUES_FILTER_BY_PASID: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; - packet->bitfields3a.pasid = filter_param; - break; - case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_queues; - break; - case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: - /* in this case, we do not preempt static queues */ - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_non_static_queues; - break; - default: - WARN(1, "filter %d", filter); - retval = -EINVAL; - goto err_invalid; - } + pm->priv_queue->ops.rollback_packet(pm->priv_queue); - pm->priv_queue->ops.submit_packet(pm->priv_queue); - - mutex_unlock(&pm->lock); - return 0; - -err_invalid: - pm->priv_queue->ops.rollback_packet(pm->priv_queue); -err_acquire_packet_buffer: +out: mutex_unlock(&pm->lock); return retval; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h new file mode 100644 index 000000000000..f2bcf5c092ea --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -0,0 +1,583 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef F32_MES_PM4_PACKETS_H +#define F32_MES_PM4_PACKETS_H + +#ifndef PM4_MES_HEADER_DEFINED +#define PM4_MES_HEADER_DEFINED +union PM4_MES_TYPE_3_HEADER { + struct { + uint32_t reserved1 : 8; /* < reserved */ + uint32_t opcode : 8; /* < IT opcode */ + uint32_t count : 14;/* < number of DWORDs - 1 in the + * information body. + */ + uint32_t type : 2; /* < packet identifier. + * It should be 3 for type 3 packets + */ + }; + uint32_t u32All; +}; +#endif /* PM4_MES_HEADER_DEFINED */ + +/*--------------------MES_SET_RESOURCES--------------------*/ + +#ifndef PM4_MES_SET_RESOURCES_DEFINED +#define PM4_MES_SET_RESOURCES_DEFINED +enum mes_set_resources_queue_type_enum { + queue_type__mes_set_resources__kernel_interface_queue_kiq = 0, + queue_type__mes_set_resources__hsa_interface_queue_hiq = 1, + queue_type__mes_set_resources__hsa_debug_interface_queue = 4 +}; + + +struct pm4_mes_set_resources { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t vmid_mask:16; + uint32_t unmap_latency:8; + uint32_t reserved1:5; + enum mes_set_resources_queue_type_enum queue_type:3; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t queue_mask_lo; + uint32_t queue_mask_hi; + uint32_t gws_mask_lo; + uint32_t gws_mask_hi; + + union { + struct { + uint32_t oac_mask:16; + uint32_t reserved2:16; + } bitfields7; + uint32_t ordinal7; + }; + + union { + struct { + uint32_t gds_heap_base:6; + uint32_t reserved3:5; + uint32_t gds_heap_size:6; + uint32_t reserved4:15; + } bitfields8; + uint32_t ordinal8; + }; + +}; +#endif + +/*--------------------MES_RUN_LIST--------------------*/ + +#ifndef PM4_MES_RUN_LIST_DEFINED +#define PM4_MES_RUN_LIST_DEFINED + +struct pm4_mes_runlist { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:2; + uint32_t ib_base_lo:30; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t ib_base_hi; + + union { + struct { + uint32_t ib_size:20; + uint32_t chain:1; + uint32_t offload_polling:1; + uint32_t reserved2:1; + uint32_t valid:1; + uint32_t process_cnt:4; + uint32_t reserved3:4; + } bitfields4; + uint32_t ordinal4; + }; + +}; +#endif + +/*--------------------MES_MAP_PROCESS--------------------*/ + +#ifndef PM4_MES_MAP_PROCESS_DEFINED +#define PM4_MES_MAP_PROCESS_DEFINED + +struct pm4_mes_map_process { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:8; + uint32_t diq_enable:1; + uint32_t process_quantum:7; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t vm_context_page_table_base_addr_lo32; + + uint32_t vm_context_page_table_base_addr_hi32; + + uint32_t sh_mem_bases; + + uint32_t sh_mem_config; + + uint32_t sq_shader_tba_lo; + + uint32_t sq_shader_tba_hi; + + uint32_t sq_shader_tma_lo; + + uint32_t sq_shader_tma_hi; + + uint32_t reserved6; + + uint32_t gds_addr_lo; + + uint32_t gds_addr_hi; + + union { + struct { + uint32_t num_gws:6; + uint32_t reserved7:1; + uint32_t sdma_enable:1; + uint32_t num_oac:4; + uint32_t reserved8:4; + uint32_t gds_size:6; + uint32_t num_queues:10; + } bitfields14; + uint32_t ordinal14; + }; + + uint32_t completion_signal_lo; + + uint32_t completion_signal_hi; + +}; + +#endif + +/*--------------------MES_MAP_PROCESS_VM--------------------*/ + +#ifndef PM4_MES_MAP_PROCESS_VM_DEFINED +#define PM4_MES_MAP_PROCESS_VM_DEFINED + +struct PM4_MES_MAP_PROCESS_VM { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + uint32_t reserved1; + + uint32_t vm_context_cntl; + + uint32_t reserved2; + + uint32_t vm_context_page_table_end_addr_lo32; + + uint32_t vm_context_page_table_end_addr_hi32; + + uint32_t vm_context_page_table_start_addr_lo32; + + uint32_t vm_context_page_table_start_addr_hi32; + + uint32_t reserved3; + + uint32_t reserved4; + + uint32_t reserved5; + + uint32_t reserved6; + + uint32_t reserved7; + + uint32_t reserved8; + + uint32_t completion_signal_lo32; + + uint32_t completion_signal_hi32; + +}; +#endif + +/*--------------------MES_MAP_QUEUES--------------------*/ + +#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED +#define PM4_MES_MAP_QUEUES_VI_DEFINED +enum mes_map_queues_queue_sel_enum { + queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0, +queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1 +}; + +enum mes_map_queues_queue_type_enum { + queue_type__mes_map_queues__normal_compute_vi = 0, + queue_type__mes_map_queues__debug_interface_queue_vi = 1, + queue_type__mes_map_queues__normal_latency_static_queue_vi = 2, +queue_type__mes_map_queues__low_latency_static_queue_vi = 3 +}; + +enum mes_map_queues_alloc_format_enum { + alloc_format__mes_map_queues__one_per_pipe_vi = 0, +alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 +}; + +enum mes_map_queues_engine_sel_enum { + engine_sel__mes_map_queues__compute_vi = 0, + engine_sel__mes_map_queues__sdma0_vi = 2, + engine_sel__mes_map_queues__sdma1_vi = 3 +}; + + +struct pm4_mes_map_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:4; + enum mes_map_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:15; + enum mes_map_queues_queue_type_enum queue_type:3; + enum mes_map_queues_alloc_format_enum alloc_format:2; + enum mes_map_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t reserved3:1; + uint32_t check_disable:1; + uint32_t doorbell_offset:26; + uint32_t reserved4:4; + } bitfields3; + uint32_t ordinal3; + }; + + uint32_t mqd_addr_lo; + uint32_t mqd_addr_hi; + uint32_t wptr_addr_lo; + uint32_t wptr_addr_hi; +}; +#endif + +/*--------------------MES_QUERY_STATUS--------------------*/ + +#ifndef PM4_MES_QUERY_STATUS_DEFINED +#define PM4_MES_QUERY_STATUS_DEFINED +enum mes_query_status_interrupt_sel_enum { + interrupt_sel__mes_query_status__completion_status = 0, + interrupt_sel__mes_query_status__process_status = 1, + interrupt_sel__mes_query_status__queue_status = 2 +}; + +enum mes_query_status_command_enum { + command__mes_query_status__interrupt_only = 0, + command__mes_query_status__fence_only_immediate = 1, + command__mes_query_status__fence_only_after_write_ack = 2, + command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3 +}; + +enum mes_query_status_engine_sel_enum { + engine_sel__mes_query_status__compute = 0, + engine_sel__mes_query_status__sdma0_queue = 2, + engine_sel__mes_query_status__sdma1_queue = 3 +}; + +struct pm4_mes_query_status { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t context_id:28; + enum mes_query_status_interrupt_sel_enum interrupt_sel:2; + enum mes_query_status_command_enum command:2; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:16; + } bitfields3a; + struct { + uint32_t reserved2:2; + uint32_t doorbell_offset:26; + enum mes_query_status_engine_sel_enum engine_sel:3; + uint32_t reserved3:1; + } bitfields3b; + uint32_t ordinal3; + }; + + uint32_t addr_lo; + uint32_t addr_hi; + uint32_t data_lo; + uint32_t data_hi; +}; +#endif + +/*--------------------MES_UNMAP_QUEUES--------------------*/ + +#ifndef PM4_MES_UNMAP_QUEUES_DEFINED +#define PM4_MES_UNMAP_QUEUES_DEFINED +enum mes_unmap_queues_action_enum { + action__mes_unmap_queues__preempt_queues = 0, + action__mes_unmap_queues__reset_queues = 1, + action__mes_unmap_queues__disable_process_queues = 2, + action__mes_unmap_queues__reserved = 3 +}; + +enum mes_unmap_queues_queue_sel_enum { + queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, + queue_sel__mes_unmap_queues__unmap_all_queues = 2, + queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3 +}; + +enum mes_unmap_queues_engine_sel_enum { + engine_sel__mes_unmap_queues__compute = 0, + engine_sel__mes_unmap_queues__sdma0 = 2, + engine_sel__mes_unmap_queues__sdmal = 3 +}; + +struct pm4_mes_unmap_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + enum mes_unmap_queues_action_enum action:2; + uint32_t reserved1:2; + enum mes_unmap_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:20; + enum mes_unmap_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved3:16; + } bitfields3a; + struct { + uint32_t reserved4:2; + uint32_t doorbell_offset0:26; + int32_t reserved5:4; + } bitfields3b; + uint32_t ordinal3; + }; + + union { + struct { + uint32_t reserved6:2; + uint32_t doorbell_offset1:26; + uint32_t reserved7:4; + } bitfields4; + uint32_t ordinal4; + }; + + union { + struct { + uint32_t reserved8:2; + uint32_t doorbell_offset2:26; + uint32_t reserved9:4; + } bitfields5; + uint32_t ordinal5; + }; + + union { + struct { + uint32_t reserved10:2; + uint32_t doorbell_offset3:26; + uint32_t reserved11:4; + } bitfields6; + uint32_t ordinal6; + }; +}; +#endif + +#ifndef PM4_MEC_RELEASE_MEM_DEFINED +#define PM4_MEC_RELEASE_MEM_DEFINED + +enum mec_release_mem_event_index_enum { + event_index__mec_release_mem__end_of_pipe = 5, + event_index__mec_release_mem__shader_done = 6 +}; + +enum mec_release_mem_cache_policy_enum { + cache_policy__mec_release_mem__lru = 0, + cache_policy__mec_release_mem__stream = 1 +}; + +enum mec_release_mem_pq_exe_status_enum { + pq_exe_status__mec_release_mem__default = 0, + pq_exe_status__mec_release_mem__phase_update = 1 +}; + +enum mec_release_mem_dst_sel_enum { + dst_sel__mec_release_mem__memory_controller = 0, + dst_sel__mec_release_mem__tc_l2 = 1, + dst_sel__mec_release_mem__queue_write_pointer_register = 2, + dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3 +}; + +enum mec_release_mem_int_sel_enum { + int_sel__mec_release_mem__none = 0, + int_sel__mec_release_mem__send_interrupt_only = 1, + int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2, + int_sel__mec_release_mem__send_data_after_write_confirm = 3, + int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4, + int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5, + int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6 +}; + +enum mec_release_mem_data_sel_enum { + data_sel__mec_release_mem__none = 0, + data_sel__mec_release_mem__send_32_bit_low = 1, + data_sel__mec_release_mem__send_64_bit_data = 2, + data_sel__mec_release_mem__send_gpu_clock_counter = 3, + data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4, + data_sel__mec_release_mem__store_gds_data_to_memory = 5 +}; + +struct pm4_mec_release_mem { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int event_type:6; + unsigned int reserved1:2; + enum mec_release_mem_event_index_enum event_index:4; + unsigned int tcl1_vol_action_ena:1; + unsigned int tc_vol_action_ena:1; + unsigned int reserved2:1; + unsigned int tc_wb_action_ena:1; + unsigned int tcl1_action_ena:1; + unsigned int tc_action_ena:1; + uint32_t reserved3:1; + uint32_t tc_nc_action_ena:1; + uint32_t tc_wc_action_ena:1; + uint32_t tc_md_action_ena:1; + uint32_t reserved4:3; + enum mec_release_mem_cache_policy_enum cache_policy:2; + uint32_t reserved5:2; + enum mec_release_mem_pq_exe_status_enum pq_exe_status:1; + uint32_t reserved6:2; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + uint32_t reserved7:16; + enum mec_release_mem_dst_sel_enum dst_sel:2; + uint32_t reserved8:6; + enum mec_release_mem_int_sel_enum int_sel:3; + uint32_t reserved9:2; + enum mec_release_mem_data_sel_enum data_sel:3; + } bitfields3; + unsigned int ordinal3; + }; + + union { + struct { + uint32_t reserved10:2; + unsigned int address_lo_32b:30; + } bitfields4; + struct { + uint32_t reserved11:3; + uint32_t address_lo_64b:29; + } bitfields4b; + uint32_t reserved12; + unsigned int ordinal4; + }; + + union { + uint32_t address_hi; + uint32_t reserved13; + uint32_t ordinal5; + }; + + union { + uint32_t data_lo; + uint32_t cmp_data_lo; + struct { + uint32_t dw_offset:16; + uint32_t num_dwords:16; + } bitfields6c; + uint32_t reserved14; + uint32_t ordinal6; + }; + + union { + uint32_t data_hi; + uint32_t cmp_data_hi; + uint32_t reserved15; + uint32_t reserved16; + uint32_t ordinal7; + }; + + uint32_t int_ctxid; + +}; + +#endif + +enum { + CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 +}; +#endif + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 96a9cc0f02c9..5e3990bb4c4b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -39,11 +39,37 @@ #include "amd_shared.h" +#define KFD_MAX_RING_ENTRY_SIZE 8 + #define KFD_SYSFS_FILE_MODE 0444 -#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull -#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull -#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull +/* GPU ID hash width in bits */ +#define KFD_GPU_ID_HASH_WIDTH 16 + +/* Use upper bits of mmap offset to store KFD driver specific information. + * BITS[63:62] - Encode MMAP type + * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to + * BITS[45:0] - MMAP offset value + * + * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these + * defines are w.r.t to PAGE_SIZE + */ +#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT) +#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) + +#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) +#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ + << KFD_MMAP_GPU_ID_SHIFT) +#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\ + & KFD_MMAP_GPU_ID_MASK) +#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ + >> KFD_MMAP_GPU_ID_SHIFT) + +#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT) +#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK) /* * When working with cp scheduler we should assign the HIQ manually or via @@ -55,9 +81,6 @@ #define KFD_CIK_HIQ_PIPE 4 #define KFD_CIK_HIQ_QUEUE 0 -/* GPU ID hash width in bits */ -#define KFD_GPU_ID_HASH_WIDTH 16 - /* Macro for allocating structures */ #define kfd_alloc_struct(ptr_to_struct) \ ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) @@ -116,6 +139,11 @@ extern int debug_largebar; */ extern int ignore_crat; +/* + * Set sh_mem_config.retry_disable on Vega10 + */ +extern int vega10_noretry; + /** * enum kfd_sched_policy * @@ -148,6 +176,8 @@ enum cache_policy { cache_policy_noncoherent }; +#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) + struct kfd_event_interrupt_class { bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry); @@ -160,6 +190,7 @@ struct kfd_device_info { const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; unsigned int max_no_of_hqd; + unsigned int doorbell_size; size_t ih_ring_entry_size; uint8_t num_of_watch_points; uint16_t mqd_size_aligned; @@ -173,6 +204,7 @@ struct kfd_mem_obj { uint32_t range_end; uint64_t gpu_addr; uint32_t *cpu_ptr; + void *gtt_mem; }; struct kfd_vmid_info { @@ -364,7 +396,7 @@ struct queue_properties { uint32_t queue_percent; uint32_t *read_ptr; uint32_t *write_ptr; - uint32_t __iomem *doorbell_ptr; + void __iomem *doorbell_ptr; uint32_t doorbell_off; bool is_interop; bool is_evicted; @@ -427,6 +459,7 @@ struct queue { uint32_t queue; unsigned int sdma_id; + unsigned int doorbell_id; struct kfd_process *process; struct kfd_dev *device; @@ -501,6 +534,9 @@ struct qcm_process_device { /* IB memory */ uint64_t ib_base; void *ib_kaddr; + + /* doorbell resources per process per device */ + unsigned long *doorbell_bitmap; }; /* KFD Memory Eviction */ @@ -512,6 +548,8 @@ struct qcm_process_device { /* Approx. time before evicting the process again */ #define PROCESS_ACTIVE_TIME_MS 10 +int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct dma_fence *fence); @@ -681,6 +719,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); void kfd_unref_process(struct kfd_process *p); +int kfd_process_evict_queues(struct kfd_process *p); +int kfd_process_restore_queues(struct kfd_process *p); void kfd_suspend_all_processes(void); int kfd_resume_all_processes(void); @@ -693,7 +733,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); -int kfd_reserved_mem_mmap(struct kfd_process *process, +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma); /* KFD process API for creating and translating handles */ @@ -721,17 +761,20 @@ unsigned int kfd_pasid_alloc(void); void kfd_pasid_free(unsigned int pasid); /* Doorbells */ +size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); int kfd_doorbell_init(struct kfd_dev *kfd); void kfd_doorbell_fini(struct kfd_dev *kfd); -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); -u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma); +void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off); void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); u32 read_kernel_doorbell(u32 __iomem *db); -void write_kernel_doorbell(u32 __iomem *db, u32 value); -unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, +void write_kernel_doorbell(void __iomem *db, u32 value); +void write_kernel_doorbell64(void __iomem *db, u64 value); +unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, struct kfd_process *process, - unsigned int queue_id); + unsigned int doorbell_id); phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, struct kfd_process *process); int kfd_alloc_process_doorbells(struct kfd_process *process); @@ -788,6 +831,8 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, @@ -832,8 +877,42 @@ struct packet_manager { bool allocated; struct kfd_mem_obj *ib_buffer_obj; unsigned int ib_size_bytes; + + const struct packet_manager_funcs *pmf; +}; + +struct packet_manager_funcs { + /* Support ASIC-specific packet formats for PM4 packets */ + int (*map_process)(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd); + int (*runlist)(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain); + int (*set_resources)(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res); + int (*map_queues)(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static); + int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter mode, + uint32_t filter_param, bool reset, + unsigned int sdma_engine); + int (*query_status)(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value); + int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); + + /* Packet sizes */ + int map_process_size; + int runlist_size; + int set_resources_size; + int map_queues_size; + int unmap_queues_size; + int query_status_size; + int release_mem_size; }; +extern const struct packet_manager_funcs kfd_vi_pm_funcs; +extern const struct packet_manager_funcs kfd_v9_pm_funcs; + int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); void pm_uninit(struct packet_manager *pm); int pm_send_set_resources(struct packet_manager *pm, @@ -849,12 +928,17 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, void pm_release_ib(struct packet_manager *pm); -uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); +/* Following PM funcs can be shared among VI and AI */ +unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); +int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res); uint64_t kfd_get_number_elems(struct kfd_dev *kfd); /* Events */ extern const struct kfd_event_interrupt_class event_interrupt_class_cik; +extern const struct kfd_event_interrupt_class event_interrupt_class_v9; + extern const struct kfd_device_global_init_class device_global_init_class_cik; void kfd_event_init_process(struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1711ad0642f7..1d80b4f7c681 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -332,6 +332,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) free_pages((unsigned long)pdd->qpd.cwsr_kaddr, get_order(KFD_CWSR_TBA_TMA_SIZE)); + kfree(pdd->qpd.doorbell_bitmap); idr_destroy(&pdd->alloc_idr); kfree(pdd); @@ -451,7 +452,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) continue; - offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; + offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id)) + << PAGE_SHIFT; qpd->tba_addr = (int64_t)vm_mmap(filep, 0, KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, MAP_SHARED, offset); @@ -585,6 +587,31 @@ err_alloc_process: return ERR_PTR(err); } +static int init_doorbell_bitmap(struct qcm_process_device *qpd, + struct kfd_dev *dev) +{ + unsigned int i; + + if (!KFD_IS_SOC15(dev->device_info->asic_family)) + return 0; + + qpd->doorbell_bitmap = + kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + BITS_PER_BYTE), GFP_KERNEL); + if (!qpd->doorbell_bitmap) + return -ENOMEM; + + /* Mask out any reserved doorbells */ + for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++) + if ((dev->shared_resources.reserved_doorbell_mask & i) == + dev->shared_resources.reserved_doorbell_val) { + set_bit(i, qpd->doorbell_bitmap); + pr_debug("reserved doorbell 0x%03x\n", i); + } + + return 0; +} + struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p) { @@ -606,6 +633,12 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, if (!pdd) return NULL; + if (init_doorbell_bitmap(&pdd->qpd, dev)) { + pr_err("Failed to init doorbell for process\n"); + kfree(pdd); + return NULL; + } + pdd->dev = dev; INIT_LIST_HEAD(&pdd->qpd.queues_list); INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); @@ -808,7 +841,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) * Eviction is reference-counted per process-device. This means multiple * evictions from different sources can be nested safely. */ -static int process_evict_queues(struct kfd_process *p) +int kfd_process_evict_queues(struct kfd_process *p) { struct kfd_process_device *pdd; int r = 0; @@ -844,7 +877,7 @@ fail: } /* process_restore_queues - Restore all user queues of a process */ -static int process_restore_queues(struct kfd_process *p) +int kfd_process_restore_queues(struct kfd_process *p) { struct kfd_process_device *pdd; int r, ret = 0; @@ -886,7 +919,7 @@ static void evict_process_worker(struct work_struct *work) flush_delayed_work(&p->restore_work); pr_debug("Started evicting pasid %d\n", p->pasid); - ret = process_evict_queues(p); + ret = kfd_process_evict_queues(p); if (!ret) { dma_fence_signal(p->ef); dma_fence_put(p->ef); @@ -946,7 +979,7 @@ static void restore_process_worker(struct work_struct *work) return; } - ret = process_restore_queues(p); + ret = kfd_process_restore_queues(p); if (!ret) pr_debug("Finished restoring pasid %d\n", p->pasid); else @@ -963,7 +996,7 @@ void kfd_suspend_all_processes(void) cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - if (process_evict_queues(p)) + if (kfd_process_evict_queues(p)) pr_err("Failed to suspend process %d\n", p->pasid); dma_fence_signal(p->ef); dma_fence_put(p->ef); @@ -989,15 +1022,12 @@ int kfd_resume_all_processes(void) return ret; } -int kfd_reserved_mem_mmap(struct kfd_process *process, +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma) { - struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff); struct kfd_process_device *pdd; struct qcm_process_device *qpd; - if (!dev) - return -EINVAL; if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { pr_err("Incorrect CWSR mapping size.\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 7817e327ea6d..d65ce0436b31 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -119,9 +119,6 @@ static int create_cp_queue(struct process_queue_manager *pqm, /* Doorbell initialized in user space*/ q_properties->doorbell_ptr = NULL; - q_properties->doorbell_off = - kfd_queue_id_to_doorbell(dev, pqm->process, qid); - /* let DQM handle it*/ q_properties->vmid = 0; q_properties->queue_id = qid; @@ -244,10 +241,20 @@ int pqm_create_queue(struct process_queue_manager *pqm, } if (retval != 0) { - pr_err("DQM create queue failed\n"); + pr_err("Pasid %d DQM create queue %d failed. ret %d\n", + pqm->process->pasid, type, retval); goto err_create_queue; } + if (q) + /* Return the doorbell offset within the doorbell page + * to the caller so it can be passed up to user mode + * (in bytes). + */ + properties->doorbell_off = + (q->properties.doorbell_off * sizeof(uint32_t)) & + (kfd_doorbell_process_slice(dev) - 1); + pr_debug("PQM After DQM create queue\n"); list_add(&pqn->process_queue_list, &pqm->queues); @@ -313,8 +320,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval) { - pr_debug("Destroy queue failed, returned %d\n", retval); - goto err_destroy_queue; + pr_err("Pasid %d destroy queue %d failed, ret %d\n", + pqm->process->pasid, + pqn->q->properties.queue_id, retval); + if (retval != -ETIME) + goto err_destroy_queue; } uninit_queue(pqn->q); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index a5315d4f1c95..6dcd621e5b71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -36,8 +36,8 @@ void print_queue_properties(struct queue_properties *q) pr_debug("Queue Address: 0x%llX\n", q->queue_address); pr_debug("Queue Id: %u\n", q->queue_id); pr_debug("Queue Process Vmid: %u\n", q->vmid); - pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr); - pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr); + pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr); + pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr); pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr); pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off); } @@ -53,8 +53,8 @@ void print_queue(struct queue *q) pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address); pr_debug("Queue Id: %u\n", q->properties.queue_id); pr_debug("Queue Process Vmid: %u\n", q->properties.vmid); - pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr); - pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr); + pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr); + pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr); pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr); pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off); pr_debug("Queue MQD Address: 0x%p\n", q->mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index ac28abc94e57..bc95d4dfee2e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1239,6 +1239,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu) HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; + case CHIP_VEGA10: + case CHIP_RAVEN: + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + break; default: WARN(1, "Unexpected ASIC family %u", dev->gpu->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index eb54cfcaf039..7d9c3f948dff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -45,6 +45,7 @@ #define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 #define HSA_CAP_DOORBELL_TYPE_1_0 0x1 +#define HSA_CAP_DOORBELL_TYPE_2_0 0x2 #define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 struct kfd_node_properties { diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h new file mode 100644 index 000000000000..0bc0b25cb410 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -0,0 +1,47 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef HSA_SOC15_INT_H_INCLUDED +#define HSA_SOC15_INT_H_INCLUDED + +#include "soc15_ih_clientid.h" + +#define SOC15_INTSRC_CP_END_OF_PIPE 181 +#define SOC15_INTSRC_CP_BAD_OPCODE 183 +#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239 +#define SOC15_INTSRC_VMC_FAULT 0 +#define SOC15_INTSRC_SDMA_TRAP 224 + + +#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff) +#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff) +#define SOC15_RING_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 16 & 0xff) +#define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf) +#define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1) +#define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff) +#define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4])) +#define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5])) +#define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6])) +#define SOC15_CONTEXT_ID3_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[7])) + +#endif + diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 5b124a67404c..e6ca72c0d347 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -9,14 +9,6 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. -config DRM_AMD_DC_PRE_VEGA - bool "DC support for Polaris and older ASICs" - default y - help - Choose this option to enable the new DC support for older asics - by default. This includes Polaris, Carrizo, Tonga, Bonaire, - and Hawaii. - config DRM_AMD_DC_FBC bool "AMD FBC - Enable Frame Buffer Compression" depends on DRM_AMD_DC @@ -42,4 +34,10 @@ config DEBUG_KERNEL_DC if you want to hit kdgb_break in assert. +config DRM_AMD_DC_VEGAM + bool "VEGAM support" + depends on DRM_AMD_DC + help + Choose this option if you want to have + VEGAM support for display engine endmenu diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1dd1142246c2..f2f54a9df56f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -433,11 +433,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.dce_environment = DCE_ENV_PRODUCTION_DRV; - if (amdgpu_dc_log) - init_data.log_mask = DC_DEFAULT_LOG_MASK; - else - init_data.log_mask = DC_MIN_LOG_MASK; - /* * TODO debug why this doesn't work on Raven */ @@ -649,18 +644,6 @@ amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state, static int dm_resume(void *handle) { struct amdgpu_device *adev = handle; - struct amdgpu_display_manager *dm = &adev->dm; - int ret = 0; - - /* power on hardware */ - dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); - - ret = amdgpu_dm_display_resume(adev); - return ret; -} - -int amdgpu_dm_display_resume(struct amdgpu_device *adev) -{ struct drm_device *ddev = adev->ddev; struct amdgpu_display_manager *dm = &adev->dm; struct amdgpu_dm_connector *aconnector; @@ -671,10 +654,12 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) struct drm_plane *plane; struct drm_plane_state *new_plane_state; struct dm_plane_state *dm_new_plane_state; - - int ret = 0; + int ret; int i; + /* power on hardware */ + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); + /* program HPD filter */ dc_resume(dm->dc); @@ -688,8 +673,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) amdgpu_dm_irq_resume_early(adev); /* Do detection*/ - list_for_each_entry(connector, - &ddev->mode_config.connector_list, head) { + list_for_each_entry(connector, &ddev->mode_config.connector_list, head) { aconnector = to_amdgpu_dm_connector(connector); /* @@ -711,7 +695,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) } /* Force mode set in atomic comit */ - for_each_new_crtc_in_state(adev->dm.cached_state, crtc, new_crtc_state, i) + for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) new_crtc_state->active_changed = true; /* @@ -719,7 +703,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) * them here, since they were duplicated as part of the suspend * procedure. */ - for_each_new_crtc_in_state(adev->dm.cached_state, crtc, new_crtc_state, i) { + for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) { dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); if (dm_new_crtc_state->stream) { WARN_ON(kref_read(&dm_new_crtc_state->stream->refcount) > 1); @@ -728,7 +712,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) } } - for_each_new_plane_in_state(adev->dm.cached_state, plane, new_plane_state, i) { + for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) { dm_new_plane_state = to_dm_plane_state(new_plane_state); if (dm_new_plane_state->dc_state) { WARN_ON(kref_read(&dm_new_plane_state->dc_state->refcount) > 1); @@ -737,9 +721,9 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) } } - ret = drm_atomic_helper_resume(ddev, adev->dm.cached_state); + ret = drm_atomic_helper_resume(ddev, dm->cached_state); - adev->dm.cached_state = NULL; + dm->cached_state = NULL; amdgpu_dm_irq_resume_late(adev); @@ -1529,6 +1513,9 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case CHIP_POLARIS11: case CHIP_POLARIS10: case CHIP_POLARIS12: +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + case CHIP_VEGAM: +#endif case CHIP_VEGA10: case CHIP_VEGA12: if (dce110_register_irq_handlers(dm->adev)) { @@ -1549,7 +1536,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) break; #endif default: - DRM_ERROR("Usupported ASIC type: 0x%X\n", adev->asic_type); + DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); goto fail; } @@ -1657,7 +1644,6 @@ static ssize_t s3_debug_store(struct device *device, if (ret == 0) { if (s3_state) { dm_resume(adev); - amdgpu_dm_display_resume(adev); drm_kms_helper_hotplug_event(adev->ddev); } else dm_suspend(adev); @@ -1722,6 +1708,9 @@ static int dm_early_init(void *handle) adev->mode_info.plane_type = dm_plane_type_default; break; case CHIP_POLARIS10: +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + case CHIP_VEGAM: +#endif adev->mode_info.num_crtc = 6; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; @@ -1743,7 +1732,7 @@ static int dm_early_init(void *handle) break; #endif default: - DRM_ERROR("Usupported ASIC type: 0x%X\n", adev->asic_type); + DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); return -EINVAL; } @@ -1848,7 +1837,7 @@ static bool fill_rects_from_plane_state(const struct drm_plane_state *state, static int get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb, uint64_t *tiling_flags) { - struct amdgpu_bo *rbo = gem_to_amdgpu_bo(amdgpu_fb->obj); + struct amdgpu_bo *rbo = gem_to_amdgpu_bo(amdgpu_fb->base.obj[0]); int r = amdgpu_bo_reserve(rbo, false); if (unlikely(r)) { @@ -2017,7 +2006,6 @@ static int fill_plane_attributes(struct amdgpu_device *adev, const struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(plane_state->fb); const struct drm_crtc *crtc = plane_state->crtc; - struct dc_transfer_func *input_tf; int ret = 0; if (!fill_rects_from_plane_state(plane_state, dc_plane_state)) @@ -2031,13 +2019,6 @@ static int fill_plane_attributes(struct amdgpu_device *adev, if (ret) return ret; - input_tf = dc_create_transfer_func(); - - if (input_tf == NULL) - return -ENOMEM; - - dc_plane_state->in_transfer_func = input_tf; - /* * Always set input transfer function, since plane state is refreshed * every time. @@ -2206,7 +2187,6 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, const struct drm_connector *connector) { struct dc_crtc_timing *timing_out = &stream->timing; - struct dc_transfer_func *tf = dc_create_transfer_func(); memset(timing_out, 0, sizeof(struct dc_crtc_timing)); @@ -2250,9 +2230,8 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, stream->output_color_space = get_output_color_space(timing_out); - tf->type = TF_TYPE_PREDEFINED; - tf->tf = TRANSFER_FUNCTION_SRGB; - stream->out_transfer_func = tf; + stream->out_transfer_func->type = TF_TYPE_PREDEFINED; + stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; } static void fill_audio_info(struct audio_info *audio_info, @@ -2488,6 +2467,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, update_stream_signal(stream); + if (dm_state && dm_state->freesync_capable) + stream->ignore_msa_timing_param = true; + return stream; } @@ -2710,18 +2692,15 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) const struct dc_link *link = aconnector->dc_link; struct amdgpu_device *adev = connector->dev->dev_private; struct amdgpu_display_manager *dm = &adev->dm; + #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) if ((link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) && - link->type != dc_connection_none) { - amdgpu_dm_register_backlight_device(dm); - - if (dm->backlight_dev) { - backlight_device_unregister(dm->backlight_dev); - dm->backlight_dev = NULL; - } - + link->type != dc_connection_none && + dm->backlight_dev) { + backlight_device_unregister(dm->backlight_dev); + dm->backlight_dev = NULL; } #endif drm_connector_unregister(connector); @@ -2855,7 +2834,7 @@ static void handle_edid_mgmt(struct amdgpu_dm_connector *aconnector) create_eml_sink(aconnector); } -int amdgpu_dm_connector_mode_valid(struct drm_connector *connector, +enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { int result = MODE_ERROR; @@ -3058,8 +3037,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, } afb = to_amdgpu_framebuffer(new_state->fb); - - obj = afb->obj; + obj = new_state->fb->obj[0]; rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); r = amdgpu_bo_reserve(rbo, false); @@ -3067,12 +3045,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, return r; if (plane->type != DRM_PLANE_TYPE_CURSOR) - domain = amdgpu_display_framebuffer_domains(adev); + domain = amdgpu_display_supported_domains(adev); else domain = AMDGPU_GEM_DOMAIN_VRAM; r = amdgpu_bo_pin(rbo, domain, &afb->address); - amdgpu_bo_unreserve(rbo); if (unlikely(r != 0)) { @@ -3123,14 +3100,12 @@ static void dm_plane_helper_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { struct amdgpu_bo *rbo; - struct amdgpu_framebuffer *afb; int r; if (!old_state->fb) return; - afb = to_amdgpu_framebuffer(old_state->fb); - rbo = gem_to_amdgpu_bo(afb->obj); + rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); r = amdgpu_bo_reserve(rbo, false); if (unlikely(r)) { DRM_ERROR("failed to reserve rbo before unpin\n"); @@ -3773,7 +3748,7 @@ static void remove_stream(struct amdgpu_device *adev, static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, struct dc_cursor_position *position) { - struct amdgpu_crtc *amdgpu_crtc = amdgpu_crtc = to_amdgpu_crtc(crtc); + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); int x, y; int xorigin = 0, yorigin = 0; @@ -3905,7 +3880,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, int r, vpos, hpos; struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(fb); - struct amdgpu_bo *abo = gem_to_amdgpu_bo(afb->obj); + struct amdgpu_bo *abo = gem_to_amdgpu_bo(fb->obj[0]); struct amdgpu_device *adev = crtc->dev->dev_private; bool async_flip = (crtc->state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; struct dc_flip_addrs addr = { {0} }; @@ -3986,6 +3961,96 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } +/* + * TODO this whole function needs to go + * + * dc_surface_update is needlessly complex. See if we can just replace this + * with a dc_plane_state and follow the atomic model a bit more closely here. + */ +static bool commit_planes_to_stream( + struct dc *dc, + struct dc_plane_state **plane_states, + uint8_t new_plane_count, + struct dm_crtc_state *dm_new_crtc_state, + struct dm_crtc_state *dm_old_crtc_state, + struct dc_state *state) +{ + /* no need to dynamically allocate this. it's pretty small */ + struct dc_surface_update updates[MAX_SURFACES]; + struct dc_flip_addrs *flip_addr; + struct dc_plane_info *plane_info; + struct dc_scaling_info *scaling_info; + int i; + struct dc_stream_state *dc_stream = dm_new_crtc_state->stream; + struct dc_stream_update *stream_update = + kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL); + + if (!stream_update) { + BREAK_TO_DEBUGGER(); + return false; + } + + flip_addr = kcalloc(MAX_SURFACES, sizeof(struct dc_flip_addrs), + GFP_KERNEL); + plane_info = kcalloc(MAX_SURFACES, sizeof(struct dc_plane_info), + GFP_KERNEL); + scaling_info = kcalloc(MAX_SURFACES, sizeof(struct dc_scaling_info), + GFP_KERNEL); + + if (!flip_addr || !plane_info || !scaling_info) { + kfree(flip_addr); + kfree(plane_info); + kfree(scaling_info); + kfree(stream_update); + return false; + } + + memset(updates, 0, sizeof(updates)); + + stream_update->src = dc_stream->src; + stream_update->dst = dc_stream->dst; + stream_update->out_transfer_func = dc_stream->out_transfer_func; + + for (i = 0; i < new_plane_count; i++) { + updates[i].surface = plane_states[i]; + updates[i].gamma = + (struct dc_gamma *)plane_states[i]->gamma_correction; + updates[i].in_transfer_func = plane_states[i]->in_transfer_func; + flip_addr[i].address = plane_states[i]->address; + flip_addr[i].flip_immediate = plane_states[i]->flip_immediate; + plane_info[i].color_space = plane_states[i]->color_space; + plane_info[i].format = plane_states[i]->format; + plane_info[i].plane_size = plane_states[i]->plane_size; + plane_info[i].rotation = plane_states[i]->rotation; + plane_info[i].horizontal_mirror = plane_states[i]->horizontal_mirror; + plane_info[i].stereo_format = plane_states[i]->stereo_format; + plane_info[i].tiling_info = plane_states[i]->tiling_info; + plane_info[i].visible = plane_states[i]->visible; + plane_info[i].per_pixel_alpha = plane_states[i]->per_pixel_alpha; + plane_info[i].dcc = plane_states[i]->dcc; + scaling_info[i].scaling_quality = plane_states[i]->scaling_quality; + scaling_info[i].src_rect = plane_states[i]->src_rect; + scaling_info[i].dst_rect = plane_states[i]->dst_rect; + scaling_info[i].clip_rect = plane_states[i]->clip_rect; + + updates[i].flip_addr = &flip_addr[i]; + updates[i].plane_info = &plane_info[i]; + updates[i].scaling_info = &scaling_info[i]; + } + + dc_commit_updates_for_stream( + dc, + updates, + new_plane_count, + dc_stream, stream_update, plane_states, state); + + kfree(flip_addr); + kfree(plane_info); + kfree(scaling_info); + kfree(stream_update); + return true; +} + static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct drm_device *dev, struct amdgpu_display_manager *dm, @@ -4001,6 +4066,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct drm_crtc_state *new_pcrtc_state = drm_atomic_get_new_crtc_state(state, pcrtc); struct dm_crtc_state *acrtc_state = to_dm_crtc_state(new_pcrtc_state); + struct dm_crtc_state *dm_old_crtc_state = + to_dm_crtc_state(drm_atomic_get_old_crtc_state(state, pcrtc)); struct dm_atomic_state *dm_state = to_dm_atomic_state(state); int planes_count = 0; unsigned long flags; @@ -4037,7 +4104,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, } spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - if (!pflip_needed) { + if (!pflip_needed || plane->type == DRM_PLANE_TYPE_OVERLAY) { WARN_ON(!dm_new_plane_state->dc_state); plane_states_constructed[planes_count] = dm_new_plane_state->dc_state; @@ -4079,10 +4146,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } - if (false == dc_commit_planes_to_stream(dm->dc, + + if (false == commit_planes_to_stream(dm->dc, plane_states_constructed, planes_count, - dc_stream_attach, + acrtc_state, + dm_old_crtc_state, dm_state->context)) dm_error("%s: Failed to attach plane!\n", __func__); } else { @@ -4307,8 +4376,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); struct dc_stream_status *status = NULL; - if (acrtc) + if (acrtc) { new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); + old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); + } /* Skip any modesets/resets */ if (!acrtc || drm_atomic_crtc_needs_modeset(new_crtc_state)) @@ -4331,11 +4402,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) WARN_ON(!status->plane_count); /*TODO How it works with MPO ?*/ - if (!dc_commit_planes_to_stream( + if (!commit_planes_to_stream( dm->dc, status->plane_states, status->plane_count, - dm_new_crtc_state->stream, + dm_new_crtc_state, + to_dm_crtc_state(old_crtc_state), dm_state->context)) dm_error("%s: Failed to update stream scaling!\n", __func__); } @@ -4578,7 +4650,7 @@ static int dm_update_crtcs_state(struct dc *dc, if (aconnector && enable) { // Make sure fake sink is created in plug-in scenario new_con_state = drm_atomic_get_connector_state(state, - &aconnector->base); + &aconnector->base); if (IS_ERR(new_con_state)) { ret = PTR_ERR_OR_ZERO(new_con_state); @@ -4755,7 +4827,8 @@ static int dm_update_planes_state(struct dc *dc, /* Remove any changed/removed planes */ if (!enable) { - if (pflip_needed) + if (pflip_needed && + plane->type != DRM_PLANE_TYPE_OVERLAY) continue; if (!old_plane_crtc) @@ -4802,7 +4875,8 @@ static int dm_update_planes_state(struct dc *dc, if (!dm_new_crtc_state->stream) continue; - if (pflip_needed) + if (pflip_needed && + plane->type != DRM_PLANE_TYPE_OVERLAY) continue; WARN_ON(dm_new_plane_state->dc_state); @@ -5009,17 +5083,24 @@ void amdgpu_dm_add_sink_to_freesync_module(struct drm_connector *connector, struct edid *edid) { int i; - uint64_t val_capable; bool edid_check_required; struct detailed_timing *timing; struct detailed_non_pixel *data; struct detailed_data_monitor_range *range; struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); + struct dm_connector_state *dm_con_state; struct drm_device *dev = connector->dev; struct amdgpu_device *adev = dev->dev_private; + if (!connector->state) { + DRM_ERROR("%s - Connector has no state", __func__); + return; + } + + dm_con_state = to_dm_connector_state(connector->state); + edid_check_required = false; if (!amdgpu_dm_connector->dc_sink) { DRM_ERROR("dc_sink NULL, could not add free_sync module.\n"); @@ -5038,7 +5119,7 @@ void amdgpu_dm_add_sink_to_freesync_module(struct drm_connector *connector, amdgpu_dm_connector); } } - val_capable = 0; + dm_con_state->freesync_capable = false; if (edid_check_required == true && (edid->version > 1 || (edid->version == 1 && edid->revision > 1))) { for (i = 0; i < 4; i++) { @@ -5074,7 +5155,7 @@ void amdgpu_dm_add_sink_to_freesync_module(struct drm_connector *connector, amdgpu_dm_connector->min_vfreq * 1000000; amdgpu_dm_connector->caps.max_refresh_in_micro_hz = amdgpu_dm_connector->max_vfreq * 1000000; - val_capable = 1; + dm_con_state->freesync_capable = true; } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index b68400c1154b..d5aa89ad5571 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -28,7 +28,6 @@ #include <drm/drmP.h> #include <drm/drm_atomic.h> -#include "dc.h" /* * This file contains the definition for amdgpu_display_manager @@ -53,6 +52,7 @@ struct amdgpu_device; struct drm_device; struct amdgpu_dm_irq_handler_data; +struct dc; struct amdgpu_dm_prev_state { struct drm_framebuffer *fb; @@ -220,6 +220,7 @@ struct dm_connector_state { uint8_t underscan_hborder; bool underscan_enable; struct mod_freesync_user_enable user_enable; + bool freesync_capable; }; #define to_dm_connector_state(x)\ @@ -246,7 +247,7 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, struct dc_link *link, int link_index); -int amdgpu_dm_connector_mode_valid(struct drm_connector *connector, +enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode); void dm_restore_drm_connector_state(struct drm_device *dev, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 25f064c01038..e3d90e918d1b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -25,6 +25,7 @@ #include "amdgpu_mode.h" #include "amdgpu_dm.h" +#include "dc.h" #include "modules/color/color_gamma.h" #define MAX_DRM_LUT_VALUE 0xFFFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c index 89342b48be6b..0229c7edb8ad 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c @@ -37,8 +37,17 @@ unsigned long long dm_get_timestamp(struct dc_context *ctx) { - /* TODO: return actual timestamp */ - return 0; + struct timespec64 time; + + getrawmonotonic64(&time); + return timespec64_to_ns(&time); +} + +unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx, + unsigned long long current_time_stamp, + unsigned long long last_time_stamp) +{ + return current_time_stamp - last_time_stamp; } void dm_perf_trace_timestamp(const char *func_name, unsigned int line) diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 8a9bba879207..7191c3213743 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -26,13 +26,13 @@ #include "dm_services.h" #include "include/fixed31_32.h" -static inline uint64_t abs_i64( - int64_t arg) +static inline unsigned long long abs_i64( + long long arg) { if (arg > 0) - return (uint64_t)arg; + return (unsigned long long)arg; else - return (uint64_t)(-arg); + return (unsigned long long)(-arg); } /* @@ -40,12 +40,12 @@ static inline uint64_t abs_i64( * result = dividend / divisor * *remainder = dividend % divisor */ -static inline uint64_t complete_integer_division_u64( - uint64_t dividend, - uint64_t divisor, - uint64_t *remainder) +static inline unsigned long long complete_integer_division_u64( + unsigned long long dividend, + unsigned long long divisor, + unsigned long long *remainder) { - uint64_t result; + unsigned long long result; ASSERT(divisor); @@ -65,29 +65,29 @@ static inline uint64_t complete_integer_division_u64( (FRACTIONAL_PART_MASK & (x)) struct fixed31_32 dal_fixed31_32_from_fraction( - int64_t numerator, - int64_t denominator) + long long numerator, + long long denominator) { struct fixed31_32 res; bool arg1_negative = numerator < 0; bool arg2_negative = denominator < 0; - uint64_t arg1_value = arg1_negative ? -numerator : numerator; - uint64_t arg2_value = arg2_negative ? -denominator : denominator; + unsigned long long arg1_value = arg1_negative ? -numerator : numerator; + unsigned long long arg2_value = arg2_negative ? -denominator : denominator; - uint64_t remainder; + unsigned long long remainder; /* determine integer part */ - uint64_t res_value = complete_integer_division_u64( + unsigned long long res_value = complete_integer_division_u64( arg1_value, arg2_value, &remainder); ASSERT(res_value <= LONG_MAX); /* determine fractional part */ { - uint32_t i = FIXED31_32_BITS_PER_FRACTIONAL_PART; + unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; do { remainder <<= 1; @@ -103,14 +103,14 @@ struct fixed31_32 dal_fixed31_32_from_fraction( /* round up LSB */ { - uint64_t summand = (remainder << 1) >= arg2_value; + unsigned long long summand = (remainder << 1) >= arg2_value; ASSERT(res_value <= LLONG_MAX - summand); res_value += summand; } - res.value = (int64_t)res_value; + res.value = (long long)res_value; if (arg1_negative ^ arg2_negative) res.value = -res.value; @@ -119,7 +119,7 @@ struct fixed31_32 dal_fixed31_32_from_fraction( } struct fixed31_32 dal_fixed31_32_from_int_nonconst( - int64_t arg) + long long arg) { struct fixed31_32 res; @@ -132,7 +132,7 @@ struct fixed31_32 dal_fixed31_32_from_int_nonconst( struct fixed31_32 dal_fixed31_32_shl( struct fixed31_32 arg, - uint8_t shift) + unsigned char shift) { struct fixed31_32 res; @@ -181,16 +181,16 @@ struct fixed31_32 dal_fixed31_32_mul( bool arg1_negative = arg1.value < 0; bool arg2_negative = arg2.value < 0; - uint64_t arg1_value = arg1_negative ? -arg1.value : arg1.value; - uint64_t arg2_value = arg2_negative ? -arg2.value : arg2.value; + unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; + unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; - uint64_t arg1_int = GET_INTEGER_PART(arg1_value); - uint64_t arg2_int = GET_INTEGER_PART(arg2_value); + unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); + unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); - uint64_t arg1_fra = GET_FRACTIONAL_PART(arg1_value); - uint64_t arg2_fra = GET_FRACTIONAL_PART(arg2_value); + unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); + unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); - uint64_t tmp; + unsigned long long tmp; res.value = arg1_int * arg2_int; @@ -200,22 +200,22 @@ struct fixed31_32 dal_fixed31_32_mul( tmp = arg1_int * arg2_fra; - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; tmp = arg2_int * arg1_fra; - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; tmp = arg1_fra * arg2_fra; tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + - (tmp >= (uint64_t)dal_fixed31_32_half.value); + (tmp >= (unsigned long long)dal_fixed31_32_half.value); - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; @@ -230,13 +230,13 @@ struct fixed31_32 dal_fixed31_32_sqr( { struct fixed31_32 res; - uint64_t arg_value = abs_i64(arg.value); + unsigned long long arg_value = abs_i64(arg.value); - uint64_t arg_int = GET_INTEGER_PART(arg_value); + unsigned long long arg_int = GET_INTEGER_PART(arg_value); - uint64_t arg_fra = GET_FRACTIONAL_PART(arg_value); + unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); - uint64_t tmp; + unsigned long long tmp; res.value = arg_int * arg_int; @@ -246,20 +246,20 @@ struct fixed31_32 dal_fixed31_32_sqr( tmp = arg_int * arg_fra; - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; tmp = arg_fra * arg_fra; tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + - (tmp >= (uint64_t)dal_fixed31_32_half.value); + (tmp >= (unsigned long long)dal_fixed31_32_half.value); - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; @@ -288,7 +288,7 @@ struct fixed31_32 dal_fixed31_32_sinc( struct fixed31_32 res = dal_fixed31_32_one; - int32_t n = 27; + int n = 27; struct fixed31_32 arg_norm = arg; @@ -299,7 +299,7 @@ struct fixed31_32 dal_fixed31_32_sinc( arg_norm, dal_fixed31_32_mul_int( dal_fixed31_32_two_pi, - (int32_t)div64_s64( + (int)div64_s64( arg_norm.value, dal_fixed31_32_two_pi.value))); } @@ -343,7 +343,7 @@ struct fixed31_32 dal_fixed31_32_cos( struct fixed31_32 res = dal_fixed31_32_one; - int32_t n = 26; + int n = 26; do { res = dal_fixed31_32_sub( @@ -370,7 +370,7 @@ struct fixed31_32 dal_fixed31_32_cos( static struct fixed31_32 fixed31_32_exp_from_taylor_series( struct fixed31_32 arg) { - uint32_t n = 9; + unsigned int n = 9; struct fixed31_32 res = dal_fixed31_32_from_fraction( n + 2, @@ -409,7 +409,7 @@ struct fixed31_32 dal_fixed31_32_exp( if (dal_fixed31_32_le( dal_fixed31_32_ln2_div_2, dal_fixed31_32_abs(arg))) { - int32_t m = dal_fixed31_32_round( + int m = dal_fixed31_32_round( dal_fixed31_32_div( arg, dal_fixed31_32_ln2)); @@ -429,7 +429,7 @@ struct fixed31_32 dal_fixed31_32_exp( if (m > 0) return dal_fixed31_32_shl( fixed31_32_exp_from_taylor_series(r), - (uint8_t)m); + (unsigned char)m); else return dal_fixed31_32_div_int( fixed31_32_exp_from_taylor_series(r), @@ -482,50 +482,50 @@ struct fixed31_32 dal_fixed31_32_pow( arg2)); } -int32_t dal_fixed31_32_floor( +int dal_fixed31_32_floor( struct fixed31_32 arg) { - uint64_t arg_value = abs_i64(arg.value); + unsigned long long arg_value = abs_i64(arg.value); if (arg.value >= 0) - return (int32_t)GET_INTEGER_PART(arg_value); + return (int)GET_INTEGER_PART(arg_value); else - return -(int32_t)GET_INTEGER_PART(arg_value); + return -(int)GET_INTEGER_PART(arg_value); } -int32_t dal_fixed31_32_round( +int dal_fixed31_32_round( struct fixed31_32 arg) { - uint64_t arg_value = abs_i64(arg.value); + unsigned long long arg_value = abs_i64(arg.value); - const int64_t summand = dal_fixed31_32_half.value; + const long long summand = dal_fixed31_32_half.value; - ASSERT(LLONG_MAX - (int64_t)arg_value >= summand); + ASSERT(LLONG_MAX - (long long)arg_value >= summand); arg_value += summand; if (arg.value >= 0) - return (int32_t)GET_INTEGER_PART(arg_value); + return (int)GET_INTEGER_PART(arg_value); else - return -(int32_t)GET_INTEGER_PART(arg_value); + return -(int)GET_INTEGER_PART(arg_value); } -int32_t dal_fixed31_32_ceil( +int dal_fixed31_32_ceil( struct fixed31_32 arg) { - uint64_t arg_value = abs_i64(arg.value); + unsigned long long arg_value = abs_i64(arg.value); - const int64_t summand = dal_fixed31_32_one.value - + const long long summand = dal_fixed31_32_one.value - dal_fixed31_32_epsilon.value; - ASSERT(LLONG_MAX - (int64_t)arg_value >= summand); + ASSERT(LLONG_MAX - (long long)arg_value >= summand); arg_value += summand; if (arg.value >= 0) - return (int32_t)GET_INTEGER_PART(arg_value); + return (int)GET_INTEGER_PART(arg_value); else - return -(int32_t)GET_INTEGER_PART(arg_value); + return -(int)GET_INTEGER_PART(arg_value); } /* this function is a generic helper to translate fixed point value to @@ -535,15 +535,15 @@ int32_t dal_fixed31_32_ceil( * part in 32 bits. It is used in hw programming (scaler) */ -static inline uint32_t ux_dy( - int64_t value, - uint32_t integer_bits, - uint32_t fractional_bits) +static inline unsigned int ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits) { /* 1. create mask of integer part */ - uint32_t result = (1 << integer_bits) - 1; + unsigned int result = (1 << integer_bits) - 1; /* 2. mask out fractional part */ - uint32_t fractional_part = FRACTIONAL_PART_MASK & value; + unsigned int fractional_part = FRACTIONAL_PART_MASK & value; /* 3. shrink fixed point integer part to be of integer_bits width*/ result &= GET_INTEGER_PART(value); /* 4. make space for fractional part to be filled in after integer */ @@ -554,13 +554,13 @@ static inline uint32_t ux_dy( return result | fractional_part; } -static inline uint32_t clamp_ux_dy( - int64_t value, - uint32_t integer_bits, - uint32_t fractional_bits, - uint32_t min_clamp) +static inline unsigned int clamp_ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits, + unsigned int min_clamp) { - uint32_t truncated_val = ux_dy(value, integer_bits, fractional_bits); + unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) return (1 << (integer_bits + fractional_bits)) - 1; @@ -570,35 +570,35 @@ static inline uint32_t clamp_ux_dy( return min_clamp; } -uint32_t dal_fixed31_32_u2d19( +unsigned int dal_fixed31_32_u2d19( struct fixed31_32 arg) { return ux_dy(arg.value, 2, 19); } -uint32_t dal_fixed31_32_u0d19( +unsigned int dal_fixed31_32_u0d19( struct fixed31_32 arg) { return ux_dy(arg.value, 0, 19); } -uint32_t dal_fixed31_32_clamp_u0d14( +unsigned int dal_fixed31_32_clamp_u0d14( struct fixed31_32 arg) { return clamp_ux_dy(arg.value, 0, 14, 1); } -uint32_t dal_fixed31_32_clamp_u0d10( +unsigned int dal_fixed31_32_clamp_u0d10( struct fixed31_32 arg) { return clamp_ux_dy(arg.value, 0, 10, 1); } -int32_t dal_fixed31_32_s4d19( +int dal_fixed31_32_s4d19( struct fixed31_32 arg) { if (arg.value < 0) - return -(int32_t)ux_dy(dal_fixed31_32_abs(arg).value, 4, 19); + return -(int)ux_dy(dal_fixed31_32_abs(arg).value, 4, 19); else return ux_dy(arg.value, 4, 19); } diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 4b5fdd577848..651e1fd4622f 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -24,7 +24,7 @@ */ #include "dm_services.h" - +#include "amdgpu.h" #include "atom.h" #include "include/bios_parser_interface.h" @@ -35,16 +35,16 @@ #include "bios_parser_types_internal.h" #define EXEC_BIOS_CMD_TABLE(command, params)\ - (cgs_atom_exec_cmd_table(bp->base.ctx->cgs_device, \ + (amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \ GetIndexIntoMasterTable(COMMAND, command), \ - ¶ms) == 0) + (uint32_t *)¶ms) == 0) #define BIOS_CMD_TABLE_REVISION(command, frev, crev)\ - cgs_atom_get_cmd_table_revs(bp->base.ctx->cgs_device, \ + amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \ GetIndexIntoMasterTable(COMMAND, command), &frev, &crev) #define BIOS_CMD_TABLE_PARA_REVISION(command)\ - bios_cmd_table_para_revision(bp->base.ctx->cgs_device, \ + bios_cmd_table_para_revision(bp->base.ctx->driver_context, \ GetIndexIntoMasterTable(COMMAND, command)) static void init_dig_encoder_control(struct bios_parser *bp); @@ -82,16 +82,18 @@ void dal_bios_parser_init_cmd_tbl(struct bios_parser *bp) init_set_dce_clock(bp); } -static uint32_t bios_cmd_table_para_revision(void *cgs_device, +static uint32_t bios_cmd_table_para_revision(void *dev, uint32_t index) { + struct amdgpu_device *adev = dev; uint8_t frev, crev; - if (cgs_atom_get_cmd_table_revs(cgs_device, + if (amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, - &frev, &crev) != 0) + &frev, &crev)) + return crev; + else return 0; - return crev; } /******************************************************************************* @@ -368,7 +370,7 @@ static void init_transmitter_control(struct bios_parser *bp) uint8_t crev; if (BIOS_CMD_TABLE_REVISION(UNIPHYTransmitterControl, - frev, crev) != 0) + frev, crev) == false) BREAK_TO_DEBUGGER(); switch (crev) { case 2: diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 3f63f712c8a4..752b08a42d3e 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -26,14 +26,18 @@ #include "dm_services.h" #include "ObjectID.h" -#include "atomfirmware.h" +#include "atomfirmware.h" +#include "atom.h" #include "include/bios_parser_interface.h" #include "command_table2.h" #include "command_table_helper2.h" #include "bios_parser_helper.h" #include "bios_parser_types_internal2.h" +#include "amdgpu.h" + + #define DC_LOGGER \ bp->base.ctx->logger @@ -43,16 +47,16 @@ ->FieldName)-(char *)0)/sizeof(uint16_t)) #define EXEC_BIOS_CMD_TABLE(fname, params)\ - (cgs_atom_exec_cmd_table(bp->base.ctx->cgs_device, \ + (amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \ GET_INDEX_INTO_MASTER_TABLE(command, fname), \ - ¶ms) == 0) + (uint32_t *)¶ms) == 0) #define BIOS_CMD_TABLE_REVISION(fname, frev, crev)\ - cgs_atom_get_cmd_table_revs(bp->base.ctx->cgs_device, \ + amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \ GET_INDEX_INTO_MASTER_TABLE(command, fname), &frev, &crev) #define BIOS_CMD_TABLE_PARA_REVISION(fname)\ - bios_cmd_table_para_revision(bp->base.ctx->cgs_device, \ + bios_cmd_table_para_revision(bp->base.ctx->driver_context, \ GET_INDEX_INTO_MASTER_TABLE(command, fname)) static void init_dig_encoder_control(struct bios_parser *bp); @@ -86,16 +90,18 @@ void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp) init_get_smu_clock_info(bp); } -static uint32_t bios_cmd_table_para_revision(void *cgs_device, +static uint32_t bios_cmd_table_para_revision(void *dev, uint32_t index) { + struct amdgpu_device *adev = dev; uint8_t frev, crev; - if (cgs_atom_get_cmd_table_revs(cgs_device, + if (amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, - &frev, &crev) != 0) + &frev, &crev)) + return crev; + else return 0; - return crev; } /****************************************************************************** @@ -201,7 +207,7 @@ static void init_transmitter_control(struct bios_parser *bp) uint8_t frev; uint8_t crev; - if (BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) != 0) + if (BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) == false) BREAK_TO_DEBUGGER(); switch (crev) { case 6: diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c index 2979358c6a55..be066c49b984 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c @@ -51,6 +51,9 @@ bool dal_bios_parser_init_cmd_tbl_helper( return true; case DCE_VERSION_11_2: +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + case DCE_VERSION_11_22: +#endif *h = dal_cmd_tbl_helper_dce112_get_table(); return true; diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c index 9a4d30dd4969..9b9e06995805 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c @@ -52,6 +52,9 @@ bool dal_bios_parser_init_cmd_tbl_helper2( return true; case DCE_VERSION_11_2: +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + case DCE_VERSION_11_22: +#endif *h = dal_cmd_tbl_helper_dce112_get_table2(); return true; #if defined(CONFIG_DRM_AMD_DC_DCN1_0) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h b/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h new file mode 100644 index 000000000000..fc3f98fb09ea --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h @@ -0,0 +1,579 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _CALCS_CALCS_LOGGER_H_ +#define _CALCS_CALCS_LOGGER_H_ +#define DC_LOGGER \ + logger + +static void print_bw_calcs_dceip(struct dal_logger *logger, const struct bw_calcs_dceip *dceip) +{ + + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_dceip"); + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_calcs_version version %d", dceip->version); + DC_LOG_BANDWIDTH_CALCS(" [bool] large_cursor: %d", dceip->large_cursor); + DC_LOG_BANDWIDTH_CALCS(" [bool] dmif_pipe_en_fbc_chunk_tracker: %d", dceip->dmif_pipe_en_fbc_chunk_tracker); + DC_LOG_BANDWIDTH_CALCS(" [bool] display_write_back_supported: %d", dceip->display_write_back_supported); + DC_LOG_BANDWIDTH_CALCS(" [bool] argb_compression_support: %d", dceip->argb_compression_support); + DC_LOG_BANDWIDTH_CALCS(" [bool] pre_downscaler_enabled: %d", dceip->pre_downscaler_enabled); + DC_LOG_BANDWIDTH_CALCS(" [bool] underlay_downscale_prefetch_enabled: %d", + dceip->underlay_downscale_prefetch_enabled); + DC_LOG_BANDWIDTH_CALCS(" [bool] graphics_lb_nodownscaling_multi_line_prefetching: %d", + dceip->graphics_lb_nodownscaling_multi_line_prefetching); + DC_LOG_BANDWIDTH_CALCS(" [bool] limit_excessive_outstanding_dmif_requests: %d", + dceip->limit_excessive_outstanding_dmif_requests); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] cursor_max_outstanding_group_num: %d", + dceip->cursor_max_outstanding_group_num); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] lines_interleaved_into_lb: %d", dceip->lines_interleaved_into_lb); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] low_power_tiling_mode: %d", dceip->low_power_tiling_mode); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] chunk_width: %d", dceip->chunk_width); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_graphics_pipes: %d", dceip->number_of_graphics_pipes); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_underlay_pipes: %d", dceip->number_of_underlay_pipes); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] max_dmif_buffer_allocated: %d", dceip->max_dmif_buffer_allocated); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] graphics_dmif_size: %d", dceip->graphics_dmif_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_luma_dmif_size: %d", dceip->underlay_luma_dmif_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_chroma_dmif_size: %d", dceip->underlay_chroma_dmif_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] scatter_gather_lines_of_pte_prefetching_in_linear_mode: %d", + dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] display_write_back420_luma_mcifwr_buffer_size: %d", + dceip->display_write_back420_luma_mcifwr_buffer_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] display_write_back420_chroma_mcifwr_buffer_size: %d", + dceip->display_write_back420_chroma_mcifwr_buffer_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] scatter_gather_pte_request_rows_in_tiling_mode: %d", + dceip->scatter_gather_pte_request_rows_in_tiling_mode); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_vscaler_efficiency10_bit_per_component: %d", + bw_fixed_to_int(dceip->underlay_vscaler_efficiency10_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_vscaler_efficiency12_bit_per_component: %d", + bw_fixed_to_int(dceip->underlay_vscaler_efficiency12_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency6_bit_per_component: %d", + bw_fixed_to_int(dceip->graphics_vscaler_efficiency6_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency8_bit_per_component: %d", + bw_fixed_to_int(dceip->graphics_vscaler_efficiency8_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency10_bit_per_component: %d", + bw_fixed_to_int(dceip->graphics_vscaler_efficiency10_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency12_bit_per_component: %d", + bw_fixed_to_int(dceip->graphics_vscaler_efficiency12_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] alpha_vscaler_efficiency: %d", + bw_fixed_to_int(dceip->alpha_vscaler_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_write_pixels_per_dispclk: %d", + bw_fixed_to_int(dceip->lb_write_pixels_per_dispclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_size_per_component444: %d", + bw_fixed_to_int(dceip->lb_size_per_component444)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_and_dram_clock_state_change_gated_before_cursor: %d", + bw_fixed_to_int(dceip->stutter_and_dram_clock_state_change_gated_before_cursor)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay420_luma_lb_size_per_component: %d", + bw_fixed_to_int(dceip->underlay420_luma_lb_size_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay420_chroma_lb_size_per_component: %d", + bw_fixed_to_int(dceip->underlay420_chroma_lb_size_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay422_lb_size_per_component: %d", + bw_fixed_to_int(dceip->underlay422_lb_size_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_chunk_width: %d", bw_fixed_to_int(dceip->cursor_chunk_width)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_dcp_buffer_lines: %d", + bw_fixed_to_int(dceip->cursor_dcp_buffer_lines)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_width_efficient_for_tiling: %d", + bw_fixed_to_int(dceip->underlay_maximum_width_efficient_for_tiling)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_height_efficient_for_tiling: %d", + bw_fixed_to_int(dceip->underlay_maximum_height_efficient_for_tiling)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display: %d", + bw_fixed_to_int(dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation: %d", + bw_fixed_to_int(dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_outstanding_pte_request_limit: %d", + bw_fixed_to_int(dceip->minimum_outstanding_pte_request_limit)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_total_outstanding_pte_requests_allowed_by_saw: %d", + bw_fixed_to_int(dceip->maximum_total_outstanding_pte_requests_allowed_by_saw)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] linear_mode_line_request_alternation_slice: %d", + bw_fixed_to_int(dceip->linear_mode_line_request_alternation_slice)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] request_efficiency: %d", bw_fixed_to_int(dceip->request_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_per_request: %d", bw_fixed_to_int(dceip->dispclk_per_request)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_ramping_factor: %d", + bw_fixed_to_int(dceip->dispclk_ramping_factor)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_pipe_throughput_factor: %d", + bw_fixed_to_int(dceip->display_pipe_throughput_factor)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_all_surfaces_burst_time: %d", + bw_fixed_to_int(dceip->mcifwr_all_surfaces_burst_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_request_buffer_size: %d", + bw_fixed_to_int(dceip->dmif_request_buffer_size)); + + +} + +static void print_bw_calcs_vbios(struct dal_logger *logger, const struct bw_calcs_vbios *vbios) +{ + + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_vbios vbios"); + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines memory_type: %d", vbios->memory_type); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines memory_type: %d", vbios->memory_type); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] dram_channel_width_in_bits: %d", vbios->dram_channel_width_in_bits); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_channels: %d", vbios->number_of_dram_channels); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_banks: %d", vbios->number_of_dram_banks); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_yclk: %d", bw_fixed_to_int(vbios->low_yclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_yclk: %d", bw_fixed_to_int(vbios->mid_yclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_yclk: %d", bw_fixed_to_int(vbios->high_yclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_sclk: %d", bw_fixed_to_int(vbios->low_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid1_sclk: %d", bw_fixed_to_int(vbios->mid1_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid2_sclk: %d", bw_fixed_to_int(vbios->mid2_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid3_sclk: %d", bw_fixed_to_int(vbios->mid3_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid4_sclk: %d", bw_fixed_to_int(vbios->mid4_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid5_sclk: %d", bw_fixed_to_int(vbios->mid5_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid6_sclk: %d", bw_fixed_to_int(vbios->mid6_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_sclk: %d", bw_fixed_to_int(vbios->high_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_voltage_max_dispclk: %d", + bw_fixed_to_int(vbios->low_voltage_max_dispclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_voltage_max_dispclk;: %d", + bw_fixed_to_int(vbios->mid_voltage_max_dispclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_voltage_max_dispclk;: %d", + bw_fixed_to_int(vbios->high_voltage_max_dispclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_voltage_max_phyclk: %d", + bw_fixed_to_int(vbios->low_voltage_max_phyclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_voltage_max_phyclk: %d", + bw_fixed_to_int(vbios->mid_voltage_max_phyclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_voltage_max_phyclk: %d", + bw_fixed_to_int(vbios->high_voltage_max_phyclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] data_return_bus_width: %d", bw_fixed_to_int(vbios->data_return_bus_width)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] trc: %d", bw_fixed_to_int(vbios->trc)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifmc_urgent_latency: %d", bw_fixed_to_int(vbios->dmifmc_urgent_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_self_refresh_exit_latency: %d", + bw_fixed_to_int(vbios->stutter_self_refresh_exit_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_self_refresh_entry_latency: %d", + bw_fixed_to_int(vbios->stutter_self_refresh_entry_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_change_latency: %d", + bw_fixed_to_int(vbios->nbp_state_change_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwrmc_urgent_latency: %d", + bw_fixed_to_int(vbios->mcifwrmc_urgent_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bool] scatter_gather_enable: %d", vbios->scatter_gather_enable); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] down_spread_percentage: %d", + bw_fixed_to_int(vbios->down_spread_percentage)); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] cursor_width: %d", vbios->cursor_width); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] average_compression_rate: %d", vbios->average_compression_rate); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_request_slots_gmc_reserves_for_dmif_per_channel: %d", + vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_duration: %d", bw_fixed_to_int(vbios->blackout_duration)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_blackout_recovery_time: %d", + bw_fixed_to_int(vbios->maximum_blackout_recovery_time)); + + +} + +static void print_bw_calcs_data(struct dal_logger *logger, struct bw_calcs_data *data) +{ + + int i, j, k; + + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_data data"); + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_displays: %d", data->number_of_displays); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_surface_type: %d", data->underlay_surface_type); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines panning_and_bezel_adjustment: %d", + data->panning_and_bezel_adjustment); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines graphics_tiling_mode: %d", data->graphics_tiling_mode); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] graphics_lb_bpc: %d", data->graphics_lb_bpc); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_lb_bpc: %d", data->underlay_lb_bpc); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_tiling_mode: %d", data->underlay_tiling_mode); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines d0_underlay_mode: %d", data->d0_underlay_mode); + DC_LOG_BANDWIDTH_CALCS(" [bool] d1_display_write_back_dwb_enable: %d", data->d1_display_write_back_dwb_enable); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines d1_underlay_mode: %d", data->d1_underlay_mode); + DC_LOG_BANDWIDTH_CALCS(" [bool] cpup_state_change_enable: %d", data->cpup_state_change_enable); + DC_LOG_BANDWIDTH_CALCS(" [bool] cpuc_state_change_enable: %d", data->cpuc_state_change_enable); + DC_LOG_BANDWIDTH_CALCS(" [bool] nbp_state_change_enable: %d", data->nbp_state_change_enable); + DC_LOG_BANDWIDTH_CALCS(" [bool] stutter_mode_enable: %d", data->stutter_mode_enable); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] y_clk_level: %d", data->y_clk_level); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] sclk_level: %d", data->sclk_level); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_underlay_surfaces: %d", data->number_of_underlay_surfaces); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_wrchannels: %d", data->number_of_dram_wrchannels); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] chunk_request_delay: %d", data->chunk_request_delay); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_channels: %d", data->number_of_dram_channels); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_micro_tile_mode: %d", data->underlay_micro_tile_mode); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines graphics_micro_tile_mode: %d", data->graphics_micro_tile_mode); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] max_phyclk: %d", bw_fixed_to_int(data->max_phyclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_efficiency: %d", bw_fixed_to_int(data->dram_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width_after_surface_type: %d", + bw_fixed_to_int(data->src_width_after_surface_type)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height_after_surface_type: %d", + bw_fixed_to_int(data->src_height_after_surface_type)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_surface_type: %d", + bw_fixed_to_int(data->hsr_after_surface_type)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_surface_type: %d", bw_fixed_to_int(data->vsr_after_surface_type)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width_after_rotation: %d", + bw_fixed_to_int(data->src_width_after_rotation)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height_after_rotation: %d", + bw_fixed_to_int(data->src_height_after_rotation)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_rotation: %d", bw_fixed_to_int(data->hsr_after_rotation)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_rotation: %d", bw_fixed_to_int(data->vsr_after_rotation)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_height_pixels: %d", bw_fixed_to_int(data->source_height_pixels)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_stereo: %d", bw_fixed_to_int(data->hsr_after_stereo)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_stereo: %d", bw_fixed_to_int(data->vsr_after_stereo)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_in_lb: %d", bw_fixed_to_int(data->source_width_in_lb)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_line_pitch: %d", bw_fixed_to_int(data->lb_line_pitch)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_source_efficient_for_tiling: %d", + bw_fixed_to_int(data->underlay_maximum_source_efficient_for_tiling)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] num_lines_at_frame_start: %d", + bw_fixed_to_int(data->num_lines_at_frame_start)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_dmif_size_in_time: %d", bw_fixed_to_int(data->min_dmif_size_in_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_mcifwr_size_in_time: %d", + bw_fixed_to_int(data->min_mcifwr_size_in_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_requests_for_dmif_size: %d", + bw_fixed_to_int(data->total_requests_for_dmif_size)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting: %d", + bw_fixed_to_int(data->peak_pte_request_to_eviction_ratio_limiting)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] useful_pte_per_pte_request: %d", + bw_fixed_to_int(data->useful_pte_per_pte_request)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_request_rows: %d", + bw_fixed_to_int(data->scatter_gather_pte_request_rows)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_row_height: %d", + bw_fixed_to_int(data->scatter_gather_row_height)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_requests_in_vblank: %d", + bw_fixed_to_int(data->scatter_gather_pte_requests_in_vblank)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] inefficient_linear_pitch_in_bytes: %d", + bw_fixed_to_int(data->inefficient_linear_pitch_in_bytes)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_total_data: %d", bw_fixed_to_int(data->cursor_total_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_total_request_groups: %d", + bw_fixed_to_int(data->cursor_total_request_groups)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_total_pte_requests: %d", + bw_fixed_to_int(data->scatter_gather_total_pte_requests)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_total_pte_request_groups: %d", + bw_fixed_to_int(data->scatter_gather_total_pte_request_groups)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] tile_width_in_pixels: %d", bw_fixed_to_int(data->tile_width_in_pixels)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_total_number_of_data_request_page_close_open: %d", + bw_fixed_to_int(data->dmif_total_number_of_data_request_page_close_open)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_total_number_of_data_request_page_close_open: %d", + bw_fixed_to_int(data->mcifwr_total_number_of_data_request_page_close_open)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] bytes_per_page_close_open: %d", + bw_fixed_to_int(data->bytes_per_page_close_open)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_total_page_close_open_time: %d", + bw_fixed_to_int(data->mcifwr_total_page_close_open_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_requests_for_adjusted_dmif_size: %d", + bw_fixed_to_int(data->total_requests_for_adjusted_dmif_size)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dmifmc_urgent_trips: %d", + bw_fixed_to_int(data->total_dmifmc_urgent_trips)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dmifmc_urgent_latency: %d", + bw_fixed_to_int(data->total_dmifmc_urgent_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_reads_required_data: %d", + bw_fixed_to_int(data->total_display_reads_required_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_reads_required_dram_access_data: %d", + bw_fixed_to_int(data->total_display_reads_required_dram_access_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_writes_required_data: %d", + bw_fixed_to_int(data->total_display_writes_required_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_writes_required_dram_access_data: %d", + bw_fixed_to_int(data->total_display_writes_required_dram_access_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_required_data: %d", + bw_fixed_to_int(data->display_reads_required_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_required_dram_access_data: %d", + bw_fixed_to_int(data->display_reads_required_dram_access_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_total_page_close_open_time: %d", + bw_fixed_to_int(data->dmif_total_page_close_open_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_cursor_memory_interface_buffer_size_in_time: %d", + bw_fixed_to_int(data->min_cursor_memory_interface_buffer_size_in_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_read_buffer_size_in_time: %d", + bw_fixed_to_int(data->min_read_buffer_size_in_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_time_for_data_transfer: %d", + bw_fixed_to_int(data->display_reads_time_for_data_transfer)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_writes_time_for_data_transfer: %d", + bw_fixed_to_int(data->display_writes_time_for_data_transfer)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_dram_bandwidth: %d", + bw_fixed_to_int(data->dmif_required_dram_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_required_dram_bandwidth: %d", + bw_fixed_to_int(data->mcifwr_required_dram_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_dmifmc_urgent_latency_for_page_close_open: %d", + bw_fixed_to_int(data->required_dmifmc_urgent_latency_for_page_close_open)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_mcifmcwr_urgent_latency: %d", + bw_fixed_to_int(data->required_mcifmcwr_urgent_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_dram_bandwidth_gbyte_per_second: %d", + bw_fixed_to_int(data->required_dram_bandwidth_gbyte_per_second)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_bandwidth: %d", bw_fixed_to_int(data->dram_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_sclk: %d", bw_fixed_to_int(data->dmif_required_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_required_sclk: %d", bw_fixed_to_int(data->mcifwr_required_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_sclk: %d", bw_fixed_to_int(data->required_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] downspread_factor: %d", bw_fixed_to_int(data->downspread_factor)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_scaler_efficiency: %d", bw_fixed_to_int(data->v_scaler_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scaler_limits_factor: %d", bw_fixed_to_int(data->scaler_limits_factor)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_pipe_pixel_throughput: %d", + bw_fixed_to_int(data->display_pipe_pixel_throughput)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_with_ramping: %d", + bw_fixed_to_int(data->total_dispclk_required_with_ramping)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_without_ramping: %d", + bw_fixed_to_int(data->total_dispclk_required_without_ramping)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_read_request_bandwidth: %d", + bw_fixed_to_int(data->total_read_request_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_write_request_bandwidth: %d", + bw_fixed_to_int(data->total_write_request_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_total_read_request_bandwidth: %d", + bw_fixed_to_int(data->dispclk_required_for_total_read_request_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_with_ramping_with_request_bandwidth: %d", + bw_fixed_to_int(data->total_dispclk_required_with_ramping_with_request_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_without_ramping_with_request_bandwidth: %d", + bw_fixed_to_int(data->total_dispclk_required_without_ramping_with_request_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk: %d", bw_fixed_to_int(data->dispclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_recovery_time: %d", bw_fixed_to_int(data->blackout_recovery_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_pixels_per_data_fifo_entry: %d", + bw_fixed_to_int(data->min_pixels_per_data_fifo_entry)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] sclk_deep_sleep: %d", bw_fixed_to_int(data->sclk_deep_sleep)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] chunk_request_time: %d", bw_fixed_to_int(data->chunk_request_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_request_time: %d", bw_fixed_to_int(data->cursor_request_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] line_source_pixels_transfer_time: %d", + bw_fixed_to_int(data->line_source_pixels_transfer_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifdram_access_efficiency: %d", + bw_fixed_to_int(data->dmifdram_access_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwrdram_access_efficiency: %d", + bw_fixed_to_int(data->mcifwrdram_access_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_average_bandwidth_no_compression: %d", + bw_fixed_to_int(data->total_average_bandwidth_no_compression)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_average_bandwidth: %d", + bw_fixed_to_int(data->total_average_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_stutter_cycle_duration: %d", + bw_fixed_to_int(data->total_stutter_cycle_duration)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_burst_time: %d", bw_fixed_to_int(data->stutter_burst_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] time_in_self_refresh: %d", bw_fixed_to_int(data->time_in_self_refresh)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_efficiency: %d", bw_fixed_to_int(data->stutter_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] worst_number_of_trips_to_memory: %d", + bw_fixed_to_int(data->worst_number_of_trips_to_memory)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] immediate_flip_time: %d", bw_fixed_to_int(data->immediate_flip_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_for_non_dmif_clients: %d", + bw_fixed_to_int(data->latency_for_non_dmif_clients)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_for_non_mcifwr_clients: %d", + bw_fixed_to_int(data->latency_for_non_mcifwr_clients)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifmc_urgent_latency_supported_in_high_sclk_and_yclk: %d", + bw_fixed_to_int(data->dmifmc_urgent_latency_supported_in_high_sclk_and_yclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_dram_speed_change_margin: %d", + bw_fixed_to_int(data->nbp_state_dram_speed_change_margin)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_time_for_data_transfer_and_urgent_latency: %d", + bw_fixed_to_int(data->display_reads_time_for_data_transfer_and_urgent_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_speed_change_margin: %d", + bw_fixed_to_int(data->dram_speed_change_margin)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_vblank_dram_speed_change_margin: %d", + bw_fixed_to_int(data->min_vblank_dram_speed_change_margin)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_stutter_refresh_duration: %d", + bw_fixed_to_int(data->min_stutter_refresh_duration)); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] total_stutter_dmif_buffer_size: %d", data->total_stutter_dmif_buffer_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] total_bytes_requested: %d", data->total_bytes_requested); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] min_stutter_dmif_buffer_size: %d", data->min_stutter_dmif_buffer_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] num_stutter_bursts: %d", data->num_stutter_bursts); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_blank_nbp_state_dram_speed_change_latency_supported: %d", + bw_fixed_to_int(data->v_blank_nbp_state_dram_speed_change_latency_supported)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_dram_speed_change_latency_supported: %d", + bw_fixed_to_int(data->nbp_state_dram_speed_change_latency_supported)); + + for (i = 0; i < maximum_number_of_surfaces; i++) { + DC_LOG_BANDWIDTH_CALCS(" [bool] fbc_en[%d]:%d\n", i, data->fbc_en[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] lpt_en[%d]:%d", i, data->lpt_en[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] displays_match_flag[%d]:%d", i, data->displays_match_flag[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] use_alpha[%d]:%d", i, data->use_alpha[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] orthogonal_rotation[%d]:%d", i, data->orthogonal_rotation[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] enable[%d]:%d", i, data->enable[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] access_one_channel_only[%d]:%d", i, data->access_one_channel_only[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] scatter_gather_enable_for_pipe[%d]:%d", + i, data->scatter_gather_enable_for_pipe[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] interlace_mode[%d]:%d", + i, data->interlace_mode[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] display_pstate_change_enable[%d]:%d", + i, data->display_pstate_change_enable[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] line_buffer_prefetch[%d]:%d", i, data->line_buffer_prefetch[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] bytes_per_pixel[%d]:%d", i, data->bytes_per_pixel[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] max_chunks_non_fbc_mode[%d]:%d", + i, data->max_chunks_non_fbc_mode[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] lb_bpc[%d]:%d", i, data->lb_bpc[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bpphdmi[%d]:%d", i, data->output_bpphdmi[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr[%d]:%d", i, data->output_bppdp4_lane_hbr[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr2[%d]:%d", + i, data->output_bppdp4_lane_hbr2[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr3[%d]:%d", + i, data->output_bppdp4_lane_hbr3[i]); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines stereo_mode[%d]:%d", i, data->stereo_mode[i]); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_buffer_transfer_time[%d]:%d", + i, bw_fixed_to_int(data->dmif_buffer_transfer_time[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] displays_with_same_mode[%d]:%d", + i, bw_fixed_to_int(data->displays_with_same_mode[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_dmif_buffer_size[%d]:%d", + i, bw_fixed_to_int(data->stutter_dmif_buffer_size[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_refresh_duration[%d]:%d", + i, bw_fixed_to_int(data->stutter_refresh_duration[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_exit_watermark[%d]:%d", + i, bw_fixed_to_int(data->stutter_exit_watermark[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_entry_watermark[%d]:%d", + i, bw_fixed_to_int(data->stutter_entry_watermark[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_total[%d]:%d", i, bw_fixed_to_int(data->h_total[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_total[%d]:%d", i, bw_fixed_to_int(data->v_total[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pixel_rate[%d]:%d", i, bw_fixed_to_int(data->pixel_rate[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width[%d]:%d", i, bw_fixed_to_int(data->src_width[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pitch_in_pixels[%d]:%d", + i, bw_fixed_to_int(data->pitch_in_pixels[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pitch_in_pixels_after_surface_type[%d]:%d", + i, bw_fixed_to_int(data->pitch_in_pixels_after_surface_type[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height[%d]:%d", i, bw_fixed_to_int(data->src_height[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scale_ratio[%d]:%d", i, bw_fixed_to_int(data->scale_ratio[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_taps[%d]:%d", i, bw_fixed_to_int(data->h_taps[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_taps[%d]:%d", i, bw_fixed_to_int(data->v_taps[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_scale_ratio[%d]:%d", i, bw_fixed_to_int(data->h_scale_ratio[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_scale_ratio[%d]:%d", i, bw_fixed_to_int(data->v_scale_ratio[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] rotation_angle[%d]:%d", + i, bw_fixed_to_int(data->rotation_angle[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] compression_rate[%d]:%d", + i, bw_fixed_to_int(data->compression_rate[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr[%d]:%d", i, bw_fixed_to_int(data->hsr[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr[%d]:%d", i, bw_fixed_to_int(data->vsr[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_rounded_up_to_chunks[%d]:%d", + i, bw_fixed_to_int(data->source_width_rounded_up_to_chunks[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_pixels[%d]:%d", + i, bw_fixed_to_int(data->source_width_pixels[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_height_rounded_up_to_chunks[%d]:%d", + i, bw_fixed_to_int(data->source_height_rounded_up_to_chunks[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_bandwidth[%d]:%d", + i, bw_fixed_to_int(data->display_bandwidth[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] request_bandwidth[%d]:%d", + i, bw_fixed_to_int(data->request_bandwidth[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] bytes_per_request[%d]:%d", + i, bw_fixed_to_int(data->bytes_per_request[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] useful_bytes_per_request[%d]:%d", + i, bw_fixed_to_int(data->useful_bytes_per_request[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lines_interleaved_in_mem_access[%d]:%d", + i, bw_fixed_to_int(data->lines_interleaved_in_mem_access[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_hiding_lines[%d]:%d", + i, bw_fixed_to_int(data->latency_hiding_lines[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_partitions[%d]:%d", + i, bw_fixed_to_int(data->lb_partitions[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_partitions_max[%d]:%d", + i, bw_fixed_to_int(data->lb_partitions_max[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_with_ramping[%d]:%d", + i, bw_fixed_to_int(data->dispclk_required_with_ramping[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_without_ramping[%d]:%d", + i, bw_fixed_to_int(data->dispclk_required_without_ramping[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] data_buffer_size[%d]:%d", + i, bw_fixed_to_int(data->data_buffer_size[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] outstanding_chunk_request_limit[%d]:%d", + i, bw_fixed_to_int(data->outstanding_chunk_request_limit[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] urgent_watermark[%d]:%d", + i, bw_fixed_to_int(data->urgent_watermark[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_change_watermark[%d]:%d", + i, bw_fixed_to_int(data->nbp_state_change_watermark[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_filter_init[%d]:%d", i, bw_fixed_to_int(data->v_filter_init[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_cycle_duration[%d]:%d", + i, bw_fixed_to_int(data->stutter_cycle_duration[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] average_bandwidth[%d]:%d", + i, bw_fixed_to_int(data->average_bandwidth[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] average_bandwidth_no_compression[%d]:%d", + i, bw_fixed_to_int(data->average_bandwidth_no_compression[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_request_limit[%d]:%d", + i, bw_fixed_to_int(data->scatter_gather_pte_request_limit[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_size_per_component[%d]:%d", + i, bw_fixed_to_int(data->lb_size_per_component[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] memory_chunk_size_in_bytes[%d]:%d", + i, bw_fixed_to_int(data->memory_chunk_size_in_bytes[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pipe_chunk_size_in_bytes[%d]:%d", + i, bw_fixed_to_int(data->pipe_chunk_size_in_bytes[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] number_of_trips_to_memory_for_getting_apte_row[%d]:%d", + i, bw_fixed_to_int(data->number_of_trips_to_memory_for_getting_apte_row[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] adjusted_data_buffer_size[%d]:%d", + i, bw_fixed_to_int(data->adjusted_data_buffer_size[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] adjusted_data_buffer_size_in_memory[%d]:%d", + i, bw_fixed_to_int(data->adjusted_data_buffer_size_in_memory[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pixels_per_data_fifo_entry[%d]:%d", + i, bw_fixed_to_int(data->pixels_per_data_fifo_entry[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_requests_in_row[%d]:%d", + i, bw_fixed_to_int(data->scatter_gather_pte_requests_in_row[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pte_request_per_chunk[%d]:%d", + i, bw_fixed_to_int(data->pte_request_per_chunk[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_page_width[%d]:%d", + i, bw_fixed_to_int(data->scatter_gather_page_width[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_page_height[%d]:%d", + i, bw_fixed_to_int(data->scatter_gather_page_height[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_lines_in_per_line_out_in_beginning_of_frame[%d]:%d", + i, bw_fixed_to_int(data->lb_lines_in_per_line_out_in_beginning_of_frame[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_lines_in_per_line_out_in_middle_of_frame[%d]:%d", + i, bw_fixed_to_int(data->lb_lines_in_per_line_out_in_middle_of_frame[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_width_pixels[%d]:%d", + i, bw_fixed_to_int(data->cursor_width_pixels[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_latency_hiding[%d]:%d", + i, bw_fixed_to_int(data->minimum_latency_hiding[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_latency_hiding[%d]:%d", + i, bw_fixed_to_int(data->maximum_latency_hiding[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_latency_hiding_with_cursor[%d]:%d", + i, bw_fixed_to_int(data->minimum_latency_hiding_with_cursor[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_latency_hiding_with_cursor[%d]:%d", + i, bw_fixed_to_int(data->maximum_latency_hiding_with_cursor[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_pixels_for_first_output_pixel[%d]:%d", + i, bw_fixed_to_int(data->src_pixels_for_first_output_pixel[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_pixels_for_last_output_pixel[%d]:%d", + i, bw_fixed_to_int(data->src_pixels_for_last_output_pixel[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_data_for_first_output_pixel[%d]:%d", + i, bw_fixed_to_int(data->src_data_for_first_output_pixel[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_data_for_last_output_pixel[%d]:%d", + i, bw_fixed_to_int(data->src_data_for_last_output_pixel[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] active_time[%d]:%d", i, bw_fixed_to_int(data->active_time[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] horizontal_blank_and_chunk_granularity_factor[%d]:%d", + i, bw_fixed_to_int(data->horizontal_blank_and_chunk_granularity_factor[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_latency_hiding[%d]:%d", + i, bw_fixed_to_int(data->cursor_latency_hiding[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_blank_dram_speed_change_margin[%d]:%d", + i, bw_fixed_to_int(data->v_blank_dram_speed_change_margin[i])); + } + + for (i = 0; i < maximum_number_of_surfaces; i++) { + for (j = 0; j < 3; j++) { + for (k = 0; k < 8; k++) { + + DC_LOG_BANDWIDTH_CALCS("\n [bw_fixed] line_source_transfer_time[%d][%d][%d]:%d", + i, j, k, bw_fixed_to_int(data->line_source_transfer_time[i][j][k])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_speed_change_line_source_transfer_time[%d][%d][%d]:%d", + i, j, k, + bw_fixed_to_int(data->dram_speed_change_line_source_transfer_time[i][j][k])); + } + } + } + + for (i = 0; i < 3; i++) { + for (j = 0; j < 8; j++) { + + DC_LOG_BANDWIDTH_CALCS("\n [uint32_t] num_displays_with_margin[%d][%d]:%d", + i, j, data->num_displays_with_margin[i][j]); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_burst_time[%d][%d]:%d", + i, j, bw_fixed_to_int(data->dmif_burst_time[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_burst_time[%d][%d]:%d", + i, j, bw_fixed_to_int(data->mcifwr_burst_time[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_dram_speed_change_margin[%d][%d]:%d", + i, j, bw_fixed_to_int(data->min_dram_speed_change_margin[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_dram_speed_change[%d][%d]:%d", + i, j, bw_fixed_to_int(data->dispclk_required_for_dram_speed_change[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_duration_margin[%d][%d]:%d", + i, j, bw_fixed_to_int(data->blackout_duration_margin[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_blackout_duration[%d][%d]:%d", + i, j, bw_fixed_to_int(data->dispclk_required_for_blackout_duration[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_blackout_recovery[%d][%d]:%d", + i, j, bw_fixed_to_int(data->dispclk_required_for_blackout_recovery[i][j])); + } + } + + for (i = 0; i < 6; i++) { + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_sclk_for_urgent_latency[%d]:%d", + i, bw_fixed_to_int(data->dmif_required_sclk_for_urgent_latency[i])); + } +} +; + +#endif /* _CALCS_CALCS_LOGGER_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c index 0cbab81ab304..4ee3c26f7c13 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c @@ -28,6 +28,7 @@ #include "dc.h" #include "core_types.h" #include "dal_asic_id.h" +#include "calcs_logger.h" /* * NOTE: @@ -52,11 +53,16 @@ static enum bw_calcs_version bw_calcs_version_from_asic_id(struct hw_asic_id asi return BW_CALCS_VERSION_CARRIZO; case FAMILY_VI: + if (ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev)) + return BW_CALCS_VERSION_POLARIS12; if (ASIC_REV_IS_POLARIS10_P(asic_id.hw_internal_rev)) return BW_CALCS_VERSION_POLARIS10; - if (ASIC_REV_IS_POLARIS11_M(asic_id.hw_internal_rev) || - ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev)) + if (ASIC_REV_IS_POLARIS11_M(asic_id.hw_internal_rev)) return BW_CALCS_VERSION_POLARIS11; +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + if (ASIC_REV_IS_VEGAM(asic_id.hw_internal_rev)) + return BW_CALCS_VERSION_VEGAM; +#endif return BW_CALCS_VERSION_INVALID; case FAMILY_AI: @@ -2145,6 +2151,11 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip, dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0); /* todo: this is a bug*/ break; case BW_CALCS_VERSION_POLARIS10: +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + /* TODO: Treat VEGAM the same as P10 for now + * Need to tune the para for VEGAM if needed */ + case BW_CALCS_VERSION_VEGAM: +#endif vbios.memory_type = bw_def_gddr5; vbios.dram_channel_width_in_bits = 32; vbios.number_of_dram_channels = asic_id.vram_width / vbios.dram_channel_width_in_bits; @@ -2373,6 +2384,122 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip, dceip.scatter_gather_pte_request_rows_in_tiling_mode = 2; dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0); break; + case BW_CALCS_VERSION_POLARIS12: + vbios.memory_type = bw_def_gddr5; + vbios.dram_channel_width_in_bits = 32; + vbios.number_of_dram_channels = asic_id.vram_width / vbios.dram_channel_width_in_bits; + vbios.number_of_dram_banks = 8; + vbios.high_yclk = bw_int_to_fixed(6000); + vbios.mid_yclk = bw_int_to_fixed(3200); + vbios.low_yclk = bw_int_to_fixed(1000); + vbios.low_sclk = bw_int_to_fixed(678); + vbios.mid1_sclk = bw_int_to_fixed(864); + vbios.mid2_sclk = bw_int_to_fixed(900); + vbios.mid3_sclk = bw_int_to_fixed(920); + vbios.mid4_sclk = bw_int_to_fixed(940); + vbios.mid5_sclk = bw_int_to_fixed(960); + vbios.mid6_sclk = bw_int_to_fixed(980); + vbios.high_sclk = bw_int_to_fixed(1049); + vbios.low_voltage_max_dispclk = bw_int_to_fixed(459); + vbios.mid_voltage_max_dispclk = bw_int_to_fixed(654); + vbios.high_voltage_max_dispclk = bw_int_to_fixed(1108); + vbios.low_voltage_max_phyclk = bw_int_to_fixed(540); + vbios.mid_voltage_max_phyclk = bw_int_to_fixed(810); + vbios.high_voltage_max_phyclk = bw_int_to_fixed(810); + vbios.data_return_bus_width = bw_int_to_fixed(32); + vbios.trc = bw_int_to_fixed(48); + if (vbios.number_of_dram_channels == 2) // 64-bit + vbios.dmifmc_urgent_latency = bw_int_to_fixed(4); + else + vbios.dmifmc_urgent_latency = bw_int_to_fixed(3); + vbios.stutter_self_refresh_exit_latency = bw_int_to_fixed(5); + vbios.stutter_self_refresh_entry_latency = bw_int_to_fixed(0); + vbios.nbp_state_change_latency = bw_int_to_fixed(250); + vbios.mcifwrmc_urgent_latency = bw_int_to_fixed(10); + vbios.scatter_gather_enable = false; + vbios.down_spread_percentage = bw_frc_to_fixed(5, 10); + vbios.cursor_width = 32; + vbios.average_compression_rate = 4; + vbios.number_of_request_slots_gmc_reserves_for_dmif_per_channel = 256; + vbios.blackout_duration = bw_int_to_fixed(0); /* us */ + vbios.maximum_blackout_recovery_time = bw_int_to_fixed(0); + + dceip.max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation = 100; + dceip.max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation = 100; + dceip.percent_of_ideal_port_bw_received_after_urgent_latency = 100; + dceip.large_cursor = false; + dceip.dmif_request_buffer_size = bw_int_to_fixed(768); + dceip.dmif_pipe_en_fbc_chunk_tracker = false; + dceip.cursor_max_outstanding_group_num = 1; + dceip.lines_interleaved_into_lb = 2; + dceip.chunk_width = 256; + dceip.number_of_graphics_pipes = 5; + dceip.number_of_underlay_pipes = 0; + dceip.low_power_tiling_mode = 0; + dceip.display_write_back_supported = true; + dceip.argb_compression_support = true; + dceip.underlay_vscaler_efficiency6_bit_per_component = + bw_frc_to_fixed(35556, 10000); + dceip.underlay_vscaler_efficiency8_bit_per_component = + bw_frc_to_fixed(34286, 10000); + dceip.underlay_vscaler_efficiency10_bit_per_component = + bw_frc_to_fixed(32, 10); + dceip.underlay_vscaler_efficiency12_bit_per_component = + bw_int_to_fixed(3); + dceip.graphics_vscaler_efficiency6_bit_per_component = + bw_frc_to_fixed(35, 10); + dceip.graphics_vscaler_efficiency8_bit_per_component = + bw_frc_to_fixed(34286, 10000); + dceip.graphics_vscaler_efficiency10_bit_per_component = + bw_frc_to_fixed(32, 10); + dceip.graphics_vscaler_efficiency12_bit_per_component = + bw_int_to_fixed(3); + dceip.alpha_vscaler_efficiency = bw_int_to_fixed(3); + dceip.max_dmif_buffer_allocated = 4; + dceip.graphics_dmif_size = 12288; + dceip.underlay_luma_dmif_size = 19456; + dceip.underlay_chroma_dmif_size = 23552; + dceip.pre_downscaler_enabled = true; + dceip.underlay_downscale_prefetch_enabled = true; + dceip.lb_write_pixels_per_dispclk = bw_int_to_fixed(1); + dceip.lb_size_per_component444 = bw_int_to_fixed(245952); + dceip.graphics_lb_nodownscaling_multi_line_prefetching = true; + dceip.stutter_and_dram_clock_state_change_gated_before_cursor = + bw_int_to_fixed(1); + dceip.underlay420_luma_lb_size_per_component = bw_int_to_fixed( + 82176); + dceip.underlay420_chroma_lb_size_per_component = + bw_int_to_fixed(164352); + dceip.underlay422_lb_size_per_component = bw_int_to_fixed( + 82176); + dceip.cursor_chunk_width = bw_int_to_fixed(64); + dceip.cursor_dcp_buffer_lines = bw_int_to_fixed(4); + dceip.underlay_maximum_width_efficient_for_tiling = + bw_int_to_fixed(1920); + dceip.underlay_maximum_height_efficient_for_tiling = + bw_int_to_fixed(1080); + dceip.peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display = + bw_frc_to_fixed(3, 10); + dceip.peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation = + bw_int_to_fixed(25); + dceip.minimum_outstanding_pte_request_limit = bw_int_to_fixed( + 2); + dceip.maximum_total_outstanding_pte_requests_allowed_by_saw = + bw_int_to_fixed(128); + dceip.limit_excessive_outstanding_dmif_requests = true; + dceip.linear_mode_line_request_alternation_slice = + bw_int_to_fixed(64); + dceip.scatter_gather_lines_of_pte_prefetching_in_linear_mode = + 32; + dceip.display_write_back420_luma_mcifwr_buffer_size = 12288; + dceip.display_write_back420_chroma_mcifwr_buffer_size = 8192; + dceip.request_efficiency = bw_frc_to_fixed(8, 10); + dceip.dispclk_per_request = bw_int_to_fixed(2); + dceip.dispclk_ramping_factor = bw_frc_to_fixed(105, 100); + dceip.display_pipe_throughput_factor = bw_frc_to_fixed(105, 100); + dceip.scatter_gather_pte_request_rows_in_tiling_mode = 2; + dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0); + break; case BW_CALCS_VERSION_STONEY: vbios.memory_type = bw_def_gddr5; vbios.dram_channel_width_in_bits = 64; @@ -2815,6 +2942,19 @@ static void populate_initial_data( data->bytes_per_pixel[num_displays + 4] = 4; break; } + } else if (pipe[i].stream->dst.width != 0 && + pipe[i].stream->dst.height != 0 && + pipe[i].stream->src.width != 0 && + pipe[i].stream->src.height != 0) { + data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->src.width); + data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4]; + data->src_height[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->src.height); + data->h_taps[num_displays + 4] = pipe[i].stream->src.width == pipe[i].stream->dst.width ? bw_int_to_fixed(1) : bw_int_to_fixed(2); + data->v_taps[num_displays + 4] = pipe[i].stream->src.height == pipe[i].stream->dst.height ? bw_int_to_fixed(1) : bw_int_to_fixed(2); + data->h_scale_ratio[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->src.width, pipe[i].stream->dst.width); + data->v_scale_ratio[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->src.height, pipe[i].stream->dst.height); + data->rotation_angle[num_displays + 4] = bw_int_to_fixed(0); + data->bytes_per_pixel[num_displays + 4] = 4; } else { data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.h_addressable); data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4]; @@ -2873,6 +3013,11 @@ bool bw_calcs(struct dc_context *ctx, struct bw_fixed mid_yclk = vbios->mid_yclk; struct bw_fixed low_yclk = vbios->low_yclk; + if (ctx->dc->debug.bandwidth_calcs_trace) { + print_bw_calcs_dceip(ctx->logger, dceip); + print_bw_calcs_vbios(ctx->logger, vbios); + print_bw_calcs_data(ctx->logger, data); + } calculate_bandwidth(dceip, vbios, data); yclk_lvl = data->y_clk_level; @@ -2968,7 +3113,33 @@ bool bw_calcs(struct dc_context *ctx, bw_fixed_to_int(bw_mul(data-> stutter_exit_watermark[9], bw_int_to_fixed(1000))); - + calcs_output->stutter_entry_wm_ns[0].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[4], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[1].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[5], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[2].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[6], bw_int_to_fixed(1000))); + if (ctx->dc->caps.max_slave_planes) { + calcs_output->stutter_entry_wm_ns[3].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[0], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[1], bw_int_to_fixed(1000))); + } else { + calcs_output->stutter_entry_wm_ns[3].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[7], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[8], bw_int_to_fixed(1000))); + } + calcs_output->stutter_entry_wm_ns[5].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[9], bw_int_to_fixed(1000))); calcs_output->urgent_wm_ns[0].a_mark = bw_fixed_to_int(bw_mul(data-> @@ -3063,7 +3234,33 @@ bool bw_calcs(struct dc_context *ctx, bw_fixed_to_int(bw_mul(data-> stutter_exit_watermark[9], bw_int_to_fixed(1000))); - + calcs_output->stutter_entry_wm_ns[0].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[4], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[1].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[5], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[2].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[6], bw_int_to_fixed(1000))); + if (ctx->dc->caps.max_slave_planes) { + calcs_output->stutter_entry_wm_ns[3].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[0], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[1], bw_int_to_fixed(1000))); + } else { + calcs_output->stutter_entry_wm_ns[3].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[7], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[8], bw_int_to_fixed(1000))); + } + calcs_output->stutter_entry_wm_ns[5].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[9], bw_int_to_fixed(1000))); calcs_output->urgent_wm_ns[0].b_mark = bw_fixed_to_int(bw_mul(data-> @@ -3156,6 +3353,34 @@ bool bw_calcs(struct dc_context *ctx, bw_fixed_to_int(bw_mul(data-> stutter_exit_watermark[9], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[0].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[4], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[1].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[5], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[2].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[6], bw_int_to_fixed(1000))); + if (ctx->dc->caps.max_slave_planes) { + calcs_output->stutter_entry_wm_ns[3].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[0], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[1], bw_int_to_fixed(1000))); + } else { + calcs_output->stutter_entry_wm_ns[3].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[7], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[8], bw_int_to_fixed(1000))); + } + calcs_output->stutter_entry_wm_ns[5].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[9], bw_int_to_fixed(1000))); + calcs_output->urgent_wm_ns[0].c_mark = bw_fixed_to_int(bw_mul(data-> urgent_watermark[4], bw_int_to_fixed(1000))); @@ -3260,6 +3485,33 @@ bool bw_calcs(struct dc_context *ctx, bw_fixed_to_int(bw_mul(data-> stutter_exit_watermark[9], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[0].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[4], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[1].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[5], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[2].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[6], bw_int_to_fixed(1000))); + if (ctx->dc->caps.max_slave_planes) { + calcs_output->stutter_entry_wm_ns[3].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[0], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[1], bw_int_to_fixed(1000))); + } else { + calcs_output->stutter_entry_wm_ns[3].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[7], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[8], bw_int_to_fixed(1000))); + } + calcs_output->stutter_entry_wm_ns[5].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[9], bw_int_to_fixed(1000))); calcs_output->urgent_wm_ns[0].d_mark = bw_fixed_to_int(bw_mul(data-> diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 4bb43a371292..a102c192328d 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -1459,39 +1459,39 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) void dcn_bw_sync_calcs_and_dml(struct dc *dc) { kernel_fpu_begin(); - DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %d ns\n" - "sr_enter_plus_exit_time: %d ns\n" - "urgent_latency: %d ns\n" - "write_back_latency: %d ns\n" - "percent_of_ideal_drambw_received_after_urg_latency: %d %\n" + DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n" + "sr_enter_plus_exit_time: %f ns\n" + "urgent_latency: %f ns\n" + "write_back_latency: %f ns\n" + "percent_of_ideal_drambw_received_after_urg_latency: %f %%\n" "max_request_size: %d bytes\n" - "dcfclkv_max0p9: %d kHz\n" - "dcfclkv_nom0p8: %d kHz\n" - "dcfclkv_mid0p72: %d kHz\n" - "dcfclkv_min0p65: %d kHz\n" - "max_dispclk_vmax0p9: %d kHz\n" - "max_dispclk_vnom0p8: %d kHz\n" - "max_dispclk_vmid0p72: %d kHz\n" - "max_dispclk_vmin0p65: %d kHz\n" - "max_dppclk_vmax0p9: %d kHz\n" - "max_dppclk_vnom0p8: %d kHz\n" - "max_dppclk_vmid0p72: %d kHz\n" - "max_dppclk_vmin0p65: %d kHz\n" - "socclk: %d kHz\n" - "fabric_and_dram_bandwidth_vmax0p9: %d MB/s\n" - "fabric_and_dram_bandwidth_vnom0p8: %d MB/s\n" - "fabric_and_dram_bandwidth_vmid0p72: %d MB/s\n" - "fabric_and_dram_bandwidth_vmin0p65: %d MB/s\n" - "phyclkv_max0p9: %d kHz\n" - "phyclkv_nom0p8: %d kHz\n" - "phyclkv_mid0p72: %d kHz\n" - "phyclkv_min0p65: %d kHz\n" - "downspreading: %d %\n" + "dcfclkv_max0p9: %f kHz\n" + "dcfclkv_nom0p8: %f kHz\n" + "dcfclkv_mid0p72: %f kHz\n" + "dcfclkv_min0p65: %f kHz\n" + "max_dispclk_vmax0p9: %f kHz\n" + "max_dispclk_vnom0p8: %f kHz\n" + "max_dispclk_vmid0p72: %f kHz\n" + "max_dispclk_vmin0p65: %f kHz\n" + "max_dppclk_vmax0p9: %f kHz\n" + "max_dppclk_vnom0p8: %f kHz\n" + "max_dppclk_vmid0p72: %f kHz\n" + "max_dppclk_vmin0p65: %f kHz\n" + "socclk: %f kHz\n" + "fabric_and_dram_bandwidth_vmax0p9: %f MB/s\n" + "fabric_and_dram_bandwidth_vnom0p8: %f MB/s\n" + "fabric_and_dram_bandwidth_vmid0p72: %f MB/s\n" + "fabric_and_dram_bandwidth_vmin0p65: %f MB/s\n" + "phyclkv_max0p9: %f kHz\n" + "phyclkv_nom0p8: %f kHz\n" + "phyclkv_mid0p72: %f kHz\n" + "phyclkv_min0p65: %f kHz\n" + "downspreading: %f %%\n" "round_trip_ping_latency_cycles: %d DCFCLK Cycles\n" "urgent_out_of_order_return_per_channel: %d Bytes\n" "number_of_channels: %d\n" "vmm_page_size: %d Bytes\n" - "dram_clock_change_latency: %d ns\n" + "dram_clock_change_latency: %f ns\n" "return_bus_width: %d Bytes\n", dc->dcn_soc->sr_exit_time * 1000, dc->dcn_soc->sr_enter_plus_exit_time * 1000, @@ -1527,11 +1527,11 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc) dc->dcn_soc->vmm_page_size, dc->dcn_soc->dram_clock_change_latency * 1000, dc->dcn_soc->return_bus_width); - DC_LOG_BANDWIDTH_CALCS("rob_buffer_size_in_kbyte: %d\n" - "det_buffer_size_in_kbyte: %d\n" - "dpp_output_buffer_pixels: %d\n" - "opp_output_buffer_lines: %d\n" - "pixel_chunk_size_in_kbyte: %d\n" + DC_LOG_BANDWIDTH_CALCS("rob_buffer_size_in_kbyte: %f\n" + "det_buffer_size_in_kbyte: %f\n" + "dpp_output_buffer_pixels: %f\n" + "opp_output_buffer_lines: %f\n" + "pixel_chunk_size_in_kbyte: %f\n" "pte_enable: %d\n" "pte_chunk_size: %d kbytes\n" "meta_chunk_size: %d kbytes\n" @@ -1550,13 +1550,13 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc) "max_pscl_tolb_throughput: %d pixels/dppclk\n" "max_lb_tovscl_throughput: %d pixels/dppclk\n" "max_vscl_tohscl_throughput: %d pixels/dppclk\n" - "max_hscl_ratio: %d\n" - "max_vscl_ratio: %d\n" + "max_hscl_ratio: %f\n" + "max_vscl_ratio: %f\n" "max_hscl_taps: %d\n" "max_vscl_taps: %d\n" "pte_buffer_size_in_requests: %d\n" - "dispclk_ramping_margin: %d %\n" - "under_scan_factor: %d %\n" + "dispclk_ramping_margin: %f %%\n" + "under_scan_factor: %f %%\n" "max_inter_dcn_tile_repeaters: %d\n" "can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one: %d\n" "bug_forcing_luma_and_chroma_request_to_same_size_fixed: %d\n" diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 9cd3566def8d..644b2187507b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -936,95 +936,6 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc) return true; } -/* - * TODO this whole function needs to go - * - * dc_surface_update is needlessly complex. See if we can just replace this - * with a dc_plane_state and follow the atomic model a bit more closely here. - */ -bool dc_commit_planes_to_stream( - struct dc *dc, - struct dc_plane_state **plane_states, - uint8_t new_plane_count, - struct dc_stream_state *dc_stream, - struct dc_state *state) -{ - /* no need to dynamically allocate this. it's pretty small */ - struct dc_surface_update updates[MAX_SURFACES]; - struct dc_flip_addrs *flip_addr; - struct dc_plane_info *plane_info; - struct dc_scaling_info *scaling_info; - int i; - struct dc_stream_update *stream_update = - kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL); - - if (!stream_update) { - BREAK_TO_DEBUGGER(); - return false; - } - - flip_addr = kcalloc(MAX_SURFACES, sizeof(struct dc_flip_addrs), - GFP_KERNEL); - plane_info = kcalloc(MAX_SURFACES, sizeof(struct dc_plane_info), - GFP_KERNEL); - scaling_info = kcalloc(MAX_SURFACES, sizeof(struct dc_scaling_info), - GFP_KERNEL); - - if (!flip_addr || !plane_info || !scaling_info) { - kfree(flip_addr); - kfree(plane_info); - kfree(scaling_info); - kfree(stream_update); - return false; - } - - memset(updates, 0, sizeof(updates)); - - stream_update->src = dc_stream->src; - stream_update->dst = dc_stream->dst; - stream_update->out_transfer_func = dc_stream->out_transfer_func; - - for (i = 0; i < new_plane_count; i++) { - updates[i].surface = plane_states[i]; - updates[i].gamma = - (struct dc_gamma *)plane_states[i]->gamma_correction; - updates[i].in_transfer_func = plane_states[i]->in_transfer_func; - flip_addr[i].address = plane_states[i]->address; - flip_addr[i].flip_immediate = plane_states[i]->flip_immediate; - plane_info[i].color_space = plane_states[i]->color_space; - plane_info[i].input_tf = plane_states[i]->input_tf; - plane_info[i].format = plane_states[i]->format; - plane_info[i].plane_size = plane_states[i]->plane_size; - plane_info[i].rotation = plane_states[i]->rotation; - plane_info[i].horizontal_mirror = plane_states[i]->horizontal_mirror; - plane_info[i].stereo_format = plane_states[i]->stereo_format; - plane_info[i].tiling_info = plane_states[i]->tiling_info; - plane_info[i].visible = plane_states[i]->visible; - plane_info[i].per_pixel_alpha = plane_states[i]->per_pixel_alpha; - plane_info[i].dcc = plane_states[i]->dcc; - scaling_info[i].scaling_quality = plane_states[i]->scaling_quality; - scaling_info[i].src_rect = plane_states[i]->src_rect; - scaling_info[i].dst_rect = plane_states[i]->dst_rect; - scaling_info[i].clip_rect = plane_states[i]->clip_rect; - - updates[i].flip_addr = &flip_addr[i]; - updates[i].plane_info = &plane_info[i]; - updates[i].scaling_info = &scaling_info[i]; - } - - dc_commit_updates_for_stream( - dc, - updates, - new_plane_count, - dc_stream, stream_update, plane_states, state); - - kfree(flip_addr); - kfree(plane_info); - kfree(scaling_info); - kfree(stream_update); - return true; -} - struct dc_state *dc_create_state(void) { struct dc_state *context = kzalloc(sizeof(struct dc_state), @@ -1107,9 +1018,6 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa if (u->plane_info->color_space != u->surface->color_space) update_flags->bits.color_space_change = 1; - if (u->plane_info->input_tf != u->surface->input_tf) - update_flags->bits.input_tf_change = 1; - if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) update_flags->bits.horizontal_mirror_change = 1; @@ -1243,12 +1151,20 @@ static enum surface_update_type det_surface_update(const struct dc *dc, if (u->input_csc_color_matrix) update_flags->bits.input_csc_change = 1; - if (update_flags->bits.in_transfer_func_change - || update_flags->bits.input_csc_change) { + if (u->coeff_reduction_factor) + update_flags->bits.coeff_reduction_change = 1; + + if (update_flags->bits.in_transfer_func_change) { type = UPDATE_TYPE_MED; elevate_update_type(&overall_type, type); } + if (update_flags->bits.input_csc_change + || update_flags->bits.coeff_reduction_change) { + type = UPDATE_TYPE_FULL; + elevate_update_type(&overall_type, type); + } + return overall_type; } @@ -1297,7 +1213,7 @@ enum surface_update_type dc_check_update_surfaces_for_stream( type = check_update_surfaces_for_stream(dc, updates, surface_count, stream_update, stream_status); if (type == UPDATE_TYPE_FULL) for (i = 0; i < surface_count; i++) - updates[i].surface->update_flags.bits.full_update = 1; + updates[i].surface->update_flags.raw = 0xFFFFFFFF; return type; } @@ -1375,6 +1291,12 @@ static void commit_planes_for_stream(struct dc *dc, pipe_ctx->stream_res.abm->funcs->set_abm_level( pipe_ctx->stream_res.abm, stream->abm_level); } + + if (stream_update && stream_update->periodic_fn_vsync_delta && + pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) + pipe_ctx->stream_res.tg->funcs->program_vline_interrupt( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, + pipe_ctx->stream->periodic_fn_vsync_delta); } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index 5a552cb3f8a7..267c76766dea 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -36,8 +36,9 @@ #include "hw_sequencer.h" #include "resource.h" -#define DC_LOGGER \ - logger + +#define DC_LOGGER_INIT(logger) + #define SURFACE_TRACE(...) do {\ if (dc->debug.surface_trace) \ @@ -60,8 +61,7 @@ void pre_surface_trace( int surface_count) { int i; - struct dc *core_dc = dc; - struct dal_logger *logger = core_dc->ctx->logger; + DC_LOGGER_INIT(dc->ctx->logger); for (i = 0; i < surface_count; i++) { const struct dc_plane_state *plane_state = plane_states[i]; @@ -72,8 +72,8 @@ void pre_surface_trace( "plane_state->visible = %d;\n" "plane_state->flip_immediate = %d;\n" "plane_state->address.type = %d;\n" - "plane_state->address.grph.addr.quad_part = 0x%X;\n" - "plane_state->address.grph.meta_addr.quad_part = 0x%X;\n" + "plane_state->address.grph.addr.quad_part = 0x%llX;\n" + "plane_state->address.grph.meta_addr.quad_part = 0x%llX;\n" "plane_state->scaling_quality.h_taps = %d;\n" "plane_state->scaling_quality.v_taps = %d;\n" "plane_state->scaling_quality.h_taps_c = %d;\n" @@ -155,7 +155,6 @@ void pre_surface_trace( "plane_state->tiling_info.gfx8.pipe_config = %d;\n" "plane_state->tiling_info.gfx8.array_mode = %d;\n" "plane_state->color_space = %d;\n" - "plane_state->input_tf = %d;\n" "plane_state->dcc.enable = %d;\n" "plane_state->format = %d;\n" "plane_state->rotation = %d;\n" @@ -163,7 +162,6 @@ void pre_surface_trace( plane_state->tiling_info.gfx8.pipe_config, plane_state->tiling_info.gfx8.array_mode, plane_state->color_space, - plane_state->input_tf, plane_state->dcc.enable, plane_state->format, plane_state->rotation, @@ -183,8 +181,7 @@ void update_surface_trace( int surface_count) { int i; - struct dc *core_dc = dc; - struct dal_logger *logger = core_dc->ctx->logger; + DC_LOGGER_INIT(dc->ctx->logger); for (i = 0; i < surface_count; i++) { const struct dc_surface_update *update = &updates[i]; @@ -192,8 +189,8 @@ void update_surface_trace( SURFACE_TRACE("Update %d\n", i); if (update->flip_addr) { SURFACE_TRACE("flip_addr->address.type = %d;\n" - "flip_addr->address.grph.addr.quad_part = 0x%X;\n" - "flip_addr->address.grph.meta_addr.quad_part = 0x%X;\n" + "flip_addr->address.grph.addr.quad_part = 0x%llX;\n" + "flip_addr->address.grph.meta_addr.quad_part = 0x%llX;\n" "flip_addr->flip_immediate = %d;\n", update->flip_addr->address.type, update->flip_addr->address.grph.addr.quad_part, @@ -204,16 +201,15 @@ void update_surface_trace( if (update->plane_info) { SURFACE_TRACE( "plane_info->color_space = %d;\n" - "plane_info->input_tf = %d;\n" "plane_info->format = %d;\n" "plane_info->plane_size.grph.surface_pitch = %d;\n" "plane_info->plane_size.grph.surface_size.height = %d;\n" "plane_info->plane_size.grph.surface_size.width = %d;\n" "plane_info->plane_size.grph.surface_size.x = %d;\n" "plane_info->plane_size.grph.surface_size.y = %d;\n" - "plane_info->rotation = %d;\n", + "plane_info->rotation = %d;\n" + "plane_info->stereo_format = %d;\n", update->plane_info->color_space, - update->plane_info->input_tf, update->plane_info->format, update->plane_info->plane_size.grph.surface_pitch, update->plane_info->plane_size.grph.surface_size.height, @@ -303,8 +299,7 @@ void update_surface_trace( void post_surface_trace(struct dc *dc) { - struct dc *core_dc = dc; - struct dal_logger *logger = core_dc->ctx->logger; + DC_LOGGER_INIT(dc->ctx->logger); SURFACE_TRACE("post surface process.\n"); @@ -316,10 +311,10 @@ void context_timing_trace( { int i; struct dc *core_dc = dc; - struct dal_logger *logger = core_dc->ctx->logger; int h_pos[MAX_PIPES], v_pos[MAX_PIPES]; struct crtc_position position; unsigned int underlay_idx = core_dc->res_pool->underlay_pipe_index; + DC_LOGGER_INIT(dc->ctx->logger); for (i = 0; i < core_dc->res_pool->pipe_count; i++) { @@ -354,9 +349,7 @@ void context_clock_trace( struct dc_state *context) { #if defined(CONFIG_DRM_AMD_DC_DCN1_0) - struct dc *core_dc = dc; - struct dal_logger *logger = core_dc->ctx->logger; - + DC_LOGGER_INIT(dc->ctx->logger); CLOCK_TRACE("Current: dispclk_khz:%d max_dppclk_khz:%d dcfclk_khz:%d\n" "dcfclk_deep_sleep_khz:%d fclk_khz:%d socclk_khz:%d\n", context->bw.dcn.calc_clk.dispclk_khz, @@ -371,6 +364,7 @@ void context_clock_trace( context->bw.dcn.calc_clk.dppclk_khz, context->bw.dcn.calc_clk.dcfclk_khz, context->bw.dcn.calc_clk.dcfclk_deep_sleep_khz, - context->bw.dcn.calc_clk.fclk_khz); + context->bw.dcn.calc_clk.fclk_khz, + context->bw.dcn.calc_clk.socclk_khz); #endif } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index ebc96b720083..83d121510ef5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -208,6 +208,7 @@ void color_space_to_black_color( case COLOR_SPACE_YCBCR709: case COLOR_SPACE_YCBCR601_LIMITED: case COLOR_SPACE_YCBCR709_LIMITED: + case COLOR_SPACE_2020_YCBCR: *black_color = black_color_format[BLACK_COLOR_FORMAT_YUV_CV]; break; @@ -216,7 +217,25 @@ void color_space_to_black_color( black_color_format[BLACK_COLOR_FORMAT_RGB_LIMITED]; break; - default: + /** + * Remove default and add case for all color space + * so when we forget to add new color space + * compiler will give a warning + */ + case COLOR_SPACE_UNKNOWN: + case COLOR_SPACE_SRGB: + case COLOR_SPACE_XR_RGB: + case COLOR_SPACE_MSREF_SCRGB: + case COLOR_SPACE_XV_YCC_709: + case COLOR_SPACE_XV_YCC_601: + case COLOR_SPACE_2020_RGB_FULLRANGE: + case COLOR_SPACE_2020_RGB_LIMITEDRANGE: + case COLOR_SPACE_ADOBERGB: + case COLOR_SPACE_DCIP3: + case COLOR_SPACE_DISPLAYNATIVE: + case COLOR_SPACE_DOLBYVISION: + case COLOR_SPACE_APPCTRL: + case COLOR_SPACE_CUSTOMPOINTS: /* fefault is sRGB black (full range). */ *black_color = black_color_format[BLACK_COLOR_FORMAT_RGB_FULLRANGE]; @@ -230,6 +249,9 @@ bool hwss_wait_for_blank_complete( { int counter; + /* Not applicable if the pipe is not primary, save 300ms of boot time */ + if (!tg->funcs->is_blanked) + return true; for (counter = 0; counter < 100; counter++) { if (tg->funcs->is_blanked(tg)) break; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 6d1c4981a185..b44cf52090a5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -45,8 +45,9 @@ #include "dce/dce_11_0_d.h" #include "dce/dce_11_0_enum.h" #include "dce/dce_11_0_sh_mask.h" -#define DC_LOGGER \ - dc_ctx->logger + +#define DC_LOGGER_INIT(logger) + #define LINK_INFO(...) \ DC_LOG_HW_HOTPLUG( \ @@ -561,7 +562,7 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) struct dc_context *dc_ctx = link->ctx; struct dc_sink *sink = NULL; enum dc_connection_type new_connection_type = dc_connection_none; - + DC_LOGGER_INIT(link->ctx->logger); if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) return false; @@ -927,6 +928,7 @@ static bool construct( struct integrated_info info = {{{ 0 }}}; struct dc_bios *bios = init_params->dc->ctx->dc_bios; const struct dc_vbios_funcs *bp_funcs = bios->funcs; + DC_LOGGER_INIT(dc_ctx->logger); link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; link->irq_source_hpd_rx = DC_IRQ_SOURCE_INVALID; @@ -1135,7 +1137,8 @@ static void dpcd_configure_panel_mode( { union dpcd_edp_config edp_config_set; bool panel_mode_edp = false; - struct dc_context *dc_ctx = link->ctx; + DC_LOGGER_INIT(link->ctx->logger); + memset(&edp_config_set, '\0', sizeof(union dpcd_edp_config)); if (DP_PANEL_MODE_DEFAULT != panel_mode) { @@ -1183,16 +1186,21 @@ static void enable_stream_features(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->sink->link; - union down_spread_ctrl downspread; + union down_spread_ctrl old_downspread; + union down_spread_ctrl new_downspread; core_link_read_dpcd(link, DP_DOWNSPREAD_CTRL, - &downspread.raw, sizeof(downspread)); + &old_downspread.raw, sizeof(old_downspread)); - downspread.bits.IGNORE_MSA_TIMING_PARAM = + new_downspread.raw = old_downspread.raw; + + new_downspread.bits.IGNORE_MSA_TIMING_PARAM = (stream->ignore_msa_timing_param) ? 1 : 0; - core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL, - &downspread.raw, sizeof(downspread)); + if (new_downspread.raw != old_downspread.raw) { + core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL, + &new_downspread.raw, sizeof(new_downspread)); + } } static enum dc_status enable_link_dp( @@ -1843,9 +1851,22 @@ static void disable_link(struct dc_link *link, enum signal_type signal) static bool dp_active_dongle_validate_timing( const struct dc_crtc_timing *timing, - const struct dc_dongle_caps *dongle_caps) + const struct dpcd_caps *dpcd_caps) { unsigned int required_pix_clk = timing->pix_clk_khz; + const struct dc_dongle_caps *dongle_caps = &dpcd_caps->dongle_caps; + + switch (dpcd_caps->dongle_type) { + case DISPLAY_DONGLE_DP_VGA_CONVERTER: + case DISPLAY_DONGLE_DP_DVI_CONVERTER: + case DISPLAY_DONGLE_DP_DVI_DONGLE: + if (timing->pixel_encoding == PIXEL_ENCODING_RGB) + return true; + else + return false; + default: + break; + } if (dongle_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER || dongle_caps->extendedCapValid == false) @@ -1911,7 +1932,7 @@ enum dc_status dc_link_validate_mode_timing( const struct dc_crtc_timing *timing) { uint32_t max_pix_clk = stream->sink->dongle_max_pix_clk; - struct dc_dongle_caps *dongle_caps = &link->dpcd_caps.dongle_caps; + struct dpcd_caps *dpcd_caps = &link->dpcd_caps; /* A hack to avoid failing any modes for EDID override feature on * topology change such as lower quality cable for DP or different dongle @@ -1924,7 +1945,7 @@ enum dc_status dc_link_validate_mode_timing( return DC_EXCEED_DONGLE_CAP; /* Active Dongle*/ - if (!dp_active_dongle_validate_timing(timing, dongle_caps)) + if (!dp_active_dongle_validate_timing(timing, dpcd_caps)) return DC_EXCEED_DONGLE_CAP; switch (stream->signal) { @@ -1950,10 +1971,10 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level, struct dc *core_dc = link->ctx->dc; struct abm *abm = core_dc->res_pool->abm; struct dmcu *dmcu = core_dc->res_pool->dmcu; - struct dc_context *dc_ctx = link->ctx; unsigned int controller_id = 0; bool use_smooth_brightness = true; int i; + DC_LOGGER_INIT(link->ctx->logger); if ((dmcu == NULL) || (abm == NULL) || @@ -1961,7 +1982,7 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level, return false; if (stream) { - if (stream->bl_pwm_level == 0) + if (stream->bl_pwm_level == EDP_BACKLIGHT_RAMP_DISABLE_LEVEL) frame_ramp = 0; ((struct dc_stream_state *)stream)->bl_pwm_level = level; @@ -2149,8 +2170,8 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx) struct fixed31_32 avg_time_slots_per_mtp; struct fixed31_32 pbn; struct fixed31_32 pbn_per_slot; - struct dc_context *dc_ctx = link->ctx; uint8_t i; + DC_LOGGER_INIT(link->ctx->logger); /* enable_link_dp_mst already check link->enabled_stream_count * and stream is in link->stream[]. This is called during set mode, @@ -2178,11 +2199,11 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx) link->mst_stream_alloc_table.stream_count); for (i = 0; i < MAX_CONTROLLER_NUM; i++) { - DC_LOG_MST("stream_enc[%d]: 0x%x " + DC_LOG_MST("stream_enc[%d]: %p " "stream[%d].vcp_id: %d " "stream[%d].slot_count: %d\n", i, - link->mst_stream_alloc_table.stream_allocations[i].stream_enc, + (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, i, link->mst_stream_alloc_table.stream_allocations[i].vcp_id, i, @@ -2229,7 +2250,7 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) struct fixed31_32 avg_time_slots_per_mtp = dal_fixed31_32_from_int(0); uint8_t i; bool mst_mode = (link->type == dc_connection_mst_branch); - struct dc_context *dc_ctx = link->ctx; + DC_LOGGER_INIT(link->ctx->logger); /* deallocate_mst_payload is called before disable link. When mode or * disable/enable monitor, new stream is created which is not in link @@ -2268,11 +2289,11 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) link->mst_stream_alloc_table.stream_count); for (i = 0; i < MAX_CONTROLLER_NUM; i++) { - DC_LOG_MST("stream_enc[%d]: 0x%x " + DC_LOG_MST("stream_enc[%d]: %p " "stream[%d].vcp_id: %d " "stream[%d].slot_count: %d\n", i, - link->mst_stream_alloc_table.stream_allocations[i].stream_enc, + (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, i, link->mst_stream_alloc_table.stream_allocations[i].vcp_id, i, @@ -2302,8 +2323,8 @@ void core_link_enable_stream( struct pipe_ctx *pipe_ctx) { struct dc *core_dc = pipe_ctx->stream->ctx->dc; - struct dc_context *dc_ctx = pipe_ctx->stream->ctx; enum dc_status status; + DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); /* eDP lit up by bios already, no need to enable again. */ if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 3b5053570229..7d609c71394b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1378,8 +1378,8 @@ static uint32_t bandwidth_in_kbps_from_timing( { uint32_t bits_per_channel = 0; uint32_t kbps; - switch (timing->display_color_depth) { + switch (timing->display_color_depth) { case COLOR_DEPTH_666: bits_per_channel = 6; break; @@ -1401,14 +1401,20 @@ static uint32_t bandwidth_in_kbps_from_timing( default: break; } + ASSERT(bits_per_channel != 0); kbps = timing->pix_clk_khz; kbps *= bits_per_channel; - if (timing->flags.Y_ONLY != 1) + if (timing->flags.Y_ONLY != 1) { /*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/ kbps *= 3; + if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) + kbps /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + kbps = kbps * 2 / 3; + } return kbps; @@ -2278,6 +2284,8 @@ static bool retrieve_link_cap(struct dc_link *link) union edp_configuration_cap edp_config_cap; union dp_downstream_port_present ds_port = { 0 }; enum dc_status status = DC_ERROR_UNEXPECTED; + uint32_t read_dpcd_retry_cnt = 3; + int i; memset(dpcd_data, '\0', sizeof(dpcd_data)); memset(&down_strm_port_count, @@ -2285,11 +2293,15 @@ static bool retrieve_link_cap(struct dc_link *link) memset(&edp_config_cap, '\0', sizeof(union edp_configuration_cap)); - status = core_link_read_dpcd( - link, - DP_DPCD_REV, - dpcd_data, - sizeof(dpcd_data)); + for (i = 0; i < read_dpcd_retry_cnt; i++) { + status = core_link_read_dpcd( + link, + DP_DPCD_REV, + dpcd_data, + sizeof(dpcd_data)); + if (status == DC_OK) + break; + } if (status != DC_OK) { dm_error("%s: Read dpcd data failed.\n", __func__); @@ -2376,6 +2388,10 @@ bool detect_dp_sink_caps(struct dc_link *link) void detect_edp_sink_caps(struct dc_link *link) { retrieve_link_cap(link); + + if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN) + link->reported_link_cap.link_rate = LINK_RATE_HIGH2; + link->verified_link_cap = link->reported_link_cap; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index ba3487e97361..9eb731fb5251 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -45,8 +45,9 @@ #include "dcn10/dcn10_resource.h" #endif #include "dce120/dce120_resource.h" -#define DC_LOGGER \ - ctx->logger + +#define DC_LOGGER_INIT(logger) + enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) { enum dce_version dc_version = DCE_VERSION_UNKNOWN; @@ -78,6 +79,10 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev)) { dc_version = DCE_VERSION_11_2; } +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + if (ASIC_REV_IS_VEGAM(asic_id.hw_internal_rev)) + dc_version = DCE_VERSION_11_22; +#endif break; case FAMILY_AI: dc_version = DCE_VERSION_12_0; @@ -124,6 +129,9 @@ struct resource_pool *dc_create_resource_pool( num_virtual_links, dc, asic_id); break; case DCE_VERSION_11_2: +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + case DCE_VERSION_11_22: +#endif res_pool = dce112_create_resource_pool( num_virtual_links, dc); break; @@ -835,7 +843,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; struct view recout_skip = { 0 }; bool res = false; - struct dc_context *ctx = pipe_ctx->stream->ctx; + DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); /* Important: scaling ratio calculation requires pixel format, * lb depth calculation requires recout and taps require scaling ratios. * Inits require viewport, taps, ratios and recout of split pipe @@ -843,6 +851,9 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface( pipe_ctx->plane_state->format); + if (pipe_ctx->stream->timing.flags.INTERLACE) + pipe_ctx->stream->dst.height *= 2; + calculate_scaling_ratios(pipe_ctx); calculate_viewport(pipe_ctx); @@ -863,6 +874,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable + timing->v_border_top + timing->v_border_bottom; + if (pipe_ctx->stream->timing.flags.INTERLACE) + pipe_ctx->plane_res.scl_data.v_active *= 2; /* Taps calculations */ @@ -908,6 +921,9 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) plane_state->dst_rect.x, plane_state->dst_rect.y); + if (pipe_ctx->stream->timing.flags.INTERLACE) + pipe_ctx->stream->dst.height /= 2; + return res; } @@ -1294,6 +1310,19 @@ bool dc_add_all_planes_for_stream( } +static bool is_hdr_static_meta_changed(struct dc_stream_state *cur_stream, + struct dc_stream_state *new_stream) +{ + if (cur_stream == NULL) + return true; + + if (memcmp(&cur_stream->hdr_static_metadata, + &new_stream->hdr_static_metadata, + sizeof(struct dc_info_packet)) != 0) + return true; + + return false; +} static bool is_timing_changed(struct dc_stream_state *cur_stream, struct dc_stream_state *new_stream) @@ -1329,6 +1358,9 @@ static bool are_stream_backends_same( if (is_timing_changed(stream_a, stream_b)) return false; + if (is_hdr_static_meta_changed(stream_a, stream_b)) + return false; + return true; } @@ -1599,18 +1631,6 @@ enum dc_status dc_remove_stream_from_ctx( return DC_OK; } -static void copy_pipe_ctx( - const struct pipe_ctx *from_pipe_ctx, struct pipe_ctx *to_pipe_ctx) -{ - struct dc_plane_state *plane_state = to_pipe_ctx->plane_state; - struct dc_stream_state *stream = to_pipe_ctx->stream; - - *to_pipe_ctx = *from_pipe_ctx; - to_pipe_ctx->stream = stream; - if (plane_state != NULL) - to_pipe_ctx->plane_state = plane_state; -} - static struct dc_stream_state *find_pll_sharable_stream( struct dc_stream_state *stream_needs_pll, struct dc_state *context) @@ -1703,7 +1723,7 @@ enum dc_status resource_map_pool_resources( pipe_idx = acquire_first_split_pipe(&context->res_ctx, pool, stream); #endif - if (pipe_idx < 0) + if (pipe_idx < 0 || context->res_ctx.pipe_ctx[pipe_idx].stream_res.tg == NULL) return DC_NO_CONTROLLER_RESOURCE; pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx]; @@ -1752,26 +1772,6 @@ enum dc_status resource_map_pool_resources( return DC_ERROR_UNEXPECTED; } -/* first stream in the context is used to populate the rest */ -void validate_guaranteed_copy_streams( - struct dc_state *context, - int max_streams) -{ - int i; - - for (i = 1; i < max_streams; i++) { - context->streams[i] = context->streams[0]; - - copy_pipe_ctx(&context->res_ctx.pipe_ctx[0], - &context->res_ctx.pipe_ctx[i]); - context->res_ctx.pipe_ctx[i].stream = - context->res_ctx.pipe_ctx[0].stream; - - dc_stream_retain(context->streams[i]); - context->stream_count++; - } -} - void dc_resource_state_copy_construct_current( const struct dc *dc, struct dc_state *dst_ctx) @@ -1798,9 +1798,9 @@ enum dc_status dc_validate_global_state( return DC_ERROR_UNEXPECTED; if (dc->res_pool->funcs->validate_global) { - result = dc->res_pool->funcs->validate_global(dc, new_ctx); - if (result != DC_OK) - return result; + result = dc->res_pool->funcs->validate_global(dc, new_ctx); + if (result != DC_OK) + return result; } for (i = 0; i < new_ctx->stream_count; i++) { @@ -1843,7 +1843,7 @@ enum dc_status dc_validate_global_state( } static void patch_gamut_packet_checksum( - struct encoder_info_packet *gamut_packet) + struct dc_info_packet *gamut_packet) { /* For gamut we recalc checksum */ if (gamut_packet->valid) { @@ -1862,12 +1862,11 @@ static void patch_gamut_packet_checksum( } static void set_avi_info_frame( - struct encoder_info_packet *info_packet, + struct dc_info_packet *info_packet, struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; enum dc_color_space color_space = COLOR_SPACE_UNKNOWN; - struct info_frame info_frame = { {0} }; uint32_t pixel_encoding = 0; enum scanning_type scan_type = SCANNING_TYPE_NODATA; enum dc_aspect_ratio aspect = ASPECT_RATIO_NO_DATA; @@ -1877,22 +1876,24 @@ static void set_avi_info_frame( unsigned int cn0_cn1_value = 0; uint8_t *check_sum = NULL; uint8_t byte_index = 0; - union hdmi_info_packet *hdmi_info = &info_frame.avi_info_packet.info_packet_hdmi; + union hdmi_info_packet hdmi_info; union display_content_support support = {0}; unsigned int vic = pipe_ctx->stream->timing.vic; enum dc_timing_3d_format format; + memset(&hdmi_info, 0, sizeof(union hdmi_info_packet)); + color_space = pipe_ctx->stream->output_color_space; if (color_space == COLOR_SPACE_UNKNOWN) color_space = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? COLOR_SPACE_SRGB:COLOR_SPACE_YCBCR709; /* Initialize header */ - hdmi_info->bits.header.info_frame_type = HDMI_INFOFRAME_TYPE_AVI; + hdmi_info.bits.header.info_frame_type = HDMI_INFOFRAME_TYPE_AVI; /* InfoFrameVersion_3 is defined by CEA861F (Section 6.4), but shall * not be used in HDMI 2.0 (Section 10.1) */ - hdmi_info->bits.header.version = 2; - hdmi_info->bits.header.length = HDMI_AVI_INFOFRAME_SIZE; + hdmi_info.bits.header.version = 2; + hdmi_info.bits.header.length = HDMI_AVI_INFOFRAME_SIZE; /* * IDO-defined (Y2,Y1,Y0 = 1,1,1) shall not be used by devices built @@ -1918,39 +1919,39 @@ static void set_avi_info_frame( /* Y0_Y1_Y2 : The pixel encoding */ /* H14b AVI InfoFrame has extension on Y-field from 2 bits to 3 bits */ - hdmi_info->bits.Y0_Y1_Y2 = pixel_encoding; + hdmi_info.bits.Y0_Y1_Y2 = pixel_encoding; /* A0 = 1 Active Format Information valid */ - hdmi_info->bits.A0 = ACTIVE_FORMAT_VALID; + hdmi_info.bits.A0 = ACTIVE_FORMAT_VALID; /* B0, B1 = 3; Bar info data is valid */ - hdmi_info->bits.B0_B1 = BAR_INFO_BOTH_VALID; + hdmi_info.bits.B0_B1 = BAR_INFO_BOTH_VALID; - hdmi_info->bits.SC0_SC1 = PICTURE_SCALING_UNIFORM; + hdmi_info.bits.SC0_SC1 = PICTURE_SCALING_UNIFORM; /* S0, S1 : Underscan / Overscan */ /* TODO: un-hardcode scan type */ scan_type = SCANNING_TYPE_UNDERSCAN; - hdmi_info->bits.S0_S1 = scan_type; + hdmi_info.bits.S0_S1 = scan_type; /* C0, C1 : Colorimetry */ if (color_space == COLOR_SPACE_YCBCR709 || color_space == COLOR_SPACE_YCBCR709_LIMITED) - hdmi_info->bits.C0_C1 = COLORIMETRY_ITU709; + hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709; else if (color_space == COLOR_SPACE_YCBCR601 || color_space == COLOR_SPACE_YCBCR601_LIMITED) - hdmi_info->bits.C0_C1 = COLORIMETRY_ITU601; + hdmi_info.bits.C0_C1 = COLORIMETRY_ITU601; else { - hdmi_info->bits.C0_C1 = COLORIMETRY_NO_DATA; + hdmi_info.bits.C0_C1 = COLORIMETRY_NO_DATA; } if (color_space == COLOR_SPACE_2020_RGB_FULLRANGE || color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE || color_space == COLOR_SPACE_2020_YCBCR) { - hdmi_info->bits.EC0_EC2 = COLORIMETRYEX_BT2020RGBYCBCR; - hdmi_info->bits.C0_C1 = COLORIMETRY_EXTENDED; + hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_BT2020RGBYCBCR; + hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED; } else if (color_space == COLOR_SPACE_ADOBERGB) { - hdmi_info->bits.EC0_EC2 = COLORIMETRYEX_ADOBERGB; - hdmi_info->bits.C0_C1 = COLORIMETRY_EXTENDED; + hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_ADOBERGB; + hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED; } /* TODO: un-hardcode aspect ratio */ @@ -1959,18 +1960,18 @@ static void set_avi_info_frame( switch (aspect) { case ASPECT_RATIO_4_3: case ASPECT_RATIO_16_9: - hdmi_info->bits.M0_M1 = aspect; + hdmi_info.bits.M0_M1 = aspect; break; case ASPECT_RATIO_NO_DATA: case ASPECT_RATIO_64_27: case ASPECT_RATIO_256_135: default: - hdmi_info->bits.M0_M1 = 0; + hdmi_info.bits.M0_M1 = 0; } /* Active Format Aspect ratio - same as Picture Aspect Ratio. */ - hdmi_info->bits.R0_R3 = ACTIVE_FORMAT_ASPECT_RATIO_SAME_AS_PICTURE; + hdmi_info.bits.R0_R3 = ACTIVE_FORMAT_ASPECT_RATIO_SAME_AS_PICTURE; /* TODO: un-hardcode cn0_cn1 and itc */ @@ -2013,8 +2014,8 @@ static void set_avi_info_frame( } } } - hdmi_info->bits.CN0_CN1 = cn0_cn1_value; - hdmi_info->bits.ITC = itc_value; + hdmi_info.bits.CN0_CN1 = cn0_cn1_value; + hdmi_info.bits.ITC = itc_value; } /* TODO : We should handle YCC quantization */ @@ -2023,19 +2024,19 @@ static void set_avi_info_frame( stream->sink->edid_caps.qy_bit == 1) { if (color_space == COLOR_SPACE_SRGB || color_space == COLOR_SPACE_2020_RGB_FULLRANGE) { - hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_FULL_RANGE; - hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_FULL_RANGE; + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_FULL_RANGE; + hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_FULL_RANGE; } else if (color_space == COLOR_SPACE_SRGB_LIMITED || color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE) { - hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_LIMITED_RANGE; - hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_LIMITED_RANGE; + hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; } else { - hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; - hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; + hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; } } else { - hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; - hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; + hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; } ///VIC @@ -2060,51 +2061,49 @@ static void set_avi_info_frame( break; } } - hdmi_info->bits.VIC0_VIC7 = vic; + hdmi_info.bits.VIC0_VIC7 = vic; /* pixel repetition * PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel * repetition start from 1 */ - hdmi_info->bits.PR0_PR3 = 0; + hdmi_info.bits.PR0_PR3 = 0; /* Bar Info * barTop: Line Number of End of Top Bar. * barBottom: Line Number of Start of Bottom Bar. * barLeft: Pixel Number of End of Left Bar. * barRight: Pixel Number of Start of Right Bar. */ - hdmi_info->bits.bar_top = stream->timing.v_border_top; - hdmi_info->bits.bar_bottom = (stream->timing.v_total + hdmi_info.bits.bar_top = stream->timing.v_border_top; + hdmi_info.bits.bar_bottom = (stream->timing.v_total - stream->timing.v_border_bottom + 1); - hdmi_info->bits.bar_left = stream->timing.h_border_left; - hdmi_info->bits.bar_right = (stream->timing.h_total + hdmi_info.bits.bar_left = stream->timing.h_border_left; + hdmi_info.bits.bar_right = (stream->timing.h_total - stream->timing.h_border_right + 1); /* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */ - check_sum = &info_frame.avi_info_packet.info_packet_hdmi.packet_raw_data.sb[0]; + check_sum = &hdmi_info.packet_raw_data.sb[0]; *check_sum = HDMI_INFOFRAME_TYPE_AVI + HDMI_AVI_INFOFRAME_SIZE + 2; for (byte_index = 1; byte_index <= HDMI_AVI_INFOFRAME_SIZE; byte_index++) - *check_sum += hdmi_info->packet_raw_data.sb[byte_index]; + *check_sum += hdmi_info.packet_raw_data.sb[byte_index]; /* one byte complement */ *check_sum = (uint8_t) (0x100 - *check_sum); /* Store in hw_path_mode */ - info_packet->hb0 = hdmi_info->packet_raw_data.hb0; - info_packet->hb1 = hdmi_info->packet_raw_data.hb1; - info_packet->hb2 = hdmi_info->packet_raw_data.hb2; + info_packet->hb0 = hdmi_info.packet_raw_data.hb0; + info_packet->hb1 = hdmi_info.packet_raw_data.hb1; + info_packet->hb2 = hdmi_info.packet_raw_data.hb2; - for (byte_index = 0; byte_index < sizeof(info_frame.avi_info_packet. - info_packet_hdmi.packet_raw_data.sb); byte_index++) - info_packet->sb[byte_index] = info_frame.avi_info_packet. - info_packet_hdmi.packet_raw_data.sb[byte_index]; + for (byte_index = 0; byte_index < sizeof(hdmi_info.packet_raw_data.sb); byte_index++) + info_packet->sb[byte_index] = hdmi_info.packet_raw_data.sb[byte_index]; info_packet->valid = true; } static void set_vendor_info_packet( - struct encoder_info_packet *info_packet, + struct dc_info_packet *info_packet, struct dc_stream_state *stream) { uint32_t length = 0; @@ -2217,7 +2216,7 @@ static void set_vendor_info_packet( } static void set_spd_info_packet( - struct encoder_info_packet *info_packet, + struct dc_info_packet *info_packet, struct dc_stream_state *stream) { /* SPD info packet for FreeSync */ @@ -2338,104 +2337,19 @@ static void set_spd_info_packet( } static void set_hdr_static_info_packet( - struct encoder_info_packet *info_packet, + struct dc_info_packet *info_packet, struct dc_stream_state *stream) { - uint16_t i = 0; - enum signal_type signal = stream->signal; - uint32_t data; + /* HDR Static Metadata info packet for HDR10 */ - if (!stream->hdr_static_metadata.hdr_supported) + if (!stream->hdr_static_metadata.valid) return; - if (dc_is_hdmi_signal(signal)) { - info_packet->valid = true; - - info_packet->hb0 = 0x87; - info_packet->hb1 = 0x01; - info_packet->hb2 = 0x1A; - i = 1; - } else if (dc_is_dp_signal(signal)) { - info_packet->valid = true; - - info_packet->hb0 = 0x00; - info_packet->hb1 = 0x87; - info_packet->hb2 = 0x1D; - info_packet->hb3 = (0x13 << 2); - i = 2; - } - - data = stream->hdr_static_metadata.is_hdr; - info_packet->sb[i++] = data ? 0x02 : 0x00; - info_packet->sb[i++] = 0x00; - - data = stream->hdr_static_metadata.chromaticity_green_x / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_green_y / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_blue_x / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_blue_y / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_red_x / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_red_y / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_white_point_x / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_white_point_y / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.max_luminance; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.min_luminance; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.maximum_content_light_level; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.maximum_frame_average_light_level; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - if (dc_is_hdmi_signal(signal)) { - uint32_t checksum = 0; - - checksum += info_packet->hb0; - checksum += info_packet->hb1; - checksum += info_packet->hb2; - - for (i = 1; i <= info_packet->hb2; i++) - checksum += info_packet->sb[i]; - - info_packet->sb[0] = 0x100 - checksum; - } else if (dc_is_dp_signal(signal)) { - info_packet->sb[0] = 0x01; - info_packet->sb[1] = 0x1A; - } + *info_packet = stream->hdr_static_metadata; } static void set_vsc_info_packet( - struct encoder_info_packet *info_packet, + struct dc_info_packet *info_packet, struct dc_stream_state *stream) { unsigned int vscPacketRevision = 0; @@ -2650,6 +2564,8 @@ bool pipe_need_reprogram( if (is_timing_changed(pipe_ctx_old->stream, pipe_ctx->stream)) return true; + if (is_hdr_static_meta_changed(pipe_ctx_old->stream, pipe_ctx->stream)) + return true; return false; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index ce0747ed0f00..3732a1de9d6c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -101,14 +101,16 @@ static void construct(struct dc_stream_state *stream, stream->status.link = stream->sink->link; update_stream_signal(stream); + + stream->out_transfer_func = dc_create_transfer_func(); + stream->out_transfer_func->type = TF_TYPE_BYPASS; } static void destruct(struct dc_stream_state *stream) { dc_sink_release(stream->sink); if (stream->out_transfer_func != NULL) { - dc_transfer_func_release( - stream->out_transfer_func); + dc_transfer_func_release(stream->out_transfer_func); stream->out_transfer_func = NULL; } } @@ -176,6 +178,7 @@ bool dc_stream_set_cursor_attributes( int i; struct dc *core_dc; struct resource_context *res_ctx; + struct pipe_ctx *pipe_to_program = NULL; if (NULL == stream) { dm_error("DC: dc_stream is NULL!\n"); @@ -203,9 +206,17 @@ bool dc_stream_set_cursor_attributes( if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state) continue; + if (!pipe_to_program) { + pipe_to_program = pipe_ctx; + core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true); + } core_dc->hwss.set_cursor_attribute(pipe_ctx); } + + if (pipe_to_program) + core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, false); + return true; } @@ -216,6 +227,7 @@ bool dc_stream_set_cursor_position( int i; struct dc *core_dc; struct resource_context *res_ctx; + struct pipe_ctx *pipe_to_program = NULL; if (NULL == stream) { dm_error("DC: dc_stream is NULL!\n"); @@ -241,9 +253,17 @@ bool dc_stream_set_cursor_position( !pipe_ctx->plane_res.ipp) continue; + if (!pipe_to_program) { + pipe_to_program = pipe_ctx; + core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true); + } + core_dc->hwss.set_cursor_position(pipe_ctx); } + if (pipe_to_program) + core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, false); + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index 132eef3826e2..68a71adeb12e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -38,6 +38,12 @@ static void construct(struct dc_context *ctx, struct dc_plane_state *plane_state) { plane_state->ctx = ctx; + + plane_state->gamma_correction = dc_create_gamma(); + plane_state->gamma_correction->is_identity = true; + + plane_state->in_transfer_func = dc_create_transfer_func(); + plane_state->in_transfer_func->type = TF_TYPE_BYPASS; } static void destruct(struct dc_plane_state *plane_state) @@ -175,7 +181,7 @@ void dc_transfer_func_release(struct dc_transfer_func *tf) kref_put(&tf->refcount, dc_transfer_func_free); } -struct dc_transfer_func *dc_create_transfer_func(void) +struct dc_transfer_func *dc_create_transfer_func() { struct dc_transfer_func *tf = kvzalloc(sizeof(*tf), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index fa4b3c8b3bb7..cd4f4341cb53 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -38,7 +38,7 @@ #include "inc/compressor.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.1.38" +#define DC_VER "3.1.44" #define MAX_SURFACES 3 #define MAX_STREAMS 6 @@ -202,6 +202,7 @@ struct dc_debug { bool timing_trace; bool clock_trace; bool validation_trace; + bool bandwidth_calcs_trace; /* stutter efficiency related */ bool disable_stutter; @@ -332,20 +333,6 @@ enum { TRANSFER_FUNC_POINTS = 1025 }; -// Moved here from color module for linux -enum color_transfer_func { - transfer_func_unknown, - transfer_func_srgb, - transfer_func_bt709, - transfer_func_pq2084, - transfer_func_pq2084_interim, - transfer_func_linear_0_1, - transfer_func_linear_0_125, - transfer_func_dolbyvision, - transfer_func_gamma_22, - transfer_func_gamma_26 -}; - struct dc_hdr_static_metadata { /* display chromaticities and white point in units of 0.00001 */ unsigned int chromaticity_green_x; @@ -361,9 +348,6 @@ struct dc_hdr_static_metadata { uint32_t max_luminance; uint32_t maximum_content_light_level; uint32_t maximum_frame_average_light_level; - - bool hdr_supported; - bool is_hdr; }; enum dc_transfer_func_type { @@ -419,7 +403,6 @@ union surface_update_flags { /* Medium updates */ uint32_t dcc_change:1; uint32_t color_space_change:1; - uint32_t input_tf_change:1; uint32_t horizontal_mirror_change:1; uint32_t per_pixel_alpha_change:1; uint32_t rotation_change:1; @@ -428,6 +411,7 @@ union surface_update_flags { uint32_t position_change:1; uint32_t in_transfer_func_change:1; uint32_t input_csc_change:1; + uint32_t coeff_reduction_change:1; uint32_t output_tf_change:1; uint32_t pixel_format_change:1; @@ -460,7 +444,7 @@ struct dc_plane_state { struct dc_gamma *gamma_correction; struct dc_transfer_func *in_transfer_func; struct dc_bias_and_scale *bias_and_scale; - struct csc_transform input_csc_color_matrix; + struct dc_csc_transform input_csc_color_matrix; struct fixed31_32 coeff_reduction_factor; uint32_t sdr_white_level; @@ -468,7 +452,6 @@ struct dc_plane_state { struct dc_hdr_static_metadata hdr_static_ctx; enum dc_color_space color_space; - enum color_transfer_func input_tf; enum surface_pixel_format format; enum dc_rotation_angle rotation; @@ -498,7 +481,6 @@ struct dc_plane_info { enum dc_rotation_angle rotation; enum plane_stereo_format stereo_format; enum dc_color_space color_space; - enum color_transfer_func input_tf; unsigned int sdr_white_level; bool horizontal_mirror; bool visible; @@ -525,10 +507,9 @@ struct dc_surface_update { * null means no updates */ struct dc_gamma *gamma; - enum color_transfer_func color_input_tf; struct dc_transfer_func *in_transfer_func; - struct csc_transform *input_csc_color_matrix; + struct dc_csc_transform *input_csc_color_matrix; struct fixed31_32 *coeff_reduction_factor; }; @@ -699,6 +680,7 @@ struct dc_cursor { struct dc_cursor_attributes attributes; }; + /******************************************************************************* * Interrupt interfaces ******************************************************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 48e1fcf53d43..bd0fda0ceb91 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -117,6 +117,65 @@ uint32_t generic_reg_get5(const struct dc_context *ctx, uint32_t addr, return reg_val; } +uint32_t generic_reg_get6(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6) +{ + uint32_t reg_val = dm_read_reg(ctx, addr); + *field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1); + *field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2); + *field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3); + *field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4); + *field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5); + *field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6); + return reg_val; +} + +uint32_t generic_reg_get7(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6, + uint8_t shift7, uint32_t mask7, uint32_t *field_value7) +{ + uint32_t reg_val = dm_read_reg(ctx, addr); + *field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1); + *field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2); + *field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3); + *field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4); + *field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5); + *field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6); + *field_value7 = get_reg_field_value_ex(reg_val, mask7, shift7); + return reg_val; +} + +uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6, + uint8_t shift7, uint32_t mask7, uint32_t *field_value7, + uint8_t shift8, uint32_t mask8, uint32_t *field_value8) +{ + uint32_t reg_val = dm_read_reg(ctx, addr); + *field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1); + *field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2); + *field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3); + *field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4); + *field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5); + *field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6); + *field_value7 = get_reg_field_value_ex(reg_val, mask7, shift7); + *field_value8 = get_reg_field_value_ex(reg_val, mask8, shift8); + return reg_val; +} /* note: va version of this is pretty bad idea, since there is a output parameter pass by pointer * compiler won't be able to check for size match and is prone to stack corruption type of bugs diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index b83a7dc2f5a9..b1f70579d61b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -423,6 +423,11 @@ enum dc_gamma_type { GAMMA_CS_TFM_1D = 3, }; +struct dc_csc_transform { + uint16_t matrix[12]; + bool enable_adjustment; +}; + struct dc_gamma { struct kref refcount; enum dc_gamma_type type; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index dc34515ef01f..8a716baa1203 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -51,6 +51,14 @@ struct link_mst_stream_allocation_table { struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM]; }; +struct time_stamp { + uint64_t edp_poweroff; + uint64_t edp_poweron; +}; + +struct link_trace { + struct time_stamp time_stamp; +}; /* * A link contains one or more sinks and their connected status. * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported. @@ -114,6 +122,7 @@ struct dc_link { struct dc_link_status link_status; + struct link_trace link_trace; }; const struct dc_link_status *dc_link_get_status(const struct dc_link *dc_link); diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index d017df56b2ba..d7e6d53bb383 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -58,18 +58,20 @@ struct dc_stream_state { struct freesync_context freesync_ctx; - struct dc_hdr_static_metadata hdr_static_metadata; + struct dc_info_packet hdr_static_metadata; struct dc_transfer_func *out_transfer_func; struct colorspace_transform gamut_remap_matrix; - struct csc_transform csc_color_matrix; + struct dc_csc_transform csc_color_matrix; enum dc_color_space output_color_space; enum dc_dither_option dither_option; enum view_3d_format view_format; - enum color_transfer_func output_tf; bool ignore_msa_timing_param; + + unsigned long long periodic_fn_vsync_delta; + /* TODO: custom INFO packets */ /* TODO: ABM info (DMCU) */ /* PSR info */ @@ -110,9 +112,10 @@ struct dc_stream_update { struct rect src; struct rect dst; struct dc_transfer_func *out_transfer_func; - struct dc_hdr_static_metadata *hdr_static_metadata; - enum color_transfer_func color_output_tf; + struct dc_info_packet *hdr_static_metadata; unsigned int *abm_level; + + unsigned long long *periodic_fn_vsync_delta; }; bool dc_is_stream_unchanged( @@ -131,13 +134,6 @@ bool dc_is_stream_scaling_unchanged( * This does not trigger a flip. No surface address is programmed. */ -bool dc_commit_planes_to_stream( - struct dc *dc, - struct dc_plane_state **plane_states, - uint8_t new_plane_count, - struct dc_stream_state *dc_stream, - struct dc_state *state); - void dc_commit_updates_for_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, @@ -209,14 +205,6 @@ bool dc_add_all_planes_for_stream( enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream); /* - * This function takes a stream and checks if it is guaranteed to be supported. - * Guaranteed means that MAX_COFUNC similar streams are supported. - * - * After this call: - * No hardware is programmed for call. Only validation is done. - */ - -/* * Set up streams and links associated to drive sinks * The streams parameter is an absolute set of all active streams. * diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 9441305d3ab5..9defe3b17617 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -370,12 +370,6 @@ struct dc_csc_adjustments { struct fixed31_32 hue; }; -enum { - MAX_LANES = 2, - MAX_COFUNC_PATH = 6, - LAYER_INDEX_PRIMARY = -1, -}; - enum dpcd_downstream_port_max_bpc { DOWN_STREAM_MAX_8BPC = 0, DOWN_STREAM_MAX_10BPC, @@ -530,6 +524,15 @@ struct vrr_params { uint32_t frame_counter; }; +struct dc_info_packet { + bool valid; + uint8_t hb0; + uint8_t hb1; + uint8_t hb2; + uint8_t hb3; + uint8_t sb[32]; +}; + #define DC_PLANE_UPDATE_TIMES_MAX 10 struct dc_plane_flip_time { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index 6d5cdcdc8ec9..7f6d724686f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -33,8 +33,9 @@ #define CTX \ aud->base.ctx -#define DC_LOGGER \ - aud->base.ctx->logger + +#define DC_LOGGER_INIT() + #define REG(reg)\ (aud->regs->reg) @@ -348,8 +349,8 @@ static void set_audio_latency( void dce_aud_az_enable(struct audio *audio) { - struct dce_audio *aud = DCE_AUD(audio); uint32_t value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); + DC_LOGGER_INIT(); set_reg_field_value(value, 1, AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, @@ -371,7 +372,7 @@ void dce_aud_az_enable(struct audio *audio) void dce_aud_az_disable(struct audio *audio) { uint32_t value; - struct dce_audio *aud = DCE_AUD(audio); + DC_LOGGER_INIT(); value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); set_reg_field_value(value, 1, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 0aa2cda60890..223db98a568a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -41,8 +41,9 @@ #define CTX \ clk_src->base.ctx -#define DC_LOGGER \ - calc_pll_cs->ctx->logger + +#define DC_LOGGER_INIT() + #undef FN #define FN(reg_name, field_name) \ clk_src->cs_shift->field_name, clk_src->cs_mask->field_name @@ -467,7 +468,7 @@ static uint32_t dce110_get_pix_clk_dividers_helper ( { uint32_t field = 0; uint32_t pll_calc_error = MAX_PLL_CALC_ERROR; - struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll; + DC_LOGGER_INIT(); /* Check if reference clock is external (not pcie/xtalin) * HW Dce80 spec: * 00 - PCIE_REFCLK, 01 - XTALIN, 02 - GENERICA, 03 - GENERICB @@ -557,8 +558,8 @@ static uint32_t dce110_get_pix_clk_dividers( struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(cs); - struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll; uint32_t pll_calc_error = MAX_PLL_CALC_ERROR; + DC_LOGGER_INIT(); if (pix_clk_params == NULL || pll_settings == NULL || pix_clk_params->requested_pix_clk == 0) { @@ -589,6 +590,9 @@ static uint32_t dce110_get_pix_clk_dividers( pll_settings, pix_clk_params); break; case DCE_VERSION_11_2: +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + case DCE_VERSION_11_22: +#endif case DCE_VERSION_12_0: #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case DCN_VERSION_1_0: @@ -978,6 +982,9 @@ static bool dce110_program_pix_clk( break; case DCE_VERSION_11_2: +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + case DCE_VERSION_11_22: +#endif case DCE_VERSION_12_0: #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case DCN_VERSION_1_0: @@ -1054,7 +1061,7 @@ static void get_ss_info_from_atombios( struct spread_spectrum_info *ss_info_cur; struct spread_spectrum_data *ss_data_cur; uint32_t i; - struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll; + DC_LOGGER_INIT(); if (ss_entries_num == NULL) { DC_LOG_SYNC( "Invalid entry !!!\n"); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c index 487724345d9d..0275d6d60da4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c @@ -53,7 +53,8 @@ void dce_pipe_control_lock(struct dc *dc, struct dce_hwseq *hws = dc->hwseq; /* Not lock pipe when blank */ - if (lock && pipe->stream_res.tg->funcs->is_blanked(pipe->stream_res.tg)) + if (lock && pipe->stream_res.tg->funcs->is_blanked && + pipe->stream_res.tg->funcs->is_blanked(pipe->stream_res.tg)) return; val = REG_GET_4(BLND_V_UPDATE_LOCK[pipe->stream_res.tg->inst], diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 8167cad7bcf7..dbe3b26b6d9e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -113,6 +113,7 @@ static const struct link_encoder_funcs dce110_lnk_enc_funcs = { .connect_dig_be_to_fe = dce110_link_encoder_connect_dig_be_to_fe, .enable_hpd = dce110_link_encoder_enable_hpd, .disable_hpd = dce110_link_encoder_disable_hpd, + .is_dig_enabled = dce110_is_dig_enabled, .destroy = dce110_link_encoder_destroy }; @@ -535,8 +536,9 @@ void dce110_psr_program_secondary_packet(struct link_encoder *enc, DP_SEC_GSP0_PRIORITY, 1); } -static bool is_dig_enabled(const struct dce110_link_encoder *enc110) +bool dce110_is_dig_enabled(struct link_encoder *enc) { + struct dce110_link_encoder *enc110 = TO_DCE110_LINK_ENC(enc); uint32_t value; REG_GET(DIG_BE_EN_CNTL, DIG_ENABLE, &value); @@ -1031,7 +1033,7 @@ void dce110_link_encoder_disable_output( struct bp_transmitter_control cntl = { 0 }; enum bp_result result; - if (!is_dig_enabled(enc110)) { + if (!dce110_is_dig_enabled(enc)) { /* OF_SKIP_POWER_DOWN_INACTIVE_ENCODER */ return; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h index 0ec3433d34b6..347069461a22 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h @@ -263,4 +263,6 @@ void dce110_psr_program_dp_dphy_fast_training(struct link_encoder *enc, void dce110_psr_program_secondary_packet(struct link_encoder *enc, unsigned int sdp_transmit_line_num_deadline); +bool dce110_is_dig_enabled(struct link_encoder *enc); + #endif /* __DC_LINK_ENCODER__DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c index 0790f25c7b3b..b235a75355b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c @@ -174,6 +174,25 @@ static void program_urgency_watermark( URGENCY_HIGH_WATERMARK, urgency_high_wm); } +static void dce120_program_urgency_watermark( + struct dce_mem_input *dce_mi, + uint32_t wm_select, + uint32_t urgency_low_wm, + uint32_t urgency_high_wm) +{ + REG_UPDATE(DPG_WATERMARK_MASK_CONTROL, + URGENCY_WATERMARK_MASK, wm_select); + + REG_SET_2(DPG_PIPE_URGENCY_CONTROL, 0, + URGENCY_LOW_WATERMARK, urgency_low_wm, + URGENCY_HIGH_WATERMARK, urgency_high_wm); + + REG_SET_2(DPG_PIPE_URGENT_LEVEL_CONTROL, 0, + URGENT_LEVEL_LOW_WATERMARK, urgency_low_wm, + URGENT_LEVEL_HIGH_WATERMARK, urgency_high_wm); + +} + static void program_nbp_watermark( struct dce_mem_input *dce_mi, uint32_t wm_select, @@ -206,6 +225,25 @@ static void program_nbp_watermark( } } +static void dce120_program_stutter_watermark( + struct dce_mem_input *dce_mi, + uint32_t wm_select, + uint32_t stutter_mark, + uint32_t stutter_entry) +{ + REG_UPDATE(DPG_WATERMARK_MASK_CONTROL, + STUTTER_EXIT_SELF_REFRESH_WATERMARK_MASK, wm_select); + + if (REG(DPG_PIPE_STUTTER_CONTROL2)) + REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL2, + STUTTER_EXIT_SELF_REFRESH_WATERMARK, stutter_mark, + STUTTER_ENTER_SELF_REFRESH_WATERMARK, stutter_entry); + else + REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL, + STUTTER_EXIT_SELF_REFRESH_WATERMARK, stutter_mark, + STUTTER_ENTER_SELF_REFRESH_WATERMARK, stutter_entry); +} + static void program_stutter_watermark( struct dce_mem_input *dce_mi, uint32_t wm_select, @@ -225,7 +263,8 @@ static void program_stutter_watermark( static void dce_mi_program_display_marks( struct mem_input *mi, struct dce_watermarks nbp, - struct dce_watermarks stutter, + struct dce_watermarks stutter_exit, + struct dce_watermarks stutter_enter, struct dce_watermarks urgent, uint32_t total_dest_line_time_ns) { @@ -243,13 +282,14 @@ static void dce_mi_program_display_marks( program_nbp_watermark(dce_mi, 2, nbp.a_mark); /* set a */ program_nbp_watermark(dce_mi, 1, nbp.d_mark); /* set d */ - program_stutter_watermark(dce_mi, 2, stutter.a_mark); /* set a */ - program_stutter_watermark(dce_mi, 1, stutter.d_mark); /* set d */ + program_stutter_watermark(dce_mi, 2, stutter_exit.a_mark); /* set a */ + program_stutter_watermark(dce_mi, 1, stutter_exit.d_mark); /* set d */ } -static void dce120_mi_program_display_marks(struct mem_input *mi, +static void dce112_mi_program_display_marks(struct mem_input *mi, struct dce_watermarks nbp, - struct dce_watermarks stutter, + struct dce_watermarks stutter_exit, + struct dce_watermarks stutter_entry, struct dce_watermarks urgent, uint32_t total_dest_line_time_ns) { @@ -273,10 +313,43 @@ static void dce120_mi_program_display_marks(struct mem_input *mi, program_nbp_watermark(dce_mi, 2, nbp.c_mark); /* set c */ program_nbp_watermark(dce_mi, 3, nbp.d_mark); /* set d */ - program_stutter_watermark(dce_mi, 0, stutter.a_mark); /* set a */ - program_stutter_watermark(dce_mi, 1, stutter.b_mark); /* set b */ - program_stutter_watermark(dce_mi, 2, stutter.c_mark); /* set c */ - program_stutter_watermark(dce_mi, 3, stutter.d_mark); /* set d */ + program_stutter_watermark(dce_mi, 0, stutter_exit.a_mark); /* set a */ + program_stutter_watermark(dce_mi, 1, stutter_exit.b_mark); /* set b */ + program_stutter_watermark(dce_mi, 2, stutter_exit.c_mark); /* set c */ + program_stutter_watermark(dce_mi, 3, stutter_exit.d_mark); /* set d */ +} + +static void dce120_mi_program_display_marks(struct mem_input *mi, + struct dce_watermarks nbp, + struct dce_watermarks stutter_exit, + struct dce_watermarks stutter_entry, + struct dce_watermarks urgent, + uint32_t total_dest_line_time_ns) +{ + struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi); + uint32_t stutter_en = mi->ctx->dc->debug.disable_stutter ? 0 : 1; + + dce120_program_urgency_watermark(dce_mi, 0, /* set a */ + urgent.a_mark, total_dest_line_time_ns); + dce120_program_urgency_watermark(dce_mi, 1, /* set b */ + urgent.b_mark, total_dest_line_time_ns); + dce120_program_urgency_watermark(dce_mi, 2, /* set c */ + urgent.c_mark, total_dest_line_time_ns); + dce120_program_urgency_watermark(dce_mi, 3, /* set d */ + urgent.d_mark, total_dest_line_time_ns); + + REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL, + STUTTER_ENABLE, stutter_en, + STUTTER_IGNORE_FBC, 1); + program_nbp_watermark(dce_mi, 0, nbp.a_mark); /* set a */ + program_nbp_watermark(dce_mi, 1, nbp.b_mark); /* set b */ + program_nbp_watermark(dce_mi, 2, nbp.c_mark); /* set c */ + program_nbp_watermark(dce_mi, 3, nbp.d_mark); /* set d */ + + dce120_program_stutter_watermark(dce_mi, 0, stutter_exit.a_mark, stutter_entry.a_mark); /* set a */ + dce120_program_stutter_watermark(dce_mi, 1, stutter_exit.b_mark, stutter_entry.b_mark); /* set b */ + dce120_program_stutter_watermark(dce_mi, 2, stutter_exit.c_mark, stutter_entry.c_mark); /* set c */ + dce120_program_stutter_watermark(dce_mi, 3, stutter_exit.d_mark, stutter_entry.d_mark); /* set d */ } static void program_tiling( @@ -696,5 +769,17 @@ void dce112_mem_input_construct( const struct dce_mem_input_mask *mi_mask) { dce_mem_input_construct(dce_mi, ctx, inst, regs, mi_shift, mi_mask); + dce_mi->base.funcs->mem_input_program_display_marks = dce112_mi_program_display_marks; +} + +void dce120_mem_input_construct( + struct dce_mem_input *dce_mi, + struct dc_context *ctx, + int inst, + const struct dce_mem_input_registers *regs, + const struct dce_mem_input_shift *mi_shift, + const struct dce_mem_input_mask *mi_mask) +{ + dce_mem_input_construct(dce_mi, ctx, inst, regs, mi_shift, mi_mask); dce_mi->base.funcs->mem_input_program_display_marks = dce120_mi_program_display_marks; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h index 05d39c0cbe87..d15b0d7f47fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h @@ -106,6 +106,7 @@ struct dce_mem_input_registers { uint32_t DPG_PIPE_ARBITRATION_CONTROL1; uint32_t DPG_WATERMARK_MASK_CONTROL; uint32_t DPG_PIPE_URGENCY_CONTROL; + uint32_t DPG_PIPE_URGENT_LEVEL_CONTROL; uint32_t DPG_PIPE_NB_PSTATE_CHANGE_CONTROL; uint32_t DPG_PIPE_LOW_POWER_CONTROL; uint32_t DPG_PIPE_STUTTER_CONTROL; @@ -213,6 +214,11 @@ struct dce_mem_input_registers { #define MI_DCE12_DMIF_PG_MASK_SH_LIST(mask_sh, blk)\ SFB(blk, DPG_PIPE_STUTTER_CONTROL2, STUTTER_EXIT_SELF_REFRESH_WATERMARK, mask_sh),\ + SFB(blk, DPG_PIPE_STUTTER_CONTROL2, STUTTER_ENTER_SELF_REFRESH_WATERMARK, mask_sh),\ + SFB(blk, DPG_PIPE_URGENT_LEVEL_CONTROL, URGENT_LEVEL_LOW_WATERMARK, mask_sh),\ + SFB(blk, DPG_PIPE_URGENT_LEVEL_CONTROL, URGENT_LEVEL_HIGH_WATERMARK, mask_sh),\ + SFB(blk, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, mask_sh),\ + SFB(blk, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, mask_sh),\ SFB(blk, DPG_WATERMARK_MASK_CONTROL, PSTATE_CHANGE_WATERMARK_MASK, mask_sh),\ SFB(blk, DPG_PIPE_LOW_POWER_CONTROL, PSTATE_CHANGE_ENABLE, mask_sh),\ SFB(blk, DPG_PIPE_LOW_POWER_CONTROL, PSTATE_CHANGE_URGENT_DURING_REQUEST, mask_sh),\ @@ -286,6 +292,8 @@ struct dce_mem_input_registers { type STUTTER_EXIT_SELF_REFRESH_WATERMARK_MASK; \ type URGENCY_LOW_WATERMARK; \ type URGENCY_HIGH_WATERMARK; \ + type URGENT_LEVEL_LOW_WATERMARK;\ + type URGENT_LEVEL_HIGH_WATERMARK;\ type NB_PSTATE_CHANGE_ENABLE; \ type NB_PSTATE_CHANGE_URGENT_DURING_REQUEST; \ type NB_PSTATE_CHANGE_NOT_SELF_REFRESH_DURING_REQUEST; \ @@ -297,6 +305,7 @@ struct dce_mem_input_registers { type STUTTER_ENABLE; \ type STUTTER_IGNORE_FBC; \ type STUTTER_EXIT_SELF_REFRESH_WATERMARK; \ + type STUTTER_ENTER_SELF_REFRESH_WATERMARK; \ type DMIF_BUFFERS_ALLOCATED; \ type DMIF_BUFFERS_ALLOCATION_COMPLETED; \ type ENABLE; /* MC_HUB_RDREQ_DMIF_LIMIT */\ @@ -344,4 +353,12 @@ void dce112_mem_input_construct( const struct dce_mem_input_shift *mi_shift, const struct dce_mem_input_mask *mi_mask); +void dce120_mem_input_construct( + struct dce_mem_input *dce_mi, + struct dc_context *ctx, + int inst, + const struct dce_mem_input_registers *regs, + const struct dce_mem_input_shift *mi_shift, + const struct dce_mem_input_mask *mi_mask); + #endif /*__DCE_MEM_INPUT_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 162f6a6c4208..e265a0abe361 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -26,27 +26,10 @@ #include "dc_bios_types.h" #include "dce_stream_encoder.h" #include "reg_helper.h" +#include "hw_shared.h" + #define DC_LOGGER \ enc110->base.ctx->logger -enum DP_PIXEL_ENCODING { -DP_PIXEL_ENCODING_RGB444 = 0x00000000, -DP_PIXEL_ENCODING_YCBCR422 = 0x00000001, -DP_PIXEL_ENCODING_YCBCR444 = 0x00000002, -DP_PIXEL_ENCODING_RGB_WIDE_GAMUT = 0x00000003, -DP_PIXEL_ENCODING_Y_ONLY = 0x00000004, -DP_PIXEL_ENCODING_YCBCR420 = 0x00000005, -DP_PIXEL_ENCODING_RESERVED = 0x00000006, -}; - - -enum DP_COMPONENT_DEPTH { -DP_COMPONENT_DEPTH_6BPC = 0x00000000, -DP_COMPONENT_DEPTH_8BPC = 0x00000001, -DP_COMPONENT_DEPTH_10BPC = 0x00000002, -DP_COMPONENT_DEPTH_12BPC = 0x00000003, -DP_COMPONENT_DEPTH_16BPC = 0x00000004, -DP_COMPONENT_DEPTH_RESERVED = 0x00000005, -}; #define REG(reg)\ @@ -80,7 +63,7 @@ enum { static void dce110_update_generic_info_packet( struct dce110_stream_encoder *enc110, uint32_t packet_index, - const struct encoder_info_packet *info_packet) + const struct dc_info_packet *info_packet) { uint32_t regval; /* TODOFPGA Figure out a proper number for max_retries polling for lock @@ -196,7 +179,7 @@ static void dce110_update_generic_info_packet( static void dce110_update_hdmi_info_packet( struct dce110_stream_encoder *enc110, uint32_t packet_index, - const struct encoder_info_packet *info_packet) + const struct dc_info_packet *info_packet) { uint32_t cont, send, line; @@ -314,11 +297,11 @@ static void dce110_stream_encoder_dp_set_stream_attribute( switch (crtc_timing->pixel_encoding) { case PIXEL_ENCODING_YCBCR422: REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_YCBCR422); + DP_PIXEL_ENCODING_TYPE_YCBCR422); break; case PIXEL_ENCODING_YCBCR444: REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_YCBCR444); + DP_PIXEL_ENCODING_TYPE_YCBCR444); if (crtc_timing->flags.Y_ONLY) if (crtc_timing->display_color_depth != COLOR_DEPTH_666) @@ -326,7 +309,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( * Color depth of Y-only could be * 8, 10, 12, 16 bits */ REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_Y_ONLY); + DP_PIXEL_ENCODING_TYPE_Y_ONLY); /* Note: DP_MSA_MISC1 bit 7 is the indicator * of Y-only mode. * This bit is set in HW if register @@ -334,7 +317,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( break; case PIXEL_ENCODING_YCBCR420: REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_YCBCR420); + DP_PIXEL_ENCODING_TYPE_YCBCR420); if (enc110->se_mask->DP_VID_M_DOUBLE_VALUE_EN) REG_UPDATE(DP_VID_TIMING, DP_VID_M_DOUBLE_VALUE_EN, 1); @@ -345,7 +328,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( break; default: REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_RGB444); + DP_PIXEL_ENCODING_TYPE_RGB444); break; } @@ -363,20 +346,20 @@ static void dce110_stream_encoder_dp_set_stream_attribute( break; case COLOR_DEPTH_888: REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_DEPTH_8BPC); + DP_COMPONENT_PIXEL_DEPTH_8BPC); break; case COLOR_DEPTH_101010: REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_DEPTH_10BPC); + DP_COMPONENT_PIXEL_DEPTH_10BPC); break; case COLOR_DEPTH_121212: REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_DEPTH_12BPC); + DP_COMPONENT_PIXEL_DEPTH_12BPC); break; default: REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_DEPTH_6BPC); + DP_COMPONENT_PIXEL_DEPTH_6BPC); break; } @@ -836,7 +819,7 @@ static void dce110_stream_encoder_update_dp_info_packets( const struct encoder_info_frame *info_frame) { struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); - uint32_t value = REG_READ(DP_SEC_CNTL); + uint32_t value = 0; if (info_frame->vsc.valid) dce110_update_generic_info_packet( @@ -870,6 +853,7 @@ static void dce110_stream_encoder_update_dp_info_packets( * Therefore we need to enable master bit * if at least on of the fields is not 0 */ + value = REG_READ(DP_SEC_CNTL); if (value) REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); } @@ -879,7 +863,7 @@ static void dce110_stream_encoder_stop_dp_info_packets( { /* stop generic packets on DP */ struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); - uint32_t value = REG_READ(DP_SEC_CNTL); + uint32_t value = 0; if (enc110->se_mask->DP_SEC_AVI_ENABLE) { REG_SET_7(DP_SEC_CNTL, 0, @@ -892,25 +876,10 @@ static void dce110_stream_encoder_stop_dp_info_packets( DP_SEC_STREAM_ENABLE, 0); } -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) - if (enc110->se_mask->DP_SEC_GSP7_ENABLE) { - REG_SET_10(DP_SEC_CNTL, 0, - DP_SEC_GSP0_ENABLE, 0, - DP_SEC_GSP1_ENABLE, 0, - DP_SEC_GSP2_ENABLE, 0, - DP_SEC_GSP3_ENABLE, 0, - DP_SEC_GSP4_ENABLE, 0, - DP_SEC_GSP5_ENABLE, 0, - DP_SEC_GSP6_ENABLE, 0, - DP_SEC_GSP7_ENABLE, 0, - DP_SEC_MPG_ENABLE, 0, - DP_SEC_STREAM_ENABLE, 0); - } -#endif /* this register shared with audio info frame. * therefore we need to keep master enabled * if at least one of the fields is not 0 */ - + value = REG_READ(DP_SEC_CNTL); if (value) REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); @@ -1513,7 +1482,7 @@ static void dce110_se_disable_dp_audio( struct stream_encoder *enc) { struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); - uint32_t value = REG_READ(DP_SEC_CNTL); + uint32_t value = 0; /* Disable Audio packets */ REG_UPDATE_5(DP_SEC_CNTL, @@ -1525,6 +1494,7 @@ static void dce110_se_disable_dp_audio( /* This register shared with encoder info frame. Therefore we need to keep master enabled if at least on of the fields is not 0 */ + value = REG_READ(DP_SEC_CNTL); if (value != 0) REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 3092f76bdb75..38ec0d609297 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -733,38 +733,6 @@ enum dc_status dce100_add_stream_to_ctx( return result; } -enum dc_status dce100_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *dc_stream, - struct dc_state *context) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - context->streams[0] = dc_stream; - dc_stream_retain(context->streams[0]); - context->stream_count++; - - result = resource_map_pool_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = resource_map_clock_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = build_mapped_resource(dc, context, dc_stream); - - if (result == DC_OK) { - validate_guaranteed_copy_streams( - context, dc->caps.max_streams); - result = resource_build_scaling_params_for_context(dc, context); - } - - if (result == DC_OK) - if (!dce100_validate_bandwidth(dc, context)) - result = DC_FAIL_BANDWIDTH_VALIDATE; - - return result; -} - static void dce100_destroy_resource_pool(struct resource_pool **pool) { struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); @@ -786,7 +754,6 @@ enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, s static const struct resource_funcs dce100_res_pool_funcs = { .destroy = dce100_destroy_resource_pool, .link_enc_create = dce100_link_encoder_create, - .validate_guaranteed = dce100_validate_guaranteed, .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index d0575999f172..2288d0aa773b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -70,8 +70,9 @@ #define CTX \ hws->ctx -#define DC_LOGGER \ - ctx->logger + +#define DC_LOGGER_INIT() + #define REG(reg)\ hws->regs->reg @@ -279,7 +280,9 @@ dce110_set_input_transfer_func(struct pipe_ctx *pipe_ctx, build_prescale_params(&prescale_params, plane_state); ipp->funcs->ipp_program_prescale(ipp, &prescale_params); - if (plane_state->gamma_correction && dce_use_lut(plane_state->format)) + if (plane_state->gamma_correction && + !plane_state->gamma_correction->is_identity && + dce_use_lut(plane_state->format)) ipp->funcs->ipp_program_input_lut(ipp, plane_state->gamma_correction); if (tf == NULL) { @@ -851,6 +854,28 @@ void hwss_edp_power_control( if (power_up != is_panel_powered_on(hwseq)) { /* Send VBIOS command to prompt eDP panel power */ + if (power_up) { + unsigned long long current_ts = dm_get_timestamp(ctx); + unsigned long long duration_in_ms = + dm_get_elapse_time_in_ns( + ctx, + current_ts, + div64_u64(link->link_trace.time_stamp.edp_poweroff, 1000000)); + unsigned long long wait_time_ms = 0; + + /* max 500ms from LCDVDD off to on */ + if (link->link_trace.time_stamp.edp_poweroff == 0) + wait_time_ms = 500; + else if (duration_in_ms < 500) + wait_time_ms = 500 - duration_in_ms; + + if (wait_time_ms) { + msleep(wait_time_ms); + dm_output_to_console("%s: wait %lld ms to power on eDP.\n", + __func__, wait_time_ms); + } + + } DC_LOG_HW_RESUME_S3( "%s: Panel Power action: %s\n", @@ -864,9 +889,14 @@ void hwss_edp_power_control( cntl.coherent = false; cntl.lanes_number = LANE_COUNT_FOUR; cntl.hpd_sel = link->link_enc->hpd_source; - bp_result = link_transmitter_control(ctx->dc_bios, &cntl); + if (!power_up) + /*save driver power off time stamp*/ + link->link_trace.time_stamp.edp_poweroff = dm_get_timestamp(ctx); + else + link->link_trace.time_stamp.edp_poweron = dm_get_timestamp(ctx); + if (bp_result != BP_RESULT_OK) DC_LOG_ERROR( "%s: Panel Power bp_result: %d\n", @@ -1011,7 +1041,7 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { link->dc->hwss.edp_backlight_control(link, true); - stream->bl_pwm_level = 0; + stream->bl_pwm_level = EDP_BACKLIGHT_RAMP_DISABLE_LEVEL; } } void dce110_blank_stream(struct pipe_ctx *pipe_ctx) @@ -1203,7 +1233,7 @@ static void program_scaler(const struct dc *dc, &pipe_ctx->plane_res.scl_data); } -static enum dc_status dce110_prog_pixclk_crtc_otg( +static enum dc_status dce110_enable_stream_timing( struct pipe_ctx *pipe_ctx, struct dc_state *context, struct dc *dc) @@ -1269,7 +1299,7 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx[pipe_ctx->pipe_idx]; /* */ - dc->hwss.prog_pixclk_crtc_otg(pipe_ctx, context, dc); + dc->hwss.enable_stream_timing(pipe_ctx, context, dc); /* FPGA does not program backend */ if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { @@ -1441,6 +1471,17 @@ static void disable_vga_and_power_gate_all_controllers( } } +static struct dc_link *get_link_for_edp(struct dc *dc) +{ + int i; + + for (i = 0; i < dc->link_count; i++) { + if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP) + return dc->links[i]; + } + return NULL; +} + static struct dc_link *get_link_for_edp_not_in_use( struct dc *dc, struct dc_state *context) @@ -1475,20 +1516,21 @@ static struct dc_link *get_link_for_edp_not_in_use( */ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) { - struct dc_bios *dcb = dc->ctx->dc_bios; - - /* vbios already light up eDP, so we can leverage vbios and skip eDP - * programming - */ - bool can_eDP_fast_boot_optimize = - (dcb->funcs->get_vga_enabled_displays(dc->ctx->dc_bios) == ATOM_DISPLAY_LCD1_ACTIVE); - - /* if OS doesn't light up eDP and eDP link is available, we want to disable */ struct dc_link *edp_link_to_turnoff = NULL; + struct dc_link *edp_link = get_link_for_edp(dc); + bool can_eDP_fast_boot_optimize = false; + + if (edp_link) { + can_eDP_fast_boot_optimize = + edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc); + } if (can_eDP_fast_boot_optimize) { edp_link_to_turnoff = get_link_for_edp_not_in_use(dc, context); + /* if OS doesn't light up eDP and eDP link is available, we want to disable + * If resume from S4/S5, should optimization. + */ if (!edp_link_to_turnoff) dc->apply_edp_fast_boot_optimization = true; } @@ -1544,6 +1586,7 @@ static void dce110_set_displaymarks( pipe_ctx->plane_res.mi, context->bw.dce.nbp_state_change_wm_ns[num_pipes], context->bw.dce.stutter_exit_wm_ns[num_pipes], + context->bw.dce.stutter_entry_wm_ns[num_pipes], context->bw.dce.urgent_wm_ns[num_pipes], total_dest_line_time_ns); if (i == underlay_idx) { @@ -1569,6 +1612,7 @@ static void set_safe_displaymarks( MAX_WATERMARK, MAX_WATERMARK, MAX_WATERMARK, MAX_WATERMARK }; struct dce_watermarks nbp_marks = { SAFE_NBP_MARK, SAFE_NBP_MARK, SAFE_NBP_MARK, SAFE_NBP_MARK }; + struct dce_watermarks min_marks = { 0, 0, 0, 0}; for (i = 0; i < MAX_PIPES; i++) { if (res_ctx->pipe_ctx[i].stream == NULL || res_ctx->pipe_ctx[i].plane_res.mi == NULL) @@ -1578,6 +1622,7 @@ static void set_safe_displaymarks( res_ctx->pipe_ctx[i].plane_res.mi, nbp_marks, max_marks, + min_marks, max_marks, MAX_WATERMARK); @@ -1803,6 +1848,9 @@ static bool should_enable_fbc(struct dc *dc, } } + /* Pipe context should be found */ + ASSERT(pipe_ctx); + /* Only supports eDP */ if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP) return false; @@ -2699,8 +2747,11 @@ static void dce110_program_front_end_for_pipe( struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct xfm_grph_csc_adjustment adjust; struct out_csc_color_matrix tbl_entry; +#if defined(CONFIG_DRM_AMD_DC_FBC) + unsigned int underlay_idx = dc->res_pool->underlay_pipe_index; +#endif unsigned int i; - struct dc_context *ctx = dc->ctx; + DC_LOGGER_INIT(); memset(&tbl_entry, 0, sizeof(tbl_entry)); if (dc->current_state) @@ -2740,7 +2791,9 @@ static void dce110_program_front_end_for_pipe( program_scaler(dc, pipe_ctx); #if defined(CONFIG_DRM_AMD_DC_FBC) - if (dc->fbc_compressor && old_pipe->stream) { + /* fbc not applicable on Underlay pipe */ + if (dc->fbc_compressor && old_pipe->stream && + pipe_ctx->pipe_idx != underlay_idx) { if (plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL) dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor); else @@ -2776,13 +2829,13 @@ static void dce110_program_front_end_for_pipe( dc->hwss.set_output_transfer_func(pipe_ctx, pipe_ctx->stream); DC_LOG_SURFACE( - "Pipe:%d 0x%x: addr hi:0x%x, " + "Pipe:%d %p: addr hi:0x%x, " "addr low:0x%x, " "src: %d, %d, %d," " %d; dst: %d, %d, %d, %d;" "clip: %d, %d, %d, %d\n", pipe_ctx->pipe_idx, - pipe_ctx->plane_state, + (void *) pipe_ctx->plane_state, pipe_ctx->plane_state->address.grph.addr.high_part, pipe_ctx->plane_state->address.grph.addr.low_part, pipe_ctx->plane_state->src_rect.x, @@ -2993,7 +3046,7 @@ static const struct hw_sequencer_funcs dce110_funcs = { .get_position = get_position, .set_static_screen_control = set_static_screen_control, .reset_hw_ctx_wrap = dce110_reset_hw_ctx_wrap, - .prog_pixclk_crtc_otg = dce110_prog_pixclk_crtc_otg, + .enable_stream_timing = dce110_enable_stream_timing, .setup_stereo = NULL, .set_avmute = dce110_set_avmute, .wait_for_mpcc_disconnect = dce110_wait_for_mpcc_disconnect, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c index 7bab8c6d2a73..0564c8e31252 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c @@ -923,6 +923,7 @@ void dce_mem_input_v_program_display_marks( struct mem_input *mem_input, struct dce_watermarks nbp, struct dce_watermarks stutter, + struct dce_watermarks stutter_enter, struct dce_watermarks urgent, uint32_t total_dest_line_time_ns) { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index b1f14be20fdf..ee33786bdef6 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -930,38 +930,6 @@ static enum dc_status dce110_add_stream_to_ctx( return result; } -static enum dc_status dce110_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *dc_stream, - struct dc_state *context) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - context->streams[0] = dc_stream; - dc_stream_retain(context->streams[0]); - context->stream_count++; - - result = resource_map_pool_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = resource_map_clock_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = build_mapped_resource(dc, context, dc_stream); - - if (result == DC_OK) { - validate_guaranteed_copy_streams( - context, dc->caps.max_streams); - result = resource_build_scaling_params_for_context(dc, context); - } - - if (result == DC_OK) - if (!dce110_validate_bandwidth(dc, context)) - result = DC_FAIL_BANDWIDTH_VALIDATE; - - return result; -} - static struct pipe_ctx *dce110_acquire_underlay( struct dc_state *context, const struct resource_pool *pool, @@ -1036,7 +1004,6 @@ static void dce110_destroy_resource_pool(struct resource_pool **pool) static const struct resource_funcs dce110_res_pool_funcs = { .destroy = dce110_destroy_resource_pool, .link_enc_create = dce110_link_encoder_create, - .validate_guaranteed = dce110_validate_guaranteed, .validate_bandwidth = dce110_validate_bandwidth, .validate_plane = dce110_validate_plane, .acquire_idle_pipe_for_layer = dce110_acquire_underlay, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index be7153924a70..1b2fe0df347f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -431,14 +431,6 @@ void dce110_timing_generator_set_drr( 0, CRTC_V_TOTAL_CONTROL, CRTC_SET_V_TOTAL_MIN_MASK); - set_reg_field_value(v_total_min, - 0, - CRTC_V_TOTAL_MIN, - CRTC_V_TOTAL_MIN); - set_reg_field_value(v_total_max, - 0, - CRTC_V_TOTAL_MAX, - CRTC_V_TOTAL_MAX); set_reg_field_value(v_total_cntl, 0, CRTC_V_TOTAL_CONTROL, @@ -447,6 +439,14 @@ void dce110_timing_generator_set_drr( 0, CRTC_V_TOTAL_CONTROL, CRTC_V_TOTAL_MAX_SEL); + set_reg_field_value(v_total_min, + 0, + CRTC_V_TOTAL_MIN, + CRTC_V_TOTAL_MIN); + set_reg_field_value(v_total_max, + 0, + CRTC_V_TOTAL_MAX, + CRTC_V_TOTAL_MAX); set_reg_field_value(v_total_cntl, 0, CRTC_V_TOTAL_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c index 8ad04816e7d3..a3cef60380ed 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c @@ -648,12 +648,6 @@ static void dce110_timing_generator_v_disable_vga( return; } -static bool dce110_tg_v_is_blanked(struct timing_generator *tg) -{ - /* Signal comes from the primary pipe, underlay is never blanked. */ - return false; -} - /** ******************************************************************************************** * * DCE11 Timing Generator Constructor / Destructor @@ -670,7 +664,6 @@ static const struct timing_generator_funcs dce110_tg_v_funcs = { .set_early_control = dce110_timing_generator_v_set_early_control, .wait_for_state = dce110_timing_generator_v_wait_for_state, .set_blank = dce110_timing_generator_v_set_blank, - .is_blanked = dce110_tg_v_is_blanked, .set_colors = dce110_timing_generator_v_set_colors, .set_overscan_blank_color = dce110_timing_generator_v_set_overscan_color_black, diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index cd1e3f72c44e..00c0a1ef15eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -430,7 +430,7 @@ static struct stream_encoder *dce112_stream_encoder_create( if (!enc110) return NULL; - + dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, &stream_enc_regs[eng_id], &se_shift, &se_mask); @@ -867,38 +867,6 @@ enum dc_status dce112_add_stream_to_ctx( return result; } -enum dc_status dce112_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *stream, - struct dc_state *context) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - context->streams[0] = stream; - dc_stream_retain(context->streams[0]); - context->stream_count++; - - result = resource_map_pool_resources(dc, context, stream); - - if (result == DC_OK) - result = resource_map_phy_clock_resources(dc, context, stream); - - if (result == DC_OK) - result = build_mapped_resource(dc, context, stream); - - if (result == DC_OK) { - validate_guaranteed_copy_streams( - context, dc->caps.max_streams); - result = resource_build_scaling_params_for_context(dc, context); - } - - if (result == DC_OK) - if (!dce112_validate_bandwidth(dc, context)) - result = DC_FAIL_BANDWIDTH_VALIDATE; - - return result; -} - enum dc_status dce112_validate_global( struct dc *dc, struct dc_state *context) @@ -921,7 +889,6 @@ static void dce112_destroy_resource_pool(struct resource_pool **pool) static const struct resource_funcs dce112_res_pool_funcs = { .destroy = dce112_destroy_resource_pool, .link_enc_create = dce112_link_encoder_create, - .validate_guaranteed = dce112_validate_guaranteed, .validate_bandwidth = dce112_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce112_add_stream_to_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h index d5c19d34eb0a..95a403396219 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h @@ -42,11 +42,6 @@ enum dc_status dce112_validate_with_context( struct dc_state *context, struct dc_state *old_context); -enum dc_status dce112_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *dc_stream, - struct dc_state *context); - bool dce112_validate_bandwidth( struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 4659a4bfabaa..fda01574d1ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -652,7 +652,7 @@ static struct mem_input *dce120_mem_input_create( return NULL; } - dce112_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); + dce120_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); return &dce_mi->base; } @@ -684,7 +684,6 @@ static void dce120_destroy_resource_pool(struct resource_pool **pool) static const struct resource_funcs dce120_res_pool_funcs = { .destroy = dce120_destroy_resource_pool, .link_enc_create = dce120_link_encoder_create, - .validate_guaranteed = dce112_validate_guaranteed, .validate_bandwidth = dce112_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce112_add_stream_to_ctx diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index 7bee78172d85..2ea490f8482e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -570,12 +570,6 @@ void dce120_timing_generator_set_drr( 0x180); } else { - CRTC_REG_UPDATE( - CRTC0_CRTC_V_TOTAL_MIN, - CRTC_V_TOTAL_MIN, 0); - CRTC_REG_UPDATE( - CRTC0_CRTC_V_TOTAL_MAX, - CRTC_V_TOTAL_MAX, 0); CRTC_REG_SET_N(CRTC0_CRTC_V_TOTAL_CONTROL, 5, FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_V_TOTAL_MIN_SEL), 0, FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_V_TOTAL_MAX_SEL), 0, @@ -583,6 +577,12 @@ void dce120_timing_generator_set_drr( FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_FORCE_LOCK_TO_MASTER_VSYNC), 0, FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_SET_V_TOTAL_MIN_MASK), 0); CRTC_REG_UPDATE( + CRTC0_CRTC_V_TOTAL_MIN, + CRTC_V_TOTAL_MIN, 0); + CRTC_REG_UPDATE( + CRTC0_CRTC_V_TOTAL_MAX, + CRTC_V_TOTAL_MAX, 0); + CRTC_REG_UPDATE( CRTC0_CRTC_STATIC_SCREEN_CONTROL, CRTC_STATIC_SCREEN_EVENT_MASK, 0); diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 5d854a37a978..48a068964722 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -691,23 +691,6 @@ static void destruct(struct dce110_resource_pool *pool) } } -static enum dc_status build_mapped_resource( - const struct dc *dc, - struct dc_state *context, - struct dc_stream_state *stream) -{ - struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream); - - if (!pipe_ctx) - return DC_ERROR_UNEXPECTED; - - dce110_resource_build_pipe_hw_param(pipe_ctx); - - resource_build_info_frame(pipe_ctx); - - return DC_OK; -} - bool dce80_validate_bandwidth( struct dc *dc, struct dc_state *context) @@ -749,37 +732,6 @@ enum dc_status dce80_validate_global( return DC_OK; } -enum dc_status dce80_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *dc_stream, - struct dc_state *context) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - context->streams[0] = dc_stream; - dc_stream_retain(context->streams[0]); - context->stream_count++; - - result = resource_map_pool_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = resource_map_clock_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = build_mapped_resource(dc, context, dc_stream); - - if (result == DC_OK) { - validate_guaranteed_copy_streams( - context, dc->caps.max_streams); - result = resource_build_scaling_params_for_context(dc, context); - } - - if (result == DC_OK) - result = dce80_validate_bandwidth(dc, context); - - return result; -} - static void dce80_destroy_resource_pool(struct resource_pool **pool) { struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); @@ -792,7 +744,6 @@ static void dce80_destroy_resource_pool(struct resource_pool **pool) static const struct resource_funcs dce80_res_pool_funcs = { .destroy = dce80_destroy_resource_pool, .link_enc_create = dce80_link_encoder_create, - .validate_guaranteed = dce80_validate_guaranteed, .validate_bandwidth = dce80_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 5469bdfe19f3..5c69743a4b4f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -26,7 +26,7 @@ DCN10 = dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \ dcn10_dpp.o dcn10_opp.o dcn10_optc.o \ dcn10_hubp.o dcn10_mpc.o \ dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \ - dcn10_hubbub.o + dcn10_hubbub.o dcn10_stream_encoder.o AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 881a1bff94d2..96d5878e9ccd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -367,15 +367,15 @@ bool cm_helper_translate_curve_to_hw_format( lut_params->hw_points_num = hw_points; - i = 1; - for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) { + k = 0; + for (i = 1; i < MAX_REGIONS_NUMBER; i++) { if (seg_distr[k] != -1) { lut_params->arr_curve_points[k].segments_num = seg_distr[k]; lut_params->arr_curve_points[i].offset = lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]); } - i++; + k++; } if (seg_distr[k] != -1) @@ -529,15 +529,15 @@ bool cm_helper_translate_curve_to_degamma_hw_format( lut_params->hw_points_num = hw_points; - i = 1; - for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) { + k = 0; + for (i = 1; i < MAX_REGIONS_NUMBER; i++) { if (seg_distr[k] != -1) { lut_params->arr_curve_points[k].segments_num = seg_distr[k]; lut_params->arr_curve_points[i].offset = lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]); } - i++; + k++; } if (seg_distr[k] != -1) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index e305c28c98de..8c4d9e523331 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -98,6 +98,30 @@ enum gamut_remap_select { GAMUT_REMAP_COMB_COEFF }; +void dpp_read_state(struct dpp *dpp_base, + struct dcn_dpp_state *s) +{ + struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); + + REG_GET(CM_IGAM_CONTROL, + CM_IGAM_LUT_MODE, &s->igam_lut_mode); + REG_GET(CM_IGAM_CONTROL, + CM_IGAM_INPUT_FORMAT, &s->igam_input_format); + REG_GET(CM_DGAM_CONTROL, + CM_DGAM_LUT_MODE, &s->dgam_lut_mode); + REG_GET(CM_RGAM_CONTROL, + CM_RGAM_LUT_MODE, &s->rgam_lut_mode); + REG_GET(CM_GAMUT_REMAP_CONTROL, + CM_GAMUT_REMAP_MODE, &s->gamut_remap_mode); + + s->gamut_remap_c11_c12 = REG_READ(CM_GAMUT_REMAP_C11_C12); + s->gamut_remap_c13_c14 = REG_READ(CM_GAMUT_REMAP_C13_C14); + s->gamut_remap_c21_c22 = REG_READ(CM_GAMUT_REMAP_C21_C22); + s->gamut_remap_c23_c24 = REG_READ(CM_GAMUT_REMAP_C23_C24); + s->gamut_remap_c31_c32 = REG_READ(CM_GAMUT_REMAP_C31_C32); + s->gamut_remap_c33_c34 = REG_READ(CM_GAMUT_REMAP_C33_C34); +} + /* Program gamut remap in bypass mode */ void dpp_set_gamut_remap_bypass(struct dcn10_dpp *dpp) { @@ -121,6 +145,13 @@ bool dpp_get_optimal_number_of_taps( else pixel_width = scl_data->viewport.width; + /* Some ASICs does not support FP16 scaling, so we reject modes require this*/ + if (scl_data->viewport.width != scl_data->h_active && + scl_data->viewport.height != scl_data->v_active && + dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT && + scl_data->format == PIXEL_FORMAT_FP16) + return false; + /* TODO: add lb check */ /* No support for programming ratio of 4, drop to 3.99999.. */ @@ -257,7 +288,7 @@ void dpp1_cnv_setup ( struct dpp *dpp_base, enum surface_pixel_format format, enum expansion_mode mode, - struct csc_transform input_csc_color_matrix, + struct dc_csc_transform input_csc_color_matrix, enum dc_color_space input_color_space) { uint32_t pixel_format; @@ -416,7 +447,7 @@ void dpp1_set_cursor_position( if (src_x_offset >= (int)param->viewport_width) cur_en = 0; /* not visible beyond right edge*/ - if (src_x_offset + (int)width < 0) + if (src_x_offset + (int)width <= 0) cur_en = 0; /* not visible beyond left edge*/ REG_UPDATE(CURSOR0_CONTROL, @@ -443,6 +474,7 @@ void dpp1_dppclk_control( } static const struct dpp_funcs dcn10_dpp_funcs = { + .dpp_read_state = dpp_read_state, .dpp_reset = dpp_reset, .dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale, .dpp_get_optimal_number_of_taps = dpp_get_optimal_number_of_taps, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h index 17b062a8f88a..5944a3ba0409 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h @@ -44,6 +44,10 @@ #define TF_REG_LIST_DCN(id) \ SRI(CM_GAMUT_REMAP_CONTROL, CM, id),\ SRI(CM_GAMUT_REMAP_C11_C12, CM, id),\ + SRI(CM_GAMUT_REMAP_C13_C14, CM, id),\ + SRI(CM_GAMUT_REMAP_C21_C22, CM, id),\ + SRI(CM_GAMUT_REMAP_C23_C24, CM, id),\ + SRI(CM_GAMUT_REMAP_C31_C32, CM, id),\ SRI(CM_GAMUT_REMAP_C33_C34, CM, id),\ SRI(DSCL_EXT_OVERSCAN_LEFT_RIGHT, DSCL, id), \ SRI(DSCL_EXT_OVERSCAN_TOP_BOTTOM, DSCL, id), \ @@ -108,6 +112,8 @@ SRI(CM_DGAM_LUT_DATA, CM, id), \ SRI(CM_CONTROL, CM, id), \ SRI(CM_DGAM_CONTROL, CM, id), \ + SRI(CM_TEST_DEBUG_INDEX, CM, id), \ + SRI(CM_TEST_DEBUG_DATA, CM, id), \ SRI(FORMAT_CONTROL, CNVC_CFG, id), \ SRI(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \ SRI(CURSOR0_CONTROL, CNVC_CUR, id), \ @@ -175,6 +181,14 @@ TF_SF(CM0_CM_GAMUT_REMAP_CONTROL, CM_GAMUT_REMAP_MODE, mask_sh),\ TF_SF(CM0_CM_GAMUT_REMAP_C11_C12, CM_GAMUT_REMAP_C11, mask_sh),\ TF_SF(CM0_CM_GAMUT_REMAP_C11_C12, CM_GAMUT_REMAP_C12, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C13_C14, CM_GAMUT_REMAP_C13, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C13_C14, CM_GAMUT_REMAP_C14, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C21_C22, CM_GAMUT_REMAP_C21, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C21_C22, CM_GAMUT_REMAP_C22, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C23_C24, CM_GAMUT_REMAP_C23, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C23_C24, CM_GAMUT_REMAP_C24, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C31_C32, CM_GAMUT_REMAP_C31, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C31_C32, CM_GAMUT_REMAP_C32, mask_sh),\ TF_SF(CM0_CM_GAMUT_REMAP_C33_C34, CM_GAMUT_REMAP_C33, mask_sh),\ TF_SF(CM0_CM_GAMUT_REMAP_C33_C34, CM_GAMUT_REMAP_C34, mask_sh),\ TF_SF(DSCL0_DSCL_EXT_OVERSCAN_LEFT_RIGHT, EXT_OVERSCAN_LEFT, mask_sh),\ @@ -300,6 +314,7 @@ TF_SF(CM0_CM_DGAM_LUT_INDEX, CM_DGAM_LUT_INDEX, mask_sh), \ TF_SF(CM0_CM_DGAM_LUT_DATA, CM_DGAM_LUT_DATA, mask_sh), \ TF_SF(CM0_CM_DGAM_CONTROL, CM_DGAM_LUT_MODE, mask_sh), \ + TF_SF(CM0_CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_INDEX, mask_sh), \ TF_SF(CNVC_CFG0_FORMAT_CONTROL, CNVC_BYPASS, mask_sh), \ TF2_SF(CNVC_CFG0, FORMAT_CONTROL__ALPHA_EN, mask_sh), \ TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_EXPANSION_MODE, mask_sh), \ @@ -417,6 +432,41 @@ TF_SF(CURSOR0_CURSOR_CONTROL, CURSOR_ENABLE, mask_sh), \ TF_SF(DPP_TOP0_DPP_CONTROL, DPPCLK_RATE_CONTROL, mask_sh) +/* + * + DCN1 CM debug status register definition + + register :ID9_CM_STATUS do + implement_ref :cm + map to: :cmdebugind, at: j + width 32 + disclosure NEVER + + field :ID9_VUPDATE_CFG, [0], R + field :ID9_IGAM_LUT_MODE, [2..1], R + field :ID9_BNS_BYPASS, [3], R + field :ID9_ICSC_MODE, [5..4], R + field :ID9_DGAM_LUT_MODE, [8..6], R + field :ID9_HDR_BYPASS, [9], R + field :ID9_GAMUT_REMAP_MODE, [11..10], R + field :ID9_RGAM_LUT_MODE, [14..12], R + #1 free bit + field :ID9_OCSC_MODE, [18..16], R + field :ID9_DENORM_MODE, [21..19], R + field :ID9_ROUND_TRUNC_MODE, [25..22], R + field :ID9_DITHER_EN, [26], R + field :ID9_DITHER_MODE, [28..27], R + end +*/ + +#define TF_DEBUG_REG_LIST_SH_DCN10 \ + .CM_TEST_DEBUG_DATA_ID9_ICSC_MODE = 4, \ + .CM_TEST_DEBUG_DATA_ID9_OCSC_MODE = 16 + +#define TF_DEBUG_REG_LIST_MASK_DCN10 \ + .CM_TEST_DEBUG_DATA_ID9_ICSC_MODE = 0x30, \ + .CM_TEST_DEBUG_DATA_ID9_OCSC_MODE = 0x70000 + #define TF_REG_FIELD_LIST(type) \ type EXT_OVERSCAN_LEFT; \ type EXT_OVERSCAN_RIGHT; \ @@ -486,6 +536,14 @@ type CM_GAMUT_REMAP_MODE; \ type CM_GAMUT_REMAP_C11; \ type CM_GAMUT_REMAP_C12; \ + type CM_GAMUT_REMAP_C13; \ + type CM_GAMUT_REMAP_C14; \ + type CM_GAMUT_REMAP_C21; \ + type CM_GAMUT_REMAP_C22; \ + type CM_GAMUT_REMAP_C23; \ + type CM_GAMUT_REMAP_C24; \ + type CM_GAMUT_REMAP_C31; \ + type CM_GAMUT_REMAP_C32; \ type CM_GAMUT_REMAP_C33; \ type CM_GAMUT_REMAP_C34; \ type CM_COMA_C11; \ @@ -1010,6 +1068,9 @@ type CUR0_EXPANSION_MODE; \ type CUR0_ENABLE; \ type CM_BYPASS; \ + type CM_TEST_DEBUG_INDEX; \ + type CM_TEST_DEBUG_DATA_ID9_ICSC_MODE; \ + type CM_TEST_DEBUG_DATA_ID9_OCSC_MODE;\ type FORMAT_CONTROL__ALPHA_EN; \ type CUR0_COLOR0; \ type CUR0_COLOR1; \ @@ -1054,6 +1115,10 @@ struct dcn_dpp_mask { uint32_t RECOUT_SIZE; \ uint32_t CM_GAMUT_REMAP_CONTROL; \ uint32_t CM_GAMUT_REMAP_C11_C12; \ + uint32_t CM_GAMUT_REMAP_C13_C14; \ + uint32_t CM_GAMUT_REMAP_C21_C22; \ + uint32_t CM_GAMUT_REMAP_C23_C24; \ + uint32_t CM_GAMUT_REMAP_C31_C32; \ uint32_t CM_GAMUT_REMAP_C33_C34; \ uint32_t CM_COMA_C11_C12; \ uint32_t CM_COMA_C33_C34; \ @@ -1255,6 +1320,8 @@ struct dcn_dpp_mask { uint32_t CM_IGAM_LUT_RW_CONTROL; \ uint32_t CM_IGAM_LUT_RW_INDEX; \ uint32_t CM_IGAM_LUT_SEQ_COLOR; \ + uint32_t CM_TEST_DEBUG_INDEX; \ + uint32_t CM_TEST_DEBUG_DATA; \ uint32_t FORMAT_CONTROL; \ uint32_t CNVC_SURFACE_PIXEL_FORMAT; \ uint32_t CURSOR_CONTROL; \ @@ -1289,8 +1356,8 @@ struct dcn10_dpp { enum dcn10_input_csc_select { INPUT_CSC_SELECT_BYPASS = 0, - INPUT_CSC_SELECT_ICSC, - INPUT_CSC_SELECT_COMA + INPUT_CSC_SELECT_ICSC = 1, + INPUT_CSC_SELECT_COMA = 2 }; void dpp1_set_cursor_attributes( @@ -1364,6 +1431,9 @@ bool dpp_get_optimal_number_of_taps( struct scaler_data *scl_data, const struct scaling_taps *in_taps); +void dpp_read_state(struct dpp *dpp_base, + struct dcn_dpp_state *s); + void dpp_reset(struct dpp *dpp_base); void dpp1_cm_program_regamma_lut( @@ -1408,7 +1478,7 @@ void dpp1_cnv_setup ( struct dpp *dpp_base, enum surface_pixel_format format, enum expansion_mode mode, - struct csc_transform input_csc_color_matrix, + struct dc_csc_transform input_csc_color_matrix, enum dc_color_space input_color_space); void dpp1_full_bypass(struct dpp *dpp_base); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index fb32975e4b67..4f373c97804f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -216,41 +216,55 @@ static void dpp1_cm_program_color_matrix( struct dcn10_dpp *dpp, const uint16_t *regval) { - uint32_t mode; + uint32_t ocsc_mode; + uint32_t cur_mode; struct color_matrices_reg gam_regs; - REG_GET(CM_OCSC_CONTROL, CM_OCSC_MODE, &mode); - if (regval == NULL) { BREAK_TO_DEBUGGER(); return; } - mode = 4; + + /* determine which CSC matrix (ocsc or comb) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + REG_SET(CM_TEST_DEBUG_INDEX, 0, + CM_TEST_DEBUG_INDEX, 9); + + REG_GET(CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_ID9_OCSC_MODE, &cur_mode); + + if (cur_mode != 4) + ocsc_mode = 4; + else + ocsc_mode = 5; + + gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_OCSC_C11; gam_regs.masks.csc_c11 = dpp->tf_mask->CM_OCSC_C11; gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_OCSC_C12; gam_regs.masks.csc_c12 = dpp->tf_mask->CM_OCSC_C12; - if (mode == 4) { + if (ocsc_mode == 4) { gam_regs.csc_c11_c12 = REG(CM_OCSC_C11_C12); gam_regs.csc_c33_c34 = REG(CM_OCSC_C33_C34); - cm_helper_program_color_matrices( - dpp->base.ctx, - regval, - &gam_regs); - } else { gam_regs.csc_c11_c12 = REG(CM_COMB_C11_C12); gam_regs.csc_c33_c34 = REG(CM_COMB_C33_C34); - cm_helper_program_color_matrices( - dpp->base.ctx, - regval, - &gam_regs); } + + cm_helper_program_color_matrices( + dpp->base.ctx, + regval, + &gam_regs); + + REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode); + } void dpp1_cm_set_output_csc_default( @@ -260,15 +274,14 @@ void dpp1_cm_set_output_csc_default( struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); const uint16_t *regval = NULL; int arr_size; - uint32_t ocsc_mode = 4; regval = find_color_matrix(colorspace, &arr_size); if (regval == NULL) { BREAK_TO_DEBUGGER(); return; } + dpp1_cm_program_color_matrix(dpp, regval); - REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode); } static void dpp1_cm_get_reg_field( @@ -329,9 +342,8 @@ void dpp1_cm_set_output_csc_adjustment( const uint16_t *regval) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - uint32_t ocsc_mode = 4; + dpp1_cm_program_color_matrix(dpp, regval); - REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode); } void dpp1_cm_power_on_regamma_lut(struct dpp *dpp_base, @@ -437,17 +449,18 @@ void dpp1_cm_program_regamma_lutb_settings( void dpp1_program_input_csc( struct dpp *dpp_base, enum dc_color_space color_space, - enum dcn10_input_csc_select select, + enum dcn10_input_csc_select input_select, const struct out_csc_color_matrix *tbl_entry) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); int i; int arr_size = sizeof(dcn10_input_csc_matrix)/sizeof(struct dcn10_input_csc_matrix); const uint16_t *regval = NULL; - uint32_t selection = 1; + uint32_t cur_select = 0; + enum dcn10_input_csc_select select; struct color_matrices_reg gam_regs; - if (select == INPUT_CSC_SELECT_BYPASS) { + if (input_select == INPUT_CSC_SELECT_BYPASS) { REG_SET(CM_ICSC_CONTROL, 0, CM_ICSC_MODE, 0); return; } @@ -467,36 +480,45 @@ void dpp1_program_input_csc( regval = tbl_entry->regval; } - if (select == INPUT_CSC_SELECT_COMA) - selection = 2; - REG_SET(CM_ICSC_CONTROL, 0, - CM_ICSC_MODE, selection); + /* determine which CSC matrix (icsc or coma) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + REG_SET(CM_TEST_DEBUG_INDEX, 0, + CM_TEST_DEBUG_INDEX, 9); + + REG_GET(CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_ID9_ICSC_MODE, &cur_select); + + if (cur_select != INPUT_CSC_SELECT_ICSC) + select = INPUT_CSC_SELECT_ICSC; + else + select = INPUT_CSC_SELECT_COMA; gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_ICSC_C11; gam_regs.masks.csc_c11 = dpp->tf_mask->CM_ICSC_C11; gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_ICSC_C12; gam_regs.masks.csc_c12 = dpp->tf_mask->CM_ICSC_C12; - if (select == INPUT_CSC_SELECT_ICSC) { gam_regs.csc_c11_c12 = REG(CM_ICSC_C11_C12); gam_regs.csc_c33_c34 = REG(CM_ICSC_C33_C34); - cm_helper_program_color_matrices( - dpp->base.ctx, - regval, - &gam_regs); } else { gam_regs.csc_c11_c12 = REG(CM_COMA_C11_C12); gam_regs.csc_c33_c34 = REG(CM_COMA_C33_C34); - cm_helper_program_color_matrices( - dpp->base.ctx, - regval, - &gam_regs); } + + cm_helper_program_color_matrices( + dpp->base.ctx, + regval, + &gam_regs); + + REG_SET(CM_ICSC_CONTROL, 0, + CM_ICSC_MODE, select); } //keep here for now, decide multi dce support later diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index 738f67ffd1b4..b9fb14a3224b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -476,8 +476,227 @@ void hubbub1_toggle_watermark_change_req(struct hubbub *hubbub) DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, watermark_change_req); } +static bool hubbub1_dcc_support_swizzle( + enum swizzle_mode_values swizzle, + unsigned int bytes_per_element, + enum segment_order *segment_order_horz, + enum segment_order *segment_order_vert) +{ + bool standard_swizzle = false; + bool display_swizzle = false; + + switch (swizzle) { + case DC_SW_4KB_S: + case DC_SW_64KB_S: + case DC_SW_VAR_S: + case DC_SW_4KB_S_X: + case DC_SW_64KB_S_X: + case DC_SW_VAR_S_X: + standard_swizzle = true; + break; + case DC_SW_4KB_D: + case DC_SW_64KB_D: + case DC_SW_VAR_D: + case DC_SW_4KB_D_X: + case DC_SW_64KB_D_X: + case DC_SW_VAR_D_X: + display_swizzle = true; + break; + default: + break; + } + + if (bytes_per_element == 1 && standard_swizzle) { + *segment_order_horz = segment_order__contiguous; + *segment_order_vert = segment_order__na; + return true; + } + if (bytes_per_element == 2 && standard_swizzle) { + *segment_order_horz = segment_order__non_contiguous; + *segment_order_vert = segment_order__contiguous; + return true; + } + if (bytes_per_element == 4 && standard_swizzle) { + *segment_order_horz = segment_order__non_contiguous; + *segment_order_vert = segment_order__contiguous; + return true; + } + if (bytes_per_element == 8 && standard_swizzle) { + *segment_order_horz = segment_order__na; + *segment_order_vert = segment_order__contiguous; + return true; + } + if (bytes_per_element == 8 && display_swizzle) { + *segment_order_horz = segment_order__contiguous; + *segment_order_vert = segment_order__non_contiguous; + return true; + } + + return false; +} + +static bool hubbub1_dcc_support_pixel_format( + enum surface_pixel_format format, + unsigned int *bytes_per_element) +{ + /* DML: get_bytes_per_element */ + switch (format) { + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + *bytes_per_element = 2; + return true; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010: + *bytes_per_element = 4; + return true; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + *bytes_per_element = 8; + return true; + default: + return false; + } +} + +static void hubbub1_get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height, + unsigned int bytes_per_element) +{ + /* copied from DML. might want to refactor DML to leverage from DML */ + /* DML : get_blk256_size */ + if (bytes_per_element == 1) { + *blk256_width = 16; + *blk256_height = 16; + } else if (bytes_per_element == 2) { + *blk256_width = 16; + *blk256_height = 8; + } else if (bytes_per_element == 4) { + *blk256_width = 8; + *blk256_height = 8; + } else if (bytes_per_element == 8) { + *blk256_width = 8; + *blk256_height = 4; + } +} + +static void hubbub1_det_request_size( + unsigned int height, + unsigned int width, + unsigned int bpe, + bool *req128_horz_wc, + bool *req128_vert_wc) +{ + unsigned int detile_buf_size = 164 * 1024; /* 164KB for DCN1.0 */ + + unsigned int blk256_height = 0; + unsigned int blk256_width = 0; + unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc; + + hubbub1_get_blk256_size(&blk256_width, &blk256_height, bpe); + + swath_bytes_horz_wc = height * blk256_height * bpe; + swath_bytes_vert_wc = width * blk256_width * bpe; + + *req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128b request */ + + *req128_vert_wc = (2 * swath_bytes_vert_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128b request */ +} + +static bool hubbub1_get_dcc_compression_cap(struct hubbub *hubbub, + const struct dc_dcc_surface_param *input, + struct dc_surface_dcc_cap *output) +{ + struct dc *dc = hubbub->ctx->dc; + /* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */ + enum dcc_control dcc_control; + unsigned int bpe; + enum segment_order segment_order_horz, segment_order_vert; + bool req128_horz_wc, req128_vert_wc; + + memset(output, 0, sizeof(*output)); + + if (dc->debug.disable_dcc == DCC_DISABLE) + return false; + + if (!hubbub->funcs->dcc_support_pixel_format(input->format, &bpe)) + return false; + + if (!hubbub->funcs->dcc_support_swizzle(input->swizzle_mode, bpe, + &segment_order_horz, &segment_order_vert)) + return false; + + hubbub1_det_request_size(input->surface_size.height, input->surface_size.width, + bpe, &req128_horz_wc, &req128_vert_wc); + + if (!req128_horz_wc && !req128_vert_wc) { + dcc_control = dcc_control__256_256_xxx; + } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) { + if (!req128_horz_wc) + dcc_control = dcc_control__256_256_xxx; + else if (segment_order_horz == segment_order__contiguous) + dcc_control = dcc_control__128_128_xxx; + else + dcc_control = dcc_control__256_64_64; + } else if (input->scan == SCAN_DIRECTION_VERTICAL) { + if (!req128_vert_wc) + dcc_control = dcc_control__256_256_xxx; + else if (segment_order_vert == segment_order__contiguous) + dcc_control = dcc_control__128_128_xxx; + else + dcc_control = dcc_control__256_64_64; + } else { + if ((req128_horz_wc && + segment_order_horz == segment_order__non_contiguous) || + (req128_vert_wc && + segment_order_vert == segment_order__non_contiguous)) + /* access_dir not known, must use most constraining */ + dcc_control = dcc_control__256_64_64; + else + /* reg128 is true for either horz and vert + * but segment_order is contiguous + */ + dcc_control = dcc_control__128_128_xxx; + } + + if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE && + dcc_control != dcc_control__256_256_xxx) + return false; + + switch (dcc_control) { + case dcc_control__256_256_xxx: + output->grph.rgb.max_uncompressed_blk_size = 256; + output->grph.rgb.max_compressed_blk_size = 256; + output->grph.rgb.independent_64b_blks = false; + break; + case dcc_control__128_128_xxx: + output->grph.rgb.max_uncompressed_blk_size = 128; + output->grph.rgb.max_compressed_blk_size = 128; + output->grph.rgb.independent_64b_blks = false; + break; + case dcc_control__256_64_64: + output->grph.rgb.max_uncompressed_blk_size = 256; + output->grph.rgb.max_compressed_blk_size = 64; + output->grph.rgb.independent_64b_blks = true; + break; + } + + output->capable = true; + output->const_color_support = false; + + return true; +} + static const struct hubbub_funcs hubbub1_funcs = { - .update_dchub = hubbub1_update_dchub + .update_dchub = hubbub1_update_dchub, + .dcc_support_swizzle = hubbub1_dcc_support_swizzle, + .dcc_support_pixel_format = hubbub1_dcc_support_pixel_format, + .get_dcc_compression_cap = hubbub1_get_dcc_compression_cap, }; void hubbub1_construct(struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index a16e908821a0..f479f54e5bb2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -27,6 +27,7 @@ #define __DC_HUBBUB_DCN10_H__ #include "core_types.h" +#include "dchubbub.h" #define HUBHUB_REG_LIST_DCN()\ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A),\ @@ -173,12 +174,6 @@ struct dcn_hubbub_wm { struct dcn_hubbub_wm_set sets[4]; }; -struct hubbub_funcs { - void (*update_dchub)( - struct hubbub *hubbub, - struct dchub_init_data *dh_data); -}; - struct hubbub { const struct hubbub_funcs *funcs; struct dc_context *ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 39b72f696ae9..0cbc83edd37f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -146,6 +146,9 @@ void hubp1_program_size_and_rotation( * 444 or 420 luma */ if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + ASSERT(plane_size->video.chroma_pitch != 0); + /* Chroma pitch zero can cause system hang! */ + pitch = plane_size->video.luma_pitch - 1; meta_pitch = dcc->video.meta_pitch_l - 1; pitch_c = plane_size->video.chroma_pitch - 1; @@ -535,11 +538,13 @@ void hubp1_program_deadline( REG_SET(VBLANK_PARAMETERS_3, 0, REFCYC_PER_META_CHUNK_VBLANK_L, dlg_attr->refcyc_per_meta_chunk_vblank_l); - REG_SET(NOM_PARAMETERS_0, 0, - DST_Y_PER_PTE_ROW_NOM_L, dlg_attr->dst_y_per_pte_row_nom_l); + if (REG(NOM_PARAMETERS_0)) + REG_SET(NOM_PARAMETERS_0, 0, + DST_Y_PER_PTE_ROW_NOM_L, dlg_attr->dst_y_per_pte_row_nom_l); - REG_SET(NOM_PARAMETERS_1, 0, - REFCYC_PER_PTE_GROUP_NOM_L, dlg_attr->refcyc_per_pte_group_nom_l); + if (REG(NOM_PARAMETERS_1)) + REG_SET(NOM_PARAMETERS_1, 0, + REFCYC_PER_PTE_GROUP_NOM_L, dlg_attr->refcyc_per_pte_group_nom_l); REG_SET(NOM_PARAMETERS_4, 0, DST_Y_PER_META_ROW_NOM_L, dlg_attr->dst_y_per_meta_row_nom_l); @@ -568,11 +573,13 @@ void hubp1_program_deadline( REG_SET(VBLANK_PARAMETERS_4, 0, REFCYC_PER_META_CHUNK_VBLANK_C, dlg_attr->refcyc_per_meta_chunk_vblank_c); - REG_SET(NOM_PARAMETERS_2, 0, - DST_Y_PER_PTE_ROW_NOM_C, dlg_attr->dst_y_per_pte_row_nom_c); + if (REG(NOM_PARAMETERS_2)) + REG_SET(NOM_PARAMETERS_2, 0, + DST_Y_PER_PTE_ROW_NOM_C, dlg_attr->dst_y_per_pte_row_nom_c); - REG_SET(NOM_PARAMETERS_3, 0, - REFCYC_PER_PTE_GROUP_NOM_C, dlg_attr->refcyc_per_pte_group_nom_c); + if (REG(NOM_PARAMETERS_3)) + REG_SET(NOM_PARAMETERS_3, 0, + REFCYC_PER_PTE_GROUP_NOM_C, dlg_attr->refcyc_per_pte_group_nom_c); REG_SET(NOM_PARAMETERS_6, 0, DST_Y_PER_META_ROW_NOM_C, dlg_attr->dst_y_per_meta_row_nom_c); @@ -609,6 +616,13 @@ void hubp1_program_deadline( REG_SET(DCN_SURF1_TTU_CNTL1, 0, REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_c); + + REG_SET_3(DCN_CUR0_TTU_CNTL0, 0, + REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_cur0, + QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_cur0, + QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_cur0); + REG_SET(DCN_CUR0_TTU_CNTL1, 0, + REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_cur0); } static void hubp1_setup( @@ -752,9 +766,159 @@ void min_set_viewport( PRI_VIEWPORT_Y_START_C, viewport_c->y); } -void hubp1_read_state(struct dcn10_hubp *hubp1, - struct dcn_hubp_state *s) +void hubp1_read_state(struct hubp *hubp) { + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + struct dcn_hubp_state *s = &hubp1->state; + struct _vcs_dpi_display_dlg_regs_st *dlg_attr = &s->dlg_attr; + struct _vcs_dpi_display_ttu_regs_st *ttu_attr = &s->ttu_attr; + struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs; + + /* Requester */ + REG_GET(HUBPRET_CONTROL, + DET_BUF_PLANE1_BASE_ADDRESS, &rq_regs->plane1_base_address); + REG_GET_4(DCN_EXPANSION_MODE, + DRQ_EXPANSION_MODE, &rq_regs->drq_expansion_mode, + PRQ_EXPANSION_MODE, &rq_regs->prq_expansion_mode, + MRQ_EXPANSION_MODE, &rq_regs->mrq_expansion_mode, + CRQ_EXPANSION_MODE, &rq_regs->crq_expansion_mode); + REG_GET_8(DCHUBP_REQ_SIZE_CONFIG, + CHUNK_SIZE, &rq_regs->rq_regs_l.chunk_size, + MIN_CHUNK_SIZE, &rq_regs->rq_regs_l.min_chunk_size, + META_CHUNK_SIZE, &rq_regs->rq_regs_l.meta_chunk_size, + MIN_META_CHUNK_SIZE, &rq_regs->rq_regs_l.min_meta_chunk_size, + DPTE_GROUP_SIZE, &rq_regs->rq_regs_l.dpte_group_size, + MPTE_GROUP_SIZE, &rq_regs->rq_regs_l.mpte_group_size, + SWATH_HEIGHT, &rq_regs->rq_regs_l.swath_height, + PTE_ROW_HEIGHT_LINEAR, &rq_regs->rq_regs_l.pte_row_height_linear); + REG_GET_8(DCHUBP_REQ_SIZE_CONFIG_C, + CHUNK_SIZE_C, &rq_regs->rq_regs_c.chunk_size, + MIN_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_chunk_size, + META_CHUNK_SIZE_C, &rq_regs->rq_regs_c.meta_chunk_size, + MIN_META_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_meta_chunk_size, + DPTE_GROUP_SIZE_C, &rq_regs->rq_regs_c.dpte_group_size, + MPTE_GROUP_SIZE_C, &rq_regs->rq_regs_c.mpte_group_size, + SWATH_HEIGHT_C, &rq_regs->rq_regs_c.swath_height, + PTE_ROW_HEIGHT_LINEAR_C, &rq_regs->rq_regs_c.pte_row_height_linear); + + /* DLG - Per hubp */ + REG_GET_2(BLANK_OFFSET_0, + REFCYC_H_BLANK_END, &dlg_attr->refcyc_h_blank_end, + DLG_V_BLANK_END, &dlg_attr->dlg_vblank_end); + + REG_GET(BLANK_OFFSET_1, + MIN_DST_Y_NEXT_START, &dlg_attr->min_dst_y_next_start); + + REG_GET(DST_DIMENSIONS, + REFCYC_PER_HTOTAL, &dlg_attr->refcyc_per_htotal); + + REG_GET_2(DST_AFTER_SCALER, + REFCYC_X_AFTER_SCALER, &dlg_attr->refcyc_x_after_scaler, + DST_Y_AFTER_SCALER, &dlg_attr->dst_y_after_scaler); + + if (REG(PREFETCH_SETTINS)) + REG_GET_2(PREFETCH_SETTINS, + DST_Y_PREFETCH, &dlg_attr->dst_y_prefetch, + VRATIO_PREFETCH, &dlg_attr->vratio_prefetch); + else + REG_GET_2(PREFETCH_SETTINGS, + DST_Y_PREFETCH, &dlg_attr->dst_y_prefetch, + VRATIO_PREFETCH, &dlg_attr->vratio_prefetch); + + REG_GET_2(VBLANK_PARAMETERS_0, + DST_Y_PER_VM_VBLANK, &dlg_attr->dst_y_per_vm_vblank, + DST_Y_PER_ROW_VBLANK, &dlg_attr->dst_y_per_row_vblank); + + REG_GET(REF_FREQ_TO_PIX_FREQ, + REF_FREQ_TO_PIX_FREQ, &dlg_attr->ref_freq_to_pix_freq); + + /* DLG - Per luma/chroma */ + REG_GET(VBLANK_PARAMETERS_1, + REFCYC_PER_PTE_GROUP_VBLANK_L, &dlg_attr->refcyc_per_pte_group_vblank_l); + + REG_GET(VBLANK_PARAMETERS_3, + REFCYC_PER_META_CHUNK_VBLANK_L, &dlg_attr->refcyc_per_meta_chunk_vblank_l); + + if (REG(NOM_PARAMETERS_0)) + REG_GET(NOM_PARAMETERS_0, + DST_Y_PER_PTE_ROW_NOM_L, &dlg_attr->dst_y_per_pte_row_nom_l); + + if (REG(NOM_PARAMETERS_1)) + REG_GET(NOM_PARAMETERS_1, + REFCYC_PER_PTE_GROUP_NOM_L, &dlg_attr->refcyc_per_pte_group_nom_l); + + REG_GET(NOM_PARAMETERS_4, + DST_Y_PER_META_ROW_NOM_L, &dlg_attr->dst_y_per_meta_row_nom_l); + + REG_GET(NOM_PARAMETERS_5, + REFCYC_PER_META_CHUNK_NOM_L, &dlg_attr->refcyc_per_meta_chunk_nom_l); + + REG_GET_2(PER_LINE_DELIVERY_PRE, + REFCYC_PER_LINE_DELIVERY_PRE_L, &dlg_attr->refcyc_per_line_delivery_pre_l, + REFCYC_PER_LINE_DELIVERY_PRE_C, &dlg_attr->refcyc_per_line_delivery_pre_c); + + REG_GET_2(PER_LINE_DELIVERY, + REFCYC_PER_LINE_DELIVERY_L, &dlg_attr->refcyc_per_line_delivery_l, + REFCYC_PER_LINE_DELIVERY_C, &dlg_attr->refcyc_per_line_delivery_c); + + if (REG(PREFETCH_SETTINS_C)) + REG_GET(PREFETCH_SETTINS_C, + VRATIO_PREFETCH_C, &dlg_attr->vratio_prefetch_c); + else + REG_GET(PREFETCH_SETTINGS_C, + VRATIO_PREFETCH_C, &dlg_attr->vratio_prefetch_c); + + REG_GET(VBLANK_PARAMETERS_2, + REFCYC_PER_PTE_GROUP_VBLANK_C, &dlg_attr->refcyc_per_pte_group_vblank_c); + + REG_GET(VBLANK_PARAMETERS_4, + REFCYC_PER_META_CHUNK_VBLANK_C, &dlg_attr->refcyc_per_meta_chunk_vblank_c); + + if (REG(NOM_PARAMETERS_2)) + REG_GET(NOM_PARAMETERS_2, + DST_Y_PER_PTE_ROW_NOM_C, &dlg_attr->dst_y_per_pte_row_nom_c); + + if (REG(NOM_PARAMETERS_3)) + REG_GET(NOM_PARAMETERS_3, + REFCYC_PER_PTE_GROUP_NOM_C, &dlg_attr->refcyc_per_pte_group_nom_c); + + REG_GET(NOM_PARAMETERS_6, + DST_Y_PER_META_ROW_NOM_C, &dlg_attr->dst_y_per_meta_row_nom_c); + + REG_GET(NOM_PARAMETERS_7, + REFCYC_PER_META_CHUNK_NOM_C, &dlg_attr->refcyc_per_meta_chunk_nom_c); + + /* TTU - per hubp */ + REG_GET_2(DCN_TTU_QOS_WM, + QoS_LEVEL_LOW_WM, &ttu_attr->qos_level_low_wm, + QoS_LEVEL_HIGH_WM, &ttu_attr->qos_level_high_wm); + + REG_GET_2(DCN_GLOBAL_TTU_CNTL, + MIN_TTU_VBLANK, &ttu_attr->min_ttu_vblank, + QoS_LEVEL_FLIP, &ttu_attr->qos_level_flip); + + /* TTU - per luma/chroma */ + /* Assumed surf0 is luma and 1 is chroma */ + + REG_GET_3(DCN_SURF0_TTU_CNTL0, + REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_l, + QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_l, + QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_l); + + REG_GET(DCN_SURF0_TTU_CNTL1, + REFCYC_PER_REQ_DELIVERY_PRE, + &ttu_attr->refcyc_per_req_delivery_pre_l); + + REG_GET_3(DCN_SURF1_TTU_CNTL0, + REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_c, + QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_c, + QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_c); + + REG_GET(DCN_SURF1_TTU_CNTL1, + REFCYC_PER_REQ_DELIVERY_PRE, + &ttu_attr->refcyc_per_req_delivery_pre_c); + + /* Rest of hubp */ REG_GET(DCSURF_SURFACE_CONFIG, SURFACE_PIXEL_FORMAT, &s->pixel_format); @@ -897,7 +1061,7 @@ void hubp1_cursor_set_position( if (src_x_offset >= (int)param->viewport_width) cur_en = 0; /* not visible beyond right edge*/ - if (src_x_offset + (int)hubp->curs_attr.width < 0) + if (src_x_offset + (int)hubp->curs_attr.width <= 0) cur_en = 0; /* not visible beyond left edge*/ if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) @@ -952,6 +1116,7 @@ static struct hubp_funcs dcn10_hubp_funcs = { .hubp_disconnect = hubp1_disconnect, .hubp_clk_cntl = hubp1_clk_cntl, .hubp_vtg_sel = hubp1_vtg_sel, + .hubp_read_state = hubp1_read_state, }; /*****************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index 4a3703e12ea1..fe9b8c4a91ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -30,6 +30,7 @@ #define TO_DCN10_HUBP(hubp)\ container_of(hubp, struct dcn10_hubp, base) +/* Register address initialization macro for all ASICs (including those with reduced functionality) */ #define HUBP_REG_LIST_DCN(id)\ SRI(DCHUBP_CNTL, HUBP, id),\ SRI(HUBPREQ_DEBUG_DB, HUBP, id),\ @@ -78,16 +79,12 @@ SRI(REF_FREQ_TO_PIX_FREQ, HUBPREQ, id),\ SRI(VBLANK_PARAMETERS_1, HUBPREQ, id),\ SRI(VBLANK_PARAMETERS_3, HUBPREQ, id),\ - SRI(NOM_PARAMETERS_0, HUBPREQ, id),\ - SRI(NOM_PARAMETERS_1, HUBPREQ, id),\ SRI(NOM_PARAMETERS_4, HUBPREQ, id),\ SRI(NOM_PARAMETERS_5, HUBPREQ, id),\ SRI(PER_LINE_DELIVERY_PRE, HUBPREQ, id),\ SRI(PER_LINE_DELIVERY, HUBPREQ, id),\ SRI(VBLANK_PARAMETERS_2, HUBPREQ, id),\ SRI(VBLANK_PARAMETERS_4, HUBPREQ, id),\ - SRI(NOM_PARAMETERS_2, HUBPREQ, id),\ - SRI(NOM_PARAMETERS_3, HUBPREQ, id),\ SRI(NOM_PARAMETERS_6, HUBPREQ, id),\ SRI(NOM_PARAMETERS_7, HUBPREQ, id),\ SRI(DCN_TTU_QOS_WM, HUBPREQ, id),\ @@ -96,11 +93,21 @@ SRI(DCN_SURF0_TTU_CNTL1, HUBPREQ, id),\ SRI(DCN_SURF1_TTU_CNTL0, HUBPREQ, id),\ SRI(DCN_SURF1_TTU_CNTL1, HUBPREQ, id),\ - SRI(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id),\ + SRI(DCN_CUR0_TTU_CNTL0, HUBPREQ, id),\ + SRI(DCN_CUR0_TTU_CNTL1, HUBPREQ, id),\ SRI(HUBP_CLK_CNTL, HUBP, id) +/* Register address initialization macro for ASICs with VM */ +#define HUBP_REG_LIST_DCN_VM(id)\ + SRI(NOM_PARAMETERS_0, HUBPREQ, id),\ + SRI(NOM_PARAMETERS_1, HUBPREQ, id),\ + SRI(NOM_PARAMETERS_2, HUBPREQ, id),\ + SRI(NOM_PARAMETERS_3, HUBPREQ, id),\ + SRI(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id) + #define HUBP_REG_LIST_DCN10(id)\ HUBP_REG_LIST_DCN(id),\ + HUBP_REG_LIST_DCN_VM(id),\ SRI(PREFETCH_SETTINS, HUBPREQ, id),\ SRI(PREFETCH_SETTINS_C, HUBPREQ, id),\ SRI(DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB, HUBPREQ, id),\ @@ -198,6 +205,8 @@ uint32_t DCN_SURF0_TTU_CNTL1; \ uint32_t DCN_SURF1_TTU_CNTL0; \ uint32_t DCN_SURF1_TTU_CNTL1; \ + uint32_t DCN_CUR0_TTU_CNTL0; \ + uint32_t DCN_CUR0_TTU_CNTL1; \ uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB; \ uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LSB; \ uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_MSB; \ @@ -237,6 +246,7 @@ #define HUBP_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix +/* Mask/shift struct generation macro for all ASICs (including those with reduced functionality) */ #define HUBP_MASK_SH_LIST_DCN(mask_sh)\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_BLANK_EN, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_TTU_DISABLE, mask_sh),\ @@ -335,8 +345,6 @@ HUBP_SF(HUBPREQ0_REF_FREQ_TO_PIX_FREQ, REF_FREQ_TO_PIX_FREQ, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_1, REFCYC_PER_PTE_GROUP_VBLANK_L, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_3, REFCYC_PER_META_CHUNK_VBLANK_L, mask_sh),\ - HUBP_SF(HUBPREQ0_NOM_PARAMETERS_0, DST_Y_PER_PTE_ROW_NOM_L, mask_sh),\ - HUBP_SF(HUBPREQ0_NOM_PARAMETERS_1, REFCYC_PER_PTE_GROUP_NOM_L, mask_sh),\ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_4, DST_Y_PER_META_ROW_NOM_L, mask_sh),\ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_5, REFCYC_PER_META_CHUNK_NOM_L, mask_sh),\ HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY_PRE, REFCYC_PER_LINE_DELIVERY_PRE_L, mask_sh),\ @@ -345,8 +353,6 @@ HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY, REFCYC_PER_LINE_DELIVERY_C, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_2, REFCYC_PER_PTE_GROUP_VBLANK_C, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_4, REFCYC_PER_META_CHUNK_VBLANK_C, mask_sh),\ - HUBP_SF(HUBPREQ0_NOM_PARAMETERS_2, DST_Y_PER_PTE_ROW_NOM_C, mask_sh),\ - HUBP_SF(HUBPREQ0_NOM_PARAMETERS_3, REFCYC_PER_PTE_GROUP_NOM_C, mask_sh),\ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_6, DST_Y_PER_META_ROW_NOM_C, mask_sh),\ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_7, REFCYC_PER_META_CHUNK_NOM_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_TTU_QOS_WM, QoS_LEVEL_LOW_WM, mask_sh),\ @@ -357,12 +363,24 @@ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_LEVEL_FIXED, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_RAMP_DISABLE, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh),\ + HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh) + +/* Mask/shift struct generation macro for ASICs with VM */ +#define HUBP_MASK_SH_LIST_DCN_VM(mask_sh)\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_0, DST_Y_PER_PTE_ROW_NOM_L, mask_sh),\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_1, REFCYC_PER_PTE_GROUP_NOM_L, mask_sh),\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_2, DST_Y_PER_PTE_ROW_NOM_C, mask_sh),\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_3, REFCYC_PER_PTE_GROUP_NOM_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, mask_sh),\ - HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh) + HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, REFCYC_PER_REQ_DELIVERY, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, QoS_LEVEL_FIXED, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, QoS_RAMP_DISABLE, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh) #define HUBP_MASK_SH_LIST_DCN10(mask_sh)\ HUBP_MASK_SH_LIST_DCN(mask_sh),\ + HUBP_MASK_SH_LIST_DCN_VM(mask_sh),\ HUBP_SF(HUBPREQ0_PREFETCH_SETTINS, DST_Y_PREFETCH, mask_sh),\ HUBP_SF(HUBPREQ0_PREFETCH_SETTINS, VRATIO_PREFETCH, mask_sh),\ HUBP_SF(HUBPREQ0_PREFETCH_SETTINS_C, VRATIO_PREFETCH_C, mask_sh),\ @@ -601,8 +619,29 @@ struct dcn_mi_mask { DCN_HUBP_REG_FIELD_LIST(uint32_t); }; +struct dcn_hubp_state { + struct _vcs_dpi_display_dlg_regs_st dlg_attr; + struct _vcs_dpi_display_ttu_regs_st ttu_attr; + struct _vcs_dpi_display_rq_regs_st rq_regs; + uint32_t pixel_format; + uint32_t inuse_addr_hi; + uint32_t viewport_width; + uint32_t viewport_height; + uint32_t rotation_angle; + uint32_t h_mirror_en; + uint32_t sw_mode; + uint32_t dcc_en; + uint32_t blank_en; + uint32_t underflow_status; + uint32_t ttu_disable; + uint32_t min_ttu_vblank; + uint32_t qos_level_low_wm; + uint32_t qos_level_high_wm; +}; + struct dcn10_hubp { struct hubp base; + struct dcn_hubp_state state; const struct dcn_mi_registers *hubp_regs; const struct dcn_mi_shift *hubp_shift; const struct dcn_mi_mask *hubp_mask; @@ -680,25 +719,7 @@ void dcn10_hubp_construct( const struct dcn_mi_shift *hubp_shift, const struct dcn_mi_mask *hubp_mask); - -struct dcn_hubp_state { - uint32_t pixel_format; - uint32_t inuse_addr_hi; - uint32_t viewport_width; - uint32_t viewport_height; - uint32_t rotation_angle; - uint32_t h_mirror_en; - uint32_t sw_mode; - uint32_t dcc_en; - uint32_t blank_en; - uint32_t underflow_status; - uint32_t ttu_disable; - uint32_t min_ttu_vblank; - uint32_t qos_level_low_wm; - uint32_t qos_level_high_wm; -}; -void hubp1_read_state(struct dcn10_hubp *hubp1, - struct dcn_hubp_state *s); +void hubp1_read_state(struct hubp *hubp); enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 8b0f6b8a5627..572fa601a0eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -45,8 +45,8 @@ #include "dcn10_hubbub.h" #include "dcn10_cm_common.h" -#define DC_LOGGER \ - ctx->logger +#define DC_LOGGER_INIT(logger) + #define CTX \ hws->ctx #define REG(reg)\ @@ -56,16 +56,17 @@ #define FN(reg_name, field_name) \ hws->shifts->field_name, hws->masks->field_name +/*print is 17 wide, first two characters are spaces*/ #define DTN_INFO_MICRO_SEC(ref_cycle) \ print_microsec(dc_ctx, ref_cycle) void print_microsec(struct dc_context *dc_ctx, uint32_t ref_cycle) { - static const uint32_t ref_clk_mhz = 48; - static const unsigned int frac = 10; + const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clock_inKhz / 1000; + static const unsigned int frac = 1000; uint32_t us_x10 = (ref_cycle * frac) / ref_clk_mhz; - DTN_INFO("%d.%d \t ", + DTN_INFO(" %11d.%03d", us_x10 / frac, us_x10 % frac); } @@ -92,14 +93,14 @@ void dcn10_log_hubbub_state(struct dc *dc) hubbub1_wm_read_state(dc->res_pool->hubbub, &wm); - DTN_INFO("HUBBUB WM: \t data_urgent \t pte_meta_urgent \t " - "sr_enter \t sr_exit \t dram_clk_change \n"); + DTN_INFO("HUBBUB WM: data_urgent pte_meta_urgent" + " sr_enter sr_exit dram_clk_change\n"); for (i = 0; i < 4; i++) { struct dcn_hubbub_wm_set *s; s = &wm.sets[i]; - DTN_INFO("WM_Set[%d]:\t ", s->wm_set); + DTN_INFO("WM_Set[%d]:", s->wm_set); DTN_INFO_MICRO_SEC(s->data_urgent); DTN_INFO_MICRO_SEC(s->pte_meta_urgent); DTN_INFO_MICRO_SEC(s->sr_enter); @@ -111,6 +112,116 @@ void dcn10_log_hubbub_state(struct dc *dc) DTN_INFO("\n"); } +static void dcn10_log_hubp_states(struct dc *dc) +{ + struct dc_context *dc_ctx = dc->ctx; + struct resource_pool *pool = dc->res_pool; + int i; + + DTN_INFO("HUBP: format addr_hi width height" + " rot mir sw_mode dcc_en blank_en ttu_dis underflow" + " min_ttu_vblank qos_low_wm qos_high_wm\n"); + for (i = 0; i < pool->pipe_count; i++) { + struct hubp *hubp = pool->hubps[i]; + struct dcn_hubp_state *s = &(TO_DCN10_HUBP(hubp)->state); + + hubp->funcs->hubp_read_state(hubp); + + DTN_INFO("[%2d]: %5xh %6xh %5d %6d %2xh %2xh %6xh" + " %6d %8d %7d %8xh", + hubp->inst, + s->pixel_format, + s->inuse_addr_hi, + s->viewport_width, + s->viewport_height, + s->rotation_angle, + s->h_mirror_en, + s->sw_mode, + s->dcc_en, + s->blank_en, + s->ttu_disable, + s->underflow_status); + DTN_INFO_MICRO_SEC(s->min_ttu_vblank); + DTN_INFO_MICRO_SEC(s->qos_level_low_wm); + DTN_INFO_MICRO_SEC(s->qos_level_high_wm); + DTN_INFO("\n"); + } + + DTN_INFO("\n=========RQ========\n"); + DTN_INFO("HUBP: drq_exp_m prq_exp_m mrq_exp_m crq_exp_m plane1_ba L:chunk_s min_chu_s meta_ch_s" + " min_m_c_s dpte_gr_s mpte_gr_s swath_hei pte_row_h C:chunk_s min_chu_s meta_ch_s" + " min_m_c_s dpte_gr_s mpte_gr_s swath_hei pte_row_h\n"); + for (i = 0; i < pool->pipe_count; i++) { + struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state); + struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs; + + DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n", + i, rq_regs->drq_expansion_mode, rq_regs->prq_expansion_mode, rq_regs->mrq_expansion_mode, + rq_regs->crq_expansion_mode, rq_regs->plane1_base_address, rq_regs->rq_regs_l.chunk_size, + rq_regs->rq_regs_l.min_chunk_size, rq_regs->rq_regs_l.meta_chunk_size, + rq_regs->rq_regs_l.min_meta_chunk_size, rq_regs->rq_regs_l.dpte_group_size, + rq_regs->rq_regs_l.mpte_group_size, rq_regs->rq_regs_l.swath_height, + rq_regs->rq_regs_l.pte_row_height_linear, rq_regs->rq_regs_c.chunk_size, rq_regs->rq_regs_c.min_chunk_size, + rq_regs->rq_regs_c.meta_chunk_size, rq_regs->rq_regs_c.min_meta_chunk_size, + rq_regs->rq_regs_c.dpte_group_size, rq_regs->rq_regs_c.mpte_group_size, + rq_regs->rq_regs_c.swath_height, rq_regs->rq_regs_c.pte_row_height_linear); + } + + DTN_INFO("========DLG========\n"); + DTN_INFO("HUBP: rc_hbe dlg_vbe min_d_y_n rc_per_ht rc_x_a_s " + " dst_y_a_s dst_y_pf dst_y_vvb dst_y_rvb dst_y_vfl dst_y_rfl rf_pix_fq" + " vratio_pf vrat_pf_c rc_pg_vbl rc_pg_vbc rc_mc_vbl rc_mc_vbc rc_pg_fll" + " rc_pg_flc rc_mc_fll rc_mc_flc pr_nom_l pr_nom_c rc_pg_nl rc_pg_nc " + " mr_nom_l mr_nom_c rc_mc_nl rc_mc_nc rc_ld_pl rc_ld_pc rc_ld_l " + " rc_ld_c cha_cur0 ofst_cur1 cha_cur1 vr_af_vc0 ddrq_limt x_rt_dlay" + " x_rp_dlay x_rr_sfl\n"); + for (i = 0; i < pool->pipe_count; i++) { + struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state); + struct _vcs_dpi_display_dlg_regs_st *dlg_regs = &s->dlg_attr; + + DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh" + "% 8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh" + " %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n", + i, dlg_regs->refcyc_h_blank_end, dlg_regs->dlg_vblank_end, dlg_regs->min_dst_y_next_start, + dlg_regs->refcyc_per_htotal, dlg_regs->refcyc_x_after_scaler, dlg_regs->dst_y_after_scaler, + dlg_regs->dst_y_prefetch, dlg_regs->dst_y_per_vm_vblank, dlg_regs->dst_y_per_row_vblank, + dlg_regs->dst_y_per_vm_flip, dlg_regs->dst_y_per_row_flip, dlg_regs->ref_freq_to_pix_freq, + dlg_regs->vratio_prefetch, dlg_regs->vratio_prefetch_c, dlg_regs->refcyc_per_pte_group_vblank_l, + dlg_regs->refcyc_per_pte_group_vblank_c, dlg_regs->refcyc_per_meta_chunk_vblank_l, + dlg_regs->refcyc_per_meta_chunk_vblank_c, dlg_regs->refcyc_per_pte_group_flip_l, + dlg_regs->refcyc_per_pte_group_flip_c, dlg_regs->refcyc_per_meta_chunk_flip_l, + dlg_regs->refcyc_per_meta_chunk_flip_c, dlg_regs->dst_y_per_pte_row_nom_l, + dlg_regs->dst_y_per_pte_row_nom_c, dlg_regs->refcyc_per_pte_group_nom_l, + dlg_regs->refcyc_per_pte_group_nom_c, dlg_regs->dst_y_per_meta_row_nom_l, + dlg_regs->dst_y_per_meta_row_nom_c, dlg_regs->refcyc_per_meta_chunk_nom_l, + dlg_regs->refcyc_per_meta_chunk_nom_c, dlg_regs->refcyc_per_line_delivery_pre_l, + dlg_regs->refcyc_per_line_delivery_pre_c, dlg_regs->refcyc_per_line_delivery_l, + dlg_regs->refcyc_per_line_delivery_c, dlg_regs->chunk_hdl_adjust_cur0, dlg_regs->dst_y_offset_cur1, + dlg_regs->chunk_hdl_adjust_cur1, dlg_regs->vready_after_vcount0, dlg_regs->dst_y_delta_drq_limit, + dlg_regs->xfc_reg_transfer_delay, dlg_regs->xfc_reg_precharge_delay, + dlg_regs->xfc_reg_remote_surface_flip_latency); + } + + DTN_INFO("========TTU========\n"); + DTN_INFO("HUBP: qos_ll_wm qos_lh_wm mn_ttu_vb qos_l_flp rc_rd_p_l rc_rd_l rc_rd_p_c" + " rc_rd_c rc_rd_c0 rc_rd_pc0 rc_rd_c1 rc_rd_pc1 qos_lf_l qos_rds_l" + " qos_lf_c qos_rds_c qos_lf_c0 qos_rds_c0 qos_lf_c1 qos_rds_c1\n"); + for (i = 0; i < pool->pipe_count; i++) { + struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state); + struct _vcs_dpi_display_ttu_regs_st *ttu_regs = &s->ttu_attr; + + DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n", + i, ttu_regs->qos_level_low_wm, ttu_regs->qos_level_high_wm, ttu_regs->min_ttu_vblank, + ttu_regs->qos_level_flip, ttu_regs->refcyc_per_req_delivery_pre_l, ttu_regs->refcyc_per_req_delivery_l, + ttu_regs->refcyc_per_req_delivery_pre_c, ttu_regs->refcyc_per_req_delivery_c, ttu_regs->refcyc_per_req_delivery_cur0, + ttu_regs->refcyc_per_req_delivery_pre_cur0, ttu_regs->refcyc_per_req_delivery_cur1, + ttu_regs->refcyc_per_req_delivery_pre_cur1, ttu_regs->qos_level_fixed_l, ttu_regs->qos_ramp_disable_l, + ttu_regs->qos_level_fixed_c, ttu_regs->qos_ramp_disable_c, ttu_regs->qos_level_fixed_cur0, + ttu_regs->qos_ramp_disable_cur0, ttu_regs->qos_level_fixed_cur1, ttu_regs->qos_ramp_disable_cur1); + } + DTN_INFO("\n"); +} + void dcn10_log_hw_state(struct dc *dc) { struct dc_context *dc_ctx = dc->ctx; @@ -121,41 +232,64 @@ void dcn10_log_hw_state(struct dc *dc) dcn10_log_hubbub_state(dc); - DTN_INFO("HUBP:\t format \t addr_hi \t width \t height \t " - "rotation \t mirror \t sw_mode \t " - "dcc_en \t blank_en \t ttu_dis \t underflow \t " - "min_ttu_vblank \t qos_low_wm \t qos_high_wm \n"); + dcn10_log_hubp_states(dc); + DTN_INFO("DPP: IGAM format IGAM mode DGAM mode RGAM mode" + " GAMUT mode C11 C12 C13 C14 C21 C22 C23 C24 " + "C31 C32 C33 C34\n"); for (i = 0; i < pool->pipe_count; i++) { - struct hubp *hubp = pool->hubps[i]; - struct dcn_hubp_state s; + struct dpp *dpp = pool->dpps[i]; + struct dcn_dpp_state s; + + dpp->funcs->dpp_read_state(dpp, &s); + + DTN_INFO("[%2d]: %11xh %-11s %-11s %-11s" + "%8x %08xh %08xh %08xh %08xh %08xh %08xh", + dpp->inst, + s.igam_input_format, + (s.igam_lut_mode == 0) ? "BypassFixed" : + ((s.igam_lut_mode == 1) ? "BypassFloat" : + ((s.igam_lut_mode == 2) ? "RAM" : + ((s.igam_lut_mode == 3) ? "RAM" : + "Unknown"))), + (s.dgam_lut_mode == 0) ? "Bypass" : + ((s.dgam_lut_mode == 1) ? "sRGB" : + ((s.dgam_lut_mode == 2) ? "Ycc" : + ((s.dgam_lut_mode == 3) ? "RAM" : + ((s.dgam_lut_mode == 4) ? "RAM" : + "Unknown")))), + (s.rgam_lut_mode == 0) ? "Bypass" : + ((s.rgam_lut_mode == 1) ? "sRGB" : + ((s.rgam_lut_mode == 2) ? "Ycc" : + ((s.rgam_lut_mode == 3) ? "RAM" : + ((s.rgam_lut_mode == 4) ? "RAM" : + "Unknown")))), + s.gamut_remap_mode, + s.gamut_remap_c11_c12, + s.gamut_remap_c13_c14, + s.gamut_remap_c21_c22, + s.gamut_remap_c23_c24, + s.gamut_remap_c31_c32, + s.gamut_remap_c33_c34); + DTN_INFO("\n"); + } + DTN_INFO("\n"); - hubp1_read_state(TO_DCN10_HUBP(hubp), &s); + DTN_INFO("MPCC: OPP DPP MPCCBOT MODE ALPHA_MODE PREMULT OVERLAP_ONLY IDLE\n"); + for (i = 0; i < pool->pipe_count; i++) { + struct mpcc_state s = {0}; - DTN_INFO("[%d]:\t %xh \t %xh \t %d \t %d \t " - "%xh \t %xh \t %xh \t " - "%d \t %d \t %d \t %xh \t", - hubp->inst, - s.pixel_format, - s.inuse_addr_hi, - s.viewport_width, - s.viewport_height, - s.rotation_angle, - s.h_mirror_en, - s.sw_mode, - s.dcc_en, - s.blank_en, - s.ttu_disable, - s.underflow_status); - DTN_INFO_MICRO_SEC(s.min_ttu_vblank); - DTN_INFO_MICRO_SEC(s.qos_level_low_wm); - DTN_INFO_MICRO_SEC(s.qos_level_high_wm); - DTN_INFO("\n"); + pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s); + if (s.opp_id != 0xf) + DTN_INFO("[%2d]: %2xh %2xh %6xh %4d %10d %7d %12d %4d\n", + i, s.opp_id, s.dpp_id, s.bot_mpcc_id, + s.mode, s.alpha_mode, s.pre_multiplied_alpha, s.overlap_only, + s.idle); } DTN_INFO("\n"); - DTN_INFO("OTG:\t v_bs \t v_be \t v_ss \t v_se \t vpol \t vmax \t vmin \t " - "h_bs \t h_be \t h_ss \t h_se \t hpol \t htot \t vtot \t underflow\n"); + DTN_INFO("OTG: v_bs v_be v_ss v_se vpol vmax vmin vmax_sel vmin_sel" + " h_bs h_be h_ss h_se hpol htot vtot underflow\n"); for (i = 0; i < pool->timing_generator_count; i++) { struct timing_generator *tg = pool->timing_generators[i]; @@ -167,9 +301,8 @@ void dcn10_log_hw_state(struct dc *dc) if ((s.otg_enabled & 1) == 0) continue; - DTN_INFO("[%d]:\t %d \t %d \t %d \t %d \t " - "%d \t %d \t %d \t %d \t %d \t %d \t " - "%d \t %d \t %d \t %d \t %d \t ", + DTN_INFO("[%d]: %5d %5d %5d %5d %5d %5d %5d %9d %9d %5d %5d %5d" + " %5d %5d %5d %5d %9d\n", tg->inst, s.v_blank_start, s.v_blank_end, @@ -178,6 +311,8 @@ void dcn10_log_hw_state(struct dc *dc) s.v_sync_a_pol, s.v_total_max, s.v_total_min, + s.v_total_max_sel, + s.v_total_min_sel, s.h_blank_start, s.h_blank_end, s.h_sync_a_start, @@ -186,10 +321,19 @@ void dcn10_log_hw_state(struct dc *dc) s.h_total, s.v_total, s.underflow_occurred_status); - DTN_INFO("\n"); } DTN_INFO("\n"); + DTN_INFO("\nCALCULATED Clocks: dcfclk_khz:%d dcfclk_deep_sleep_khz:%d dispclk_khz:%d\n" + "dppclk_khz:%d max_supported_dppclk_khz:%d fclk_khz:%d socclk_khz:%d\n\n", + dc->current_state->bw.dcn.calc_clk.dcfclk_khz, + dc->current_state->bw.dcn.calc_clk.dcfclk_deep_sleep_khz, + dc->current_state->bw.dcn.calc_clk.dispclk_khz, + dc->current_state->bw.dcn.calc_clk.dppclk_khz, + dc->current_state->bw.dcn.calc_clk.max_supported_dppclk_khz, + dc->current_state->bw.dcn.calc_clk.fclk_khz, + dc->current_state->bw.dcn.calc_clk.socclk_khz); + log_mpc_crc(dc); DTN_INFO_END(); @@ -354,7 +498,7 @@ static void power_on_plane( struct dce_hwseq *hws, int plane_id) { - struct dc_context *ctx = hws->ctx; + DC_LOGGER_INIT(hws->ctx->logger); if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); @@ -461,7 +605,7 @@ static void false_optc_underflow_wa( tg->funcs->clear_optc_underflow(tg); } -static enum dc_status dcn10_prog_pixclk_crtc_otg( +static enum dc_status dcn10_enable_stream_timing( struct pipe_ctx *pipe_ctx, struct dc_state *context, struct dc *dc) @@ -553,7 +697,7 @@ static void reset_back_end_for_pipe( struct dc_state *context) { int i; - struct dc_context *ctx = dc->ctx; + DC_LOGGER_INIT(dc->ctx->logger); if (pipe_ctx->stream_res.stream_enc == NULL) { pipe_ctx->stream = NULL; return; @@ -649,7 +793,7 @@ static void plane_atomic_power_down(struct dc *dc, struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; struct dpp *dpp = pipe_ctx->plane_res.dpp; - struct dc_context *ctx = dc->ctx; + DC_LOGGER_INIT(dc->ctx->logger); if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, @@ -699,7 +843,7 @@ static void plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) { - struct dc_context *ctx = dc->ctx; + DC_LOGGER_INIT(dc->ctx->logger); if (!pipe_ctx->plane_res.hubp || pipe_ctx->plane_res.hubp->power_gated) return; @@ -945,9 +1089,8 @@ static bool dcn10_set_input_transfer_func(struct pipe_ctx *pipe_ctx, tf = plane_state->in_transfer_func; if (plane_state->gamma_correction && - plane_state->gamma_correction->is_identity) - dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_BYPASS); - else if (plane_state->gamma_correction && dce_use_lut(plane_state->format)) + !plane_state->gamma_correction->is_identity + && dce_use_lut(plane_state->format)) dpp_base->funcs->dpp_program_input_lut(dpp_base, plane_state->gamma_correction); if (tf == NULL) @@ -1433,7 +1576,7 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx, } } -static void program_output_csc(struct dc *dc, +static void dcn10_program_output_csc(struct dc *dc, struct pipe_ctx *pipe_ctx, enum dc_color_space colorspace, uint16_t *matrix, @@ -1623,6 +1766,8 @@ static void update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); + + /* TODO: proper fix once fpga works */ if (dc->debug.surface_visual_confirm) @@ -1649,6 +1794,7 @@ static void update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx->stream->output_color_space) && per_pixel_alpha; + /* * TODO: remove hack * Note: currently there is a bug in init_hw such that @@ -1659,6 +1805,12 @@ static void update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) */ mpcc_id = hubp->inst; + /* If there is no full update, don't need to touch MPC tree*/ + if (!pipe_ctx->plane_state->update_flags.bits.full_update) { + mpc->funcs->update_blending(mpc, &blnd_cfg, mpcc_id); + return; + } + /* check if this MPCC is already being used */ new_mpcc = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, mpcc_id); /* remove MPCC if being used */ @@ -1777,7 +1929,7 @@ static void update_dchubp_dpp( /*gamut remap*/ program_gamut_remap(pipe_ctx); - program_output_csc(dc, + dc->hwss.program_output_csc(dc, pipe_ctx, pipe_ctx->stream->output_color_space, pipe_ctx->stream->csc_color_matrix.matrix, @@ -1810,9 +1962,9 @@ static void update_dchubp_dpp( hubp->funcs->set_blank(hubp, false); } -static void dcn10_otg_blank( +static void dcn10_blank_pixel_data( struct dc *dc, - struct stream_resource stream_res, + struct stream_resource *stream_res, struct dc_stream_state *stream, bool blank) { @@ -1823,21 +1975,21 @@ static void dcn10_otg_blank( color_space = stream->output_color_space; color_space_to_black_color(dc, color_space, &black_color); - if (stream_res.tg->funcs->set_blank_color) - stream_res.tg->funcs->set_blank_color( - stream_res.tg, + if (stream_res->tg->funcs->set_blank_color) + stream_res->tg->funcs->set_blank_color( + stream_res->tg, &black_color); if (!blank) { - if (stream_res.tg->funcs->set_blank) - stream_res.tg->funcs->set_blank(stream_res.tg, blank); - if (stream_res.abm) - stream_res.abm->funcs->set_abm_level(stream_res.abm, stream->abm_level); + if (stream_res->tg->funcs->set_blank) + stream_res->tg->funcs->set_blank(stream_res->tg, blank); + if (stream_res->abm) + stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level); } else if (blank) { - if (stream_res.abm) - stream_res.abm->funcs->set_abm_immediate_disable(stream_res.abm); - if (stream_res.tg->funcs->set_blank) - stream_res.tg->funcs->set_blank(stream_res.tg, blank); + if (stream_res->abm) + stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm); + if (stream_res->tg->funcs->set_blank) + stream_res->tg->funcs->set_blank(stream_res->tg, blank); } } @@ -1876,7 +2028,7 @@ static void program_all_pipe_in_tree( pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg); - dcn10_otg_blank(dc, pipe_ctx->stream_res, + dc->hwss.blank_pixel_data(dc, &pipe_ctx->stream_res, pipe_ctx->stream, blank); } @@ -1983,9 +2135,9 @@ static void dcn10_apply_ctx_for_surface( bool removed_pipe[4] = { false }; unsigned int ref_clk_mhz = dc->res_pool->ref_clock_inKhz/1000; bool program_water_mark = false; - struct dc_context *ctx = dc->ctx; struct pipe_ctx *top_pipe_to_program = find_top_pipe_for_stream(dc, context, stream); + DC_LOGGER_INIT(dc->ctx->logger); if (!top_pipe_to_program) return; @@ -1996,7 +2148,7 @@ static void dcn10_apply_ctx_for_surface( if (num_planes == 0) { /* OTG blank before remove all front end */ - dcn10_otg_blank(dc, top_pipe_to_program->stream_res, top_pipe_to_program->stream, true); + dc->hwss.blank_pixel_data(dc, &top_pipe_to_program->stream_res, top_pipe_to_program->stream, true); } /* Disconnect unused mpcc */ @@ -2527,6 +2679,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .update_pending_status = dcn10_update_pending_status, .set_input_transfer_func = dcn10_set_input_transfer_func, .set_output_transfer_func = dcn10_set_output_transfer_func, + .program_output_csc = dcn10_program_output_csc, .power_down = dce110_power_down, .enable_accelerated_mode = dce110_enable_accelerated_mode, .enable_timing_synchronization = dcn10_enable_timing_synchronization, @@ -2538,10 +2691,11 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .blank_stream = dce110_blank_stream, .enable_display_power_gating = dcn10_dummy_display_power_gating, .disable_plane = dcn10_disable_plane, + .blank_pixel_data = dcn10_blank_pixel_data, .pipe_control_lock = dcn10_pipe_control_lock, .set_bandwidth = dcn10_set_bandwidth, .reset_hw_ctx_wrap = reset_hw_ctx_wrap, - .prog_pixclk_crtc_otg = dcn10_prog_pixclk_crtc_otg, + .enable_stream_timing = dcn10_enable_stream_timing, .set_drr = set_drr, .get_position = get_position, .set_static_screen_control = set_static_screen_control, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 179890b1a8c4..9ca51ae46de7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -65,6 +65,7 @@ static void mpc1_update_blending( int mpcc_id) { struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); + struct mpcc *mpcc = mpc1_get_mpcc(mpc, mpcc_id); REG_UPDATE_5(MPCC_CONTROL[mpcc_id], MPCC_ALPHA_BLND_MODE, blnd_cfg->alpha_mode, @@ -74,6 +75,7 @@ static void mpc1_update_blending( MPCC_GLOBAL_GAIN, blnd_cfg->global_gain); mpc1_set_bg_color(mpc, &blnd_cfg->black_color, mpcc_id); + mpcc->blnd_cfg = *blnd_cfg; } void mpc1_update_stereo_mix( @@ -235,8 +237,7 @@ struct mpcc *mpc1_insert_plane( } /* update the blending configuration */ - new_mpcc->blnd_cfg = *blnd_cfg; - mpc->funcs->update_blending(mpc, &new_mpcc->blnd_cfg, mpcc_id); + mpc->funcs->update_blending(mpc, blnd_cfg, mpcc_id); /* update the stereo mix settings, if provided */ if (sm_cfg != NULL) { @@ -409,7 +410,26 @@ void mpc1_init_mpcc_list_from_hw( } } +void mpc1_read_mpcc_state( + struct mpc *mpc, + int mpcc_inst, + struct mpcc_state *s) +{ + struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); + + REG_GET(MPCC_OPP_ID[mpcc_inst], MPCC_OPP_ID, &s->opp_id); + REG_GET(MPCC_TOP_SEL[mpcc_inst], MPCC_TOP_SEL, &s->dpp_id); + REG_GET(MPCC_BOT_SEL[mpcc_inst], MPCC_BOT_SEL, &s->bot_mpcc_id); + REG_GET_4(MPCC_CONTROL[mpcc_inst], MPCC_MODE, &s->mode, + MPCC_ALPHA_BLND_MODE, &s->alpha_mode, + MPCC_ALPHA_MULTIPLIED_MODE, &s->pre_multiplied_alpha, + MPCC_BLND_ACTIVE_OVERLAP_ONLY, &s->overlap_only); + REG_GET_2(MPCC_STATUS[mpcc_inst], MPCC_IDLE, &s->idle, + MPCC_BUSY, &s->busy); +} + const struct mpc_funcs dcn10_mpc_funcs = { + .read_mpcc_state = mpc1_read_mpcc_state, .insert_plane = mpc1_insert_plane, .remove_mpcc = mpc1_remove_mpcc, .mpc_init = mpc1_mpc_init, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h index 267a2995ef6e..d3d16c4cbea3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h @@ -183,4 +183,9 @@ struct mpcc *mpc1_get_mpcc_for_dpp( struct mpc_tree *tree, int dpp_id); +void mpc1_read_mpcc_state( + struct mpc *mpc, + int mpcc_inst, + struct mpcc_state *s); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 4bf64d1b2c60..c734b7fa5835 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -93,6 +93,81 @@ static void optc1_disable_stereo(struct timing_generator *optc) OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0); } +static uint32_t get_start_vline(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing) +{ + struct dc_crtc_timing patched_crtc_timing; + int vesa_sync_start; + int asic_blank_end; + int interlace_factor; + int vertical_line_start; + + patched_crtc_timing = *dc_crtc_timing; + optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); + + vesa_sync_start = patched_crtc_timing.h_addressable + + patched_crtc_timing.h_border_right + + patched_crtc_timing.h_front_porch; + + asic_blank_end = patched_crtc_timing.h_total - + vesa_sync_start - + patched_crtc_timing.h_border_left; + + interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1; + + vesa_sync_start = patched_crtc_timing.v_addressable + + patched_crtc_timing.v_border_bottom + + patched_crtc_timing.v_front_porch; + + asic_blank_end = (patched_crtc_timing.v_total - + vesa_sync_start - + patched_crtc_timing.v_border_top) + * interlace_factor; + + vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; + if (vertical_line_start < 0) { + ASSERT(0); + vertical_line_start = 0; + } + + return vertical_line_start; +} + +void optc1_program_vline_interrupt( + struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + unsigned long long vsync_delta) +{ + + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + unsigned long long req_delta_tens_of_usec = div64_u64((vsync_delta + 9999), 10000); + unsigned long long pix_clk_hundreds_khz = div64_u64((dc_crtc_timing->pix_clk_khz + 99), 100); + uint32_t req_delta_lines = (uint32_t) div64_u64( + (req_delta_tens_of_usec * pix_clk_hundreds_khz + dc_crtc_timing->h_total - 1), + dc_crtc_timing->h_total); + + uint32_t vsync_line = get_start_vline(optc, dc_crtc_timing); + uint32_t start_line = 0; + uint32_t endLine = 0; + + if (req_delta_lines != 0) + req_delta_lines--; + + if (req_delta_lines > vsync_line) + start_line = dc_crtc_timing->v_total - (req_delta_lines - vsync_line) - 1; + else + start_line = vsync_line - req_delta_lines; + + endLine = start_line + 2; + + if (endLine >= dc_crtc_timing->v_total) + endLine = 2; + + REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0, + OTG_VERTICAL_INTERRUPT0_LINE_START, start_line, + OTG_VERTICAL_INTERRUPT0_LINE_END, endLine); +} + /** * program_timing_generator used by mode timing set * Program CRTC Timing Registers - OTG_H_*, OTG_V_*, Pixel repetition. @@ -780,17 +855,17 @@ void optc1_set_drr( OTG_SET_V_TOTAL_MIN_MASK_EN, 0, OTG_SET_V_TOTAL_MIN_MASK, 0); } else { - REG_SET(OTG_V_TOTAL_MIN, 0, - OTG_V_TOTAL_MIN, 0); - - REG_SET(OTG_V_TOTAL_MAX, 0, - OTG_V_TOTAL_MAX, 0); - REG_UPDATE_4(OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, 0, OTG_V_TOTAL_MIN_SEL, 0, OTG_V_TOTAL_MAX_SEL, 0, OTG_FORCE_LOCK_ON_EVENT, 0); + + REG_SET(OTG_V_TOTAL_MIN, 0, + OTG_V_TOTAL_MIN, 0); + + REG_SET(OTG_V_TOTAL_MAX, 0, + OTG_V_TOTAL_MAX, 0); } } @@ -1154,6 +1229,12 @@ void optc1_read_otg_state(struct optc *optc1, REG_GET(OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, &s->v_total_min); + REG_GET(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MAX_SEL, &s->v_total_max_sel); + + REG_GET(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, &s->v_total_min_sel); + REG_GET_2(OTG_V_SYNC_A, OTG_V_SYNC_A_START, &s->v_sync_a_start, OTG_V_SYNC_A_END, &s->v_sync_a_end); @@ -1215,6 +1296,7 @@ static bool optc1_is_optc_underflow_occurred(struct timing_generator *optc) static const struct timing_generator_funcs dcn10_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, + .program_vline_interrupt = optc1_program_vline_interrupt, .program_global_sync = optc1_program_global_sync, .enable_crtc = optc1_enable_crtc, .disable_crtc = optc1_disable_crtc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index d25e7bf0d0d7..89e09e5327a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -65,6 +65,8 @@ SRI(OTG_NOM_VERT_POSITION, OTG, inst),\ SRI(OTG_BLACK_COLOR, OTG, inst),\ SRI(OTG_CLOCK_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ @@ -124,6 +126,8 @@ struct dcn_optc_registers { uint32_t OTG_TEST_PATTERN_CONTROL; uint32_t OTG_TEST_PATTERN_COLOR; uint32_t OTG_CLOCK_CONTROL; + uint32_t OTG_VERTICAL_INTERRUPT0_CONTROL; + uint32_t OTG_VERTICAL_INTERRUPT0_POSITION; uint32_t OTG_VERTICAL_INTERRUPT2_CONTROL; uint32_t OTG_VERTICAL_INTERRUPT2_POSITION; uint32_t OPTC_INPUT_CLOCK_CONTROL; @@ -206,6 +210,9 @@ struct dcn_optc_registers { SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ @@ -323,6 +330,9 @@ struct dcn_optc_registers { type OTG_CLOCK_EN;\ type OTG_CLOCK_ON;\ type OTG_CLOCK_GATE_DIS;\ + type OTG_VERTICAL_INTERRUPT0_INT_ENABLE;\ + type OTG_VERTICAL_INTERRUPT0_LINE_START;\ + type OTG_VERTICAL_INTERRUPT0_LINE_END;\ type OTG_VERTICAL_INTERRUPT2_INT_ENABLE;\ type OTG_VERTICAL_INTERRUPT2_LINE_START;\ type OPTC_INPUT_CLK_EN;\ @@ -396,6 +406,8 @@ struct dcn_otg_state { uint32_t v_total; uint32_t v_total_max; uint32_t v_total_min; + uint32_t v_total_min_sel; + uint32_t v_total_max_sel; uint32_t v_sync_a_start; uint32_t v_sync_a_end; uint32_t h_blank_start; @@ -420,6 +432,10 @@ void optc1_program_timing( const struct dc_crtc_timing *dc_crtc_timing, bool use_vbios); +void optc1_program_vline_interrupt(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + unsigned long long vsync_delta); + void optc1_program_global_sync( struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 02bd664aed3e..2c0a3150bf2d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -39,7 +39,7 @@ #include "dce110/dce110_hw_sequencer.h" #include "dcn10/dcn10_opp.h" #include "dce/dce_link_encoder.h" -#include "dce/dce_stream_encoder.h" +#include "dcn10/dcn10_stream_encoder.h" #include "dce/dce_clocks.h" #include "dce/dce_clock_source.h" #include "dce/dce_audio.h" @@ -166,36 +166,22 @@ static const struct dce_abm_mask abm_mask = { #define stream_enc_regs(id)\ [id] = {\ - SE_DCN_REG_LIST(id),\ - .TMDS_CNTL = 0,\ - .AFMT_AVI_INFO0 = 0,\ - .AFMT_AVI_INFO1 = 0,\ - .AFMT_AVI_INFO2 = 0,\ - .AFMT_AVI_INFO3 = 0,\ + SE_DCN_REG_LIST(id)\ } -static const struct dce110_stream_enc_registers stream_enc_regs[] = { +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { stream_enc_regs(0), stream_enc_regs(1), stream_enc_regs(2), stream_enc_regs(3), }; -static const struct dce_stream_encoder_shift se_shift = { +static const struct dcn10_stream_encoder_shift se_shift = { SE_COMMON_MASK_SH_LIST_DCN10(__SHIFT) }; -static const struct dce_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN10(_MASK), - .AFMT_GENERIC0_UPDATE = 0, - .AFMT_GENERIC2_UPDATE = 0, - .DP_DYN_RANGE = 0, - .DP_YCBCR_RANGE = 0, - .HDMI_AVI_INFO_SEND = 0, - .HDMI_AVI_INFO_CONT = 0, - .HDMI_AVI_INFO_LINE = 0, - .DP_SEC_AVI_ENABLE = 0, - .AFMT_AVI_INFO_VERSION = 0 +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN10(_MASK) }; #define audio_regs(id)\ @@ -320,11 +306,14 @@ static const struct dcn_dpp_registers tf_regs[] = { }; static const struct dcn_dpp_shift tf_shift = { - TF_REG_LIST_SH_MASK_DCN10(__SHIFT) + TF_REG_LIST_SH_MASK_DCN10(__SHIFT), + TF_DEBUG_REG_LIST_SH_DCN10 + }; static const struct dcn_dpp_mask tf_mask = { TF_REG_LIST_SH_MASK_DCN10(_MASK), + TF_DEBUG_REG_LIST_MASK_DCN10 }; static const struct dcn_mpc_registers mpc_regs = { @@ -650,16 +639,16 @@ static struct stream_encoder *dcn10_stream_encoder_create( enum engine_id eng_id, struct dc_context *ctx) { - struct dce110_stream_encoder *enc110 = - kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); + struct dcn10_stream_encoder *enc1 = + kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - if (!enc110) + if (!enc1) return NULL; - dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, + dcn10_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, &stream_enc_regs[eng_id], &se_shift, &se_mask); - return &enc110->base; + return &enc1->base; } static const struct dce_hwseq_registers hwseq_reg = { @@ -918,36 +907,6 @@ enum dc_status dcn10_add_stream_to_ctx( return result; } -enum dc_status dcn10_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *dc_stream, - struct dc_state *context) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - context->streams[0] = dc_stream; - dc_stream_retain(context->streams[0]); - context->stream_count++; - - result = resource_map_pool_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = resource_map_phy_clock_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = build_mapped_resource(dc, context, dc_stream); - - if (result == DC_OK) { - validate_guaranteed_copy_streams( - context, dc->caps.max_streams); - result = resource_build_scaling_params_for_context(dc, context); - } - if (result == DC_OK && !dcn_validate_bandwidth(dc, context)) - return DC_FAIL_BANDWIDTH_VALIDATE; - - return result; -} - static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer( struct dc_state *context, const struct resource_pool *pool, @@ -978,235 +937,16 @@ static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer( return idle_pipe; } -enum dcc_control { - dcc_control__256_256_xxx, - dcc_control__128_128_xxx, - dcc_control__256_64_64, -}; - -enum segment_order { - segment_order__na, - segment_order__contiguous, - segment_order__non_contiguous, -}; - -static bool dcc_support_pixel_format( - enum surface_pixel_format format, - unsigned int *bytes_per_element) -{ - /* DML: get_bytes_per_element */ - switch (format) { - case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: - case SURFACE_PIXEL_FORMAT_GRPH_RGB565: - *bytes_per_element = 2; - return true; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888: - case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010: - *bytes_per_element = 4; - return true; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: - *bytes_per_element = 8; - return true; - default: - return false; - } -} - -static bool dcc_support_swizzle( - enum swizzle_mode_values swizzle, - unsigned int bytes_per_element, - enum segment_order *segment_order_horz, - enum segment_order *segment_order_vert) -{ - bool standard_swizzle = false; - bool display_swizzle = false; - - switch (swizzle) { - case DC_SW_4KB_S: - case DC_SW_64KB_S: - case DC_SW_VAR_S: - case DC_SW_4KB_S_X: - case DC_SW_64KB_S_X: - case DC_SW_VAR_S_X: - standard_swizzle = true; - break; - case DC_SW_4KB_D: - case DC_SW_64KB_D: - case DC_SW_VAR_D: - case DC_SW_4KB_D_X: - case DC_SW_64KB_D_X: - case DC_SW_VAR_D_X: - display_swizzle = true; - break; - default: - break; - } - - if (bytes_per_element == 1 && standard_swizzle) { - *segment_order_horz = segment_order__contiguous; - *segment_order_vert = segment_order__na; - return true; - } - if (bytes_per_element == 2 && standard_swizzle) { - *segment_order_horz = segment_order__non_contiguous; - *segment_order_vert = segment_order__contiguous; - return true; - } - if (bytes_per_element == 4 && standard_swizzle) { - *segment_order_horz = segment_order__non_contiguous; - *segment_order_vert = segment_order__contiguous; - return true; - } - if (bytes_per_element == 8 && standard_swizzle) { - *segment_order_horz = segment_order__na; - *segment_order_vert = segment_order__contiguous; - return true; - } - if (bytes_per_element == 8 && display_swizzle) { - *segment_order_horz = segment_order__contiguous; - *segment_order_vert = segment_order__non_contiguous; - return true; - } - - return false; -} - -static void get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height, - unsigned int bytes_per_element) -{ - /* copied from DML. might want to refactor DML to leverage from DML */ - /* DML : get_blk256_size */ - if (bytes_per_element == 1) { - *blk256_width = 16; - *blk256_height = 16; - } else if (bytes_per_element == 2) { - *blk256_width = 16; - *blk256_height = 8; - } else if (bytes_per_element == 4) { - *blk256_width = 8; - *blk256_height = 8; - } else if (bytes_per_element == 8) { - *blk256_width = 8; - *blk256_height = 4; - } -} - -static void det_request_size( - unsigned int height, - unsigned int width, - unsigned int bpe, - bool *req128_horz_wc, - bool *req128_vert_wc) -{ - unsigned int detile_buf_size = 164 * 1024; /* 164KB for DCN1.0 */ - - unsigned int blk256_height = 0; - unsigned int blk256_width = 0; - unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc; - - get_blk256_size(&blk256_width, &blk256_height, bpe); - - swath_bytes_horz_wc = height * blk256_height * bpe; - swath_bytes_vert_wc = width * blk256_width * bpe; - - *req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ? - false : /* full 256B request */ - true; /* half 128b request */ - - *req128_vert_wc = (2 * swath_bytes_vert_wc <= detile_buf_size) ? - false : /* full 256B request */ - true; /* half 128b request */ -} - -static bool get_dcc_compression_cap(const struct dc *dc, +static bool dcn10_get_dcc_compression_cap(const struct dc *dc, const struct dc_dcc_surface_param *input, struct dc_surface_dcc_cap *output) { - /* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */ - enum dcc_control dcc_control; - unsigned int bpe; - enum segment_order segment_order_horz, segment_order_vert; - bool req128_horz_wc, req128_vert_wc; - - memset(output, 0, sizeof(*output)); - - if (dc->debug.disable_dcc == DCC_DISABLE) - return false; - - if (!dcc_support_pixel_format(input->format, - &bpe)) - return false; - - if (!dcc_support_swizzle(input->swizzle_mode, bpe, - &segment_order_horz, &segment_order_vert)) - return false; - - det_request_size(input->surface_size.height, input->surface_size.width, - bpe, &req128_horz_wc, &req128_vert_wc); - - if (!req128_horz_wc && !req128_vert_wc) { - dcc_control = dcc_control__256_256_xxx; - } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) { - if (!req128_horz_wc) - dcc_control = dcc_control__256_256_xxx; - else if (segment_order_horz == segment_order__contiguous) - dcc_control = dcc_control__128_128_xxx; - else - dcc_control = dcc_control__256_64_64; - } else if (input->scan == SCAN_DIRECTION_VERTICAL) { - if (!req128_vert_wc) - dcc_control = dcc_control__256_256_xxx; - else if (segment_order_vert == segment_order__contiguous) - dcc_control = dcc_control__128_128_xxx; - else - dcc_control = dcc_control__256_64_64; - } else { - if ((req128_horz_wc && - segment_order_horz == segment_order__non_contiguous) || - (req128_vert_wc && - segment_order_vert == segment_order__non_contiguous)) - /* access_dir not known, must use most constraining */ - dcc_control = dcc_control__256_64_64; - else - /* reg128 is true for either horz and vert - * but segment_order is contiguous - */ - dcc_control = dcc_control__128_128_xxx; - } - - if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE && - dcc_control != dcc_control__256_256_xxx) - return false; - - switch (dcc_control) { - case dcc_control__256_256_xxx: - output->grph.rgb.max_uncompressed_blk_size = 256; - output->grph.rgb.max_compressed_blk_size = 256; - output->grph.rgb.independent_64b_blks = false; - break; - case dcc_control__128_128_xxx: - output->grph.rgb.max_uncompressed_blk_size = 128; - output->grph.rgb.max_compressed_blk_size = 128; - output->grph.rgb.independent_64b_blks = false; - break; - case dcc_control__256_64_64: - output->grph.rgb.max_uncompressed_blk_size = 256; - output->grph.rgb.max_compressed_blk_size = 64; - output->grph.rgb.independent_64b_blks = true; - break; - } - - output->capable = true; - output->const_color_support = false; - - return true; + return dc->res_pool->hubbub->funcs->get_dcc_compression_cap( + dc->res_pool->hubbub, + input, + output); } - static void dcn10_destroy_resource_pool(struct resource_pool **pool) { struct dcn10_resource_pool *dcn10_pool = TO_DCN10_RES_POOL(*pool); @@ -1227,13 +967,12 @@ static enum dc_status dcn10_validate_plane(const struct dc_plane_state *plane_st } static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = get_dcc_compression_cap + .get_dcc_compression_cap = dcn10_get_dcc_compression_cap }; static struct resource_funcs dcn10_res_pool_funcs = { .destroy = dcn10_destroy_resource_pool, .link_enc_create = dcn10_link_encoder_create, - .validate_guaranteed = dcn10_validate_guaranteed, .validate_bandwidth = dcn_validate_bandwidth, .acquire_idle_pipe_for_layer = dcn10_acquire_idle_pipe_for_layer, .validate_plane = dcn10_validate_plane, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c new file mode 100644 index 000000000000..befd8639ad55 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -0,0 +1,1490 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dc_bios_types.h" +#include "dcn10_stream_encoder.h" +#include "reg_helper.h" +#include "hw_shared.h" + +#define DC_LOGGER \ + enc1->base.ctx->logger + + +#define REG(reg)\ + (enc1->regs->reg) + +#undef FN +#define FN(reg_name, field_name) \ + enc1->se_shift->field_name, enc1->se_mask->field_name + +#define VBI_LINE_0 0 +#define DP_BLANK_MAX_RETRY 20 +#define HDMI_CLOCK_CHANNEL_RATE_MORE_340M 340000 + + +enum { + DP_MST_UPDATE_MAX_RETRY = 50 +}; + +#define CTX \ + enc1->base.ctx + +void enc1_update_generic_info_packet( + struct dcn10_stream_encoder *enc1, + uint32_t packet_index, + const struct dc_info_packet *info_packet) +{ + uint32_t regval; + /* TODOFPGA Figure out a proper number for max_retries polling for lock + * use 50 for now. + */ + uint32_t max_retries = 50; + + /*we need turn on clock before programming AFMT block*/ + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + + if (packet_index >= 8) + ASSERT(0); + + /* poll dig_update_lock is not locked -> asic internal signal + * assume otg master lock will unlock it + */ +/* REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_LOCK_STATUS, + 0, 10, max_retries);*/ + + /* check if HW reading GSP memory */ + REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT, + 0, 10, max_retries); + + /* HW does is not reading GSP memory not reading too long -> + * something wrong. clear GPS memory access and notify? + * hw SW is writing to GSP memory + */ + REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1); + + /* choose which generic packet to use */ + regval = REG_READ(AFMT_VBI_PACKET_CONTROL); + REG_UPDATE(AFMT_VBI_PACKET_CONTROL, + AFMT_GENERIC_INDEX, packet_index); + + /* write generic packet header + * (4th byte is for GENERIC0 only) + */ + REG_SET_4(AFMT_GENERIC_HDR, 0, + AFMT_GENERIC_HB0, info_packet->hb0, + AFMT_GENERIC_HB1, info_packet->hb1, + AFMT_GENERIC_HB2, info_packet->hb2, + AFMT_GENERIC_HB3, info_packet->hb3); + + /* write generic packet contents + * (we never use last 4 bytes) + * there are 8 (0-7) mmDIG0_AFMT_GENERIC0_x registers + */ + { + const uint32_t *content = + (const uint32_t *) &info_packet->sb[0]; + + REG_WRITE(AFMT_GENERIC_0, *content++); + REG_WRITE(AFMT_GENERIC_1, *content++); + REG_WRITE(AFMT_GENERIC_2, *content++); + REG_WRITE(AFMT_GENERIC_3, *content++); + REG_WRITE(AFMT_GENERIC_4, *content++); + REG_WRITE(AFMT_GENERIC_5, *content++); + REG_WRITE(AFMT_GENERIC_6, *content++); + REG_WRITE(AFMT_GENERIC_7, *content); + } + + switch (packet_index) { + case 0: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC0_FRAME_UPDATE, 1); + break; + case 1: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC1_FRAME_UPDATE, 1); + break; + case 2: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC2_FRAME_UPDATE, 1); + break; + case 3: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC3_FRAME_UPDATE, 1); + break; + case 4: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC4_FRAME_UPDATE, 1); + break; + case 5: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC5_FRAME_UPDATE, 1); + break; + case 6: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC6_FRAME_UPDATE, 1); + break; + case 7: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC7_FRAME_UPDATE, 1); + break; + default: + break; + } +} + +static void enc1_update_hdmi_info_packet( + struct dcn10_stream_encoder *enc1, + uint32_t packet_index, + const struct dc_info_packet *info_packet) +{ + uint32_t cont, send, line; + + if (info_packet->valid) { + enc1_update_generic_info_packet( + enc1, + packet_index, + info_packet); + + /* enable transmission of packet(s) - + * packet transmission begins on the next frame + */ + cont = 1; + /* send packet(s) every frame */ + send = 1; + /* select line number to send packets on */ + line = 2; + } else { + cont = 0; + send = 0; + line = 0; + } + + /* choose which generic packet control to use */ + switch (packet_index) { + case 0: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL0, + HDMI_GENERIC0_CONT, cont, + HDMI_GENERIC0_SEND, send, + HDMI_GENERIC0_LINE, line); + break; + case 1: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL0, + HDMI_GENERIC1_CONT, cont, + HDMI_GENERIC1_SEND, send, + HDMI_GENERIC1_LINE, line); + break; + case 2: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL1, + HDMI_GENERIC0_CONT, cont, + HDMI_GENERIC0_SEND, send, + HDMI_GENERIC0_LINE, line); + break; + case 3: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL1, + HDMI_GENERIC1_CONT, cont, + HDMI_GENERIC1_SEND, send, + HDMI_GENERIC1_LINE, line); + break; + case 4: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL2, + HDMI_GENERIC0_CONT, cont, + HDMI_GENERIC0_SEND, send, + HDMI_GENERIC0_LINE, line); + break; + case 5: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL2, + HDMI_GENERIC1_CONT, cont, + HDMI_GENERIC1_SEND, send, + HDMI_GENERIC1_LINE, line); + break; + case 6: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL3, + HDMI_GENERIC0_CONT, cont, + HDMI_GENERIC0_SEND, send, + HDMI_GENERIC0_LINE, line); + break; + case 7: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL3, + HDMI_GENERIC1_CONT, cont, + HDMI_GENERIC1_SEND, send, + HDMI_GENERIC1_LINE, line); + break; + default: + /* invalid HW packet index */ + DC_LOG_WARNING( + "Invalid HW packet index: %s()\n", + __func__); + return; + } +} + +/* setup stream encoder in dp mode */ +void enc1_stream_encoder_dp_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + enum dc_color_space output_color_space) +{ + uint32_t h_active_start; + uint32_t v_active_start; + uint32_t misc0 = 0; + uint32_t misc1 = 0; + uint32_t h_blank; + uint32_t h_back_porch; + uint8_t synchronous_clock = 0; /* asynchronous mode */ + uint8_t colorimetry_bpc; + uint8_t dynamic_range_rgb = 0; /*full range*/ + uint8_t dynamic_range_ycbcr = 1; /*bt709*/ + + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + REG_UPDATE(DP_DB_CNTL, DP_DB_DISABLE, 1); + + /* set pixel encoding */ + switch (crtc_timing->pixel_encoding) { + case PIXEL_ENCODING_YCBCR422: + REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, + DP_PIXEL_ENCODING_TYPE_YCBCR422); + break; + case PIXEL_ENCODING_YCBCR444: + REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, + DP_PIXEL_ENCODING_TYPE_YCBCR444); + + if (crtc_timing->flags.Y_ONLY) + if (crtc_timing->display_color_depth != COLOR_DEPTH_666) + /* HW testing only, no use case yet. + * Color depth of Y-only could be + * 8, 10, 12, 16 bits + */ + REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, + DP_PIXEL_ENCODING_TYPE_Y_ONLY); + /* Note: DP_MSA_MISC1 bit 7 is the indicator + * of Y-only mode. + * This bit is set in HW if register + * DP_PIXEL_ENCODING is programmed to 0x4 + */ + break; + case PIXEL_ENCODING_YCBCR420: + REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, + DP_PIXEL_ENCODING_TYPE_YCBCR420); + REG_UPDATE(DP_VID_TIMING, DP_VID_N_MUL, 1); + break; + default: + REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, + DP_PIXEL_ENCODING_TYPE_RGB444); + break; + } + + misc1 = REG_READ(DP_MSA_MISC); + + /* set color depth */ + + switch (crtc_timing->display_color_depth) { + case COLOR_DEPTH_666: + REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, + 0); + break; + case COLOR_DEPTH_888: + REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, + DP_COMPONENT_PIXEL_DEPTH_8BPC); + break; + case COLOR_DEPTH_101010: + REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, + DP_COMPONENT_PIXEL_DEPTH_10BPC); + + break; + case COLOR_DEPTH_121212: + REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, + DP_COMPONENT_PIXEL_DEPTH_12BPC); + break; + default: + REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, + DP_COMPONENT_PIXEL_DEPTH_6BPC); + break; + } + + /* set dynamic range and YCbCr range */ + + switch (crtc_timing->display_color_depth) { + case COLOR_DEPTH_666: + colorimetry_bpc = 0; + break; + case COLOR_DEPTH_888: + colorimetry_bpc = 1; + break; + case COLOR_DEPTH_101010: + colorimetry_bpc = 2; + break; + case COLOR_DEPTH_121212: + colorimetry_bpc = 3; + break; + default: + colorimetry_bpc = 0; + break; + } + + misc0 = misc0 | synchronous_clock; + misc0 = colorimetry_bpc << 5; + + switch (output_color_space) { + case COLOR_SPACE_SRGB: + misc0 = misc0 | 0x0; + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + dynamic_range_rgb = 0; /*full range*/ + break; + case COLOR_SPACE_SRGB_LIMITED: + misc0 = misc0 | 0x8; /* bit3=1 */ + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + dynamic_range_rgb = 1; /*limited range*/ + break; + case COLOR_SPACE_YCBCR601: + case COLOR_SPACE_YCBCR601_LIMITED: + misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */ + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + dynamic_range_ycbcr = 0; /*bt601*/ + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ + else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444) + misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ + break; + case COLOR_SPACE_YCBCR709: + case COLOR_SPACE_YCBCR709_LIMITED: + misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */ + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + dynamic_range_ycbcr = 1; /*bt709*/ + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ + else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444) + misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ + break; + case COLOR_SPACE_2020_RGB_LIMITEDRANGE: + dynamic_range_rgb = 1; /*limited range*/ + break; + case COLOR_SPACE_2020_RGB_FULLRANGE: + case COLOR_SPACE_2020_YCBCR: + case COLOR_SPACE_XR_RGB: + case COLOR_SPACE_MSREF_SCRGB: + case COLOR_SPACE_ADOBERGB: + case COLOR_SPACE_DCIP3: + case COLOR_SPACE_XV_YCC_709: + case COLOR_SPACE_XV_YCC_601: + case COLOR_SPACE_DISPLAYNATIVE: + case COLOR_SPACE_DOLBYVISION: + case COLOR_SPACE_APPCTRL: + case COLOR_SPACE_CUSTOMPOINTS: + case COLOR_SPACE_UNKNOWN: + /* do nothing */ + break; + } + + REG_SET(DP_MSA_COLORIMETRY, 0, DP_MSA_MISC0, misc0); + REG_WRITE(DP_MSA_MISC, misc1); /* MSA_MISC1 */ + + /* dcn new register + * dc_crtc_timing is vesa dmt struct. data from edid + */ + REG_SET_2(DP_MSA_TIMING_PARAM1, 0, + DP_MSA_HTOTAL, crtc_timing->h_total, + DP_MSA_VTOTAL, crtc_timing->v_total); + + /* calculate from vesa timing parameters + * h_active_start related to leading edge of sync + */ + + h_blank = crtc_timing->h_total - crtc_timing->h_border_left - + crtc_timing->h_addressable - crtc_timing->h_border_right; + + h_back_porch = h_blank - crtc_timing->h_front_porch - + crtc_timing->h_sync_width; + + /* start at beginning of left border */ + h_active_start = crtc_timing->h_sync_width + h_back_porch; + + + v_active_start = crtc_timing->v_total - crtc_timing->v_border_top - + crtc_timing->v_addressable - crtc_timing->v_border_bottom - + crtc_timing->v_front_porch; + + + /* start at beginning of left border */ + REG_SET_2(DP_MSA_TIMING_PARAM2, 0, + DP_MSA_HSTART, h_active_start, + DP_MSA_VSTART, v_active_start); + + REG_SET_4(DP_MSA_TIMING_PARAM3, 0, + DP_MSA_HSYNCWIDTH, + crtc_timing->h_sync_width, + DP_MSA_HSYNCPOLARITY, + !crtc_timing->flags.HSYNC_POSITIVE_POLARITY, + DP_MSA_VSYNCWIDTH, + crtc_timing->v_sync_width, + DP_MSA_VSYNCPOLARITY, + !crtc_timing->flags.VSYNC_POSITIVE_POLARITY); + + /* HWDITH include border or overscan */ + REG_SET_2(DP_MSA_TIMING_PARAM4, 0, + DP_MSA_HWIDTH, crtc_timing->h_border_left + + crtc_timing->h_addressable + crtc_timing->h_border_right, + DP_MSA_VHEIGHT, crtc_timing->v_border_top + + crtc_timing->v_addressable + crtc_timing->v_border_bottom); +} + +static void enc1_stream_encoder_set_stream_attribute_helper( + struct dcn10_stream_encoder *enc1, + struct dc_crtc_timing *crtc_timing) +{ + switch (crtc_timing->pixel_encoding) { + case PIXEL_ENCODING_YCBCR422: + REG_UPDATE(DIG_FE_CNTL, TMDS_PIXEL_ENCODING, 1); + break; + default: + REG_UPDATE(DIG_FE_CNTL, TMDS_PIXEL_ENCODING, 0); + break; + } + REG_UPDATE(DIG_FE_CNTL, TMDS_COLOR_FORMAT, 0); +} + +/* setup stream encoder in hdmi mode */ +void enc1_stream_encoder_hdmi_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + int actual_pix_clk_khz, + bool enable_audio) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + struct bp_encoder_control cntl = {0}; + + cntl.action = ENCODER_CONTROL_SETUP; + cntl.engine_id = enc1->base.id; + cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A; + cntl.enable_dp_audio = enable_audio; + cntl.pixel_clock = actual_pix_clk_khz; + cntl.lanes_number = LANE_COUNT_FOUR; + + if (enc1->base.bp->funcs->encoder_control( + enc1->base.bp, &cntl) != BP_RESULT_OK) + return; + + enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing); + + /* setup HDMI engine */ + REG_UPDATE_5(HDMI_CONTROL, + HDMI_PACKET_GEN_VERSION, 1, + HDMI_KEEPOUT_MODE, 1, + HDMI_DEEP_COLOR_ENABLE, 0, + HDMI_DATA_SCRAMBLE_EN, 0, + HDMI_CLOCK_CHANNEL_RATE, 0); + + + switch (crtc_timing->display_color_depth) { + case COLOR_DEPTH_888: + REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0); + break; + case COLOR_DEPTH_101010: + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 1, + HDMI_DEEP_COLOR_ENABLE, 0); + } else { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 1, + HDMI_DEEP_COLOR_ENABLE, 1); + } + break; + case COLOR_DEPTH_121212: + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 2, + HDMI_DEEP_COLOR_ENABLE, 0); + } else { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 2, + HDMI_DEEP_COLOR_ENABLE, 1); + } + break; + case COLOR_DEPTH_161616: + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 3, + HDMI_DEEP_COLOR_ENABLE, 1); + break; + default: + break; + } + + if (actual_pix_clk_khz >= HDMI_CLOCK_CHANNEL_RATE_MORE_340M) { + /* enable HDMI data scrambler + * HDMI_CLOCK_CHANNEL_RATE_MORE_340M + * Clock channel frequency is 1/4 of character rate. + */ + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DATA_SCRAMBLE_EN, 1, + HDMI_CLOCK_CHANNEL_RATE, 1); + } else if (crtc_timing->flags.LTE_340MCSC_SCRAMBLE) { + + /* TODO: New feature for DCE11, still need to implement */ + + /* enable HDMI data scrambler + * HDMI_CLOCK_CHANNEL_FREQ_EQUAL_TO_CHAR_RATE + * Clock channel frequency is the same + * as character rate + */ + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DATA_SCRAMBLE_EN, 1, + HDMI_CLOCK_CHANNEL_RATE, 0); + } + + + REG_UPDATE_3(HDMI_VBI_PACKET_CONTROL, + HDMI_GC_CONT, 1, + HDMI_GC_SEND, 1, + HDMI_NULL_SEND, 1); + + /* following belongs to audio */ + REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1); + + REG_UPDATE(AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1); + + REG_UPDATE(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, + VBI_LINE_0 + 2); + + REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0); +} + +/* setup stream encoder in dvi mode */ +void enc1_stream_encoder_dvi_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + bool is_dual_link) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + struct bp_encoder_control cntl = {0}; + + cntl.action = ENCODER_CONTROL_SETUP; + cntl.engine_id = enc1->base.id; + cntl.signal = is_dual_link ? + SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK; + cntl.enable_dp_audio = false; + cntl.pixel_clock = crtc_timing->pix_clk_khz; + cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR; + + if (enc1->base.bp->funcs->encoder_control( + enc1->base.bp, &cntl) != BP_RESULT_OK) + return; + + ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB); + ASSERT(crtc_timing->display_color_depth == COLOR_DEPTH_888); + enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing); +} + +void enc1_stream_encoder_set_mst_bandwidth( + struct stream_encoder *enc, + struct fixed31_32 avg_time_slots_per_mtp) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t x = dal_fixed31_32_floor( + avg_time_slots_per_mtp); + uint32_t y = dal_fixed31_32_ceil( + dal_fixed31_32_shl( + dal_fixed31_32_sub_int( + avg_time_slots_per_mtp, + x), + 26)); + + REG_SET_2(DP_MSE_RATE_CNTL, 0, + DP_MSE_RATE_X, x, + DP_MSE_RATE_Y, y); + + /* wait for update to be completed on the link */ + /* i.e. DP_MSE_RATE_UPDATE_PENDING field (read only) */ + /* is reset to 0 (not pending) */ + REG_WAIT(DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, + 0, + 10, DP_MST_UPDATE_MAX_RETRY); +} + +static void enc1_stream_encoder_update_hdmi_info_packets( + struct stream_encoder *enc, + const struct encoder_info_frame *info_frame) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* for bring up, disable dp double TODO */ + REG_UPDATE(HDMI_DB_CONTROL, HDMI_DB_DISABLE, 1); + + enc1_update_hdmi_info_packet(enc1, 0, &info_frame->avi); + enc1_update_hdmi_info_packet(enc1, 1, &info_frame->vendor); + enc1_update_hdmi_info_packet(enc1, 2, &info_frame->gamut); + enc1_update_hdmi_info_packet(enc1, 3, &info_frame->spd); + enc1_update_hdmi_info_packet(enc1, 4, &info_frame->hdrsmd); +} + +static void enc1_stream_encoder_stop_hdmi_info_packets( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* stop generic packets 0 & 1 on HDMI */ + REG_SET_6(HDMI_GENERIC_PACKET_CONTROL0, 0, + HDMI_GENERIC1_CONT, 0, + HDMI_GENERIC1_LINE, 0, + HDMI_GENERIC1_SEND, 0, + HDMI_GENERIC0_CONT, 0, + HDMI_GENERIC0_LINE, 0, + HDMI_GENERIC0_SEND, 0); + + /* stop generic packets 2 & 3 on HDMI */ + REG_SET_6(HDMI_GENERIC_PACKET_CONTROL1, 0, + HDMI_GENERIC0_CONT, 0, + HDMI_GENERIC0_LINE, 0, + HDMI_GENERIC0_SEND, 0, + HDMI_GENERIC1_CONT, 0, + HDMI_GENERIC1_LINE, 0, + HDMI_GENERIC1_SEND, 0); + + /* stop generic packets 2 & 3 on HDMI */ + REG_SET_6(HDMI_GENERIC_PACKET_CONTROL2, 0, + HDMI_GENERIC0_CONT, 0, + HDMI_GENERIC0_LINE, 0, + HDMI_GENERIC0_SEND, 0, + HDMI_GENERIC1_CONT, 0, + HDMI_GENERIC1_LINE, 0, + HDMI_GENERIC1_SEND, 0); + + REG_SET_6(HDMI_GENERIC_PACKET_CONTROL3, 0, + HDMI_GENERIC0_CONT, 0, + HDMI_GENERIC0_LINE, 0, + HDMI_GENERIC0_SEND, 0, + HDMI_GENERIC1_CONT, 0, + HDMI_GENERIC1_LINE, 0, + HDMI_GENERIC1_SEND, 0); +} + +void enc1_stream_encoder_update_dp_info_packets( + struct stream_encoder *enc, + const struct encoder_info_frame *info_frame) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value = 0; + + if (info_frame->vsc.valid) + enc1_update_generic_info_packet( + enc1, + 0, /* packetIndex */ + &info_frame->vsc); + + if (info_frame->spd.valid) + enc1_update_generic_info_packet( + enc1, + 2, /* packetIndex */ + &info_frame->spd); + + if (info_frame->hdrsmd.valid) + enc1_update_generic_info_packet( + enc1, + 3, /* packetIndex */ + &info_frame->hdrsmd); + + /* enable/disable transmission of packet(s). + * If enabled, packet transmission begins on the next frame + */ + REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid); + REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid); + REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid); + + + /* This bit is the master enable bit. + * When enabling secondary stream engine, + * this master bit must also be set. + * This register shared with audio info frame. + * Therefore we need to enable master bit + * if at least on of the fields is not 0 + */ + value = REG_READ(DP_SEC_CNTL); + if (value) + REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); +} + +void enc1_stream_encoder_stop_dp_info_packets( + struct stream_encoder *enc) +{ + /* stop generic packets on DP */ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value = 0; + + REG_SET_10(DP_SEC_CNTL, 0, + DP_SEC_GSP0_ENABLE, 0, + DP_SEC_GSP1_ENABLE, 0, + DP_SEC_GSP2_ENABLE, 0, + DP_SEC_GSP3_ENABLE, 0, + DP_SEC_GSP4_ENABLE, 0, + DP_SEC_GSP5_ENABLE, 0, + DP_SEC_GSP6_ENABLE, 0, + DP_SEC_GSP7_ENABLE, 0, + DP_SEC_MPG_ENABLE, 0, + DP_SEC_STREAM_ENABLE, 0); + + /* this register shared with audio info frame. + * therefore we need to keep master enabled + * if at least one of the fields is not 0 */ + value = REG_READ(DP_SEC_CNTL); + if (value) + REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); + +} + +void enc1_stream_encoder_dp_blank( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t retries = 0; + uint32_t reg1 = 0; + uint32_t max_retries = DP_BLANK_MAX_RETRY * 10; + + /* Note: For CZ, we are changing driver default to disable + * stream deferred to next VBLANK. If results are positive, we + * will make the same change to all DCE versions. There are a + * handful of panels that cannot handle disable stream at + * HBLANK and will result in a white line flash across the + * screen on stream disable. + */ + REG_GET(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, ®1); + if ((reg1 & 0x1) == 0) + /*stream not enabled*/ + return; + /* Specify the video stream disable point + * (2 = start of the next vertical blank) + */ + REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, 2); + /* Larger delay to wait until VBLANK - use max retry of + * 10us*3000=30ms. This covers 16.6ms of typical 60 Hz mode + + * a little more because we may not trust delay accuracy. + */ + max_retries = DP_BLANK_MAX_RETRY * 150; + + /* disable DP stream */ + REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0); + + /* the encoder stops sending the video stream + * at the start of the vertical blanking. + * Poll for DP_VID_STREAM_STATUS == 0 + */ + + REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, + 0, + 10, max_retries); + + ASSERT(retries <= max_retries); + + /* Tell the DP encoder to ignore timing from CRTC, must be done after + * the polling. If we set DP_STEER_FIFO_RESET before DP stream blank is + * complete, stream status will be stuck in video stream enabled state, + * i.e. DP_VID_STREAM_STATUS stuck at 1. + */ + + REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, true); +} + +/* output video stream to link encoder */ +void enc1_stream_encoder_dp_unblank( + struct stream_encoder *enc, + const struct encoder_unblank_param *param) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) { + uint32_t n_vid = 0x8000; + uint32_t m_vid; + + /* M / N = Fstream / Flink + * m_vid / n_vid = pixel rate / link rate + */ + + uint64_t m_vid_l = n_vid; + + m_vid_l *= param->pixel_clk_khz; + m_vid_l = div_u64(m_vid_l, + param->link_settings.link_rate + * LINK_RATE_REF_FREQ_IN_KHZ); + + m_vid = (uint32_t) m_vid_l; + + /* enable auto measurement */ + + REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 0); + + /* auto measurement need 1 full 0x8000 symbol cycle to kick in, + * therefore program initial value for Mvid and Nvid + */ + + REG_UPDATE(DP_VID_N, DP_VID_N, n_vid); + + REG_UPDATE(DP_VID_M, DP_VID_M, m_vid); + + REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 1); + } + + /* set DIG_START to 0x1 to resync FIFO */ + + REG_UPDATE(DIG_FE_CNTL, DIG_START, 1); + + /* switch DP encoder to CRTC data */ + + REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0); + + /* wait 100us for DIG/DP logic to prime + * (i.e. a few video lines) + */ + udelay(100); + + /* the hardware would start sending video at the start of the next DP + * frame (i.e. rising edge of the vblank). + * NOTE: We used to program DP_VID_STREAM_DIS_DEFER = 2 here, but this + * register has no effect on enable transition! HW always guarantees + * VID_STREAM enable at start of next frame, and this is not + * programmable + */ + + REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true); +} + +void enc1_stream_encoder_set_avmute( + struct stream_encoder *enc, + bool enable) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + unsigned int value = enable ? 1 : 0; + + REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, value); +} + + +#define DP_SEC_AUD_N__DP_SEC_AUD_N__DEFAULT 0x8000 +#define DP_SEC_TIMESTAMP__DP_SEC_TIMESTAMP_MODE__AUTO_CALC 1 + +#include "include/audio_types.h" + +/** +* speakersToChannels +* +* @brief +* translate speakers to channels +* +* FL - Front Left +* FR - Front Right +* RL - Rear Left +* RR - Rear Right +* RC - Rear Center +* FC - Front Center +* FLC - Front Left Center +* FRC - Front Right Center +* RLC - Rear Left Center +* RRC - Rear Right Center +* LFE - Low Freq Effect +* +* FC +* FLC FRC +* FL FR +* +* LFE +* () +* +* +* RL RR +* RLC RRC +* RC +* +* ch 8 7 6 5 4 3 2 1 +* 0b00000011 - - - - - - FR FL +* 0b00000111 - - - - - LFE FR FL +* 0b00001011 - - - - FC - FR FL +* 0b00001111 - - - - FC LFE FR FL +* 0b00010011 - - - RC - - FR FL +* 0b00010111 - - - RC - LFE FR FL +* 0b00011011 - - - RC FC - FR FL +* 0b00011111 - - - RC FC LFE FR FL +* 0b00110011 - - RR RL - - FR FL +* 0b00110111 - - RR RL - LFE FR FL +* 0b00111011 - - RR RL FC - FR FL +* 0b00111111 - - RR RL FC LFE FR FL +* 0b01110011 - RC RR RL - - FR FL +* 0b01110111 - RC RR RL - LFE FR FL +* 0b01111011 - RC RR RL FC - FR FL +* 0b01111111 - RC RR RL FC LFE FR FL +* 0b11110011 RRC RLC RR RL - - FR FL +* 0b11110111 RRC RLC RR RL - LFE FR FL +* 0b11111011 RRC RLC RR RL FC - FR FL +* 0b11111111 RRC RLC RR RL FC LFE FR FL +* 0b11000011 FRC FLC - - - - FR FL +* 0b11000111 FRC FLC - - - LFE FR FL +* 0b11001011 FRC FLC - - FC - FR FL +* 0b11001111 FRC FLC - - FC LFE FR FL +* 0b11010011 FRC FLC - RC - - FR FL +* 0b11010111 FRC FLC - RC - LFE FR FL +* 0b11011011 FRC FLC - RC FC - FR FL +* 0b11011111 FRC FLC - RC FC LFE FR FL +* 0b11110011 FRC FLC RR RL - - FR FL +* 0b11110111 FRC FLC RR RL - LFE FR FL +* 0b11111011 FRC FLC RR RL FC - FR FL +* 0b11111111 FRC FLC RR RL FC LFE FR FL +* +* @param +* speakers - speaker information as it comes from CEA audio block +*/ +/* translate speakers to channels */ + +union audio_cea_channels { + uint8_t all; + struct audio_cea_channels_bits { + uint32_t FL:1; + uint32_t FR:1; + uint32_t LFE:1; + uint32_t FC:1; + uint32_t RL_RC:1; + uint32_t RR:1; + uint32_t RC_RLC_FLC:1; + uint32_t RRC_FRC:1; + } channels; +}; + +struct audio_clock_info { + /* pixel clock frequency*/ + uint32_t pixel_clock_in_10khz; + /* N - 32KHz audio */ + uint32_t n_32khz; + /* CTS - 32KHz audio*/ + uint32_t cts_32khz; + uint32_t n_44khz; + uint32_t cts_44khz; + uint32_t n_48khz; + uint32_t cts_48khz; +}; + +/* 25.2MHz/1.001*/ +/* 25.2MHz/1.001*/ +/* 25.2MHz*/ +/* 27MHz */ +/* 27MHz*1.001*/ +/* 27MHz*1.001*/ +/* 54MHz*/ +/* 54MHz*1.001*/ +/* 74.25MHz/1.001*/ +/* 74.25MHz*/ +/* 148.5MHz/1.001*/ +/* 148.5MHz*/ + +static const struct audio_clock_info audio_clock_info_table[16] = { + {2517, 4576, 28125, 7007, 31250, 6864, 28125}, + {2518, 4576, 28125, 7007, 31250, 6864, 28125}, + {2520, 4096, 25200, 6272, 28000, 6144, 25200}, + {2700, 4096, 27000, 6272, 30000, 6144, 27000}, + {2702, 4096, 27027, 6272, 30030, 6144, 27027}, + {2703, 4096, 27027, 6272, 30030, 6144, 27027}, + {5400, 4096, 54000, 6272, 60000, 6144, 54000}, + {5405, 4096, 54054, 6272, 60060, 6144, 54054}, + {7417, 11648, 210937, 17836, 234375, 11648, 140625}, + {7425, 4096, 74250, 6272, 82500, 6144, 74250}, + {14835, 11648, 421875, 8918, 234375, 5824, 140625}, + {14850, 4096, 148500, 6272, 165000, 6144, 148500}, + {29670, 5824, 421875, 4459, 234375, 5824, 281250}, + {29700, 3072, 222750, 4704, 247500, 5120, 247500}, + {59340, 5824, 843750, 8918, 937500, 5824, 562500}, + {59400, 3072, 445500, 9408, 990000, 6144, 594000} +}; + +static const struct audio_clock_info audio_clock_info_table_36bpc[14] = { + {2517, 9152, 84375, 7007, 48875, 9152, 56250}, + {2518, 9152, 84375, 7007, 48875, 9152, 56250}, + {2520, 4096, 37800, 6272, 42000, 6144, 37800}, + {2700, 4096, 40500, 6272, 45000, 6144, 40500}, + {2702, 8192, 81081, 6272, 45045, 8192, 54054}, + {2703, 8192, 81081, 6272, 45045, 8192, 54054}, + {5400, 4096, 81000, 6272, 90000, 6144, 81000}, + {5405, 4096, 81081, 6272, 90090, 6144, 81081}, + {7417, 11648, 316406, 17836, 351562, 11648, 210937}, + {7425, 4096, 111375, 6272, 123750, 6144, 111375}, + {14835, 11648, 632812, 17836, 703125, 11648, 421875}, + {14850, 4096, 222750, 6272, 247500, 6144, 222750}, + {29670, 5824, 632812, 8918, 703125, 5824, 421875}, + {29700, 4096, 445500, 4704, 371250, 5120, 371250} +}; + +static const struct audio_clock_info audio_clock_info_table_48bpc[14] = { + {2517, 4576, 56250, 7007, 62500, 6864, 56250}, + {2518, 4576, 56250, 7007, 62500, 6864, 56250}, + {2520, 4096, 50400, 6272, 56000, 6144, 50400}, + {2700, 4096, 54000, 6272, 60000, 6144, 54000}, + {2702, 4096, 54054, 6267, 60060, 8192, 54054}, + {2703, 4096, 54054, 6272, 60060, 8192, 54054}, + {5400, 4096, 108000, 6272, 120000, 6144, 108000}, + {5405, 4096, 108108, 6272, 120120, 6144, 108108}, + {7417, 11648, 421875, 17836, 468750, 11648, 281250}, + {7425, 4096, 148500, 6272, 165000, 6144, 148500}, + {14835, 11648, 843750, 8918, 468750, 11648, 281250}, + {14850, 4096, 297000, 6272, 330000, 6144, 297000}, + {29670, 5824, 843750, 4459, 468750, 5824, 562500}, + {29700, 3072, 445500, 4704, 495000, 5120, 495000} + + +}; + +static union audio_cea_channels speakers_to_channels( + struct audio_speaker_flags speaker_flags) +{ + union audio_cea_channels cea_channels = {0}; + + /* these are one to one */ + cea_channels.channels.FL = speaker_flags.FL_FR; + cea_channels.channels.FR = speaker_flags.FL_FR; + cea_channels.channels.LFE = speaker_flags.LFE; + cea_channels.channels.FC = speaker_flags.FC; + + /* if Rear Left and Right exist move RC speaker to channel 7 + * otherwise to channel 5 + */ + if (speaker_flags.RL_RR) { + cea_channels.channels.RL_RC = speaker_flags.RL_RR; + cea_channels.channels.RR = speaker_flags.RL_RR; + cea_channels.channels.RC_RLC_FLC = speaker_flags.RC; + } else { + cea_channels.channels.RL_RC = speaker_flags.RC; + } + + /* FRONT Left Right Center and REAR Left Right Center are exclusive */ + if (speaker_flags.FLC_FRC) { + cea_channels.channels.RC_RLC_FLC = speaker_flags.FLC_FRC; + cea_channels.channels.RRC_FRC = speaker_flags.FLC_FRC; + } else { + cea_channels.channels.RC_RLC_FLC = speaker_flags.RLC_RRC; + cea_channels.channels.RRC_FRC = speaker_flags.RLC_RRC; + } + + return cea_channels; +} + +static uint32_t calc_max_audio_packets_per_line( + const struct audio_crtc_info *crtc_info) +{ + uint32_t max_packets_per_line; + + max_packets_per_line = + crtc_info->h_total - crtc_info->h_active; + + if (crtc_info->pixel_repetition) + max_packets_per_line *= crtc_info->pixel_repetition; + + /* for other hdmi features */ + max_packets_per_line -= 58; + /* for Control Period */ + max_packets_per_line -= 16; + /* Number of Audio Packets per Line */ + max_packets_per_line /= 32; + + return max_packets_per_line; +} + +static void get_audio_clock_info( + enum dc_color_depth color_depth, + uint32_t crtc_pixel_clock_in_khz, + uint32_t actual_pixel_clock_in_khz, + struct audio_clock_info *audio_clock_info) +{ + const struct audio_clock_info *clock_info; + uint32_t index; + uint32_t crtc_pixel_clock_in_10khz = crtc_pixel_clock_in_khz / 10; + uint32_t audio_array_size; + + switch (color_depth) { + case COLOR_DEPTH_161616: + clock_info = audio_clock_info_table_48bpc; + audio_array_size = ARRAY_SIZE( + audio_clock_info_table_48bpc); + break; + case COLOR_DEPTH_121212: + clock_info = audio_clock_info_table_36bpc; + audio_array_size = ARRAY_SIZE( + audio_clock_info_table_36bpc); + break; + default: + clock_info = audio_clock_info_table; + audio_array_size = ARRAY_SIZE( + audio_clock_info_table); + break; + } + + if (clock_info != NULL) { + /* search for exact pixel clock in table */ + for (index = 0; index < audio_array_size; index++) { + if (clock_info[index].pixel_clock_in_10khz > + crtc_pixel_clock_in_10khz) + break; /* not match */ + else if (clock_info[index].pixel_clock_in_10khz == + crtc_pixel_clock_in_10khz) { + /* match found */ + *audio_clock_info = clock_info[index]; + return; + } + } + } + + /* not found */ + if (actual_pixel_clock_in_khz == 0) + actual_pixel_clock_in_khz = crtc_pixel_clock_in_khz; + + /* See HDMI spec the table entry under + * pixel clock of "Other". */ + audio_clock_info->pixel_clock_in_10khz = + actual_pixel_clock_in_khz / 10; + audio_clock_info->cts_32khz = actual_pixel_clock_in_khz; + audio_clock_info->cts_44khz = actual_pixel_clock_in_khz; + audio_clock_info->cts_48khz = actual_pixel_clock_in_khz; + + audio_clock_info->n_32khz = 4096; + audio_clock_info->n_44khz = 6272; + audio_clock_info->n_48khz = 6144; +} + +static void enc1_se_audio_setup( + struct stream_encoder *enc, + unsigned int az_inst, + struct audio_info *audio_info) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + uint32_t speakers = 0; + uint32_t channels = 0; + + ASSERT(audio_info); + if (audio_info == NULL) + /* This should not happen.it does so we don't get BSOD*/ + return; + + speakers = audio_info->flags.info.ALLSPEAKERS; + channels = speakers_to_channels(audio_info->flags.speaker_flags).all; + + /* setup the audio stream source select (audio -> dig mapping) */ + REG_SET(AFMT_AUDIO_SRC_CONTROL, 0, AFMT_AUDIO_SRC_SELECT, az_inst); + + /* Channel allocation */ + REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, channels); +} + +static void enc1_se_setup_hdmi_audio( + struct stream_encoder *enc, + const struct audio_crtc_info *crtc_info) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + struct audio_clock_info audio_clock_info = {0}; + uint32_t max_packets_per_line; + + /* For now still do calculation, although this field is ignored when + * above HDMI_PACKET_GEN_VERSION set to 1 + */ + max_packets_per_line = calc_max_audio_packets_per_line(crtc_info); + + /* HDMI_AUDIO_PACKET_CONTROL */ + REG_UPDATE_2(HDMI_AUDIO_PACKET_CONTROL, + HDMI_AUDIO_PACKETS_PER_LINE, max_packets_per_line, + HDMI_AUDIO_DELAY_EN, 1); + + /* AFMT_AUDIO_PACKET_CONTROL */ + REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1); + + /* AFMT_AUDIO_PACKET_CONTROL2 */ + REG_UPDATE_2(AFMT_AUDIO_PACKET_CONTROL2, + AFMT_AUDIO_LAYOUT_OVRD, 0, + AFMT_60958_OSF_OVRD, 0); + + /* HDMI_ACR_PACKET_CONTROL */ + REG_UPDATE_3(HDMI_ACR_PACKET_CONTROL, + HDMI_ACR_AUTO_SEND, 1, + HDMI_ACR_SOURCE, 0, + HDMI_ACR_AUDIO_PRIORITY, 0); + + /* Program audio clock sample/regeneration parameters */ + get_audio_clock_info(crtc_info->color_depth, + crtc_info->requested_pixel_clock, + crtc_info->calculated_pixel_clock, + &audio_clock_info); + DC_LOG_HW_AUDIO( + "\n%s:Input::requested_pixel_clock = %d" \ + "calculated_pixel_clock = %d \n", __func__, \ + crtc_info->requested_pixel_clock, \ + crtc_info->calculated_pixel_clock); + + /* HDMI_ACR_32_0__HDMI_ACR_CTS_32_MASK */ + REG_UPDATE(HDMI_ACR_32_0, HDMI_ACR_CTS_32, audio_clock_info.cts_32khz); + + /* HDMI_ACR_32_1__HDMI_ACR_N_32_MASK */ + REG_UPDATE(HDMI_ACR_32_1, HDMI_ACR_N_32, audio_clock_info.n_32khz); + + /* HDMI_ACR_44_0__HDMI_ACR_CTS_44_MASK */ + REG_UPDATE(HDMI_ACR_44_0, HDMI_ACR_CTS_44, audio_clock_info.cts_44khz); + + /* HDMI_ACR_44_1__HDMI_ACR_N_44_MASK */ + REG_UPDATE(HDMI_ACR_44_1, HDMI_ACR_N_44, audio_clock_info.n_44khz); + + /* HDMI_ACR_48_0__HDMI_ACR_CTS_48_MASK */ + REG_UPDATE(HDMI_ACR_48_0, HDMI_ACR_CTS_48, audio_clock_info.cts_48khz); + + /* HDMI_ACR_48_1__HDMI_ACR_N_48_MASK */ + REG_UPDATE(HDMI_ACR_48_1, HDMI_ACR_N_48, audio_clock_info.n_48khz); + + /* Video driver cannot know in advance which sample rate will + * be used by HD Audio driver + * HDMI_ACR_PACKET_CONTROL__HDMI_ACR_N_MULTIPLE field is + * programmed below in interruppt callback + */ + + /* AFMT_60958_0__AFMT_60958_CS_CHANNEL_NUMBER_L_MASK & + * AFMT_60958_0__AFMT_60958_CS_CLOCK_ACCURACY_MASK + */ + REG_UPDATE_2(AFMT_60958_0, + AFMT_60958_CS_CHANNEL_NUMBER_L, 1, + AFMT_60958_CS_CLOCK_ACCURACY, 0); + + /* AFMT_60958_1 AFMT_60958_CS_CHALNNEL_NUMBER_R */ + REG_UPDATE(AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2); + + /* AFMT_60958_2 now keep this settings until + * Programming guide comes out + */ + REG_UPDATE_6(AFMT_60958_2, + AFMT_60958_CS_CHANNEL_NUMBER_2, 3, + AFMT_60958_CS_CHANNEL_NUMBER_3, 4, + AFMT_60958_CS_CHANNEL_NUMBER_4, 5, + AFMT_60958_CS_CHANNEL_NUMBER_5, 6, + AFMT_60958_CS_CHANNEL_NUMBER_6, 7, + AFMT_60958_CS_CHANNEL_NUMBER_7, 8); +} + +static void enc1_se_setup_dp_audio( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* --- DP Audio packet configurations --- */ + + /* ATP Configuration */ + REG_SET(DP_SEC_AUD_N, 0, + DP_SEC_AUD_N, DP_SEC_AUD_N__DP_SEC_AUD_N__DEFAULT); + + /* Async/auto-calc timestamp mode */ + REG_SET(DP_SEC_TIMESTAMP, 0, DP_SEC_TIMESTAMP_MODE, + DP_SEC_TIMESTAMP__DP_SEC_TIMESTAMP_MODE__AUTO_CALC); + + /* --- The following are the registers + * copied from the SetupHDMI --- + */ + + /* AFMT_AUDIO_PACKET_CONTROL */ + REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1); + + /* AFMT_AUDIO_PACKET_CONTROL2 */ + /* Program the ATP and AIP next */ + REG_UPDATE_2(AFMT_AUDIO_PACKET_CONTROL2, + AFMT_AUDIO_LAYOUT_OVRD, 0, + AFMT_60958_OSF_OVRD, 0); + + /* AFMT_INFOFRAME_CONTROL0 */ + REG_UPDATE(AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1); + + /* AFMT_60958_0__AFMT_60958_CS_CLOCK_ACCURACY_MASK */ + REG_UPDATE(AFMT_60958_0, AFMT_60958_CS_CLOCK_ACCURACY, 0); +} + +static void enc1_se_enable_audio_clock( + struct stream_encoder *enc, + bool enable) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + if (REG(AFMT_CNTL) == 0) + return; /* DCE8/10 does not have this register */ + + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, !!enable); + + /* wait for AFMT clock to turn on, + * expectation: this should complete in 1-2 reads + * + * REG_WAIT(AFMT_CNTL, AFMT_AUDIO_CLOCK_ON, !!enable, 1, 10); + * + * TODO: wait for clock_on does not work well. May need HW + * program sequence. But audio seems work normally even without wait + * for clock_on status change + */ +} + +static void enc1_se_enable_dp_audio( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* Enable Audio packets */ + REG_UPDATE(DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1); + + /* Program the ATP and AIP next */ + REG_UPDATE_2(DP_SEC_CNTL, + DP_SEC_ATP_ENABLE, 1, + DP_SEC_AIP_ENABLE, 1); + + /* Program STREAM_ENABLE after all the other enables. */ + REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); +} + +static void enc1_se_disable_dp_audio( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value = 0; + + /* Disable Audio packets */ + REG_UPDATE_5(DP_SEC_CNTL, + DP_SEC_ASP_ENABLE, 0, + DP_SEC_ATP_ENABLE, 0, + DP_SEC_AIP_ENABLE, 0, + DP_SEC_ACM_ENABLE, 0, + DP_SEC_STREAM_ENABLE, 0); + + /* This register shared with encoder info frame. Therefore we need to + * keep master enabled if at least on of the fields is not 0 + */ + value = REG_READ(DP_SEC_CNTL); + if (value != 0) + REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); + +} + +void enc1_se_audio_mute_control( + struct stream_encoder *enc, + bool mute) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, !mute); +} + +void enc1_se_dp_audio_setup( + struct stream_encoder *enc, + unsigned int az_inst, + struct audio_info *info) +{ + enc1_se_audio_setup(enc, az_inst, info); +} + +void enc1_se_dp_audio_enable( + struct stream_encoder *enc) +{ + enc1_se_enable_audio_clock(enc, true); + enc1_se_setup_dp_audio(enc); + enc1_se_enable_dp_audio(enc); +} + +void enc1_se_dp_audio_disable( + struct stream_encoder *enc) +{ + enc1_se_disable_dp_audio(enc); + enc1_se_enable_audio_clock(enc, false); +} + +void enc1_se_hdmi_audio_setup( + struct stream_encoder *enc, + unsigned int az_inst, + struct audio_info *info, + struct audio_crtc_info *audio_crtc_info) +{ + enc1_se_enable_audio_clock(enc, true); + enc1_se_setup_hdmi_audio(enc, audio_crtc_info); + enc1_se_audio_setup(enc, az_inst, info); +} + +void enc1_se_hdmi_audio_disable( + struct stream_encoder *enc) +{ + enc1_se_enable_audio_clock(enc, false); +} + + +void enc1_setup_stereo_sync( + struct stream_encoder *enc, + int tg_inst, bool enable) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + REG_UPDATE(DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, tg_inst); + REG_UPDATE(DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, !enable); +} + + +static const struct stream_encoder_funcs dcn10_str_enc_funcs = { + .dp_set_stream_attribute = + enc1_stream_encoder_dp_set_stream_attribute, + .hdmi_set_stream_attribute = + enc1_stream_encoder_hdmi_set_stream_attribute, + .dvi_set_stream_attribute = + enc1_stream_encoder_dvi_set_stream_attribute, + .set_mst_bandwidth = + enc1_stream_encoder_set_mst_bandwidth, + .update_hdmi_info_packets = + enc1_stream_encoder_update_hdmi_info_packets, + .stop_hdmi_info_packets = + enc1_stream_encoder_stop_hdmi_info_packets, + .update_dp_info_packets = + enc1_stream_encoder_update_dp_info_packets, + .stop_dp_info_packets = + enc1_stream_encoder_stop_dp_info_packets, + .dp_blank = + enc1_stream_encoder_dp_blank, + .dp_unblank = + enc1_stream_encoder_dp_unblank, + .audio_mute_control = enc1_se_audio_mute_control, + + .dp_audio_setup = enc1_se_dp_audio_setup, + .dp_audio_enable = enc1_se_dp_audio_enable, + .dp_audio_disable = enc1_se_dp_audio_disable, + + .hdmi_audio_setup = enc1_se_hdmi_audio_setup, + .hdmi_audio_disable = enc1_se_hdmi_audio_disable, + .setup_stereo_sync = enc1_setup_stereo_sync, + .set_avmute = enc1_stream_encoder_set_avmute, +}; + +void dcn10_stream_encoder_construct( + struct dcn10_stream_encoder *enc1, + struct dc_context *ctx, + struct dc_bios *bp, + enum engine_id eng_id, + const struct dcn10_stream_enc_registers *regs, + const struct dcn10_stream_encoder_shift *se_shift, + const struct dcn10_stream_encoder_mask *se_mask) +{ + enc1->base.funcs = &dcn10_str_enc_funcs; + enc1->base.ctx = ctx; + enc1->base.id = eng_id; + enc1->base.bp = bp; + enc1->regs = regs; + enc1->se_shift = se_shift; + enc1->se_mask = se_mask; +} + diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h new file mode 100644 index 000000000000..6b3e4ded155b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -0,0 +1,524 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_STREAM_ENCODER_DCN10_H__ +#define __DC_STREAM_ENCODER_DCN10_H__ + +#include "stream_encoder.h" + +#define DCN10STRENC_FROM_STRENC(stream_encoder)\ + container_of(stream_encoder, struct dcn10_stream_encoder, base) + +#define SE_COMMON_DCN_REG_LIST(id) \ + SRI(AFMT_CNTL, DIG, id), \ + SRI(AFMT_GENERIC_0, DIG, id), \ + SRI(AFMT_GENERIC_1, DIG, id), \ + SRI(AFMT_GENERIC_2, DIG, id), \ + SRI(AFMT_GENERIC_3, DIG, id), \ + SRI(AFMT_GENERIC_4, DIG, id), \ + SRI(AFMT_GENERIC_5, DIG, id), \ + SRI(AFMT_GENERIC_6, DIG, id), \ + SRI(AFMT_GENERIC_7, DIG, id), \ + SRI(AFMT_GENERIC_HDR, DIG, id), \ + SRI(AFMT_INFOFRAME_CONTROL0, DIG, id), \ + SRI(AFMT_VBI_PACKET_CONTROL, DIG, id), \ + SRI(AFMT_VBI_PACKET_CONTROL1, DIG, id), \ + SRI(AFMT_AUDIO_PACKET_CONTROL, DIG, id), \ + SRI(AFMT_AUDIO_PACKET_CONTROL2, DIG, id), \ + SRI(AFMT_AUDIO_SRC_CONTROL, DIG, id), \ + SRI(AFMT_60958_0, DIG, id), \ + SRI(AFMT_60958_1, DIG, id), \ + SRI(AFMT_60958_2, DIG, id), \ + SRI(DIG_FE_CNTL, DIG, id), \ + SRI(HDMI_CONTROL, DIG, id), \ + SRI(HDMI_DB_CONTROL, DIG, id), \ + SRI(HDMI_GC, DIG, id), \ + SRI(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \ + SRI(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \ + SRI(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \ + SRI(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \ + SRI(HDMI_INFOFRAME_CONTROL0, DIG, id), \ + SRI(HDMI_INFOFRAME_CONTROL1, DIG, id), \ + SRI(HDMI_VBI_PACKET_CONTROL, DIG, id), \ + SRI(HDMI_AUDIO_PACKET_CONTROL, DIG, id),\ + SRI(HDMI_ACR_PACKET_CONTROL, DIG, id),\ + SRI(HDMI_ACR_32_0, DIG, id),\ + SRI(HDMI_ACR_32_1, DIG, id),\ + SRI(HDMI_ACR_44_0, DIG, id),\ + SRI(HDMI_ACR_44_1, DIG, id),\ + SRI(HDMI_ACR_48_0, DIG, id),\ + SRI(HDMI_ACR_48_1, DIG, id),\ + SRI(DP_DB_CNTL, DP, id), \ + SRI(DP_MSA_MISC, DP, id), \ + SRI(DP_MSA_COLORIMETRY, DP, id), \ + SRI(DP_MSA_TIMING_PARAM1, DP, id), \ + SRI(DP_MSA_TIMING_PARAM2, DP, id), \ + SRI(DP_MSA_TIMING_PARAM3, DP, id), \ + SRI(DP_MSA_TIMING_PARAM4, DP, id), \ + SRI(DP_MSE_RATE_CNTL, DP, id), \ + SRI(DP_MSE_RATE_UPDATE, DP, id), \ + SRI(DP_PIXEL_FORMAT, DP, id), \ + SRI(DP_SEC_CNTL, DP, id), \ + SRI(DP_STEER_FIFO, DP, id), \ + SRI(DP_VID_M, DP, id), \ + SRI(DP_VID_N, DP, id), \ + SRI(DP_VID_STREAM_CNTL, DP, id), \ + SRI(DP_VID_TIMING, DP, id), \ + SRI(DP_SEC_AUD_N, DP, id), \ + SRI(DP_SEC_TIMESTAMP, DP, id) + +#define SE_DCN_REG_LIST(id)\ + SE_COMMON_DCN_REG_LIST(id) + + +struct dcn10_stream_enc_registers { + uint32_t AFMT_CNTL; + uint32_t AFMT_AVI_INFO0; + uint32_t AFMT_AVI_INFO1; + uint32_t AFMT_AVI_INFO2; + uint32_t AFMT_AVI_INFO3; + uint32_t AFMT_GENERIC_0; + uint32_t AFMT_GENERIC_1; + uint32_t AFMT_GENERIC_2; + uint32_t AFMT_GENERIC_3; + uint32_t AFMT_GENERIC_4; + uint32_t AFMT_GENERIC_5; + uint32_t AFMT_GENERIC_6; + uint32_t AFMT_GENERIC_7; + uint32_t AFMT_GENERIC_HDR; + uint32_t AFMT_INFOFRAME_CONTROL0; + uint32_t AFMT_VBI_PACKET_CONTROL; + uint32_t AFMT_VBI_PACKET_CONTROL1; + uint32_t AFMT_AUDIO_PACKET_CONTROL; + uint32_t AFMT_AUDIO_PACKET_CONTROL2; + uint32_t AFMT_AUDIO_SRC_CONTROL; + uint32_t AFMT_60958_0; + uint32_t AFMT_60958_1; + uint32_t AFMT_60958_2; + uint32_t DIG_FE_CNTL; + uint32_t DP_MSE_RATE_CNTL; + uint32_t DP_MSE_RATE_UPDATE; + uint32_t DP_PIXEL_FORMAT; + uint32_t DP_SEC_CNTL; + uint32_t DP_STEER_FIFO; + uint32_t DP_VID_M; + uint32_t DP_VID_N; + uint32_t DP_VID_STREAM_CNTL; + uint32_t DP_VID_TIMING; + uint32_t DP_SEC_AUD_N; + uint32_t DP_SEC_TIMESTAMP; + uint32_t HDMI_CONTROL; + uint32_t HDMI_GC; + uint32_t HDMI_GENERIC_PACKET_CONTROL0; + uint32_t HDMI_GENERIC_PACKET_CONTROL1; + uint32_t HDMI_GENERIC_PACKET_CONTROL2; + uint32_t HDMI_GENERIC_PACKET_CONTROL3; + uint32_t HDMI_GENERIC_PACKET_CONTROL4; + uint32_t HDMI_GENERIC_PACKET_CONTROL5; + uint32_t HDMI_INFOFRAME_CONTROL0; + uint32_t HDMI_INFOFRAME_CONTROL1; + uint32_t HDMI_VBI_PACKET_CONTROL; + uint32_t HDMI_AUDIO_PACKET_CONTROL; + uint32_t HDMI_ACR_PACKET_CONTROL; + uint32_t HDMI_ACR_32_0; + uint32_t HDMI_ACR_32_1; + uint32_t HDMI_ACR_44_0; + uint32_t HDMI_ACR_44_1; + uint32_t HDMI_ACR_48_0; + uint32_t HDMI_ACR_48_1; + uint32_t DP_DB_CNTL; + uint32_t DP_MSA_MISC; + uint32_t DP_MSA_COLORIMETRY; + uint32_t DP_MSA_TIMING_PARAM1; + uint32_t DP_MSA_TIMING_PARAM2; + uint32_t DP_MSA_TIMING_PARAM3; + uint32_t DP_MSA_TIMING_PARAM4; + uint32_t HDMI_DB_CONTROL; +}; + + +#define SE_SF(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh)\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, mask_sh),\ + SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB0, mask_sh),\ + SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB1, mask_sh),\ + SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB2, mask_sh),\ + SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB3, mask_sh),\ + SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, mask_sh),\ + SE_SF(DP0_DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_DATA_SCRAMBLE_EN, mask_sh),\ + SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\ + SE_SF(DIG0_AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, mask_sh),\ + SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\ + SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_X, mask_sh),\ + SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_Y, mask_sh),\ + SE_SF(DP0_DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP1_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\ + SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\ + SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\ + SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\ + SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_RESET, mask_sh),\ + SE_SF(DP0_DP_VID_TIMING, DP_VID_M_N_GEN_EN, mask_sh),\ + SE_SF(DP0_DP_VID_N, DP_VID_N, mask_sh),\ + SE_SF(DP0_DP_VID_M, DP_VID_M, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_START, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, mask_sh),\ + SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_LAYOUT_OVRD, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_60958_OSF_OVRD, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUDIO_PRIORITY, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_32_0, HDMI_ACR_CTS_32, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_32_1, HDMI_ACR_N_32, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_44_0, HDMI_ACR_CTS_44, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_44_1, HDMI_ACR_N_44, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\ + SE_SF(DIG0_AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, mask_sh),\ + SE_SF(DIG0_AFMT_60958_0, AFMT_60958_CS_CLOCK_ACCURACY, mask_sh),\ + SE_SF(DIG0_AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, mask_sh),\ + SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\ + SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, mask_sh),\ + SE_SF(DIG0_AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_CLOCK_CHANNEL_RATE, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, TMDS_PIXEL_ENCODING, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, TMDS_COLOR_FORMAT, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_LOCK_STATUS, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC0_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC1_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC0_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC1_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\ + SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\ + SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_HTOTAL, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_VTOTAL, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_HSTART, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_VSTART, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCWIDTH, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCPOLARITY, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCWIDTH, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCPOLARITY, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\ + SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\ + SE_SF(DP0_DP_VID_TIMING, DP_VID_N_MUL, mask_sh) + +#define SE_COMMON_MASK_SH_LIST_SOC(mask_sh)\ + SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh) + +#define SE_COMMON_MASK_SH_LIST_DCN10(mask_sh)\ + SE_COMMON_MASK_SH_LIST_SOC(mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_LINE, mask_sh) + + +#define SE_REG_FIELD_LIST_DCN1_0(type) \ + type AFMT_GENERIC_INDEX;\ + type AFMT_GENERIC_HB0;\ + type AFMT_GENERIC_HB1;\ + type AFMT_GENERIC_HB2;\ + type AFMT_GENERIC_HB3;\ + type AFMT_GENERIC_LOCK_STATUS;\ + type AFMT_GENERIC_CONFLICT;\ + type AFMT_GENERIC_CONFLICT_CLR;\ + type AFMT_GENERIC0_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC1_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC2_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC3_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC4_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC5_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC6_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC7_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC0_FRAME_UPDATE;\ + type AFMT_GENERIC1_FRAME_UPDATE;\ + type AFMT_GENERIC2_FRAME_UPDATE;\ + type AFMT_GENERIC3_FRAME_UPDATE;\ + type AFMT_GENERIC4_FRAME_UPDATE;\ + type AFMT_GENERIC5_FRAME_UPDATE;\ + type AFMT_GENERIC6_FRAME_UPDATE;\ + type AFMT_GENERIC7_FRAME_UPDATE;\ + type HDMI_GENERIC0_CONT;\ + type HDMI_GENERIC0_SEND;\ + type HDMI_GENERIC0_LINE;\ + type HDMI_GENERIC1_CONT;\ + type HDMI_GENERIC1_SEND;\ + type HDMI_GENERIC1_LINE;\ + type HDMI_GENERIC2_CONT;\ + type HDMI_GENERIC2_SEND;\ + type HDMI_GENERIC2_LINE;\ + type HDMI_GENERIC3_CONT;\ + type HDMI_GENERIC3_SEND;\ + type HDMI_GENERIC3_LINE;\ + type HDMI_GENERIC4_CONT;\ + type HDMI_GENERIC4_SEND;\ + type HDMI_GENERIC4_LINE;\ + type HDMI_GENERIC5_CONT;\ + type HDMI_GENERIC5_SEND;\ + type HDMI_GENERIC5_LINE;\ + type HDMI_GENERIC6_CONT;\ + type HDMI_GENERIC6_SEND;\ + type HDMI_GENERIC6_LINE;\ + type HDMI_GENERIC7_CONT;\ + type HDMI_GENERIC7_SEND;\ + type HDMI_GENERIC7_LINE;\ + type DP_PIXEL_ENCODING;\ + type DP_COMPONENT_DEPTH;\ + type HDMI_PACKET_GEN_VERSION;\ + type HDMI_KEEPOUT_MODE;\ + type HDMI_DEEP_COLOR_ENABLE;\ + type HDMI_CLOCK_CHANNEL_RATE;\ + type HDMI_DEEP_COLOR_DEPTH;\ + type HDMI_GC_CONT;\ + type HDMI_GC_SEND;\ + type HDMI_NULL_SEND;\ + type HDMI_DATA_SCRAMBLE_EN;\ + type HDMI_AUDIO_INFO_SEND;\ + type AFMT_AUDIO_INFO_UPDATE;\ + type HDMI_AUDIO_INFO_LINE;\ + type HDMI_GC_AVMUTE;\ + type DP_MSE_RATE_X;\ + type DP_MSE_RATE_Y;\ + type DP_MSE_RATE_UPDATE_PENDING;\ + type DP_SEC_GSP0_ENABLE;\ + type DP_SEC_STREAM_ENABLE;\ + type DP_SEC_GSP1_ENABLE;\ + type DP_SEC_GSP2_ENABLE;\ + type DP_SEC_GSP3_ENABLE;\ + type DP_SEC_GSP4_ENABLE;\ + type DP_SEC_GSP5_ENABLE;\ + type DP_SEC_GSP6_ENABLE;\ + type DP_SEC_GSP7_ENABLE;\ + type DP_SEC_MPG_ENABLE;\ + type DP_VID_STREAM_DIS_DEFER;\ + type DP_VID_STREAM_ENABLE;\ + type DP_VID_STREAM_STATUS;\ + type DP_STEER_FIFO_RESET;\ + type DP_VID_M_N_GEN_EN;\ + type DP_VID_N;\ + type DP_VID_M;\ + type DIG_START;\ + type AFMT_AUDIO_SRC_SELECT;\ + type AFMT_AUDIO_CHANNEL_ENABLE;\ + type HDMI_AUDIO_PACKETS_PER_LINE;\ + type HDMI_AUDIO_DELAY_EN;\ + type AFMT_60958_CS_UPDATE;\ + type AFMT_AUDIO_LAYOUT_OVRD;\ + type AFMT_60958_OSF_OVRD;\ + type HDMI_ACR_AUTO_SEND;\ + type HDMI_ACR_SOURCE;\ + type HDMI_ACR_AUDIO_PRIORITY;\ + type HDMI_ACR_CTS_32;\ + type HDMI_ACR_N_32;\ + type HDMI_ACR_CTS_44;\ + type HDMI_ACR_N_44;\ + type HDMI_ACR_CTS_48;\ + type HDMI_ACR_N_48;\ + type AFMT_60958_CS_CHANNEL_NUMBER_L;\ + type AFMT_60958_CS_CLOCK_ACCURACY;\ + type AFMT_60958_CS_CHANNEL_NUMBER_R;\ + type AFMT_60958_CS_CHANNEL_NUMBER_2;\ + type AFMT_60958_CS_CHANNEL_NUMBER_3;\ + type AFMT_60958_CS_CHANNEL_NUMBER_4;\ + type AFMT_60958_CS_CHANNEL_NUMBER_5;\ + type AFMT_60958_CS_CHANNEL_NUMBER_6;\ + type AFMT_60958_CS_CHANNEL_NUMBER_7;\ + type DP_SEC_AUD_N;\ + type DP_SEC_TIMESTAMP_MODE;\ + type DP_SEC_ASP_ENABLE;\ + type DP_SEC_ATP_ENABLE;\ + type DP_SEC_AIP_ENABLE;\ + type DP_SEC_ACM_ENABLE;\ + type AFMT_AUDIO_SAMPLE_SEND;\ + type AFMT_AUDIO_CLOCK_EN;\ + type TMDS_PIXEL_ENCODING;\ + type TMDS_COLOR_FORMAT;\ + type DIG_STEREOSYNC_SELECT;\ + type DIG_STEREOSYNC_GATE_EN;\ + type DP_DB_DISABLE;\ + type DP_MSA_MISC0;\ + type DP_MSA_HTOTAL;\ + type DP_MSA_VTOTAL;\ + type DP_MSA_HSTART;\ + type DP_MSA_VSTART;\ + type DP_MSA_HSYNCWIDTH;\ + type DP_MSA_HSYNCPOLARITY;\ + type DP_MSA_VSYNCWIDTH;\ + type DP_MSA_VSYNCPOLARITY;\ + type DP_MSA_HWIDTH;\ + type DP_MSA_VHEIGHT;\ + type HDMI_DB_DISABLE;\ + type DP_VID_N_MUL;\ + type DP_VID_M_DOUBLE_VALUE_EN + +struct dcn10_stream_encoder_shift { + SE_REG_FIELD_LIST_DCN1_0(uint8_t); +}; + +struct dcn10_stream_encoder_mask { + SE_REG_FIELD_LIST_DCN1_0(uint32_t); +}; + +struct dcn10_stream_encoder { + struct stream_encoder base; + const struct dcn10_stream_enc_registers *regs; + const struct dcn10_stream_encoder_shift *se_shift; + const struct dcn10_stream_encoder_mask *se_mask; +}; + +void dcn10_stream_encoder_construct( + struct dcn10_stream_encoder *enc1, + struct dc_context *ctx, + struct dc_bios *bp, + enum engine_id eng_id, + const struct dcn10_stream_enc_registers *regs, + const struct dcn10_stream_encoder_shift *se_shift, + const struct dcn10_stream_encoder_mask *se_mask); + +void enc1_update_generic_info_packet( + struct dcn10_stream_encoder *enc1, + uint32_t packet_index, + const struct dc_info_packet *info_packet); + +void enc1_stream_encoder_dp_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + enum dc_color_space output_color_space); + +void enc1_stream_encoder_hdmi_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + int actual_pix_clk_khz, + bool enable_audio); + +void enc1_stream_encoder_dvi_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + bool is_dual_link); + +void enc1_stream_encoder_set_mst_bandwidth( + struct stream_encoder *enc, + struct fixed31_32 avg_time_slots_per_mtp); + +void enc1_stream_encoder_update_dp_info_packets( + struct stream_encoder *enc, + const struct encoder_info_frame *info_frame); + +void enc1_stream_encoder_stop_dp_info_packets( + struct stream_encoder *enc); + +void enc1_stream_encoder_dp_blank( + struct stream_encoder *enc); + +void enc1_stream_encoder_dp_unblank( + struct stream_encoder *enc, + const struct encoder_unblank_param *param); + +void enc1_setup_stereo_sync( + struct stream_encoder *enc, + int tg_inst, bool enable); + +void enc1_stream_encoder_set_avmute( + struct stream_encoder *enc, + bool enable); + +void enc1_se_audio_mute_control( + struct stream_encoder *enc, + bool mute); + +void enc1_se_dp_audio_setup( + struct stream_encoder *enc, + unsigned int az_inst, + struct audio_info *info); + +void enc1_se_dp_audio_enable( + struct stream_encoder *enc); + +void enc1_se_dp_audio_disable( + struct stream_encoder *enc); + +void enc1_se_hdmi_audio_setup( + struct stream_encoder *enc, + unsigned int az_inst, + struct audio_info *info, + struct audio_crtc_info *audio_crtc_info); + +void enc1_se_hdmi_audio_disable( + struct stream_encoder *enc); + +#endif /* __DC_STREAM_ENCODER_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h index 22e7ee7dcd26..8eafe1af8a5e 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services.h @@ -341,6 +341,10 @@ bool dm_dmcu_set_pipe(struct dc_context *ctx, unsigned int controller_id); unsigned long long dm_get_timestamp(struct dc_context *ctx); +unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx, + unsigned long long current_time_stamp, + unsigned long long last_time_stamp); + /* * performance tracing */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c index c109b2c34c8f..fd9d97aab071 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c @@ -26,75 +26,89 @@ #include "display_mode_lib.h" #include "dc_features.h" +static const struct _vcs_dpi_ip_params_st dcn1_0_ip = { + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = 164, + .dpte_buffer_size_in_pte_reqs = 42, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .pte_enable = 1, + .pte_chunk_size_kbytes = 2, + .meta_chunk_size_kbytes = 2, + .writeback_chunk_size_kbytes = 2, + .line_buffer_size_bits = 589824, + .max_line_buffer_lines = 12, + .IsLineBufferBppFixed = 0, + .LineBufferFixedBpp = -1, + .writeback_luma_buffer_size_kbytes = 12, + .writeback_chroma_buffer_size_kbytes = 8, + .max_num_dpp = 4, + .max_num_wb = 2, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 4, + .max_vscl_ratio = 4, + .hscl_mults = 4, + .vscl_mults = 4, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .underscan_factor = 1.10, + .min_vblank_lines = 14, + .dppclk_delay_subtotal = 90, + .dispclk_delay_subtotal = 42, + .dcfclk_cstate_latency = 10, + .max_inter_dcn_tile_repeaters = 8, + .can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one = 0, + .bug_forcing_LC_req_same_size_fixed = 0, +}; + +static const struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = { + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .urgent_latency_us = 4.0, + .writeback_latency_us = 12.0, + .ideal_dram_bw_after_urgent_percent = 80.0, + .max_request_size_bytes = 256, + .downspread_percent = 0.5, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 128, + .urgent_out_of_order_return_per_channel_bytes = 256, + .channel_interleave_bytes = 256, + .num_banks = 8, + .num_chans = 2, + .vmm_page_size_bytes = 4096, + .dram_clock_change_latency_us = 17.0, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, +}; + static void set_soc_bounding_box(struct _vcs_dpi_soc_bounding_box_st *soc, enum dml_project project) { - if (project == DML_PROJECT_RAVEN1) { - soc->sr_exit_time_us = 9.0; - soc->sr_enter_plus_exit_time_us = 11.0; - soc->urgent_latency_us = 4.0; - soc->writeback_latency_us = 12.0; - soc->ideal_dram_bw_after_urgent_percent = 80.0; - soc->max_request_size_bytes = 256; - soc->downspread_percent = 0.5; - soc->dram_page_open_time_ns = 50.0; - soc->dram_rw_turnaround_time_ns = 17.5; - soc->dram_return_buffer_per_channel_bytes = 8192; - soc->round_trip_ping_latency_dcfclk_cycles = 128; - soc->urgent_out_of_order_return_per_channel_bytes = 256; - soc->channel_interleave_bytes = 256; - soc->num_banks = 8; - soc->num_chans = 2; - soc->vmm_page_size_bytes = 4096; - soc->dram_clock_change_latency_us = 17.0; - soc->writeback_dram_clock_change_latency_us = 23.0; - soc->return_bus_width_bytes = 64; - } else { - BREAK_TO_DEBUGGER(); /* Invalid Project Specified */ + switch (project) { + case DML_PROJECT_RAVEN1: + *soc = dcn1_0_soc; + break; + default: + ASSERT(0); + break; } } static void set_ip_params(struct _vcs_dpi_ip_params_st *ip, enum dml_project project) { - if (project == DML_PROJECT_RAVEN1) { - ip->rob_buffer_size_kbytes = 64; - ip->det_buffer_size_kbytes = 164; - ip->dpte_buffer_size_in_pte_reqs = 42; - ip->dpp_output_buffer_pixels = 2560; - ip->opp_output_buffer_lines = 1; - ip->pixel_chunk_size_kbytes = 8; - ip->pte_enable = 1; - ip->pte_chunk_size_kbytes = 2; - ip->meta_chunk_size_kbytes = 2; - ip->writeback_chunk_size_kbytes = 2; - ip->line_buffer_size_bits = 589824; - ip->max_line_buffer_lines = 12; - ip->IsLineBufferBppFixed = 0; - ip->LineBufferFixedBpp = -1; - ip->writeback_luma_buffer_size_kbytes = 12; - ip->writeback_chroma_buffer_size_kbytes = 8; - ip->max_num_dpp = 4; - ip->max_num_wb = 2; - ip->max_dchub_pscl_bw_pix_per_clk = 4; - ip->max_pscl_lb_bw_pix_per_clk = 2; - ip->max_lb_vscl_bw_pix_per_clk = 4; - ip->max_vscl_hscl_bw_pix_per_clk = 4; - ip->max_hscl_ratio = 4; - ip->max_vscl_ratio = 4; - ip->hscl_mults = 4; - ip->vscl_mults = 4; - ip->max_hscl_taps = 8; - ip->max_vscl_taps = 8; - ip->dispclk_ramp_margin_percent = 1; - ip->underscan_factor = 1.10; - ip->min_vblank_lines = 14; - ip->dppclk_delay_subtotal = 90; - ip->dispclk_delay_subtotal = 42; - ip->dcfclk_cstate_latency = 10; - ip->max_inter_dcn_tile_repeaters = 8; - ip->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one = 0; - ip->bug_forcing_LC_req_same_size_fixed = 0; - } else { - BREAK_TO_DEBUGGER(); /* Invalid Project Specified */ + switch (project) { + case DML_PROJECT_RAVEN1: + *ip = dcn1_0_ip; + break; + default: + ASSERT(0); + break; } } diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 09affa16cc43..ce750edc1e5f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -215,8 +215,8 @@ struct writeback_st { int wb_vtaps_luma; int wb_htaps_chroma; int wb_vtaps_chroma; - int wb_hratio; - int wb_vratio; + double wb_hratio; + double wb_vratio; }; struct _vcs_dpi_display_output_params_st { @@ -224,6 +224,7 @@ struct _vcs_dpi_display_output_params_st { int output_bpp; int dsc_enable; int wb_enable; + int num_active_wb; int opp_input_bpc; int output_type; int output_format; diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c index 87b580fa4bc9..61fe484da1a0 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c @@ -75,6 +75,9 @@ bool dal_hw_factory_init( return true; case DCE_VERSION_11_0: case DCE_VERSION_11_2: +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + case DCE_VERSION_11_22: +#endif dal_hw_factory_dce110_init(factory); return true; case DCE_VERSION_12_0: diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c index 0ae8ace25739..910ae2b7bf64 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c @@ -72,6 +72,9 @@ bool dal_hw_translate_init( case DCE_VERSION_10_0: case DCE_VERSION_11_0: case DCE_VERSION_11_2: +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + case DCE_VERSION_11_22: +#endif dal_hw_translate_dce110_init(translate); return true; case DCE_VERSION_12_0: diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c index abd0095ced30..b7256f595052 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c @@ -527,7 +527,7 @@ static void construct( REG_GET(MICROSECOND_TIME_BASE_DIV, XTAL_REF_DIV, &xtal_ref_div); if (xtal_ref_div == 0) { - DC_LOG_WARNING("Invalid base timer divider\n", + DC_LOG_WARNING("Invalid base timer divider [%s]\n", __func__); xtal_ref_div = 2; } diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c index 5cbf6626b8d4..c3d7c320fdba 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c @@ -83,6 +83,9 @@ struct i2caux *dal_i2caux_create( case DCE_VERSION_8_3: return dal_i2caux_dce80_create(ctx); case DCE_VERSION_11_2: +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + case DCE_VERSION_11_22: +#endif return dal_i2caux_dce112_create(ctx); case DCE_VERSION_11_0: return dal_i2caux_dce110_create(ctx); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 8c51ad70cace..a94942d4e66b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -95,11 +95,6 @@ struct resource_funcs { struct link_encoder *(*link_enc_create)( const struct encoder_init_data *init); - enum dc_status (*validate_guaranteed)( - struct dc *dc, - struct dc_stream_state *stream, - struct dc_state *context); - bool (*validate_bandwidth)( struct dc *dc, struct dc_state *context); @@ -250,6 +245,7 @@ struct dce_bw_output { bool all_displays_in_sync; struct dce_watermarks urgent_wm_ns[MAX_PIPES]; struct dce_watermarks stutter_exit_wm_ns[MAX_PIPES]; + struct dce_watermarks stutter_entry_wm_ns[MAX_PIPES]; struct dce_watermarks nbp_state_change_wm_ns[MAX_PIPES]; int sclk_khz; int sclk_deep_sleep_khz; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h index a9bfe9ff8ce6..933ea7a1e18b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h @@ -42,6 +42,10 @@ enum bw_calcs_version { BW_CALCS_VERSION_CARRIZO, BW_CALCS_VERSION_POLARIS10, BW_CALCS_VERSION_POLARIS11, + BW_CALCS_VERSION_POLARIS12, +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + BW_CALCS_VERSION_VEGAM, +#endif BW_CALCS_VERSION_STONEY, BW_CALCS_VERSION_VEGA10 }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h new file mode 100644 index 000000000000..02f757dd70d4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -0,0 +1,64 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DAL_DCHUBBUB_H__ +#define __DAL_DCHUBBUB_H__ + + +enum dcc_control { + dcc_control__256_256_xxx, + dcc_control__128_128_xxx, + dcc_control__256_64_64, +}; + +enum segment_order { + segment_order__na, + segment_order__contiguous, + segment_order__non_contiguous, +}; + + +struct hubbub_funcs { + void (*update_dchub)( + struct hubbub *hubbub, + struct dchub_init_data *dh_data); + + bool (*get_dcc_compression_cap)(struct hubbub *hubbub, + const struct dc_dcc_surface_param *input, + struct dc_surface_dcc_cap *output); + + bool (*dcc_support_swizzle)( + enum swizzle_mode_values swizzle, + unsigned int bytes_per_element, + enum segment_order *segment_order_horz, + enum segment_order *segment_order_vert); + + bool (*dcc_support_pixel_format)( + enum surface_pixel_format format, + unsigned int *bytes_per_element); +}; + + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 99995608b620..582458f028f8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -44,7 +44,23 @@ struct dpp_grph_csc_adjustment { enum graphics_gamut_adjust_type gamut_adjust_type; }; +struct dcn_dpp_state { + uint32_t igam_lut_mode; + uint32_t igam_input_format; + uint32_t dgam_lut_mode; + uint32_t rgam_lut_mode; + uint32_t gamut_remap_mode; + uint32_t gamut_remap_c11_c12; + uint32_t gamut_remap_c13_c14; + uint32_t gamut_remap_c21_c22; + uint32_t gamut_remap_c23_c24; + uint32_t gamut_remap_c31_c32; + uint32_t gamut_remap_c33_c34; +}; + struct dpp_funcs { + void (*dpp_read_state)(struct dpp *dpp, struct dcn_dpp_state *s); + void (*dpp_reset)(struct dpp *dpp); void (*dpp_set_scaler)(struct dpp *dpp, @@ -117,7 +133,7 @@ struct dpp_funcs { struct dpp *dpp_base, enum surface_pixel_format format, enum expansion_mode mode, - struct csc_transform input_csc_color_matrix, + struct dc_csc_transform input_csc_color_matrix, enum dc_color_space input_color_space); void (*dpp_full_bypass)(struct dpp *dpp_base); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 9ced254e652c..331f8ff57ed7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -56,7 +56,6 @@ struct hubp { bool power_gated; }; - struct hubp_funcs { void (*hubp_setup)( struct hubp *hubp, @@ -121,6 +120,7 @@ struct hubp_funcs { void (*hubp_clk_cntl)(struct hubp *hubp, bool enable); void (*hubp_vtg_sel)(struct hubp *hubp, uint32_t otg_inst); + void (*hubp_read_state)(struct hubp *hubp); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index b22158190262..cf7433ebf91a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -140,11 +140,6 @@ enum opp_regamma { OPP_REGAMMA_USER }; -struct csc_transform { - uint16_t matrix[12]; - bool enable_adjustment; -}; - struct dc_bias_and_scale { uint16_t scale_red; uint16_t bias_red; @@ -191,4 +186,9 @@ enum controller_dp_test_pattern { CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA }; +enum dc_lut_mode { + LUT_BYPASS, + LUT_RAM_A, + LUT_RAM_B +}; #endif /* __DAL_HW_SHARED_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h index 2109eac20a3d..b2fa4c4cd920 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h @@ -87,7 +87,7 @@ struct ipp_funcs { struct input_pixel_processor *ipp, enum surface_pixel_format format, enum expansion_mode mode, - struct csc_transform input_csc_color_matrix, + struct dc_csc_transform input_csc_color_matrix, enum dc_color_space input_color_space); /* DCE function to setup IPP. TODO: see if we can consolidate to setup */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index 54d8a1386142..cf6df2e7beb2 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -149,6 +149,7 @@ struct link_encoder_funcs { bool connect); void (*enable_hpd)(struct link_encoder *enc); void (*disable_hpd)(struct link_encoder *enc); + bool (*is_dig_enabled)(struct link_encoder *enc); void (*destroy)(struct link_encoder **enc); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index 3e1e7e6a8792..47f1dc5a43b7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -104,6 +104,7 @@ struct mem_input_funcs { struct mem_input *mem_input, struct dce_watermarks nbp, struct dce_watermarks stutter, + struct dce_watermarks stutter_enter, struct dce_watermarks urgent, uint32_t total_dest_line_time_ns); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 23a8d5e53a89..caf74e3c836f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -105,7 +105,24 @@ struct mpc { struct mpcc mpcc_array[MAX_MPCC]; }; +struct mpcc_state { + uint32_t opp_id; + uint32_t dpp_id; + uint32_t bot_mpcc_id; + uint32_t mode; + uint32_t alpha_mode; + uint32_t pre_multiplied_alpha; + uint32_t overlap_only; + uint32_t idle; + uint32_t busy; +}; + struct mpc_funcs { + void (*read_mpcc_state)( + struct mpc *mpc, + int mpcc_inst, + struct mpcc_state *s); + /* * Insert DPP into MPC tree based on specified blending position. * Only used for planes that are part of blending chain for OPP output diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index b5db1692393c..cfa7ec9517ae 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -29,31 +29,40 @@ #define STREAM_ENCODER_H_ #include "audio_types.h" +#include "hw_shared.h" struct dc_bios; struct dc_context; struct dc_crtc_timing; -struct encoder_info_packet { - bool valid; - uint8_t hb0; - uint8_t hb1; - uint8_t hb2; - uint8_t hb3; - uint8_t sb[32]; +enum dp_pixel_encoding_type { + DP_PIXEL_ENCODING_TYPE_RGB444 = 0x00000000, + DP_PIXEL_ENCODING_TYPE_YCBCR422 = 0x00000001, + DP_PIXEL_ENCODING_TYPE_YCBCR444 = 0x00000002, + DP_PIXEL_ENCODING_TYPE_RGB_WIDE_GAMUT = 0x00000003, + DP_PIXEL_ENCODING_TYPE_Y_ONLY = 0x00000004, + DP_PIXEL_ENCODING_TYPE_YCBCR420 = 0x00000005 +}; + +enum dp_component_depth { + DP_COMPONENT_PIXEL_DEPTH_6BPC = 0x00000000, + DP_COMPONENT_PIXEL_DEPTH_8BPC = 0x00000001, + DP_COMPONENT_PIXEL_DEPTH_10BPC = 0x00000002, + DP_COMPONENT_PIXEL_DEPTH_12BPC = 0x00000003, + DP_COMPONENT_PIXEL_DEPTH_16BPC = 0x00000004 }; struct encoder_info_frame { /* auxiliary video information */ - struct encoder_info_packet avi; - struct encoder_info_packet gamut; - struct encoder_info_packet vendor; + struct dc_info_packet avi; + struct dc_info_packet gamut; + struct dc_info_packet vendor; /* source product description */ - struct encoder_info_packet spd; + struct dc_info_packet spd; /* video stream configuration */ - struct encoder_info_packet vsc; + struct dc_info_packet vsc; /* HDR Static MetaData */ - struct encoder_info_packet hdrsmd; + struct dc_info_packet hdrsmd; }; struct encoder_unblank_param { @@ -147,6 +156,7 @@ struct stream_encoder_funcs { void (*set_avmute)( struct stream_encoder *enc, bool enable); + }; #endif /* STREAM_ENCODER_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 3217b5bf6c7a..69cb0a105300 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -140,6 +140,9 @@ struct timing_generator_funcs { void (*program_timing)(struct timing_generator *tg, const struct dc_crtc_timing *timing, bool use_vbios); + void (*program_vline_interrupt)(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + unsigned long long vsync_delta); bool (*enable_crtc)(struct timing_generator *tg); bool (*disable_crtc)(struct timing_generator *tg); bool (*is_counter_moving)(struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index c5b3623bcbd9..fecc80c47c26 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -252,7 +252,7 @@ struct transform_funcs { struct transform *xfm_base, enum surface_pixel_format format, enum expansion_mode mode, - struct csc_transform input_csc_color_matrix, + struct dc_csc_transform input_csc_color_matrix, enum dc_color_space input_color_space); void (*ipp_full_bypass)(struct transform *xfm_base); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index e764cbad881b..29abf3ecb39c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -32,6 +32,8 @@ #include "inc/hw/link_encoder.h" #include "core_status.h" +#define EDP_BACKLIGHT_RAMP_DISABLE_LEVEL 0xFFFFFFFF + enum pipe_gating_control { PIPE_GATING_CONTROL_DISABLE = 0, PIPE_GATING_CONTROL_ENABLE, @@ -63,6 +65,7 @@ struct dchub_init_data; struct dc_static_screen_events; struct resource_pool; struct resource_context; +struct stream_resource; struct hw_sequencer_funcs { @@ -93,6 +96,12 @@ struct hw_sequencer_funcs { enum dc_color_space colorspace, uint16_t *matrix); + void (*program_output_csc)(struct dc *dc, + struct pipe_ctx *pipe_ctx, + enum dc_color_space colorspace, + uint16_t *matrix, + int opp_id); + void (*update_plane_addr)( const struct dc *dc, struct pipe_ctx *pipe_ctx); @@ -154,6 +163,11 @@ struct hw_sequencer_funcs { struct dc *dc, struct pipe_ctx *pipe, bool lock); + void (*blank_pixel_data)( + struct dc *dc, + struct stream_resource *stream_res, + struct dc_stream_state *stream, + bool blank); void (*set_bandwidth)( struct dc *dc, @@ -169,7 +183,7 @@ struct hw_sequencer_funcs { void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx, int num_pipes, const struct dc_static_screen_events *events); - enum dc_status (*prog_pixclk_crtc_otg)( + enum dc_status (*enable_stream_timing)( struct pipe_ctx *pipe_ctx, struct dc_state *context, struct dc *dc); @@ -201,6 +215,7 @@ struct hw_sequencer_funcs { void (*set_cursor_position)(struct pipe_ctx *pipe); void (*set_cursor_attribute)(struct pipe_ctx *pipe); + }; void color_space_to_black_color( diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h index 77eb72874e90..3306e7b0b3e3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h +++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h @@ -183,6 +183,36 @@ FN(reg_name, f4), v4, \ FN(reg_name, f5), v5) +#define REG_GET_6(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6) \ + generic_reg_get6(CTX, REG(reg_name), \ + FN(reg_name, f1), v1, \ + FN(reg_name, f2), v2, \ + FN(reg_name, f3), v3, \ + FN(reg_name, f4), v4, \ + FN(reg_name, f5), v5, \ + FN(reg_name, f6), v6) + +#define REG_GET_7(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6, f7, v7) \ + generic_reg_get7(CTX, REG(reg_name), \ + FN(reg_name, f1), v1, \ + FN(reg_name, f2), v2, \ + FN(reg_name, f3), v3, \ + FN(reg_name, f4), v4, \ + FN(reg_name, f5), v5, \ + FN(reg_name, f6), v6, \ + FN(reg_name, f7), v7) + +#define REG_GET_8(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6, f7, v7, f8, v8) \ + generic_reg_get8(CTX, REG(reg_name), \ + FN(reg_name, f1), v1, \ + FN(reg_name, f2), v2, \ + FN(reg_name, f3), v3, \ + FN(reg_name, f4), v4, \ + FN(reg_name, f5), v5, \ + FN(reg_name, f6), v6, \ + FN(reg_name, f7), v7, \ + FN(reg_name, f8), v8) + /* macro to poll and wait for a register field to read back given value */ #define REG_WAIT(reg_name, field, val, delay_between_poll_us, max_try) \ @@ -389,4 +419,30 @@ uint32_t generic_reg_get5(const struct dc_context *ctx, uint32_t addr, uint8_t shift4, uint32_t mask4, uint32_t *field_value4, uint8_t shift5, uint32_t mask5, uint32_t *field_value5); +uint32_t generic_reg_get6(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6); + +uint32_t generic_reg_get7(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6, + uint8_t shift7, uint32_t mask7, uint32_t *field_value7); + +uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6, + uint8_t shift7, uint32_t mask7, uint32_t *field_value7, + uint8_t shift8, uint32_t mask8, uint32_t *field_value8); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_REG_HELPER_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 5467332faf7b..640a647f4611 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -139,10 +139,6 @@ bool resource_validate_attach_surfaces( struct dc_state *context, const struct resource_pool *pool); -void validate_guaranteed_copy_streams( - struct dc_state *context, - int max_streams); - void resource_validate_ctx_update_pointer_after_copy( const struct dc_state *src_ctx, struct dc_state *dst_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h index a506c2e939f5..cc3b1bc6cedd 100644 --- a/drivers/gpu/drm/amd/display/dc/irq_types.h +++ b/drivers/gpu/drm/amd/display/dc/irq_types.h @@ -135,6 +135,13 @@ enum dc_irq_source { DC_IRQ_SOURCE_VBLANK5, DC_IRQ_SOURCE_VBLANK6, + DC_IRQ_SOURCE_DC1_VLINE0, + DC_IRQ_SOURCE_DC2_VLINE0, + DC_IRQ_SOURCE_DC3_VLINE0, + DC_IRQ_SOURCE_DC4_VLINE0, + DC_IRQ_SOURCE_DC5_VLINE0, + DC_IRQ_SOURCE_DC6_VLINE0, + DAL_IRQ_SOURCES_NUMBER }; diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 9b0a04f99ac8..1b987b6a347d 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -98,7 +98,14 @@ (eChipRev < VI_POLARIS11_M_A0)) #define ASIC_REV_IS_POLARIS11_M(eChipRev) ((eChipRev >= VI_POLARIS11_M_A0) && \ (eChipRev < VI_POLARIS12_V_A0)) +#if defined(CONFIG_DRM_AMD_DC_VEGAM) +#define VI_VEGAM_A0 110 +#define ASIC_REV_IS_POLARIS12_V(eChipRev) ((eChipRev >= VI_POLARIS12_V_A0) && \ + (eChipRev < VI_VEGAM_A0)) +#define ASIC_REV_IS_VEGAM(eChipRev) (eChipRev >= VI_VEGAM_A0) +#else #define ASIC_REV_IS_POLARIS12_V(eChipRev) (eChipRev >= VI_POLARIS12_V_A0) +#endif /* DCE11 */ #define CZ_CARRIZO_A0 0x01 diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h index fa543965feb5..5b1f8cef0c22 100644 --- a/drivers/gpu/drm/amd/display/include/dal_types.h +++ b/drivers/gpu/drm/amd/display/include/dal_types.h @@ -40,6 +40,9 @@ enum dce_version { DCE_VERSION_10_0, DCE_VERSION_11_0, DCE_VERSION_11_2, +#if defined(CONFIG_DRM_AMD_DC_VEGAM) + DCE_VERSION_11_22, +#endif DCE_VERSION_12_0, DCE_VERSION_MAX, DCN_VERSION_1_0, diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index 0de258622c12..16cbdb43d856 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -26,8 +26,6 @@ #ifndef __DAL_FIXED31_32_H__ #define __DAL_FIXED31_32_H__ -#include "os_types.h" - #define FIXED31_32_BITS_PER_FRACTIONAL_PART 32 /* @@ -44,7 +42,7 @@ */ struct fixed31_32 { - int64_t value; + long long value; }; /* @@ -73,15 +71,15 @@ static const struct fixed31_32 dal_fixed31_32_ln2_div_2 = { 1488522236LL }; * result = numerator / denominator */ struct fixed31_32 dal_fixed31_32_from_fraction( - int64_t numerator, - int64_t denominator); + long long numerator, + long long denominator); /* * @brief * result = arg */ -struct fixed31_32 dal_fixed31_32_from_int_nonconst(int64_t arg); -static inline struct fixed31_32 dal_fixed31_32_from_int(int64_t arg) +struct fixed31_32 dal_fixed31_32_from_int_nonconst(long long arg); +static inline struct fixed31_32 dal_fixed31_32_from_int(long long arg) { if (__builtin_constant_p(arg)) { struct fixed31_32 res; @@ -213,7 +211,7 @@ static inline struct fixed31_32 dal_fixed31_32_clamp( */ struct fixed31_32 dal_fixed31_32_shl( struct fixed31_32 arg, - uint8_t shift); + unsigned char shift); /* * @brief @@ -221,7 +219,7 @@ struct fixed31_32 dal_fixed31_32_shl( */ static inline struct fixed31_32 dal_fixed31_32_shr( struct fixed31_32 arg, - uint8_t shift) + unsigned char shift) { struct fixed31_32 res; res.value = arg.value >> shift; @@ -246,7 +244,7 @@ struct fixed31_32 dal_fixed31_32_add( * result = arg1 + arg2 */ static inline struct fixed31_32 dal_fixed31_32_add_int(struct fixed31_32 arg1, - int32_t arg2) + int arg2) { return dal_fixed31_32_add(arg1, dal_fixed31_32_from_int(arg2)); @@ -265,7 +263,7 @@ struct fixed31_32 dal_fixed31_32_sub( * result = arg1 - arg2 */ static inline struct fixed31_32 dal_fixed31_32_sub_int(struct fixed31_32 arg1, - int32_t arg2) + int arg2) { return dal_fixed31_32_sub(arg1, dal_fixed31_32_from_int(arg2)); @@ -291,7 +289,7 @@ struct fixed31_32 dal_fixed31_32_mul( * result = arg1 * arg2 */ static inline struct fixed31_32 dal_fixed31_32_mul_int(struct fixed31_32 arg1, - int32_t arg2) + int arg2) { return dal_fixed31_32_mul(arg1, dal_fixed31_32_from_int(arg2)); @@ -309,7 +307,7 @@ struct fixed31_32 dal_fixed31_32_sqr( * result = arg1 / arg2 */ static inline struct fixed31_32 dal_fixed31_32_div_int(struct fixed31_32 arg1, - int64_t arg2) + long long arg2) { return dal_fixed31_32_from_fraction(arg1.value, dal_fixed31_32_from_int(arg2).value); @@ -434,21 +432,21 @@ struct fixed31_32 dal_fixed31_32_pow( * @brief * result = floor(arg) := greatest integer lower than or equal to arg */ -int32_t dal_fixed31_32_floor( +int dal_fixed31_32_floor( struct fixed31_32 arg); /* * @brief * result = round(arg) := integer nearest to arg */ -int32_t dal_fixed31_32_round( +int dal_fixed31_32_round( struct fixed31_32 arg); /* * @brief * result = ceil(arg) := lowest integer greater than or equal to arg */ -int32_t dal_fixed31_32_ceil( +int dal_fixed31_32_ceil( struct fixed31_32 arg); /* the following two function are used in scaler hw programming to convert fixed @@ -457,20 +455,20 @@ int32_t dal_fixed31_32_ceil( * fractional */ -uint32_t dal_fixed31_32_u2d19( +unsigned int dal_fixed31_32_u2d19( struct fixed31_32 arg); -uint32_t dal_fixed31_32_u0d19( +unsigned int dal_fixed31_32_u0d19( struct fixed31_32 arg); -uint32_t dal_fixed31_32_clamp_u0d14( +unsigned int dal_fixed31_32_clamp_u0d14( struct fixed31_32 arg); -uint32_t dal_fixed31_32_clamp_u0d10( +unsigned int dal_fixed31_32_clamp_u0d10( struct fixed31_32 arg); -int32_t dal_fixed31_32_s4d19( +int dal_fixed31_32_s4d19( struct fixed31_32 arg); #endif diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index 427796bdc14a..b608a0830801 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -29,39 +29,39 @@ #include "os_types.h" #define MAX_NAME_LEN 32 -#define DC_LOG_ERROR(a, ...) dm_logger_write(DC_LOGGER, LOG_ERROR, a, ## __VA_ARGS__) -#define DC_LOG_WARNING(a, ...) dm_logger_write(DC_LOGGER, LOG_WARNING, a, ## __VA_ARGS__) -#define DC_LOG_DEBUG(a, ...) dm_logger_write(DC_LOGGER, LOG_DEBUG, a, ## __VA_ARGS__) -#define DC_LOG_DC(a, ...) dm_logger_write(DC_LOGGER, LOG_DC, a, ## __VA_ARGS__) -#define DC_LOG_DTN(a, ...) dm_logger_write(DC_LOGGER, LOG_DTN, a, ## __VA_ARGS__) -#define DC_LOG_SURFACE(a, ...) dm_logger_write(DC_LOGGER, LOG_SURFACE, a, ## __VA_ARGS__) -#define DC_LOG_HW_HOTPLUG(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_HOTPLUG, a, ## __VA_ARGS__) -#define DC_LOG_HW_LINK_TRAINING(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_LINK_TRAINING, a, ## __VA_ARGS__) -#define DC_LOG_HW_SET_MODE(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_SET_MODE, a, ## __VA_ARGS__) -#define DC_LOG_HW_RESUME_S3(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_RESUME_S3, a, ## __VA_ARGS__) -#define DC_LOG_HW_AUDIO(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_AUDIO, a, ## __VA_ARGS__) -#define DC_LOG_HW_HPD_IRQ(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_HPD_IRQ, a, ## __VA_ARGS__) -#define DC_LOG_MST(a, ...) dm_logger_write(DC_LOGGER, LOG_MST, a, ## __VA_ARGS__) -#define DC_LOG_SCALER(a, ...) dm_logger_write(DC_LOGGER, LOG_SCALER, a, ## __VA_ARGS__) -#define DC_LOG_BIOS(a, ...) dm_logger_write(DC_LOGGER, LOG_BIOS, a, ## __VA_ARGS__) -#define DC_LOG_BANDWIDTH_CALCS(a, ...) dm_logger_write(DC_LOGGER, LOG_BANDWIDTH_CALCS, a, ## __VA_ARGS__) -#define DC_LOG_BANDWIDTH_VALIDATION(a, ...) dm_logger_write(DC_LOGGER, LOG_BANDWIDTH_VALIDATION, a, ## __VA_ARGS__) -#define DC_LOG_I2C_AUX(a, ...) dm_logger_write(DC_LOGGER, LOG_I2C_AUX, a, ## __VA_ARGS__) -#define DC_LOG_SYNC(a, ...) dm_logger_write(DC_LOGGER, LOG_SYNC, a, ## __VA_ARGS__) -#define DC_LOG_BACKLIGHT(a, ...) dm_logger_write(DC_LOGGER, LOG_BACKLIGHT, a, ## __VA_ARGS__) -#define DC_LOG_FEATURE_OVERRIDE(a, ...) dm_logger_write(DC_LOGGER, LOG_FEATURE_OVERRIDE, a, ## __VA_ARGS__) -#define DC_LOG_DETECTION_EDID_PARSER(a, ...) dm_logger_write(DC_LOGGER, LOG_DETECTION_EDID_PARSER, a, ## __VA_ARGS__) -#define DC_LOG_DETECTION_DP_CAPS(a, ...) dm_logger_write(DC_LOGGER, LOG_DETECTION_DP_CAPS, a, ## __VA_ARGS__) -#define DC_LOG_RESOURCE(a, ...) dm_logger_write(DC_LOGGER, LOG_RESOURCE, a, ## __VA_ARGS__) -#define DC_LOG_DML(a, ...) dm_logger_write(DC_LOGGER, LOG_DML, a, ## __VA_ARGS__) -#define DC_LOG_EVENT_MODE_SET(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_MODE_SET, a, ## __VA_ARGS__) -#define DC_LOG_EVENT_DETECTION(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_DETECTION, a, ## __VA_ARGS__) -#define DC_LOG_EVENT_LINK_TRAINING(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_LINK_TRAINING, a, ## __VA_ARGS__) -#define DC_LOG_EVENT_LINK_LOSS(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_LINK_LOSS, a, ## __VA_ARGS__) -#define DC_LOG_EVENT_UNDERFLOW(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_UNDERFLOW, a, ## __VA_ARGS__) -#define DC_LOG_IF_TRACE(a, ...) dm_logger_write(DC_LOGGER, LOG_IF_TRACE, a, ## __VA_ARGS__) -#define DC_LOG_PERF_TRACE(a, ...) dm_logger_write(DC_LOGGER, LOG_PERF_TRACE, a, ## __VA_ARGS__) +#define DC_LOG_ERROR(...) DRM_ERROR(__VA_ARGS__) +#define DC_LOG_WARNING(...) DRM_WARN(__VA_ARGS__) +#define DC_LOG_DEBUG(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_DC(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_DTN(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_SURFACE(...) pr_debug("[SURFACE]:"__VA_ARGS__) +#define DC_LOG_HW_HOTPLUG(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_HW_LINK_TRAINING(...) pr_debug("[HW_LINK_TRAINING]:"__VA_ARGS__) +#define DC_LOG_HW_SET_MODE(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_HW_RESUME_S3(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_HW_AUDIO(...) pr_debug("[HW_AUDIO]:"__VA_ARGS__) +#define DC_LOG_HW_HPD_IRQ(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_MST(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_SCALER(...) pr_debug("[SCALER]:"__VA_ARGS__) +#define DC_LOG_BIOS(...) pr_debug("[BIOS]:"__VA_ARGS__) +#define DC_LOG_BANDWIDTH_CALCS(...) pr_debug("[BANDWIDTH_CALCS]:"__VA_ARGS__) +#define DC_LOG_BANDWIDTH_VALIDATION(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_I2C_AUX(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_SYNC(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_BACKLIGHT(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_FEATURE_OVERRIDE(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_DETECTION_EDID_PARSER(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_DETECTION_DP_CAPS(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_RESOURCE(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_DML(...) pr_debug("[DML]:"__VA_ARGS__) +#define DC_LOG_EVENT_MODE_SET(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_EVENT_DETECTION(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_EVENT_LINK_TRAINING(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_EVENT_LINK_LOSS(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_EVENT_UNDERFLOW(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_IF_TRACE(...) pr_debug("[IF_TRACE]:"__VA_ARGS__) +#define DC_LOG_PERF_TRACE(...) DRM_DEBUG_KMS(__VA_ARGS__) struct dal_logger; diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index b3747a019deb..15e5b72e6e00 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -185,14 +185,14 @@ struct dividers { static void build_coefficients(struct gamma_coefficients *coefficients, bool is_2_4) { - static const int32_t numerator01[] = { 31308, 180000}; - static const int32_t numerator02[] = { 12920, 4500}; - static const int32_t numerator03[] = { 55, 99}; - static const int32_t numerator04[] = { 55, 99}; - static const int32_t numerator05[] = { 2400, 2200}; + static const int32_t numerator01[] = { 31308, 180000}; + static const int32_t numerator02[] = { 12920, 4500}; + static const int32_t numerator03[] = { 55, 99}; + static const int32_t numerator04[] = { 55, 99}; + static const int32_t numerator05[] = { 2400, 2200}; - uint32_t i = 0; - uint32_t index = is_2_4 == true ? 0:1; + uint32_t i = 0; + uint32_t index = is_2_4 == true ? 0:1; do { coefficients->a0[i] = dal_fixed31_32_from_fraction( @@ -691,7 +691,7 @@ static void build_degamma(struct pwl_float_data_ex *curve, } } -static bool scale_gamma(struct pwl_float_data *pwl_rgb, +static void scale_gamma(struct pwl_float_data *pwl_rgb, const struct dc_gamma *ramp, struct dividers dividers) { @@ -752,11 +752,9 @@ static bool scale_gamma(struct pwl_float_data *pwl_rgb, dividers.divider3); rgb->b = dal_fixed31_32_mul(rgb_last->b, dividers.divider3); - - return true; } -static bool scale_gamma_dx(struct pwl_float_data *pwl_rgb, +static void scale_gamma_dx(struct pwl_float_data *pwl_rgb, const struct dc_gamma *ramp, struct dividers dividers) { @@ -818,8 +816,71 @@ static bool scale_gamma_dx(struct pwl_float_data *pwl_rgb, pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g); pwl_rgb[i].b = dal_fixed31_32_sub(dal_fixed31_32_mul_int( pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b); +} - return true; +/* todo: all these scale_gamma functions are inherently the same but + * take different structures as params or different format for ramp + * values. We could probably implement it in a more generic fashion + */ +static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb, + const struct regamma_ramp *ramp, + struct dividers dividers) +{ + unsigned short max_driver = 0xFFFF; + unsigned short max_os = 0xFF00; + unsigned short scaler = max_os; + uint32_t i; + struct pwl_float_data *rgb = pwl_rgb; + struct pwl_float_data *rgb_last = rgb + GAMMA_RGB_256_ENTRIES - 1; + + i = 0; + do { + if (ramp->gamma[i] > max_os || + ramp->gamma[i + 256] > max_os || + ramp->gamma[i + 512] > max_os) { + scaler = max_driver; + break; + } + i++; + } while (i != GAMMA_RGB_256_ENTRIES); + + i = 0; + do { + rgb->r = dal_fixed31_32_from_fraction( + ramp->gamma[i], scaler); + rgb->g = dal_fixed31_32_from_fraction( + ramp->gamma[i + 256], scaler); + rgb->b = dal_fixed31_32_from_fraction( + ramp->gamma[i + 512], scaler); + + ++rgb; + ++i; + } while (i != GAMMA_RGB_256_ENTRIES); + + rgb->r = dal_fixed31_32_mul(rgb_last->r, + dividers.divider1); + rgb->g = dal_fixed31_32_mul(rgb_last->g, + dividers.divider1); + rgb->b = dal_fixed31_32_mul(rgb_last->b, + dividers.divider1); + + ++rgb; + + rgb->r = dal_fixed31_32_mul(rgb_last->r, + dividers.divider2); + rgb->g = dal_fixed31_32_mul(rgb_last->g, + dividers.divider2); + rgb->b = dal_fixed31_32_mul(rgb_last->b, + dividers.divider2); + + ++rgb; + + rgb->r = dal_fixed31_32_mul(rgb_last->r, + dividers.divider3); + rgb->g = dal_fixed31_32_mul(rgb_last->g, + dividers.divider3); + rgb->b = dal_fixed31_32_mul(rgb_last->b, + dividers.divider3); } /* @@ -949,7 +1010,7 @@ static inline void copy_rgb_regamma_to_coordinates_x( uint32_t i = 0; const struct pwl_float_data_ex *rgb_regamma = rgb_ex; - while (i <= hw_points_num) { + while (i <= hw_points_num + 1) { coords->regamma_y_red = rgb_regamma->r; coords->regamma_y_green = rgb_regamma->g; coords->regamma_y_blue = rgb_regamma->b; @@ -1002,6 +1063,102 @@ static bool calculate_interpolated_hardware_curve( return true; } +/* The "old" interpolation uses a complicated scheme to build an array of + * coefficients while also using an array of 0-255 normalized to 0-1 + * Then there's another loop using both of the above + new scaled user ramp + * and we concatenate them. It also searches for points of interpolation and + * uses enums for positions. + * + * This function uses a different approach: + * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255 + * To find index for hwX , we notice the following: + * i/255 <= hwX < (i+1)/255 <=> i <= 255*hwX < i+1 + * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT + * + * Once the index is known, combined Y is simply: + * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index) + * + * We should switch to this method in all cases, it's simpler and faster + * ToDo one day - for now this only applies to ADL regamma to avoid regression + * for regular use cases (sRGB and PQ) + */ +static void interpolate_user_regamma(uint32_t hw_points_num, + struct pwl_float_data *rgb_user, + bool apply_degamma, + struct dc_transfer_func_distributed_points *tf_pts) +{ + uint32_t i; + uint32_t color = 0; + int32_t index; + int32_t index_next; + struct fixed31_32 *tf_point; + struct fixed31_32 hw_x; + struct fixed31_32 norm_factor = + dal_fixed31_32_from_int_nonconst(255); + struct fixed31_32 norm_x; + struct fixed31_32 index_f; + struct fixed31_32 lut1; + struct fixed31_32 lut2; + struct fixed31_32 delta_lut; + struct fixed31_32 delta_index; + + i = 0; + /* fixed_pt library has problems handling too small values */ + while (i != 32) { + tf_pts->red[i] = dal_fixed31_32_zero; + tf_pts->green[i] = dal_fixed31_32_zero; + tf_pts->blue[i] = dal_fixed31_32_zero; + ++i; + } + while (i <= hw_points_num + 1) { + for (color = 0; color < 3; color++) { + if (color == 0) + tf_point = &tf_pts->red[i]; + else if (color == 1) + tf_point = &tf_pts->green[i]; + else + tf_point = &tf_pts->blue[i]; + + if (apply_degamma) { + if (color == 0) + hw_x = coordinates_x[i].regamma_y_red; + else if (color == 1) + hw_x = coordinates_x[i].regamma_y_green; + else + hw_x = coordinates_x[i].regamma_y_blue; + } else + hw_x = coordinates_x[i].x; + + norm_x = dal_fixed31_32_mul(norm_factor, hw_x); + index = dal_fixed31_32_floor(norm_x); + if (index < 0 || index > 255) + continue; + + index_f = dal_fixed31_32_from_int_nonconst(index); + index_next = (index == 255) ? index : index + 1; + + if (color == 0) { + lut1 = rgb_user[index].r; + lut2 = rgb_user[index_next].r; + } else if (color == 1) { + lut1 = rgb_user[index].g; + lut2 = rgb_user[index_next].g; + } else { + lut1 = rgb_user[index].b; + lut2 = rgb_user[index_next].b; + } + + // we have everything now, so interpolate + delta_lut = dal_fixed31_32_sub(lut2, lut1); + delta_index = dal_fixed31_32_sub(norm_x, index_f); + + *tf_point = dal_fixed31_32_add(lut1, + dal_fixed31_32_mul(delta_index, delta_lut)); + } + ++i; + } +} + static void build_new_custom_resulted_curve( uint32_t hw_points_num, struct dc_transfer_func_distributed_points *tf_pts) @@ -1025,6 +1182,29 @@ static void build_new_custom_resulted_curve( } } +static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma, + uint32_t hw_points_num) +{ + uint32_t i; + + struct gamma_coefficients coeff; + struct pwl_float_data_ex *rgb = rgb_regamma; + const struct hw_x_point *coord_x = coordinates_x; + + build_coefficients(&coeff, true); + + i = 0; + while (i != hw_points_num + 1) { + rgb->r = translate_from_linear_space_ex( + coord_x->x, &coeff, 0); + rgb->g = rgb->r; + rgb->b = rgb->r; + ++coord_x; + ++rgb; + ++i; + } +} + static bool map_regamma_hw_to_x_user( const struct dc_gamma *ramp, struct pixel_gamma_point *coeff128, @@ -1062,6 +1242,7 @@ static bool map_regamma_hw_to_x_user( } } + /* this should be named differently, all it does is clamp to 0-1 */ build_new_custom_resulted_curve(hw_points_num, tf_pts); return true; @@ -1168,6 +1349,113 @@ rgb_user_alloc_fail: return ret; } +bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf, + const struct regamma_lut *regamma) +{ + struct gamma_coefficients coeff; + const struct hw_x_point *coord_x = coordinates_x; + uint32_t i = 0; + + do { + coeff.a0[i] = dal_fixed31_32_from_fraction( + regamma->coeff.A0[i], 10000000); + coeff.a1[i] = dal_fixed31_32_from_fraction( + regamma->coeff.A1[i], 1000); + coeff.a2[i] = dal_fixed31_32_from_fraction( + regamma->coeff.A2[i], 1000); + coeff.a3[i] = dal_fixed31_32_from_fraction( + regamma->coeff.A3[i], 1000); + coeff.user_gamma[i] = dal_fixed31_32_from_fraction( + regamma->coeff.gamma[i], 1000); + + ++i; + } while (i != 3); + + i = 0; + /* fixed_pt library has problems handling too small values */ + while (i != 32) { + output_tf->tf_pts.red[i] = dal_fixed31_32_zero; + output_tf->tf_pts.green[i] = dal_fixed31_32_zero; + output_tf->tf_pts.blue[i] = dal_fixed31_32_zero; + ++coord_x; + ++i; + } + while (i != MAX_HW_POINTS + 1) { + output_tf->tf_pts.red[i] = translate_from_linear_space_ex( + coord_x->x, &coeff, 0); + output_tf->tf_pts.green[i] = translate_from_linear_space_ex( + coord_x->x, &coeff, 1); + output_tf->tf_pts.blue[i] = translate_from_linear_space_ex( + coord_x->x, &coeff, 2); + ++coord_x; + ++i; + } + + // this function just clamps output to 0-1 + build_new_custom_resulted_curve(MAX_HW_POINTS, &output_tf->tf_pts); + output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + + return true; +} + +bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, + const struct regamma_lut *regamma) +{ + struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts; + struct dividers dividers; + + struct pwl_float_data *rgb_user = NULL; + struct pwl_float_data_ex *rgb_regamma = NULL; + bool ret = false; + + if (regamma == NULL) + return false; + + output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + + rgb_user = kzalloc(sizeof(*rgb_user) * (GAMMA_RGB_256_ENTRIES + _EXTRA_POINTS), + GFP_KERNEL); + if (!rgb_user) + goto rgb_user_alloc_fail; + + rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); + if (!rgb_regamma) + goto rgb_regamma_alloc_fail; + + dividers.divider1 = dal_fixed31_32_from_fraction(3, 2); + dividers.divider2 = dal_fixed31_32_from_int(2); + dividers.divider3 = dal_fixed31_32_from_fraction(5, 2); + + scale_user_regamma_ramp(rgb_user, ®amma->ramp, dividers); + + if (regamma->flags.bits.applyDegamma == 1) { + apply_degamma_for_user_regamma(rgb_regamma, MAX_HW_POINTS); + copy_rgb_regamma_to_coordinates_x(coordinates_x, + MAX_HW_POINTS, rgb_regamma); + } + + interpolate_user_regamma(MAX_HW_POINTS, rgb_user, + regamma->flags.bits.applyDegamma, tf_pts); + + // no custom HDR curves! + tf_pts->end_exponent = 0; + tf_pts->x_point_at_y1_red = 1; + tf_pts->x_point_at_y1_green = 1; + tf_pts->x_point_at_y1_blue = 1; + + // this function just clamps output to 0-1 + build_new_custom_resulted_curve(MAX_HW_POINTS, tf_pts); + + ret = true; + + kfree(rgb_regamma); +rgb_regamma_alloc_fail: + kfree(rgb_user); +rgb_user_alloc_fail: + return ret; +} + bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, const struct dc_gamma *ramp, bool mapUserRamp) { diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h index b7f9bc27d101..b64048991a95 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -32,6 +32,47 @@ struct dc_transfer_func_distributed_points; struct dc_rgb_fixed; enum dc_transfer_func_predefined; +/* For SetRegamma ADL interface support + * Must match escape type + */ +union regamma_flags { + unsigned int raw; + struct { + unsigned int gammaRampArray :1; // RegammaRamp is in use + unsigned int gammaFromEdid :1; //gamma from edid is in use + unsigned int gammaFromEdidEx :1; //gamma from edid is in use , but only for Display Id 1.2 + unsigned int gammaFromUser :1; //user custom gamma is used + unsigned int coeffFromUser :1; //coeff. A0-A3 from user is in use + unsigned int coeffFromEdid :1; //coeff. A0-A3 from edid is in use + unsigned int applyDegamma :1; //flag for additional degamma correction in driver + unsigned int gammaPredefinedSRGB :1; //flag for SRGB gamma + unsigned int gammaPredefinedPQ :1; //flag for PQ gamma + unsigned int gammaPredefinedPQ2084Interim :1; //flag for PQ gamma, lower max nits + unsigned int gammaPredefined36 :1; //flag for 3.6 gamma + unsigned int gammaPredefinedReset :1; //flag to return to previous gamma + } bits; +}; + +struct regamma_ramp { + unsigned short gamma[256*3]; // gamma ramp packed in same way as OS windows ,r , g & b +}; + +struct regamma_coeff { + int gamma[3]; + int A0[3]; + int A1[3]; + int A2[3]; + int A3[3]; +}; + +struct regamma_lut { + union regamma_flags flags; + union { + struct regamma_ramp ramp; + struct regamma_coeff coeff; + }; +}; + void setup_x_points_distribution(void); void precompute_pq(void); void precompute_de_pq(void); @@ -45,9 +86,14 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *output_tf, bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, struct dc_transfer_func_distributed_points *points); -bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, +bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, struct dc_transfer_func_distributed_points *points); +bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf, + const struct regamma_lut *regamma); + +bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, + const struct regamma_lut *regamma); #endif /* COLOR_MOD_COLOR_GAMMA_H_ */ diff --git a/drivers/gpu/drm/amd/display/modules/stats/stats.c b/drivers/gpu/drm/amd/display/modules/stats/stats.c index 041f87b73d5f..48e02197919f 100644 --- a/drivers/gpu/drm/amd/display/modules/stats/stats.c +++ b/drivers/gpu/drm/amd/display/modules/stats/stats.c @@ -115,18 +115,22 @@ struct mod_stats *mod_stats_create(struct dc *dc) ®_data, sizeof(unsigned int), &flag)) core_stats->enabled = reg_data; - core_stats->entries = DAL_STATS_ENTRIES_REGKEY_DEFAULT; - if (dm_read_persistent_data(dc->ctx, NULL, NULL, - DAL_STATS_ENTRIES_REGKEY, - ®_data, sizeof(unsigned int), &flag)) { - if (reg_data > DAL_STATS_ENTRIES_REGKEY_MAX) - core_stats->entries = DAL_STATS_ENTRIES_REGKEY_MAX; - else - core_stats->entries = reg_data; - } + if (core_stats->enabled) { + core_stats->entries = DAL_STATS_ENTRIES_REGKEY_DEFAULT; + if (dm_read_persistent_data(dc->ctx, NULL, NULL, + DAL_STATS_ENTRIES_REGKEY, + ®_data, sizeof(unsigned int), &flag)) { + if (reg_data > DAL_STATS_ENTRIES_REGKEY_MAX) + core_stats->entries = DAL_STATS_ENTRIES_REGKEY_MAX; + else + core_stats->entries = reg_data; + } - core_stats->time = kzalloc(sizeof(struct stats_time_cache) * core_stats->entries, - GFP_KERNEL); + core_stats->time = kzalloc(sizeof(struct stats_time_cache) * core_stats->entries, + GFP_KERNEL); + } else { + core_stats->entries = 0; + } if (core_stats->time == NULL) goto fail_construct; @@ -187,7 +191,7 @@ void mod_stats_dump(struct mod_stats *mod_stats) for (int i = 0; i < core_stats->index && i < core_stats->entries; i++) { dm_logger_write(logger, LOG_PROFILING, - "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u\n", + "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u", time[i].render_time_in_us, time[i].avg_render_time_in_us_last_ten, time[i].min_window, @@ -227,7 +231,7 @@ void mod_stats_reset_data(struct mod_stats *mod_stats) memset(core_stats->time, 0, sizeof(struct stats_time_cache) * core_stats->entries); - core_stats->index = 0; + core_stats->index = 1; } void mod_stats_update_flip(struct mod_stats *mod_stats, @@ -250,7 +254,7 @@ void mod_stats_update_flip(struct mod_stats *mod_stats, time[index].flip_timestamp_in_ns = timestamp_in_ns; time[index].render_time_in_us = - timestamp_in_ns - time[index - 1].flip_timestamp_in_ns; + (timestamp_in_ns - time[index - 1].flip_timestamp_in_ns) / 1000; if (index >= 10) { for (unsigned int i = 0; i < 10; i++) @@ -261,10 +265,12 @@ void mod_stats_update_flip(struct mod_stats *mod_stats, if (time[index].num_vsync_between_flips > 0) time[index].vsync_to_flip_time_in_us = - timestamp_in_ns - time[index].vupdate_timestamp_in_ns; + (timestamp_in_ns - + time[index].vupdate_timestamp_in_ns) / 1000; else time[index].vsync_to_flip_time_in_us = - timestamp_in_ns - time[index - 1].vupdate_timestamp_in_ns; + (timestamp_in_ns - + time[index - 1].vupdate_timestamp_in_ns) / 1000; core_stats->index++; } @@ -275,6 +281,8 @@ void mod_stats_update_vupdate(struct mod_stats *mod_stats, struct core_stats *core_stats = NULL; struct stats_time_cache *time = NULL; unsigned int index = 0; + unsigned int num_vsyncs = 0; + unsigned int prev_vsync_in_ns = 0; if (mod_stats == NULL) return; @@ -286,14 +294,27 @@ void mod_stats_update_vupdate(struct mod_stats *mod_stats, time = core_stats->time; index = core_stats->index; + num_vsyncs = time[index].num_vsync_between_flips; + + if (num_vsyncs < MOD_STATS_NUM_VSYNCS) { + if (num_vsyncs == 0) { + prev_vsync_in_ns = + time[index - 1].vupdate_timestamp_in_ns; + + time[index].flip_to_vsync_time_in_us = + (timestamp_in_ns - + time[index - 1].flip_timestamp_in_ns) / + 1000; + } else { + prev_vsync_in_ns = + time[index].vupdate_timestamp_in_ns; + } - time[index].vupdate_timestamp_in_ns = timestamp_in_ns; - if (time[index].num_vsync_between_flips < MOD_STATS_NUM_VSYNCS) - time[index].v_sync_time_in_us[time[index].num_vsync_between_flips] = - timestamp_in_ns - time[index - 1].vupdate_timestamp_in_ns; - time[index].flip_to_vsync_time_in_us = - timestamp_in_ns - time[index - 1].flip_timestamp_in_ns; + time[index].v_sync_time_in_us[num_vsyncs] = + (timestamp_in_ns - prev_vsync_in_ns) / 1000; + } + time[index].vupdate_timestamp_in_ns = timestamp_in_ns; time[index].num_vsync_between_flips++; } diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 9fa3aaef3f33..33de33016bda 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -109,6 +109,26 @@ enum amd_powergating_state { #define AMD_PG_SUPPORT_GFX_PIPELINE (1 << 12) #define AMD_PG_SUPPORT_MMHUB (1 << 13) +enum PP_FEATURE_MASK { + PP_SCLK_DPM_MASK = 0x1, + PP_MCLK_DPM_MASK = 0x2, + PP_PCIE_DPM_MASK = 0x4, + PP_SCLK_DEEP_SLEEP_MASK = 0x8, + PP_POWER_CONTAINMENT_MASK = 0x10, + PP_UVD_HANDSHAKE_MASK = 0x20, + PP_SMC_VOLTAGE_CONTROL_MASK = 0x40, + PP_VBI_TIME_SUPPORT_MASK = 0x80, + PP_ULV_MASK = 0x100, + PP_ENABLE_GFX_CG_THRU_SMU = 0x200, + PP_CLOCK_STRETCH_MASK = 0x400, + PP_OD_FUZZY_FAN_CONTROL_MASK = 0x800, + PP_SOCCLK_DPM_MASK = 0x1000, + PP_DCEFCLK_DPM_MASK = 0x2000, + PP_OVERDRIVE_MASK = 0x4000, + PP_GFXOFF_MASK = 0x8000, + PP_ACG_MASK = 0x10000, +}; + struct amd_ip_funcs { /* Name of IP block */ char *name; diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h index 4ccf9681c45d..721c61171045 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h @@ -3895,6 +3895,10 @@ #define mmCM0_CM_MEM_PWR_CTRL_BASE_IDX 2 #define mmCM0_CM_MEM_PWR_STATUS 0x0d33 #define mmCM0_CM_MEM_PWR_STATUS_BASE_IDX 2 +#define mmCM0_CM_TEST_DEBUG_INDEX 0x0d35 +#define mmCM0_CM_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmCM0_CM_TEST_DEBUG_DATA 0x0d36 +#define mmCM0_CM_TEST_DEBUG_DATA_BASE_IDX 2 // addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec @@ -4367,7 +4371,10 @@ #define mmCM1_CM_MEM_PWR_CTRL_BASE_IDX 2 #define mmCM1_CM_MEM_PWR_STATUS 0x0e4e #define mmCM1_CM_MEM_PWR_STATUS_BASE_IDX 2 - +#define mmCM1_CM_TEST_DEBUG_INDEX 0x0e50 +#define mmCM1_CM_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmCM1_CM_TEST_DEBUG_DATA 0x0e51 +#define mmCM1_CM_TEST_DEBUG_DATA_BASE_IDX 2 // addressBlock: dce_dc_dpp1_dispdec_dpp_dcperfmon_dc_perfmon_dispdec // base address: 0x399c @@ -4839,7 +4846,10 @@ #define mmCM2_CM_MEM_PWR_CTRL_BASE_IDX 2 #define mmCM2_CM_MEM_PWR_STATUS 0x0f69 #define mmCM2_CM_MEM_PWR_STATUS_BASE_IDX 2 - +#define mmCM2_CM_TEST_DEBUG_INDEX 0x0f6b +#define mmCM2_CM_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmCM2_CM_TEST_DEBUG_DATA 0x0f6c +#define mmCM2_CM_TEST_DEBUG_DATA_BASE_IDX 2 // addressBlock: dce_dc_dpp2_dispdec_dpp_dcperfmon_dc_perfmon_dispdec // base address: 0x3e08 @@ -5311,7 +5321,10 @@ #define mmCM3_CM_MEM_PWR_CTRL_BASE_IDX 2 #define mmCM3_CM_MEM_PWR_STATUS 0x1084 #define mmCM3_CM_MEM_PWR_STATUS_BASE_IDX 2 - +#define mmCM3_CM_TEST_DEBUG_INDEX 0x1086 +#define mmCM3_CM_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmCM3_CM_TEST_DEBUG_DATA 0x1087 +#define mmCM3_CM_TEST_DEBUG_DATA_BASE_IDX 2 // addressBlock: dce_dc_dpp3_dispdec_dpp_dcperfmon_dc_perfmon_dispdec // base address: 0x4274 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h index e2a2f114bd8e..e7c0cad41081 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h @@ -14049,6 +14049,14 @@ #define CM0_CM_MEM_PWR_STATUS__RGAM_MEM_PWR_STATE__SHIFT 0x2 #define CM0_CM_MEM_PWR_STATUS__SHARED_MEM_PWR_STATE_MASK 0x00000003L #define CM0_CM_MEM_PWR_STATUS__RGAM_MEM_PWR_STATE_MASK 0x0000000CL +//CM0_CM_TEST_DEBUG_INDEX +#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0 +#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8 +#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL +#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L +//CM0_CM_TEST_DEBUG_DATA +#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0 +#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL // addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h new file mode 100644 index 000000000000..9e19e723081b --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _df_1_7_DEFAULT_HEADER +#define _df_1_7_DEFAULT_HEADER + +#define mmFabricConfigAccessControl_DEFAULT 0x00000000 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h new file mode 100644 index 000000000000..2b305dd021e8 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _df_1_7_OFFSET_HEADER +#define _df_1_7_OFFSET_HEADER + +#define mmFabricConfigAccessControl 0x0410 +#define mmFabricConfigAccessControl_BASE_IDX 0 + +#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc +#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0 + +#define mmDF_CS_AON0_DramBaseAddress0 0x0044 +#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h new file mode 100644 index 000000000000..2ba849798924 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _df_1_7_SH_MASK_HEADER +#define _df_1_7_SH_MASK_HEADER + +/* FabricConfigAccessControl */ +#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0 +#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1 +#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10 +#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L +#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L +#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L + +/* DF_PIE_AON0_DfGlobalClkGater */ +#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0 +#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL + +/* DF_CS_AON0_DramBaseAddress0 */ +#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 +#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 +#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4 +#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8 +#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc +#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L +#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L +#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L +#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L +#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L + +#endif diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h index f696bbb643ef..7931502fa54f 100644 --- a/drivers/gpu/drm/amd/include/atombios.h +++ b/drivers/gpu/drm/amd/include/atombios.h @@ -632,6 +632,13 @@ typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2 ULONG ulReserved; }COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2; +typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3 +{ + COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 ulClock; + USHORT usMclk_fcw_frac; //fractional divider of fcw = usSclk_fcw_frac/65536 + USHORT usMclk_fcw_int; //integer divider of fcwc +}COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3; + //Input parameter of DynamicMemorySettingsTable //when ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag = COMPUTE_MEMORY_PLL_PARAM typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h index f2814ae7ecdd..a69deb3a2ac0 100644 --- a/drivers/gpu/drm/amd/include/cgs_common.h +++ b/drivers/gpu/drm/amd/include/cgs_common.h @@ -42,20 +42,6 @@ enum cgs_ind_reg { CGS_IND_REG__AUDIO_ENDPT }; -/** - * enum cgs_engine - Engines that can be statically power-gated - */ -enum cgs_engine { - CGS_ENGINE__UVD, - CGS_ENGINE__VCE, - CGS_ENGINE__VP8, - CGS_ENGINE__ACP_DMA, - CGS_ENGINE__ACP_DSP0, - CGS_ENGINE__ACP_DSP1, - CGS_ENGINE__ISP, - /* ... */ -}; - /* * enum cgs_ucode_id - Firmware types for different IPs */ @@ -76,17 +62,6 @@ enum cgs_ucode_id { CGS_UCODE_ID_MAXIMUM, }; -/* - * enum cgs_resource_type - GPU resource type - */ -enum cgs_resource_type { - CGS_RESOURCE_TYPE_MMIO = 0, - CGS_RESOURCE_TYPE_FB, - CGS_RESOURCE_TYPE_IO, - CGS_RESOURCE_TYPE_DOORBELL, - CGS_RESOURCE_TYPE_ROM, -}; - /** * struct cgs_firmware_info - Firmware information */ @@ -104,17 +79,6 @@ struct cgs_firmware_info { bool is_kicker; }; -struct cgs_mode_info { - uint32_t refresh_rate; - uint32_t vblank_time_us; -}; - -struct cgs_display_info { - uint32_t display_count; - uint32_t active_display_mask; - struct cgs_mode_info *mode_info; -}; - typedef unsigned long cgs_handle_t; /** @@ -170,119 +134,18 @@ typedef void (*cgs_write_ind_register_t)(struct cgs_device *cgs_device, enum cgs #define CGS_WREG32_FIELD_IND(device, space, reg, field, val) \ cgs_write_ind_register(device, space, ix##reg, (cgs_read_ind_register(device, space, ix##reg) & ~CGS_REG_FIELD_MASK(reg, field)) | (val) << CGS_REG_FIELD_SHIFT(reg, field)) -/** - * cgs_get_pci_resource() - provide access to a device resource (PCI BAR) - * @cgs_device: opaque device handle - * @resource_type: Type of Resource (MMIO, IO, ROM, FB, DOORBELL) - * @size: size of the region - * @offset: offset from the start of the region - * @resource_base: base address (not including offset) returned - * - * Return: 0 on success, -errno otherwise - */ -typedef int (*cgs_get_pci_resource_t)(struct cgs_device *cgs_device, - enum cgs_resource_type resource_type, - uint64_t size, - uint64_t offset, - uint64_t *resource_base); - -/** - * cgs_atom_get_data_table() - Get a pointer to an ATOM BIOS data table - * @cgs_device: opaque device handle - * @table: data table index - * @size: size of the table (output, may be NULL) - * @frev: table format revision (output, may be NULL) - * @crev: table content revision (output, may be NULL) - * - * Return: Pointer to start of the table, or NULL on failure - */ -typedef const void *(*cgs_atom_get_data_table_t)( - struct cgs_device *cgs_device, unsigned table, - uint16_t *size, uint8_t *frev, uint8_t *crev); - -/** - * cgs_atom_get_cmd_table_revs() - Get ATOM BIOS command table revisions - * @cgs_device: opaque device handle - * @table: data table index - * @frev: table format revision (output, may be NULL) - * @crev: table content revision (output, may be NULL) - * - * Return: 0 on success, -errno otherwise - */ -typedef int (*cgs_atom_get_cmd_table_revs_t)(struct cgs_device *cgs_device, unsigned table, - uint8_t *frev, uint8_t *crev); - -/** - * cgs_atom_exec_cmd_table() - Execute an ATOM BIOS command table - * @cgs_device: opaque device handle - * @table: command table index - * @args: arguments - * - * Return: 0 on success, -errno otherwise - */ -typedef int (*cgs_atom_exec_cmd_table_t)(struct cgs_device *cgs_device, - unsigned table, void *args); - -/** - * cgs_get_firmware_info - Get the firmware information from core driver - * @cgs_device: opaque device handle - * @type: the firmware type - * @info: returend firmware information - * - * Return: 0 on success, -errno otherwise - */ typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info); -typedef int (*cgs_rel_firmware)(struct cgs_device *cgs_device, - enum cgs_ucode_id type); - -typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device, - enum amd_ip_block_type block_type, - enum amd_powergating_state state); - -typedef int(*cgs_set_clockgating_state)(struct cgs_device *cgs_device, - enum amd_ip_block_type block_type, - enum amd_clockgating_state state); - -typedef int(*cgs_get_active_displays_info)( - struct cgs_device *cgs_device, - struct cgs_display_info *info); - -typedef int (*cgs_notify_dpm_enabled)(struct cgs_device *cgs_device, bool enabled); - -typedef int (*cgs_is_virtualization_enabled_t)(void *cgs_device); - -typedef int (*cgs_enter_safe_mode)(struct cgs_device *cgs_device, bool en); - -typedef void (*cgs_lock_grbm_idx)(struct cgs_device *cgs_device, bool lock); - struct cgs_ops { /* MMIO access */ cgs_read_register_t read_register; cgs_write_register_t write_register; cgs_read_ind_register_t read_ind_register; cgs_write_ind_register_t write_ind_register; - /* PCI resources */ - cgs_get_pci_resource_t get_pci_resource; - /* ATOM BIOS */ - cgs_atom_get_data_table_t atom_get_data_table; - cgs_atom_get_cmd_table_revs_t atom_get_cmd_table_revs; - cgs_atom_exec_cmd_table_t atom_exec_cmd_table; /* Firmware Info */ cgs_get_firmware_info get_firmware_info; - cgs_rel_firmware rel_firmware; - /* cg pg interface*/ - cgs_set_powergating_state set_powergating_state; - cgs_set_clockgating_state set_clockgating_state; - /* display manager */ - cgs_get_active_displays_info get_active_displays_info; - /* notify dpm enabled */ - cgs_notify_dpm_enabled notify_dpm_enabled; - cgs_is_virtualization_enabled_t is_virtualization_enabled; - cgs_enter_safe_mode enter_safe_mode; - cgs_lock_grbm_idx lock_grbm_idx; }; struct cgs_os_ops; /* To be define in OS-specific CGS header */ @@ -309,40 +172,7 @@ struct cgs_device #define cgs_write_ind_register(dev,space,index,value) \ CGS_CALL(write_ind_register,dev,space,index,value) -#define cgs_atom_get_data_table(dev,table,size,frev,crev) \ - CGS_CALL(atom_get_data_table,dev,table,size,frev,crev) -#define cgs_atom_get_cmd_table_revs(dev,table,frev,crev) \ - CGS_CALL(atom_get_cmd_table_revs,dev,table,frev,crev) -#define cgs_atom_exec_cmd_table(dev,table,args) \ - CGS_CALL(atom_exec_cmd_table,dev,table,args) - #define cgs_get_firmware_info(dev, type, info) \ CGS_CALL(get_firmware_info, dev, type, info) -#define cgs_rel_firmware(dev, type) \ - CGS_CALL(rel_firmware, dev, type) -#define cgs_set_powergating_state(dev, block_type, state) \ - CGS_CALL(set_powergating_state, dev, block_type, state) -#define cgs_set_clockgating_state(dev, block_type, state) \ - CGS_CALL(set_clockgating_state, dev, block_type, state) -#define cgs_notify_dpm_enabled(dev, enabled) \ - CGS_CALL(notify_dpm_enabled, dev, enabled) - -#define cgs_get_active_displays_info(dev, info) \ - CGS_CALL(get_active_displays_info, dev, info) - -#define cgs_get_pci_resource(cgs_device, resource_type, size, offset, \ - resource_base) \ - CGS_CALL(get_pci_resource, cgs_device, resource_type, size, offset, \ - resource_base) - -#define cgs_is_virtualization_enabled(cgs_device) \ - CGS_CALL(is_virtualization_enabled, cgs_device) - -#define cgs_enter_safe_mode(cgs_device, en) \ - CGS_CALL(enter_safe_mode, cgs_device, en) - -#define cgs_lock_grbm_idx(cgs_device, lock) \ - CGS_CALL(lock_grbm_idx, cgs_device, lock) - #endif /* _CGS_COMMON_H */ diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 237289a72bb7..5733fbee07f7 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -100,6 +100,21 @@ struct kgd2kfd_shared_resources { /* Bit n == 1 means Queue n is available for KFD */ DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES); + /* Doorbell assignments (SOC15 and later chips only). Only + * specific doorbells are routed to each SDMA engine. Others + * are routed to IH and VCN. They are not usable by the CP. + * + * Any doorbell number D that satisfies the following condition + * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val + * + * KFD currently uses 1024 (= 0x3ff) doorbells per process. If + * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means + * mask would be set to 0x1f8 and val set to 0x0f0. + */ + unsigned int sdma_doorbell[2][2]; + unsigned int reserved_doorbell_mask; + unsigned int reserved_doorbell_val; + /* Base address of doorbell aperture. */ phys_addr_t doorbell_physical_address; @@ -173,8 +188,6 @@ struct tile_config { * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp * scheduling mode. Only used for no cp scheduling mode. * - * @init_pipeline: Initialized the compute pipelines. - * * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp * sceduling mode. * @@ -274,9 +287,6 @@ struct kfd2kgd_calls { int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); - int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); - int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, @@ -382,6 +392,10 @@ struct kfd2kgd_calls { * * @resume: Notifies amdkfd about a resume action done to a kgd device * + * @quiesce_mm: Quiesce all user queue access to specified MM address space + * + * @resume_mm: Resume user queue access to specified MM address space + * * @schedule_evict_and_restore_process: Schedules work queue that will prepare * for safe eviction of KFD BOs that belong to the specified process. * @@ -399,6 +413,8 @@ struct kgd2kfd_calls { void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); void (*suspend)(struct kfd_dev *kfd); int (*resume)(struct kfd_dev *kfd); + int (*quiesce_mm)(struct mm_struct *mm); + int (*resume_mm)(struct mm_struct *mm); int (*schedule_evict_and_restore_process)(struct mm_struct *mm, struct dma_fence *fence); }; diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 5c840c022b52..06f08f34a110 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -94,6 +94,7 @@ enum pp_clock_type { PP_PCIE, OD_SCLK, OD_MCLK, + OD_RANGE, }; enum amd_pp_sensors { @@ -149,13 +150,6 @@ struct pp_states_info { uint32_t states[16]; }; -struct pp_gpu_power { - uint32_t vddc_power; - uint32_t vddci_power; - uint32_t max_gpu_power; - uint32_t average_gpu_power; -}; - #define PP_GROUP_MASK 0xF0000000 #define PP_GROUP_SHIFT 28 @@ -246,11 +240,6 @@ struct amd_pm_funcs { int (*load_firmware)(void *handle); int (*wait_for_fw_loading_complete)(void *handle); int (*set_clockgating_by_smu)(void *handle, uint32_t msg_id); - int (*notify_smu_memory_info)(void *handle, uint32_t virtual_addr_low, - uint32_t virtual_addr_hi, - uint32_t mc_addr_low, - uint32_t mc_addr_hi, - uint32_t size); int (*set_power_limit)(void *handle, uint32_t n); int (*get_power_limit)(void *handle, uint32_t *limit, bool default_limit); /* export to DC */ diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h index 2fb25abaf7c8..ceaf4932258d 100644 --- a/drivers/gpu/drm/amd/include/v9_structs.h +++ b/drivers/gpu/drm/amd/include/v9_structs.h @@ -29,10 +29,10 @@ struct v9_sdma_mqd { uint32_t sdmax_rlcx_rb_base; uint32_t sdmax_rlcx_rb_base_hi; uint32_t sdmax_rlcx_rb_rptr; + uint32_t sdmax_rlcx_rb_rptr_hi; uint32_t sdmax_rlcx_rb_wptr; + uint32_t sdmax_rlcx_rb_wptr_hi; uint32_t sdmax_rlcx_rb_wptr_poll_cntl; - uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi; - uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo; uint32_t sdmax_rlcx_rb_rptr_addr_hi; uint32_t sdmax_rlcx_rb_rptr_addr_lo; uint32_t sdmax_rlcx_ib_cntl; @@ -44,29 +44,29 @@ struct v9_sdma_mqd { uint32_t sdmax_rlcx_skip_cntl; uint32_t sdmax_rlcx_context_status; uint32_t sdmax_rlcx_doorbell; - uint32_t sdmax_rlcx_virtual_addr; - uint32_t sdmax_rlcx_ape1_cntl; + uint32_t sdmax_rlcx_status; uint32_t sdmax_rlcx_doorbell_log; - uint32_t reserved_22; - uint32_t reserved_23; - uint32_t reserved_24; - uint32_t reserved_25; - uint32_t reserved_26; - uint32_t reserved_27; - uint32_t reserved_28; - uint32_t reserved_29; - uint32_t reserved_30; - uint32_t reserved_31; - uint32_t reserved_32; - uint32_t reserved_33; - uint32_t reserved_34; - uint32_t reserved_35; - uint32_t reserved_36; - uint32_t reserved_37; - uint32_t reserved_38; - uint32_t reserved_39; - uint32_t reserved_40; - uint32_t reserved_41; + uint32_t sdmax_rlcx_watermark; + uint32_t sdmax_rlcx_doorbell_offset; + uint32_t sdmax_rlcx_csa_addr_lo; + uint32_t sdmax_rlcx_csa_addr_hi; + uint32_t sdmax_rlcx_ib_sub_remain; + uint32_t sdmax_rlcx_preempt; + uint32_t sdmax_rlcx_dummy_reg; + uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi; + uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo; + uint32_t sdmax_rlcx_rb_aql_cntl; + uint32_t sdmax_rlcx_minor_ptr_update; + uint32_t sdmax_rlcx_midcmd_data0; + uint32_t sdmax_rlcx_midcmd_data1; + uint32_t sdmax_rlcx_midcmd_data2; + uint32_t sdmax_rlcx_midcmd_data3; + uint32_t sdmax_rlcx_midcmd_data4; + uint32_t sdmax_rlcx_midcmd_data5; + uint32_t sdmax_rlcx_midcmd_data6; + uint32_t sdmax_rlcx_midcmd_data7; + uint32_t sdmax_rlcx_midcmd_data8; + uint32_t sdmax_rlcx_midcmd_cntl; uint32_t reserved_42; uint32_t reserved_43; uint32_t reserved_44; diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 7e8ad30d98e2..b493369e6d0f 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -25,30 +25,16 @@ #include <linux/kernel.h> #include <linux/gfp.h> #include <linux/slab.h> +#include <linux/firmware.h> #include "amd_shared.h" #include "amd_powerplay.h" #include "power_state.h" #include "amdgpu.h" #include "hwmgr.h" -#define PP_DPM_DISABLED 0xCCCC - -static int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_task task_id, - enum amd_pm_state_type *user_state); static const struct amd_pm_funcs pp_dpm_funcs; -static inline int pp_check(struct pp_hwmgr *hwmgr) -{ - if (hwmgr == NULL || hwmgr->smumgr_funcs == NULL) - return -EINVAL; - - if (hwmgr->pm_en == 0 || hwmgr->hwmgr_func == NULL) - return PP_DPM_DISABLED; - - return 0; -} - static int amd_powerplay_create(struct amdgpu_device *adev) { struct pp_hwmgr *hwmgr; @@ -61,19 +47,21 @@ static int amd_powerplay_create(struct amdgpu_device *adev) return -ENOMEM; hwmgr->adev = adev; - hwmgr->pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false; + hwmgr->not_vf = !amdgpu_sriov_vf(adev); + hwmgr->pm_en = (amdgpu_dpm && hwmgr->not_vf) ? true : false; hwmgr->device = amdgpu_cgs_create_device(adev); mutex_init(&hwmgr->smu_lock); hwmgr->chip_family = adev->family; hwmgr->chip_id = adev->asic_type; - hwmgr->feature_mask = amdgpu_pp_feature_mask; + hwmgr->feature_mask = adev->powerplay.pp_feature; + hwmgr->display_config = &adev->pm.pm_display_cfg; adev->powerplay.pp_handle = hwmgr; adev->powerplay.pp_funcs = &pp_dpm_funcs; return 0; } -static int amd_powerplay_destroy(struct amdgpu_device *adev) +static void amd_powerplay_destroy(struct amdgpu_device *adev) { struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; @@ -82,8 +70,6 @@ static int amd_powerplay_destroy(struct amdgpu_device *adev) kfree(hwmgr); hwmgr = NULL; - - return 0; } static int pp_early_init(void *handle) @@ -109,18 +95,9 @@ static int pp_sw_init(void *handle) struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret >= 0) { - if (hwmgr->smumgr_funcs->smu_init == NULL) - return -EINVAL; - - ret = hwmgr->smumgr_funcs->smu_init(hwmgr); + ret = hwmgr_sw_init(hwmgr); - phm_register_irq_handlers(hwmgr); - - pr_debug("amdgpu: powerplay sw initialized\n"); - } + pr_debug("powerplay sw init %s\n", ret ? "failed" : "successfully"); return ret; } @@ -129,16 +106,14 @@ static int pp_sw_fini(void *handle) { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret = 0; - ret = pp_check(hwmgr); - if (ret >= 0) { - if (hwmgr->smumgr_funcs->smu_fini != NULL) - hwmgr->smumgr_funcs->smu_fini(hwmgr); - } + hwmgr_sw_fini(hwmgr); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; amdgpu_ucode_fini_bo(adev); + } return 0; } @@ -152,55 +127,76 @@ static int pp_hw_init(void *handle) if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) amdgpu_ucode_init_bo(adev); - ret = pp_check(hwmgr); + ret = hwmgr_hw_init(hwmgr); - if (ret >= 0) { - if (hwmgr->smumgr_funcs->start_smu == NULL) - return -EINVAL; + if (ret) + pr_err("powerplay hw init failed\n"); - if (hwmgr->smumgr_funcs->start_smu(hwmgr)) { - pr_err("smc start failed\n"); - hwmgr->smumgr_funcs->smu_fini(hwmgr); - return -EINVAL; - } - if (ret == PP_DPM_DISABLED) - goto exit; - ret = hwmgr_hw_init(hwmgr); - if (ret) - goto exit; - } return ret; -exit: - hwmgr->pm_en = 0; - cgs_notify_dpm_enabled(hwmgr->device, false); - return 0; - } static int pp_hw_fini(void *handle) { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret = 0; - ret = pp_check(hwmgr); - if (ret == 0) - hwmgr_hw_fini(hwmgr); + hwmgr_hw_fini(hwmgr); return 0; } +static void pp_reserve_vram_for_smu(struct amdgpu_device *adev) +{ + int r = -EINVAL; + void *cpu_ptr = NULL; + uint64_t gpu_addr; + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + + if (amdgpu_bo_create_kernel(adev, adev->pm.smu_prv_buffer_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->pm.smu_prv_buffer, + &gpu_addr, + &cpu_ptr)) { + DRM_ERROR("amdgpu: failed to create smu prv buffer\n"); + return; + } + + if (hwmgr->hwmgr_func->notify_cac_buffer_info) + r = hwmgr->hwmgr_func->notify_cac_buffer_info(hwmgr, + lower_32_bits((unsigned long)cpu_ptr), + upper_32_bits((unsigned long)cpu_ptr), + lower_32_bits(gpu_addr), + upper_32_bits(gpu_addr), + adev->pm.smu_prv_buffer_size); + + if (r) { + amdgpu_bo_free_kernel(&adev->pm.smu_prv_buffer, NULL, NULL); + adev->pm.smu_prv_buffer = NULL; + DRM_ERROR("amdgpu: failed to notify SMU buffer address\n"); + } +} + static int pp_late_init(void *handle) { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret = 0; - - ret = pp_check(hwmgr); + int ret; - if (ret == 0) - pp_dpm_dispatch_tasks(hwmgr, + if (hwmgr && hwmgr->pm_en) { + mutex_lock(&hwmgr->smu_lock); + hwmgr_handle_task(hwmgr, AMD_PP_TASK_COMPLETE_INIT, NULL); + mutex_unlock(&hwmgr->smu_lock); + } + if (adev->pm.smu_prv_buffer_size != 0) + pp_reserve_vram_for_smu(adev); + + if (hwmgr->hwmgr_func->gfx_off_control && + (hwmgr->feature_mask & PP_GFXOFF_MASK)) { + ret = hwmgr->hwmgr_func->gfx_off_control(hwmgr, true); + if (ret) + pr_err("gfx off enabling failed!\n"); + } return 0; } @@ -209,6 +205,8 @@ static void pp_late_fini(void *handle) { struct amdgpu_device *adev = handle; + if (adev->pm.smu_prv_buffer) + amdgpu_bo_free_kernel(&adev->pm.smu_prv_buffer, NULL, NULL); amd_powerplay_destroy(adev); } @@ -233,12 +231,18 @@ static int pp_set_powergating_state(void *handle, { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret = 0; + int ret; - ret = pp_check(hwmgr); + if (!hwmgr || !hwmgr->pm_en) + return 0; - if (ret) - return ret; + if (hwmgr->hwmgr_func->gfx_off_control) { + /* Enable/disable GFX off through SMU */ + ret = hwmgr->hwmgr_func->gfx_off_control(hwmgr, + state == AMD_PG_STATE_GATE); + if (ret) + pr_err("gfx off control failed!\n"); + } if (hwmgr->hwmgr_func->enable_per_cu_power_gating == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -254,38 +258,16 @@ static int pp_suspend(void *handle) { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret = 0; - ret = pp_check(hwmgr); - if (ret == 0) - hwmgr_hw_suspend(hwmgr); - return 0; + return hwmgr_suspend(hwmgr); } static int pp_resume(void *handle) { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret; - - ret = pp_check(hwmgr); - - if (ret < 0) - return ret; - - if (hwmgr->smumgr_funcs->start_smu == NULL) - return -EINVAL; - - if (hwmgr->smumgr_funcs->start_smu(hwmgr)) { - pr_err("smc start failed\n"); - hwmgr->smumgr_funcs->smu_fini(hwmgr); - return -EINVAL; - } - - if (ret == PP_DPM_DISABLED) - return 0; - return hwmgr_hw_resume(hwmgr); + return hwmgr_resume(hwmgr); } static int pp_set_clockgating_state(void *handle, @@ -334,12 +316,9 @@ static int pp_dpm_fw_loading_complete(void *handle) static int pp_set_clockgating_by_smu(void *handle, uint32_t msg_id) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->update_clock_gatings == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -362,10 +341,10 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr *hwmgr, if (*level & profile_mode_mask) { hwmgr->saved_dpm_level = hwmgr->dpm_level; hwmgr->en_umd_pstate = true; - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_CG_STATE_UNGATE); - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); } @@ -375,10 +354,10 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr *hwmgr, if (*level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT) *level = hwmgr->saved_dpm_level; hwmgr->en_umd_pstate = false; - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_CG_STATE_GATE); - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); } @@ -389,12 +368,9 @@ static int pp_dpm_force_performance_level(void *handle, enum amd_dpm_forced_level level) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (level == hwmgr->dpm_level) return 0; @@ -412,13 +388,10 @@ static enum amd_dpm_forced_level pp_dpm_get_performance_level( void *handle) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; enum amd_dpm_forced_level level; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; mutex_lock(&hwmgr->smu_lock); level = hwmgr->dpm_level; @@ -429,13 +402,10 @@ static enum amd_dpm_forced_level pp_dpm_get_performance_level( static uint32_t pp_dpm_get_sclk(void *handle, bool low) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; uint32_t clk = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return 0; if (hwmgr->hwmgr_func->get_sclk == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -450,13 +420,10 @@ static uint32_t pp_dpm_get_sclk(void *handle, bool low) static uint32_t pp_dpm_get_mclk(void *handle, bool low) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; uint32_t clk = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return 0; if (hwmgr->hwmgr_func->get_mclk == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -471,11 +438,8 @@ static uint32_t pp_dpm_get_mclk(void *handle, bool low) static void pp_dpm_powergate_vce(void *handle, bool gate) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - ret = pp_check(hwmgr); - - if (ret) + if (!hwmgr || !hwmgr->pm_en) return; if (hwmgr->hwmgr_func->powergate_vce == NULL) { @@ -490,11 +454,8 @@ static void pp_dpm_powergate_vce(void *handle, bool gate) static void pp_dpm_powergate_uvd(void *handle, bool gate) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret) + if (!hwmgr || !hwmgr->pm_en) return; if (hwmgr->hwmgr_func->powergate_uvd == NULL) { @@ -512,10 +473,8 @@ static int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_task task_id, int ret = 0; struct pp_hwmgr *hwmgr = handle; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; mutex_lock(&hwmgr->smu_lock); ret = hwmgr_handle_task(hwmgr, task_id, user_state); @@ -528,15 +487,9 @@ static enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle) { struct pp_hwmgr *hwmgr = handle; struct pp_power_state *state; - int ret = 0; enum amd_pm_state_type pm_type; - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (hwmgr->current_ps == NULL) + if (!hwmgr || !hwmgr->pm_en || !hwmgr->current_ps) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -568,11 +521,8 @@ static enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle) static void pp_dpm_set_fan_control_mode(void *handle, uint32_t mode) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret) + if (!hwmgr || !hwmgr->pm_en) return; if (hwmgr->hwmgr_func->set_fan_control_mode == NULL) { @@ -587,13 +537,10 @@ static void pp_dpm_set_fan_control_mode(void *handle, uint32_t mode) static uint32_t pp_dpm_get_fan_control_mode(void *handle) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; uint32_t mode = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return 0; if (hwmgr->hwmgr_func->get_fan_control_mode == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -610,10 +557,8 @@ static int pp_dpm_set_fan_speed_percent(void *handle, uint32_t percent) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->set_fan_speed_percent == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -630,10 +575,8 @@ static int pp_dpm_get_fan_speed_percent(void *handle, uint32_t *speed) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->get_fan_speed_percent == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -651,10 +594,8 @@ static int pp_dpm_get_fan_speed_rpm(void *handle, uint32_t *rpm) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->get_fan_speed_rpm == NULL) return -EINVAL; @@ -670,16 +611,10 @@ static int pp_dpm_get_pp_num_states(void *handle, { struct pp_hwmgr *hwmgr = handle; int i; - int ret = 0; memset(data, 0, sizeof(*data)); - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (hwmgr->ps == NULL) + if (!hwmgr || !hwmgr->pm_en ||!hwmgr->ps) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -713,15 +648,9 @@ static int pp_dpm_get_pp_num_states(void *handle, static int pp_dpm_get_pp_table(void *handle, char **table) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; int size = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (!hwmgr->soft_pp_table) + if (!hwmgr || !hwmgr->pm_en ||!hwmgr->soft_pp_table) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -736,10 +665,6 @@ static int amd_powerplay_reset(void *handle) struct pp_hwmgr *hwmgr = handle; int ret; - ret = pp_check(hwmgr); - if (ret) - return ret; - ret = hwmgr_hw_fini(hwmgr); if (ret) return ret; @@ -754,40 +679,38 @@ static int amd_powerplay_reset(void *handle) static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; + int ret = -ENOMEM; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; mutex_lock(&hwmgr->smu_lock); if (!hwmgr->hardcode_pp_table) { hwmgr->hardcode_pp_table = kmemdup(hwmgr->soft_pp_table, hwmgr->soft_pp_table_size, GFP_KERNEL); - if (!hwmgr->hardcode_pp_table) { - mutex_unlock(&hwmgr->smu_lock); - return -ENOMEM; - } + if (!hwmgr->hardcode_pp_table) + goto err; } memcpy(hwmgr->hardcode_pp_table, buf, size); hwmgr->soft_pp_table = hwmgr->hardcode_pp_table; - mutex_unlock(&hwmgr->smu_lock); ret = amd_powerplay_reset(handle); if (ret) - return ret; + goto err; if (hwmgr->hwmgr_func->avfs_control) { ret = hwmgr->hwmgr_func->avfs_control(hwmgr, false); if (ret) - return ret; + goto err; } - + mutex_unlock(&hwmgr->smu_lock); return 0; +err: + mutex_unlock(&hwmgr->smu_lock); + return ret; } static int pp_dpm_force_clock_level(void *handle, @@ -796,10 +719,8 @@ static int pp_dpm_force_clock_level(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->force_clock_level == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -820,10 +741,8 @@ static int pp_dpm_print_clock_levels(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->print_clock_levels == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -840,10 +759,8 @@ static int pp_dpm_get_sclk_od(void *handle) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->get_sclk_od == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -860,10 +777,8 @@ static int pp_dpm_set_sclk_od(void *handle, uint32_t value) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->set_sclk_od == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -881,10 +796,8 @@ static int pp_dpm_get_mclk_od(void *handle) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->get_mclk_od == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -901,10 +814,8 @@ static int pp_dpm_set_mclk_od(void *handle, uint32_t value) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->set_mclk_od == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -922,11 +833,7 @@ static int pp_dpm_read_sensor(void *handle, int idx, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - if (ret) - return ret; - - if (value == NULL) + if (!hwmgr || !hwmgr->pm_en || !value) return -EINVAL; switch (idx) { @@ -948,14 +855,11 @@ static struct amd_vce_state* pp_dpm_get_vce_clock_state(void *handle, unsigned idx) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret) + if (!hwmgr || !hwmgr->pm_en) return NULL; - if (hwmgr && idx < hwmgr->num_vce_state_tables) + if (idx < hwmgr->num_vce_state_tables) return &hwmgr->vce_states[idx]; return NULL; } @@ -964,7 +868,7 @@ static int pp_get_power_profile_mode(void *handle, char *buf) { struct pp_hwmgr *hwmgr = handle; - if (!buf || pp_check(hwmgr)) + if (!hwmgr || !hwmgr->pm_en || !buf) return -EINVAL; if (hwmgr->hwmgr_func->get_power_profile_mode == NULL) { @@ -980,12 +884,12 @@ static int pp_set_power_profile_mode(void *handle, long *input, uint32_t size) struct pp_hwmgr *hwmgr = handle; int ret = -EINVAL; - if (pp_check(hwmgr)) - return -EINVAL; + if (!hwmgr || !hwmgr->pm_en) + return ret; if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) { pr_info("%s was not implemented.\n", __func__); - return -EINVAL; + return ret; } mutex_lock(&hwmgr->smu_lock); if (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) @@ -998,7 +902,7 @@ static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint3 { struct pp_hwmgr *hwmgr = handle; - if (pp_check(hwmgr)) + if (!hwmgr || !hwmgr->pm_en) return -EINVAL; if (hwmgr->hwmgr_func->odn_edit_dpm_table == NULL) { @@ -1016,7 +920,7 @@ static int pp_dpm_switch_power_profile(void *handle, long workload; uint32_t index; - if (pp_check(hwmgr)) + if (!hwmgr || !hwmgr->pm_en) return -EINVAL; if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) { @@ -1048,46 +952,12 @@ static int pp_dpm_switch_power_profile(void *handle, return 0; } -static int pp_dpm_notify_smu_memory_info(void *handle, - uint32_t virtual_addr_low, - uint32_t virtual_addr_hi, - uint32_t mc_addr_low, - uint32_t mc_addr_hi, - uint32_t size) -{ - struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (hwmgr->hwmgr_func->notify_cac_buffer_info == NULL) { - pr_info("%s was not implemented.\n", __func__); - return -EINVAL; - } - - mutex_lock(&hwmgr->smu_lock); - - ret = hwmgr->hwmgr_func->notify_cac_buffer_info(hwmgr, virtual_addr_low, - virtual_addr_hi, mc_addr_low, mc_addr_hi, - size); - - mutex_unlock(&hwmgr->smu_lock); - - return ret; -} - static int pp_set_power_limit(void *handle, uint32_t limit) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->set_power_limit == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -1104,20 +974,14 @@ static int pp_set_power_limit(void *handle, uint32_t limit) hwmgr->hwmgr_func->set_power_limit(hwmgr, limit); hwmgr->power_limit = limit; mutex_unlock(&hwmgr->smu_lock); - return ret; + return 0; } static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret) - return ret; - - if (limit == NULL) + if (!hwmgr || !hwmgr->pm_en ||!limit) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1129,19 +993,16 @@ static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit) mutex_unlock(&hwmgr->smu_lock); - return ret; + return 0; } static int pp_display_configuration_change(void *handle, const struct amd_pp_display_configuration *display_config) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; mutex_lock(&hwmgr->smu_lock); phm_store_dal_configuration_data(hwmgr, display_config); @@ -1155,12 +1016,7 @@ static int pp_get_display_power_level(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (output == NULL) + if (!hwmgr || !hwmgr->pm_en ||!output) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1177,10 +1033,8 @@ static int pp_get_current_clocks(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1225,10 +1079,8 @@ static int pp_get_clock_by_type(void *handle, enum amd_pp_clock_type type, struc struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (clocks == NULL) return -EINVAL; @@ -1246,11 +1098,7 @@ static int pp_get_clock_by_type_with_latency(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - if (ret) - return ret; - - if (!clocks) + if (!hwmgr || !hwmgr->pm_en ||!clocks) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1266,11 +1114,7 @@ static int pp_get_clock_by_type_with_voltage(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - if (ret) - return ret; - - if (!clocks) + if (!hwmgr || !hwmgr->pm_en ||!clocks) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1287,11 +1131,7 @@ static int pp_set_watermarks_for_clocks_ranges(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - if (ret) - return ret; - - if (!wm_with_clock_ranges) + if (!hwmgr || !hwmgr->pm_en ||!wm_with_clock_ranges) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1308,11 +1148,7 @@ static int pp_display_clock_voltage_request(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - if (ret) - return ret; - - if (!clock) + if (!hwmgr || !hwmgr->pm_en ||!clock) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1328,12 +1164,7 @@ static int pp_get_display_mode_validation_clocks(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (clocks == NULL) + if (!hwmgr || !hwmgr->pm_en ||!clocks) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1348,12 +1179,9 @@ static int pp_get_display_mode_validation_clocks(void *handle, static int pp_set_mmhub_powergating_by_smu(void *handle) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->set_mmhub_powergating_by_smu == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -1390,7 +1218,6 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .get_vce_clock_state = pp_dpm_get_vce_clock_state, .switch_power_profile = pp_dpm_switch_power_profile, .set_clockgating_by_smu = pp_set_clockgating_by_smu, - .notify_smu_memory_info = pp_dpm_notify_smu_memory_info, .get_power_profile_mode = pp_get_power_profile_mode, .set_power_profile_mode = pp_set_power_profile_mode, .odn_edit_dpm_table = pp_odn_edit_dpm_table, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index ae2e9339dd6b..e411012b3dcb 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -75,8 +75,7 @@ int phm_set_power_state(struct pp_hwmgr *hwmgr, int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr) { - int ret = 1; - bool enabled; + int ret = -EINVAL;; PHM_FUNC_CHECK(hwmgr); if (smum_is_dpm_running(hwmgr)) { @@ -87,17 +86,12 @@ int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr) if (NULL != hwmgr->hwmgr_func->dynamic_state_management_enable) ret = hwmgr->hwmgr_func->dynamic_state_management_enable(hwmgr); - enabled = ret == 0; - - cgs_notify_dpm_enabled(hwmgr->device, enabled); - return ret; } int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr) { - int ret = -1; - bool enabled; + int ret = -EINVAL; PHM_FUNC_CHECK(hwmgr); @@ -109,10 +103,6 @@ int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr) if (hwmgr->hwmgr_func->dynamic_state_management_disable) ret = hwmgr->hwmgr_func->dynamic_state_management_disable(hwmgr); - enabled = ret == 0 ? false : true; - - cgs_notify_dpm_enabled(hwmgr->device, enabled); - return ret; } @@ -275,13 +265,11 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr, if (display_config == NULL) return -EINVAL; - hwmgr->display_config = *display_config; - if (NULL != hwmgr->hwmgr_func->set_deep_sleep_dcefclk) - hwmgr->hwmgr_func->set_deep_sleep_dcefclk(hwmgr, hwmgr->display_config.min_dcef_deep_sleep_set_clk); + hwmgr->hwmgr_func->set_deep_sleep_dcefclk(hwmgr, display_config->min_dcef_deep_sleep_set_clk); - for (index = 0; index < hwmgr->display_config.num_path_including_non_display; index++) { - if (hwmgr->display_config.displays[index].controller_id != 0) + for (index = 0; index < display_config->num_path_including_non_display; index++) { + if (display_config->displays[index].controller_id != 0) number_of_active_display++; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 42982055b161..71b42331f185 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -40,6 +40,7 @@ extern const struct pp_smumgr_func iceland_smu_funcs; extern const struct pp_smumgr_func tonga_smu_funcs; extern const struct pp_smumgr_func fiji_smu_funcs; extern const struct pp_smumgr_func polaris10_smu_funcs; +extern const struct pp_smumgr_func vegam_smu_funcs; extern const struct pp_smumgr_func vega10_smu_funcs; extern const struct pp_smumgr_func vega12_smu_funcs; extern const struct pp_smumgr_func smu10_smu_funcs; @@ -76,7 +77,7 @@ static void hwmgr_init_workload_prority(struct pp_hwmgr *hwmgr) int hwmgr_early_init(struct pp_hwmgr *hwmgr) { - if (hwmgr == NULL) + if (!hwmgr) return -EINVAL; hwmgr->usec_timeout = AMD_MAX_USEC_TIMEOUT; @@ -95,7 +96,8 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) hwmgr->smumgr_funcs = &ci_smu_funcs; ci_set_asic_special_caps(hwmgr); hwmgr->feature_mask &= ~(PP_VBI_TIME_SUPPORT_MASK | - PP_ENABLE_GFX_CG_THRU_SMU); + PP_ENABLE_GFX_CG_THRU_SMU | + PP_GFXOFF_MASK); hwmgr->pp_table_version = PP_TABLE_V0; hwmgr->od_enabled = false; smu7_init_function_pointers(hwmgr); @@ -103,9 +105,11 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) case AMDGPU_FAMILY_CZ: hwmgr->od_enabled = false; hwmgr->smumgr_funcs = &smu8_smu_funcs; + hwmgr->feature_mask &= ~PP_GFXOFF_MASK; smu8_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_VI: + hwmgr->feature_mask &= ~PP_GFXOFF_MASK; switch (hwmgr->chip_id) { case CHIP_TOPAZ: hwmgr->smumgr_funcs = &iceland_smu_funcs; @@ -133,12 +137,18 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) polaris_set_asic_special_caps(hwmgr); hwmgr->feature_mask &= ~(PP_UVD_HANDSHAKE_MASK); break; + case CHIP_VEGAM: + hwmgr->smumgr_funcs = &vegam_smu_funcs; + polaris_set_asic_special_caps(hwmgr); + hwmgr->feature_mask &= ~(PP_UVD_HANDSHAKE_MASK); + break; default: return -EINVAL; } smu7_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_AI: + hwmgr->feature_mask &= ~PP_GFXOFF_MASK; switch (hwmgr->chip_id) { case CHIP_VEGA10: hwmgr->smumgr_funcs = &vega10_smu_funcs; @@ -170,22 +180,58 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) return 0; } +int hwmgr_sw_init(struct pp_hwmgr *hwmgr) +{ + if (!hwmgr|| !hwmgr->smumgr_funcs || !hwmgr->smumgr_funcs->smu_init) + return -EINVAL; + + phm_register_irq_handlers(hwmgr); + + return hwmgr->smumgr_funcs->smu_init(hwmgr); +} + + +int hwmgr_sw_fini(struct pp_hwmgr *hwmgr) +{ + if (hwmgr && hwmgr->smumgr_funcs && hwmgr->smumgr_funcs->smu_fini) + hwmgr->smumgr_funcs->smu_fini(hwmgr); + + return 0; +} + int hwmgr_hw_init(struct pp_hwmgr *hwmgr) { int ret = 0; - if (hwmgr == NULL) + if (!hwmgr || !hwmgr->smumgr_funcs) return -EINVAL; - if (hwmgr->pptable_func == NULL || - hwmgr->pptable_func->pptable_init == NULL || - hwmgr->hwmgr_func->backend_init == NULL) - return -EINVAL; + if (hwmgr->smumgr_funcs->start_smu) { + ret = hwmgr->smumgr_funcs->start_smu(hwmgr); + if (ret) { + pr_err("smc start failed\n"); + return -EINVAL; + } + } + + if (!hwmgr->pm_en) + return 0; + + if (!hwmgr->pptable_func || + !hwmgr->pptable_func->pptable_init || + !hwmgr->hwmgr_func->backend_init) { + hwmgr->pm_en = false; + pr_info("dpm not supported \n"); + return 0; + } ret = hwmgr->pptable_func->pptable_init(hwmgr); if (ret) goto err; + ((struct amdgpu_device *)hwmgr->adev)->pm.no_fan = + hwmgr->thermal_controller.fanInfo.bNoFan; + ret = hwmgr->hwmgr_func->backend_init(hwmgr); if (ret) goto err1; @@ -206,6 +252,8 @@ int hwmgr_hw_init(struct pp_hwmgr *hwmgr) if (ret) goto err2; + ((struct amdgpu_device *)hwmgr->adev)->pm.dpm_enabled = true; + return 0; err2: if (hwmgr->hwmgr_func->backend_fini) @@ -214,14 +262,13 @@ err1: if (hwmgr->pptable_func->pptable_fini) hwmgr->pptable_func->pptable_fini(hwmgr); err: - pr_err("amdgpu: powerplay initialization failed\n"); return ret; } int hwmgr_hw_fini(struct pp_hwmgr *hwmgr) { - if (hwmgr == NULL) - return -EINVAL; + if (!hwmgr || !hwmgr->pm_en) + return 0; phm_stop_thermal_controller(hwmgr); psm_set_boot_states(hwmgr); @@ -236,12 +283,12 @@ int hwmgr_hw_fini(struct pp_hwmgr *hwmgr) return psm_fini_power_state_table(hwmgr); } -int hwmgr_hw_suspend(struct pp_hwmgr *hwmgr) +int hwmgr_suspend(struct pp_hwmgr *hwmgr) { int ret = 0; - if (hwmgr == NULL) - return -EINVAL; + if (!hwmgr || !hwmgr->pm_en) + return 0; phm_disable_smc_firmware_ctf(hwmgr); ret = psm_set_boot_states(hwmgr); @@ -255,13 +302,23 @@ int hwmgr_hw_suspend(struct pp_hwmgr *hwmgr) return ret; } -int hwmgr_hw_resume(struct pp_hwmgr *hwmgr) +int hwmgr_resume(struct pp_hwmgr *hwmgr) { int ret = 0; - if (hwmgr == NULL) + if (!hwmgr) return -EINVAL; + if (hwmgr->smumgr_funcs && hwmgr->smumgr_funcs->start_smu) { + if (hwmgr->smumgr_funcs->start_smu(hwmgr)) { + pr_err("smc start failed\n"); + return -EINVAL; + } + } + + if (!hwmgr->pm_en) + return 0; + ret = phm_setup_asic(hwmgr); if (ret) return ret; @@ -270,9 +327,6 @@ int hwmgr_hw_resume(struct pp_hwmgr *hwmgr) if (ret) return ret; ret = phm_start_thermal_controller(hwmgr); - if (ret) - return ret; - ret |= psm_set_performance_states(hwmgr); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c index 0f2851b5b368..308bff2b5d1d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c @@ -46,7 +46,7 @@ int psm_init_power_state_table(struct pp_hwmgr *hwmgr) sizeof(struct pp_power_state); if (table_entries == 0 || size == 0) { - pr_warn("Please check whether power state management is suppported on this asic\n"); + pr_warn("Please check whether power state management is supported on this asic\n"); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c index c6febbf0bf69..cf99c5eaf080 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c @@ -23,7 +23,8 @@ #include "pp_debug.h" #include <linux/module.h> #include <linux/slab.h> - +#include <linux/delay.h> +#include "atom.h" #include "ppatomctrl.h" #include "atombios.h" #include "cgs_common.h" @@ -128,7 +129,6 @@ static int atomctrl_set_mc_reg_address_table( return 0; } - int atomctrl_initialize_mc_reg_table( struct pp_hwmgr *hwmgr, uint8_t module_index, @@ -141,7 +141,7 @@ int atomctrl_initialize_mc_reg_table( u16 size; vram_info = (ATOM_VRAM_INFO_HEADER_V2_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); if (module_index >= vram_info->ucNumOfVRAMModule) { @@ -174,6 +174,8 @@ int atomctrl_set_engine_dram_timings_rv770( uint32_t engine_clock, uint32_t memory_clock) { + struct amdgpu_device *adev = hwmgr->adev; + SET_ENGINE_CLOCK_PS_ALLOCATION engine_clock_parameters; /* They are both in 10KHz Units. */ @@ -184,9 +186,10 @@ int atomctrl_set_engine_dram_timings_rv770( /* in 10 khz units.*/ engine_clock_parameters.sReserved.ulClock = cpu_to_le32(memory_clock & SET_CLOCK_FREQ_MASK); - return cgs_atom_exec_cmd_table(hwmgr->device, + + return amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings), - &engine_clock_parameters); + (uint32_t *)&engine_clock_parameters); } /** @@ -203,7 +206,7 @@ static ATOM_VOLTAGE_OBJECT_INFO *get_voltage_info_table(void *device) union voltage_object_info *voltage_info; voltage_info = (union voltage_object_info *) - cgs_atom_get_data_table(device, index, + smu_atom_get_data_table(device, index, &size, &frev, &crev); if (voltage_info != NULL) @@ -247,16 +250,16 @@ int atomctrl_get_memory_pll_dividers_si( pp_atomctrl_memory_clock_param *mpll_param, bool strobe_mode) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1 mpll_parameters; int result; mpll_parameters.ulClock = cpu_to_le32(clock_value); mpll_parameters.ucInputFlag = (uint8_t)((strobe_mode) ? 1 : 0); - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam), - &mpll_parameters); + (uint32_t *)&mpll_parameters); if (0 == result) { mpll_param->mpll_fb_divider.clk_frac = @@ -295,14 +298,15 @@ int atomctrl_get_memory_pll_dividers_si( int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr, uint32_t clock_value, pp_atomctrl_memory_clock_param *mpll_param) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2 mpll_parameters; int result; mpll_parameters.ulClock.ulClock = cpu_to_le32(clock_value); - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam), - &mpll_parameters); + (uint32_t *)&mpll_parameters); if (!result) mpll_param->mpll_post_divider = @@ -311,19 +315,49 @@ int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr, return result; } +int atomctrl_get_memory_pll_dividers_ai(struct pp_hwmgr *hwmgr, + uint32_t clock_value, + pp_atomctrl_memory_clock_param_ai *mpll_param) +{ + struct amdgpu_device *adev = hwmgr->adev; + COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3 mpll_parameters = {0}; + int result; + + mpll_parameters.ulClock.ulClock = cpu_to_le32(clock_value); + + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, + GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam), + (uint32_t *)&mpll_parameters); + + /* VEGAM's mpll takes sometime to finish computing */ + udelay(10); + + if (!result) { + mpll_param->ulMclk_fcw_int = + le16_to_cpu(mpll_parameters.usMclk_fcw_int); + mpll_param->ulMclk_fcw_frac = + le16_to_cpu(mpll_parameters.usMclk_fcw_frac); + mpll_param->ulClock = + le32_to_cpu(mpll_parameters.ulClock.ulClock); + mpll_param->ulPostDiv = mpll_parameters.ulClock.ucPostDiv; + } + + return result; +} + int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr, uint32_t clock_value, pp_atomctrl_clock_dividers_kong *dividers) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 pll_parameters; int result; pll_parameters.ulClock = cpu_to_le32(clock_value); - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL), - &pll_parameters); + (uint32_t *)&pll_parameters); if (0 == result) { dividers->pll_post_divider = pll_parameters.ucPostDiv; @@ -338,16 +372,16 @@ int atomctrl_get_engine_pll_dividers_vi( uint32_t clock_value, pp_atomctrl_clock_dividers_vi *dividers) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters; int result; pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value); pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK; - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL), - &pll_patameters); + (uint32_t *)&pll_patameters); if (0 == result) { dividers->pll_post_divider = @@ -375,16 +409,16 @@ int atomctrl_get_engine_pll_dividers_ai(struct pp_hwmgr *hwmgr, uint32_t clock_value, pp_atomctrl_clock_dividers_ai *dividers) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_7 pll_patameters; int result; pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value); pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK; - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL), - &pll_patameters); + (uint32_t *)&pll_patameters); if (0 == result) { dividers->usSclk_fcw_frac = le16_to_cpu(pll_patameters.usSclk_fcw_frac); @@ -407,6 +441,7 @@ int atomctrl_get_dfs_pll_dividers_vi( uint32_t clock_value, pp_atomctrl_clock_dividers_vi *dividers) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters; int result; @@ -414,10 +449,9 @@ int atomctrl_get_dfs_pll_dividers_vi( pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK; - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL), - &pll_patameters); + (uint32_t *)&pll_patameters); if (0 == result) { dividers->pll_post_divider = @@ -452,7 +486,7 @@ uint32_t atomctrl_get_reference_clock(struct pp_hwmgr *hwmgr) uint32_t clock; fw_info = (ATOM_FIRMWARE_INFO *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, FirmwareInfo), &size, &frev, &crev); @@ -476,7 +510,7 @@ bool atomctrl_is_voltage_controlled_by_gpio_v3( uint8_t voltage_mode) { ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info = - (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device); + (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev); bool ret; PP_ASSERT_WITH_CODE((NULL != voltage_info), @@ -495,7 +529,7 @@ int atomctrl_get_voltage_table_v3( pp_atomctrl_voltage_table *voltage_table) { ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info = - (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device); + (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev); const ATOM_VOLTAGE_OBJECT_V3 *voltage_object; unsigned int i; @@ -572,7 +606,7 @@ static ATOM_GPIO_PIN_LUT *get_gpio_lookup_table(void *device) void *table_address; table_address = (ATOM_GPIO_PIN_LUT *) - cgs_atom_get_data_table(device, + smu_atom_get_data_table(device, GetIndexIntoMasterTable(DATA, GPIO_Pin_LUT), &size, &frev, &crev); @@ -592,7 +626,7 @@ bool atomctrl_get_pp_assign_pin( { bool bRet = false; ATOM_GPIO_PIN_LUT *gpio_lookup_table = - get_gpio_lookup_table(hwmgr->device); + get_gpio_lookup_table(hwmgr->adev); PP_ASSERT_WITH_CODE((NULL != gpio_lookup_table), "Could not find GPIO lookup Table in BIOS.", return false); @@ -613,7 +647,7 @@ int atomctrl_calculate_voltage_evv_on_sclk( bool debug) { ATOM_ASIC_PROFILING_INFO_V3_4 *getASICProfilingInfo; - + struct amdgpu_device *adev = hwmgr->adev; EFUSE_LINEAR_FUNC_PARAM sRO_fuse; EFUSE_LINEAR_FUNC_PARAM sCACm_fuse; EFUSE_LINEAR_FUNC_PARAM sCACb_fuse; @@ -640,7 +674,7 @@ int atomctrl_calculate_voltage_evv_on_sclk( int result; getASICProfilingInfo = (ATOM_ASIC_PROFILING_INFO_V3_4 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo), NULL, NULL, NULL); @@ -706,9 +740,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -727,9 +761,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -747,9 +781,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sInput_FuseValues.ucBitLength = sCACb_fuse.ucEfuseLength; sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -768,9 +802,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -790,9 +824,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -811,9 +845,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sInput_FuseValues.ucBitLength = sKv_b_fuse.ucEfuseLength; sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -842,9 +876,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -1053,8 +1087,9 @@ int atomctrl_get_voltage_evv_on_sclk( uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage) { - int result; + struct amdgpu_device *adev = hwmgr->adev; GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space; + int result; get_voltage_info_param_space.ucVoltageType = voltage_type; @@ -1065,9 +1100,9 @@ int atomctrl_get_voltage_evv_on_sclk( get_voltage_info_param_space.ulSCLKFreq = cpu_to_le32(sclk); - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, GetVoltageInfo), - &get_voltage_info_param_space); + (uint32_t *)&get_voltage_info_param_space); if (0 != result) return result; @@ -1088,9 +1123,10 @@ int atomctrl_get_voltage_evv(struct pp_hwmgr *hwmgr, uint16_t virtual_voltage_id, uint16_t *voltage) { + struct amdgpu_device *adev = hwmgr->adev; + GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space; int result; int entry_id; - GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space; /* search for leakage voltage ID 0xff01 ~ 0xff08 and sckl */ for (entry_id = 0; entry_id < hwmgr->dyn_state.vddc_dependency_on_sclk->count; entry_id++) { @@ -1111,9 +1147,9 @@ int atomctrl_get_voltage_evv(struct pp_hwmgr *hwmgr, get_voltage_info_param_space.ulSCLKFreq = cpu_to_le32(hwmgr->dyn_state.vddc_dependency_on_sclk->entries[entry_id].clk); - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, GetVoltageInfo), - &get_voltage_info_param_space); + (uint32_t *)&get_voltage_info_param_space); if (0 != result) return result; @@ -1135,7 +1171,7 @@ uint32_t atomctrl_get_mpll_reference_clock(struct pp_hwmgr *hwmgr) u16 size; fw_info = (ATOM_COMMON_TABLE_HEADER *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, FirmwareInfo), &size, &frev, &crev); @@ -1167,7 +1203,7 @@ static ATOM_ASIC_INTERNAL_SS_INFO *asic_internal_ss_get_ss_table(void *device) u16 size; table = (ATOM_ASIC_INTERNAL_SS_INFO *) - cgs_atom_get_data_table(device, + smu_atom_get_data_table(device, GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info), &size, &frev, &crev); @@ -1188,7 +1224,7 @@ static int asic_internal_ss_get_ss_asignment(struct pp_hwmgr *hwmgr, memset(ssEntry, 0x00, sizeof(pp_atomctrl_internal_ss_info)); - table = asic_internal_ss_get_ss_table(hwmgr->device); + table = asic_internal_ss_get_ss_table(hwmgr->adev); if (NULL == table) return -1; @@ -1260,9 +1296,10 @@ int atomctrl_get_engine_clock_spread_spectrum( ASIC_INTERNAL_ENGINE_SS, engine_clock, ssInfo); } -int atomctrl_read_efuse(void *device, uint16_t start_index, +int atomctrl_read_efuse(struct pp_hwmgr *hwmgr, uint16_t start_index, uint16_t end_index, uint32_t mask, uint32_t *efuse) { + struct amdgpu_device *adev = hwmgr->adev; int result; READ_EFUSE_VALUE_PARAMETER efuse_param; @@ -1272,9 +1309,9 @@ int atomctrl_read_efuse(void *device, uint16_t start_index, efuse_param.sEfuse.ucBitLength = (uint8_t) ((end_index - start_index) + 1); - result = cgs_atom_exec_cmd_table(device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &efuse_param); + (uint32_t *)&efuse_param); if (!result) *efuse = le32_to_cpu(efuse_param.ulEfuseValue) & mask; @@ -1284,6 +1321,7 @@ int atomctrl_read_efuse(void *device, uint16_t start_index, int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock, uint8_t level) { + struct amdgpu_device *adev = hwmgr->adev; DYNAMICE_MEMORY_SETTINGS_PARAMETER_V2_1 memory_clock_parameters; int result; @@ -1293,10 +1331,9 @@ int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock, ADJUST_MC_SETTING_PARAM; memory_clock_parameters.asDPMMCReg.ucMclkDPMState = level; - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings), - &memory_clock_parameters); + (uint32_t *)&memory_clock_parameters); return result; } @@ -1304,7 +1341,7 @@ int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock, int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage) { - + struct amdgpu_device *adev = hwmgr->adev; int result; GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_3 get_voltage_info_param_space; @@ -1313,9 +1350,9 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_ get_voltage_info_param_space.usVoltageLevel = cpu_to_le16(virtual_voltage_Id); get_voltage_info_param_space.ulSCLKFreq = cpu_to_le32(sclk); - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, GetVoltageInfo), - &get_voltage_info_param_space); + (uint32_t *)&get_voltage_info_param_space); if (0 != result) return result; @@ -1334,7 +1371,7 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr u16 size; ATOM_SMU_INFO_V2_1 *psmu_info = - (ATOM_SMU_INFO_V2_1 *)cgs_atom_get_data_table(hwmgr->device, + (ATOM_SMU_INFO_V2_1 *)smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, SMU_Info), &size, &frev, &crev); @@ -1362,7 +1399,7 @@ int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, return -EINVAL; profile = (ATOM_ASIC_PROFILING_INFO_V3_6 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo), NULL, NULL, NULL); if (!profile) @@ -1402,7 +1439,7 @@ int atomctrl_get_svi2_info(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint16_t *load_line) { ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info = - (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device); + (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev); const ATOM_VOLTAGE_OBJECT_V3 *voltage_object; @@ -1421,16 +1458,17 @@ int atomctrl_get_svi2_info(struct pp_hwmgr *hwmgr, uint8_t voltage_type, int atomctrl_get_leakage_id_from_efuse(struct pp_hwmgr *hwmgr, uint16_t *virtual_voltage_id) { - int result; + struct amdgpu_device *adev = hwmgr->adev; SET_VOLTAGE_PS_ALLOCATION allocation; SET_VOLTAGE_PARAMETERS_V1_3 *voltage_parameters = (SET_VOLTAGE_PARAMETERS_V1_3 *)&allocation.sASICSetVoltage; + int result; voltage_parameters->ucVoltageMode = ATOM_GET_LEAKAGE_ID; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, SetVoltage), - voltage_parameters); + (uint32_t *)voltage_parameters); *virtual_voltage_id = voltage_parameters->usVoltageLevel; @@ -1453,7 +1491,7 @@ int atomctrl_get_leakage_vddc_base_on_leakage(struct pp_hwmgr *hwmgr, ix = GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo); profile = (ATOM_ASIC_PROFILING_INFO_V2_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, ix, NULL, NULL, NULL); if (!profile) @@ -1498,3 +1536,31 @@ int atomctrl_get_leakage_vddc_base_on_leakage(struct pp_hwmgr *hwmgr, return 0; } + +void atomctrl_get_voltage_range(struct pp_hwmgr *hwmgr, uint32_t *max_vddc, + uint32_t *min_vddc) +{ + void *profile; + + profile = smu_atom_get_data_table(hwmgr->adev, + GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo), + NULL, NULL, NULL); + + if (profile) { + switch (hwmgr->chip_id) { + case CHIP_TONGA: + case CHIP_FIJI: + *max_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_3 *)profile)->ulMaxVddc/4); + *min_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_3 *)profile)->ulMinVddc/4); + break; + case CHIP_POLARIS11: + case CHIP_POLARIS10: + case CHIP_POLARIS12: + *max_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_6 *)profile)->ulMaxVddc/100); + *min_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_6 *)profile)->ulMinVddc/100); + break; + default: + return; + } + } +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h index c44a92064cf1..3ee54f182943 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h @@ -146,6 +146,14 @@ struct pp_atomctrl_memory_clock_param { }; typedef struct pp_atomctrl_memory_clock_param pp_atomctrl_memory_clock_param; +struct pp_atomctrl_memory_clock_param_ai { + uint32_t ulClock; + uint32_t ulPostDiv; + uint16_t ulMclk_fcw_frac; + uint16_t ulMclk_fcw_int; +}; +typedef struct pp_atomctrl_memory_clock_param_ai pp_atomctrl_memory_clock_param_ai; + struct pp_atomctrl_internal_ss_info { uint32_t speed_spectrum_percentage; /* in 1/100 percentage */ uint32_t speed_spectrum_rate; /* in KHz */ @@ -295,10 +303,12 @@ extern bool atomctrl_is_voltage_controlled_by_gpio_v3(struct pp_hwmgr *hwmgr, ui extern int atomctrl_get_voltage_table_v3(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint8_t voltage_mode, pp_atomctrl_voltage_table *voltage_table); extern int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr, uint32_t clock_value, pp_atomctrl_memory_clock_param *mpll_param); +extern int atomctrl_get_memory_pll_dividers_ai(struct pp_hwmgr *hwmgr, + uint32_t clock_value, pp_atomctrl_memory_clock_param_ai *mpll_param); extern int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr, uint32_t clock_value, pp_atomctrl_clock_dividers_kong *dividers); -extern int atomctrl_read_efuse(void *device, uint16_t start_index, +extern int atomctrl_read_efuse(struct pp_hwmgr *hwmgr, uint16_t start_index, uint16_t end_index, uint32_t mask, uint32_t *efuse); extern int atomctrl_calculate_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage, uint16_t dpm_level, bool debug); @@ -320,5 +330,8 @@ extern int atomctrl_get_leakage_vddc_base_on_leakage(struct pp_hwmgr *hwmgr, uint16_t virtual_voltage_id, uint16_t efuse_voltage_id); extern int atomctrl_get_leakage_id_from_efuse(struct pp_hwmgr *hwmgr, uint16_t *virtual_voltage_id); + +extern void atomctrl_get_voltage_range(struct pp_hwmgr *hwmgr, uint32_t *max_vddc, + uint32_t *min_vddc); #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c index ad42caac033e..c97b0e5ba43b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c @@ -23,9 +23,9 @@ #include "ppatomfwctrl.h" #include "atomfirmware.h" +#include "atom.h" #include "pp_debug.h" - static const union atom_voltage_object_v4 *pp_atomfwctrl_lookup_voltage_type_v4( const struct atom_voltage_objects_info_v4_1 *voltage_object_info_table, uint8_t voltage_type, uint8_t voltage_mode) @@ -38,35 +38,34 @@ static const union atom_voltage_object_v4 *pp_atomfwctrl_lookup_voltage_type_v4( while (offset < size) { const union atom_voltage_object_v4 *voltage_object = - (const union atom_voltage_object_v4 *)(start + offset); + (const union atom_voltage_object_v4 *)(start + offset); - if (voltage_type == voltage_object->gpio_voltage_obj.header.voltage_type && - voltage_mode == voltage_object->gpio_voltage_obj.header.voltage_mode) - return voltage_object; + if (voltage_type == voltage_object->gpio_voltage_obj.header.voltage_type && + voltage_mode == voltage_object->gpio_voltage_obj.header.voltage_mode) + return voltage_object; - offset += le16_to_cpu(voltage_object->gpio_voltage_obj.header.object_size); + offset += le16_to_cpu(voltage_object->gpio_voltage_obj.header.object_size); - } + } - return NULL; + return NULL; } static struct atom_voltage_objects_info_v4_1 *pp_atomfwctrl_get_voltage_info_table( struct pp_hwmgr *hwmgr) { - const void *table_address; - uint16_t idx; + const void *table_address; + uint16_t idx; - idx = GetIndexIntoMasterDataTable(voltageobject_info); - table_address = cgs_atom_get_data_table(hwmgr->device, - idx, NULL, NULL, NULL); + idx = GetIndexIntoMasterDataTable(voltageobject_info); + table_address = smu_atom_get_data_table(hwmgr->adev, + idx, NULL, NULL, NULL); - PP_ASSERT_WITH_CODE( - table_address, - "Error retrieving BIOS Table Address!", - return NULL); + PP_ASSERT_WITH_CODE(table_address, + "Error retrieving BIOS Table Address!", + return NULL); - return (struct atom_voltage_objects_info_v4_1 *)table_address; + return (struct atom_voltage_objects_info_v4_1 *)table_address; } /** @@ -167,7 +166,7 @@ static struct atom_gpio_pin_lut_v2_1 *pp_atomfwctrl_get_gpio_lookup_table( uint16_t idx; idx = GetIndexIntoMasterDataTable(gpio_pin_lut); - table_address = cgs_atom_get_data_table(hwmgr->device, + table_address = smu_atom_get_data_table(hwmgr->adev, idx, NULL, NULL, NULL); PP_ASSERT_WITH_CODE(table_address, "Error retrieving BIOS Table Address!", @@ -248,28 +247,30 @@ int pp_atomfwctrl_get_gpu_pll_dividers_vega10(struct pp_hwmgr *hwmgr, uint32_t clock_type, uint32_t clock_value, struct pp_atomfwctrl_clock_dividers_soc15 *dividers) { + struct amdgpu_device *adev = hwmgr->adev; struct compute_gpu_clock_input_parameter_v1_8 pll_parameters; struct compute_gpu_clock_output_parameter_v1_8 *pll_output; - int result; uint32_t idx; pll_parameters.gpuclock_10khz = (uint32_t)clock_value; pll_parameters.gpu_clock_type = clock_type; idx = GetIndexIntoMasterCmdTable(computegpuclockparam); - result = cgs_atom_exec_cmd_table(hwmgr->device, idx, &pll_parameters); - - if (!result) { - pll_output = (struct compute_gpu_clock_output_parameter_v1_8 *) - &pll_parameters; - dividers->ulClock = le32_to_cpu(pll_output->gpuclock_10khz); - dividers->ulDid = le32_to_cpu(pll_output->dfs_did); - dividers->ulPll_fb_mult = le32_to_cpu(pll_output->pll_fb_mult); - dividers->ulPll_ss_fbsmult = le32_to_cpu(pll_output->pll_ss_fbsmult); - dividers->usPll_ss_slew_frac = le16_to_cpu(pll_output->pll_ss_slew_frac); - dividers->ucPll_ss_enable = pll_output->pll_ss_enable; - } - return result; + + if (amdgpu_atom_execute_table( + adev->mode_info.atom_context, idx, (uint32_t *)&pll_parameters)) + return -EINVAL; + + pll_output = (struct compute_gpu_clock_output_parameter_v1_8 *) + &pll_parameters; + dividers->ulClock = le32_to_cpu(pll_output->gpuclock_10khz); + dividers->ulDid = le32_to_cpu(pll_output->dfs_did); + dividers->ulPll_fb_mult = le32_to_cpu(pll_output->pll_fb_mult); + dividers->ulPll_ss_fbsmult = le32_to_cpu(pll_output->pll_ss_fbsmult); + dividers->usPll_ss_slew_frac = le16_to_cpu(pll_output->pll_ss_slew_frac); + dividers->ucPll_ss_enable = pll_output->pll_ss_enable; + + return 0; } int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr, @@ -283,7 +284,7 @@ int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr, idx = GetIndexIntoMasterDataTable(asic_profiling_info); profile = (struct atom_asic_profiling_info_v4_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, idx, NULL, NULL, NULL); if (!profile) @@ -467,7 +468,7 @@ int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr, idx = GetIndexIntoMasterDataTable(smu_info); info = (struct atom_smu_info_v3_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, idx, NULL, NULL, NULL); if (!info) { @@ -487,8 +488,9 @@ int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr, return 0; } -int pp_atomfwctrl__get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLKID id, uint32_t *frequency) +int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLKID id, uint32_t *frequency) { + struct amdgpu_device *adev = hwmgr->adev; struct atom_get_smu_clock_info_parameters_v3_1 parameters; struct atom_get_smu_clock_info_output_parameters_v3_1 *output; uint32_t ix; @@ -497,13 +499,13 @@ int pp_atomfwctrl__get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLK parameters.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ; ix = GetIndexIntoMasterCmdTable(getsmuclockinfo); - if (!cgs_atom_exec_cmd_table(hwmgr->device, ix, ¶meters)) { - output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)¶meters; - *frequency = output->atom_smu_outputclkfreq.smu_clock_freq_hz / 10000; - } else { - pr_info("Error execute_table getsmuclockinfo!"); - return -1; - } + + if (amdgpu_atom_execute_table( + adev->mode_info.atom_context, ix, (uint32_t *)¶meters)) + return -EINVAL; + + output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)¶meters; + *frequency = output->atom_smu_outputclkfreq.smu_clock_freq_hz / 10000; return 0; } @@ -513,11 +515,10 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr, { struct atom_firmware_info_v3_1 *info = NULL; uint16_t ix; - uint32_t frequency = 0; ix = GetIndexIntoMasterDataTable(firmwareinfo); info = (struct atom_firmware_info_v3_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, ix, NULL, NULL, NULL); if (!info) { @@ -536,12 +537,6 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr, boot_values->ulSocClk = 0; boot_values->ulDCEFClk = 0; - if (!pp_atomfwctrl__get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_SOCCLK_ID, &frequency)) - boot_values->ulSocClk = frequency; - - if (!pp_atomfwctrl__get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCEFCLK_ID, &frequency)) - boot_values->ulDCEFClk = frequency; - return 0; } @@ -553,7 +548,7 @@ int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr, ix = GetIndexIntoMasterDataTable(smc_dpm_info); info = (struct atom_smc_dpm_info_v4_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, ix, NULL, NULL, NULL); if (!info) { pr_info("Error retrieving BIOS Table Address!"); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h index 8df1e84f27c9..fe10aa4db5e6 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h @@ -230,6 +230,8 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr, struct pp_atomfwctrl_bios_boot_up_values *boot_values); int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr, struct pp_atomfwctrl_smc_dpm_parameters *param); +int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, + BIOS_CLKID id, uint32_t *frequency); #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c index c9eecce5683f..f0d48b183d22 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c @@ -141,7 +141,7 @@ static const void *get_powerplay_table(struct pp_hwmgr *hwmgr) if (!table_address) { table_address = (ATOM_Tonga_POWERPLAYTABLE *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, index, &size, &frev, &crev); hwmgr->soft_pp_table = table_address; /*Cache the result in RAM.*/ hwmgr->soft_pp_table_size = size; @@ -728,6 +728,32 @@ static int get_mm_clock_voltage_table( return 0; } +static int get_gpio_table(struct pp_hwmgr *hwmgr, + struct phm_ppt_v1_gpio_table **pp_tonga_gpio_table, + const ATOM_Tonga_GPIO_Table *atom_gpio_table) +{ + uint32_t table_size; + struct phm_ppt_v1_gpio_table *pp_gpio_table; + struct phm_ppt_v1_information *pp_table_information = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + table_size = sizeof(struct phm_ppt_v1_gpio_table); + pp_gpio_table = kzalloc(table_size, GFP_KERNEL); + if (!pp_gpio_table) + return -ENOMEM; + + if (pp_table_information->vdd_dep_on_sclk->count < + atom_gpio_table->ucVRHotTriggeredSclkDpmIndex) + PP_ASSERT_WITH_CODE(false, + "SCLK DPM index for VRHot cannot exceed the total sclk level count!",); + else + pp_gpio_table->vrhot_triggered_sclk_dpm_index = + atom_gpio_table->ucVRHotTriggeredSclkDpmIndex; + + *pp_tonga_gpio_table = pp_gpio_table; + + return 0; +} /** * Private Function used during initialization. * Initialize clock voltage dependency @@ -761,11 +787,15 @@ static int init_clock_voltage_dependency( const PPTable_Generic_SubTable_Header *pcie_table = (const PPTable_Generic_SubTable_Header *)(((unsigned long) powerplay_table) + le16_to_cpu(powerplay_table->usPCIETableOffset)); + const ATOM_Tonga_GPIO_Table *gpio_table = + (const ATOM_Tonga_GPIO_Table *)(((unsigned long) powerplay_table) + + le16_to_cpu(powerplay_table->usGPIOTableOffset)); pp_table_information->vdd_dep_on_sclk = NULL; pp_table_information->vdd_dep_on_mclk = NULL; pp_table_information->mm_dep_table = NULL; pp_table_information->pcie_table = NULL; + pp_table_information->gpio_table = NULL; if (powerplay_table->usMMDependencyTableOffset != 0) result = get_mm_clock_voltage_table(hwmgr, @@ -810,6 +840,10 @@ static int init_clock_voltage_dependency( result = get_valid_clk(hwmgr, &pp_table_information->valid_sclk_values, pp_table_information->vdd_dep_on_sclk); + if (!result && gpio_table) + result = get_gpio_table(hwmgr, &pp_table_information->gpio_table, + gpio_table); + return result; } @@ -1116,6 +1150,9 @@ static int pp_tables_v1_0_uninitialize(struct pp_hwmgr *hwmgr) kfree(pp_table_information->pcie_table); pp_table_information->pcie_table = NULL; + kfree(pp_table_information->gpio_table); + pp_table_information->gpio_table = NULL; + kfree(hwmgr->pptable); hwmgr->pptable = NULL; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c index 36ca7c419c90..ce64dfabd34b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c @@ -837,7 +837,7 @@ static const ATOM_PPLIB_POWERPLAYTABLE *get_powerplay_table( hwmgr->soft_pp_table = &soft_dummy_pp_table[0]; hwmgr->soft_pp_table_size = sizeof(soft_dummy_pp_table); } else { - table_addr = cgs_atom_get_data_table(hwmgr->device, + table_addr = smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, PowerPlayInfo), &size, &frev, &crev); hwmgr->soft_pp_table = table_addr; @@ -1058,7 +1058,7 @@ static int init_overdrive_limits(struct pp_hwmgr *hwmgr, return 0; /* We assume here that fw_info is unchanged if this call fails.*/ - fw_info = cgs_atom_get_data_table(hwmgr->device, + fw_info = smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, FirmwareInfo), &size, &frev, &crev); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 10253b89b3d8..2f69bfa478a7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -34,7 +34,7 @@ #include "rv_ppsmc.h" #include "smu10_hwmgr.h" #include "power_state.h" -#include "pp_soc15.h" +#include "soc15_common.h" #define SMU10_MAX_DEEPSLEEP_DIVIDER_ID 5 #define SMU10_MINIMUM_ENGINE_CLOCK 800 /* 8Mhz, the low boundary of engine clock allowed on this chip */ @@ -42,6 +42,13 @@ #define SMU10_DISPCLK_BYPASS_THRESHOLD 10000 /* 100Mhz */ #define SMC_RAM_END 0x40000 +#define mmPWR_MISC_CNTL_STATUS 0x0183 +#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 +#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 +#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 +#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L +#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L + static const unsigned long SMU10_Magic = (unsigned long) PHM_Rv_Magic; @@ -74,11 +81,15 @@ static int smu10_initialize_dpm_defaults(struct pp_hwmgr *hwmgr) smu10_data->thermal_auto_throttling_treshold = 0; smu10_data->is_nb_dpm_enabled = 1; smu10_data->dpm_flags = 1; - smu10_data->gfx_off_controled_by_driver = false; smu10_data->need_min_deep_sleep_dcefclk = true; smu10_data->num_active_display = 0; smu10_data->deep_sleep_dcefclk = 0; + if (hwmgr->feature_mask & PP_GFXOFF_MASK) + smu10_data->gfx_off_controled_by_driver = true; + else + smu10_data->gfx_off_controled_by_driver = false; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep); @@ -161,7 +172,7 @@ static int smu10_set_clock_limit(struct pp_hwmgr *hwmgr, const void *input) struct PP_Clocks clocks = {0}; struct pp_display_clock_request clock_req; - clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk; + clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk; clock_req.clock_type = amd_pp_dcf_clock; clock_req.clock_freq_in_khz = clocks.dcefClock * 10; @@ -206,12 +217,18 @@ static int smu10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input static int smu10_init_power_gate_state(struct pp_hwmgr *hwmgr) { struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + struct amdgpu_device *adev = hwmgr->adev; smu10_data->vcn_power_gated = true; smu10_data->isp_tileA_power_gated = true; smu10_data->isp_tileB_power_gated = true; - return 0; + if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) + return smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetGfxCGPG, + true); + else + return 0; } @@ -237,13 +254,31 @@ static int smu10_power_off_asic(struct pp_hwmgr *hwmgr) return smu10_reset_cc6_data(hwmgr); } +static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr) +{ + uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; + + reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS); + if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) == + (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT)) + return true; + + return false; +} + static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr) { struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); - if (smu10_data->gfx_off_controled_by_driver) + if (smu10_data->gfx_off_controled_by_driver) { smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff); + /* confirm gfx is back to "on" state */ + while (!smu10_is_gfx_on(hwmgr)) + msleep(1); + } + return 0; } @@ -267,6 +302,14 @@ static int smu10_enable_dpm_tasks(struct pp_hwmgr *hwmgr) return smu10_enable_gfx_off(hwmgr); } +static int smu10_gfx_off_control(struct pp_hwmgr *hwmgr, bool enable) +{ + if (enable) + return smu10_enable_gfx_off(hwmgr); + else + return smu10_disable_gfx_off(hwmgr); +} + static int smu10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct pp_power_state *prequest_ps, const struct pp_power_state *pcurrent_ps) @@ -340,7 +383,7 @@ static int smu10_get_clock_voltage_dependency_table(struct pp_hwmgr *hwmgr, static int smu10_populate_clock_table(struct pp_hwmgr *hwmgr) { - int result; + uint32_t result; struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); DpmClocks_t *table = &(smu10_data->clock_table); @@ -386,11 +429,11 @@ static int smu10_populate_clock_table(struct pp_hwmgr *hwmgr) smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency); result = smum_get_argument(hwmgr); - smu10_data->gfx_min_freq_limit = result * 100; + smu10_data->gfx_min_freq_limit = result / 10 * 1000; smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency); result = smum_get_argument(hwmgr); - smu10_data->gfx_max_freq_limit = result * 100; + smu10_data->gfx_max_freq_limit = result / 10 * 1000; return 0; } @@ -436,8 +479,8 @@ static int smu10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) hwmgr->platform_descriptor.minimumClocksReductionPercentage = 50; - hwmgr->pstate_sclk = SMU10_UMD_PSTATE_GFXCLK; - hwmgr->pstate_mclk = SMU10_UMD_PSTATE_FCLK; + hwmgr->pstate_sclk = SMU10_UMD_PSTATE_GFXCLK * 100; + hwmgr->pstate_mclk = SMU10_UMD_PSTATE_FCLK * 100; return result; } @@ -472,6 +515,8 @@ static int smu10_hwmgr_backend_fini(struct pp_hwmgr *hwmgr) static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level) { + struct smu10_hwmgr *data = hwmgr->backend; + if (hwmgr->smu_version < 0x1E3700) { pr_info("smu firmware version too old, can not set dpm level\n"); return 0; @@ -482,7 +527,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - SMU10_UMD_PSTATE_PEAK_GFXCLK); + data->gfx_max_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, SMU10_UMD_PSTATE_PEAK_FCLK); @@ -495,7 +540,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxGfxClk, - SMU10_UMD_PSTATE_PEAK_GFXCLK); + data->gfx_max_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, SMU10_UMD_PSTATE_PEAK_FCLK); @@ -509,10 +554,10 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - SMU10_UMD_PSTATE_MIN_GFXCLK); + data->gfx_min_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxGfxClk, - SMU10_UMD_PSTATE_MIN_GFXCLK); + data->gfx_min_freq_limit/100); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: smum_send_msg_to_smc_with_parameter(hwmgr, @@ -552,7 +597,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, case AMD_DPM_FORCED_LEVEL_AUTO: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - SMU10_UMD_PSTATE_MIN_GFXCLK); + data->gfx_min_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, SMU10_UMD_PSTATE_MIN_FCLK); @@ -565,7 +610,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxGfxClk, - SMU10_UMD_PSTATE_PEAK_GFXCLK); + data->gfx_max_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, SMU10_UMD_PSTATE_PEAK_FCLK); @@ -579,10 +624,10 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, case AMD_DPM_FORCED_LEVEL_LOW: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - SMU10_UMD_PSTATE_MIN_GFXCLK); + data->gfx_min_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxGfxClk, - SMU10_UMD_PSTATE_MIN_GFXCLK); + data->gfx_min_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, SMU10_UMD_PSTATE_MIN_FCLK); @@ -699,6 +744,16 @@ static int smu10_set_cpu_power_state(struct pp_hwmgr *hwmgr) static int smu10_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time, bool cc6_disable, bool pstate_disable, bool pstate_switch_disable) { + struct smu10_hwmgr *data = (struct smu10_hwmgr *)(hwmgr->backend); + + if (separation_time != data->separation_time || + cc6_disable != data->cc6_disable || + pstate_disable != data->pstate_disable) { + data->separation_time = separation_time; + data->cc6_disable = cc6_disable; + data->pstate_disable = pstate_disable; + data->cc6_setting_changed = true; + } return 0; } @@ -711,6 +766,51 @@ static int smu10_get_dal_power_level(struct pp_hwmgr *hwmgr, static int smu10_force_clock_level(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask) { + struct smu10_hwmgr *data = hwmgr->backend; + struct smu10_voltage_dependency_table *mclk_table = + data->clock_vol_info.vdd_dep_on_fclk; + uint32_t low, high; + + low = mask ? (ffs(mask) - 1) : 0; + high = mask ? (fls(mask) - 1) : 0; + + switch (type) { + case PP_SCLK: + if (low > 2 || high > 2) { + pr_info("Currently sclk only support 3 levels on RV\n"); + return -EINVAL; + } + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + low == 2 ? data->gfx_max_freq_limit/100 : + low == 1 ? SMU10_UMD_PSTATE_GFXCLK : + data->gfx_min_freq_limit/100); + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + high == 0 ? data->gfx_min_freq_limit/100 : + high == 1 ? SMU10_UMD_PSTATE_GFXCLK : + data->gfx_max_freq_limit/100); + break; + + case PP_MCLK: + if (low > mclk_table->count - 1 || high > mclk_table->count - 1) + return -EINVAL; + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinFclkByFreq, + mclk_table->entries[low].clk/100); + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxFclkByFreq, + mclk_table->entries[high].clk/100); + break; + + case PP_PCIE: + default: + break; + } return 0; } @@ -720,21 +820,30 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, struct smu10_hwmgr *data = (struct smu10_hwmgr *)(hwmgr->backend); struct smu10_voltage_dependency_table *mclk_table = data->clock_vol_info.vdd_dep_on_fclk; - int i, now, size = 0; + uint32_t i, now, size = 0; switch (type) { case PP_SCLK: smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetGfxclkFrequency); now = smum_get_argument(hwmgr); + /* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */ + if (now == data->gfx_max_freq_limit/100) + i = 2; + else if (now == data->gfx_min_freq_limit/100) + i = 0; + else + i = 1; + size += sprintf(buf + size, "0: %uMhz %s\n", - data->gfx_min_freq_limit / 100, - ((data->gfx_min_freq_limit / 100) - == now) ? "*" : ""); + data->gfx_min_freq_limit/100, + i == 0 ? "*" : ""); size += sprintf(buf + size, "1: %uMhz %s\n", - data->gfx_max_freq_limit / 100, - ((data->gfx_max_freq_limit / 100) - == now) ? "*" : ""); + i == 1 ? now : SMU10_UMD_PSTATE_GFXCLK, + i == 1 ? "*" : ""); + size += sprintf(buf + size, "2: %uMhz %s\n", + data->gfx_max_freq_limit/100, + i == 2 ? "*" : ""); break; case PP_MCLK: smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency); @@ -947,9 +1056,8 @@ static int smu10_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simpl static int smu10_thermal_get_temperature(struct pp_hwmgr *hwmgr) { - uint32_t reg_offset = soc15_get_register_offset(THM_HWID, 0, - mmTHM_TCON_CUR_TMP_BASE_IDX, mmTHM_TCON_CUR_TMP); - uint32_t reg_value = cgs_read_register(hwmgr->device, reg_offset); + struct amdgpu_device *adev = hwmgr->adev; + uint32_t reg_value = RREG32_SOC15(THM, 0, mmTHM_TCON_CUR_TMP); int cur_temp = (reg_value & THM_TCON_CUR_TMP__CUR_TEMP_MASK) >> THM_TCON_CUR_TMP__CUR_TEMP__SHIFT; @@ -993,6 +1101,25 @@ static int smu10_read_sensor(struct pp_hwmgr *hwmgr, int idx, return ret; } +static int smu10_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr, + struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges) +{ + struct smu10_hwmgr *data = hwmgr->backend; + Watermarks_t *table = &(data->water_marks_table); + int result = 0; + + smu_set_watermarks_for_clocks_ranges(table,wm_with_clock_ranges); + smum_smc_table_manager(hwmgr, (uint8_t *)table, (uint16_t)SMU10_WMTABLE, false); + data->water_marks_exist = true; + return result; +} + +static int smu10_smus_notify_pwe(struct pp_hwmgr *hwmgr) +{ + + return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SetRccPfcPmeRestoreRegister); +} + static int smu10_set_mmhub_powergating_by_smu(struct pp_hwmgr *hwmgr) { return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PowerGateMmHub); @@ -1022,6 +1149,7 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .get_current_shallow_sleep_clocks = smu10_get_current_shallow_sleep_clocks, .get_clock_by_type_with_latency = smu10_get_clock_by_type_with_latency, .get_clock_by_type_with_voltage = smu10_get_clock_by_type_with_voltage, + .set_watermarks_for_clocks_ranges = smu10_set_watermarks_for_clocks_ranges, .get_max_high_clocks = smu10_get_max_high_clocks, .read_sensor = smu10_read_sensor, .set_active_display_count = smu10_set_active_display_count, @@ -1032,6 +1160,8 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .power_state_set = smu10_set_power_state_tasks, .dynamic_state_management_disable = smu10_disable_dpm_tasks, .set_mmhub_powergating_by_smu = smu10_set_mmhub_powergating_by_smu, + .smus_notify_pwe = smu10_smus_notify_pwe, + .gfx_off_control = smu10_gfx_off_control, }; int smu10_init_function_pointers(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h index 175c3a592b6c..1fb296a996f3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h @@ -290,6 +290,7 @@ struct smu10_hwmgr { bool vcn_dpg_mode; bool gfx_off_controled_by_driver; + bool water_marks_exist; Watermarks_t water_marks_table; struct smu10_clock_voltage_information clock_vol_info; DpmClocks_t clock_table; @@ -310,11 +311,9 @@ int smu10_init_function_pointers(struct pp_hwmgr *hwmgr); #define SMU10_UMD_PSTATE_FCLK 933 #define SMU10_UMD_PSTATE_VCE 0x03C00320 -#define SMU10_UMD_PSTATE_PEAK_GFXCLK 1100 #define SMU10_UMD_PSTATE_PEAK_SOCCLK 757 #define SMU10_UMD_PSTATE_PEAK_FCLK 1200 -#define SMU10_UMD_PSTATE_MIN_GFXCLK 200 #define SMU10_UMD_PSTATE_MIN_FCLK 400 #define SMU10_UMD_PSTATE_MIN_SOCCLK 200 #define SMU10_UMD_PSTATE_MIN_VCE 0x0190012C diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c index f4cbaee4e2ca..6d72a5600917 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c @@ -147,20 +147,20 @@ void smu7_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate) data->uvd_power_gated = bgate; if (bgate) { - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_PG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_CG_STATE_GATE); smu7_update_uvd_dpm(hwmgr, true); smu7_powerdown_uvd(hwmgr); } else { smu7_powerup_uvd(hwmgr); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_CG_STATE_UNGATE); - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_PG_STATE_UNGATE); smu7_update_uvd_dpm(hwmgr, false); @@ -175,20 +175,20 @@ void smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) data->vce_power_gated = bgate; if (bgate) { - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_PG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_GATE); smu7_update_vce_dpm(hwmgr, true); smu7_powerdown_vce(hwmgr); } else { smu7_powerup_vce(hwmgr); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_UNGATE); - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_PG_STATE_UNGATE); smu7_update_vce_dpm(hwmgr, false); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 18b5b2ff47fe..8eb3f5176646 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -61,10 +61,6 @@ #define SMC_CG_IND_START 0xc0030000 #define SMC_CG_IND_END 0xc0040000 -#define VOLTAGE_SCALE 4 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 - #define MEM_FREQ_LOW_LATENCY 25000 #define MEM_FREQ_HIGH_LATENCY 80000 @@ -88,6 +84,14 @@ static const struct profile_mode_setting smu7_profiling[6] = {0, 0, 0, 0, 0, 0, 0, 0}, }; +#define PPSMC_MSG_SetVBITimeout_VEGAM ((uint16_t) 0x310) + +#define ixPWR_SVI2_PLANE1_LOAD 0xC0200280 +#define PWR_SVI2_PLANE1_LOAD__PSI1_MASK 0x00000020L +#define PWR_SVI2_PLANE1_LOAD__PSI0_EN_MASK 0x00000040L +#define PWR_SVI2_PLANE1_LOAD__PSI1__SHIFT 0x00000005 +#define PWR_SVI2_PLANE1_LOAD__PSI0_EN__SHIFT 0x00000006 + /** Values for the CG_THERMAL_CTRL::DPM_EVENT_SRC field. */ enum DPM_EVENT_SRC { DPM_EVENT_SRC_ANALOG = 0, @@ -169,6 +173,13 @@ static int smu7_get_current_pcie_lane_number(struct pp_hwmgr *hwmgr) */ static int smu7_enable_smc_voltage_controller(struct pp_hwmgr *hwmgr) { + if (hwmgr->chip_id == CHIP_VEGAM) { + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, PWR_SVI2_PLANE1_LOAD, PSI1, 0); + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, PWR_SVI2_PLANE1_LOAD, PSI0_EN, 0); + } + if (hwmgr->feature_mask & PP_SMC_VOLTAGE_CONTROL_MASK) smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Voltage_Cntl_Enable); @@ -798,32 +809,6 @@ static int smu7_setup_dpm_tables_v1(struct pp_hwmgr *hwmgr) return 0; } -static int smu7_get_voltage_dependency_table( - const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table, - struct phm_ppt_v1_clock_voltage_dependency_table *dep_table) -{ - uint8_t i = 0; - PP_ASSERT_WITH_CODE((0 != allowed_dep_table->count), - "Voltage Lookup Table empty", - return -EINVAL); - - dep_table->count = allowed_dep_table->count; - for (i=0; i<dep_table->count; i++) { - dep_table->entries[i].clk = allowed_dep_table->entries[i].clk; - dep_table->entries[i].vddInd = allowed_dep_table->entries[i].vddInd; - dep_table->entries[i].vdd_offset = allowed_dep_table->entries[i].vdd_offset; - dep_table->entries[i].vddc = allowed_dep_table->entries[i].vddc; - dep_table->entries[i].vddgfx = allowed_dep_table->entries[i].vddgfx; - dep_table->entries[i].vddci = allowed_dep_table->entries[i].vddci; - dep_table->entries[i].mvdd = allowed_dep_table->entries[i].mvdd; - dep_table->entries[i].phases = allowed_dep_table->entries[i].phases; - dep_table->entries[i].cks_enable = allowed_dep_table->entries[i].cks_enable; - dep_table->entries[i].cks_voffset = allowed_dep_table->entries[i].cks_voffset; - } - - return 0; -} - static int smu7_odn_initial_default_setting(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -851,7 +836,7 @@ static int smu7_odn_initial_default_setting(struct pp_hwmgr *hwmgr) entries[i].vddc = dep_sclk_table->entries[i].vddc; } - smu7_get_voltage_dependency_table(dep_sclk_table, + smu_get_voltage_dependency_table_ppt_v1(dep_sclk_table, (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk)); odn_table->odn_memory_clock_dpm_levels.num_of_pl = @@ -863,12 +848,39 @@ static int smu7_odn_initial_default_setting(struct pp_hwmgr *hwmgr) entries[i].vddc = dep_mclk_table->entries[i].vddc; } - smu7_get_voltage_dependency_table(dep_mclk_table, + smu_get_voltage_dependency_table_ppt_v1(dep_mclk_table, (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_mclk)); return 0; } +static void smu7_setup_voltage_range_from_vbios(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint32_t min_vddc, max_vddc; + + if (!table_info) + return; + + dep_sclk_table = table_info->vdd_dep_on_sclk; + + atomctrl_get_voltage_range(hwmgr, &max_vddc, &min_vddc); + + if (min_vddc == 0 || min_vddc > 2000 + || min_vddc > dep_sclk_table->entries[0].vddc) + min_vddc = dep_sclk_table->entries[0].vddc; + + if (max_vddc == 0 || max_vddc > 2000 + || max_vddc < dep_sclk_table->entries[dep_sclk_table->count-1].vddc) + max_vddc = dep_sclk_table->entries[dep_sclk_table->count-1].vddc; + + data->odn_dpm_table.min_vddc = min_vddc; + data->odn_dpm_table.max_vddc = max_vddc; +} + static int smu7_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -887,8 +899,10 @@ static int smu7_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) sizeof(struct smu7_dpm_table)); /* initialize ODN table */ - if (hwmgr->od_enabled) + if (hwmgr->od_enabled) { + smu7_setup_voltage_range_from_vbios(hwmgr); smu7_odn_initial_default_setting(hwmgr); + } return 0; } @@ -966,6 +980,22 @@ static int smu7_disable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr) return 0; } +static int smu7_disable_sclk_vce_handshake(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t soft_register_value = 0; + uint32_t handshake_disables_offset = data->soft_regs_start + + smum_get_offsetof(hwmgr, + SMU_SoftRegisters, HandshakeDisables); + + soft_register_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, handshake_disables_offset); + soft_register_value |= SMU7_VCE_SCLK_HANDSHAKE_DISABLE; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + handshake_disables_offset, soft_register_value); + return 0; +} + static int smu7_disable_handshake_uvd(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -989,6 +1019,9 @@ static int smu7_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) /* enable SCLK dpm */ if (!data->sclk_dpm_key_disabled) + if (hwmgr->chip_id == CHIP_VEGAM) + smu7_disable_sclk_vce_handshake(hwmgr); + PP_ASSERT_WITH_CODE( (0 == smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DPM_Enable)), "Failed to enable SCLK DPM during DPM Start Function!", @@ -998,13 +1031,15 @@ static int smu7_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) if (0 == data->mclk_dpm_key_disabled) { if (!(hwmgr->feature_mask & PP_UVD_HANDSHAKE_MASK)) smu7_disable_handshake_uvd(hwmgr); + PP_ASSERT_WITH_CODE( (0 == smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_Enable)), "Failed to enable MCLK DPM during DPM Start Function!", return -EINVAL); - PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1); + if (hwmgr->chip_family != CHIP_VEGAM) + PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1); if (hwmgr->chip_family == AMDGPU_FAMILY_CI) { @@ -1020,8 +1055,13 @@ static int smu7_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x5); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_CPL_CNTL, 0x100005); udelay(10); - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400005); - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400005); + if (hwmgr->chip_id == CHIP_VEGAM) { + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400009); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400009); + } else { + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400005); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400005); + } cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_CPL_CNTL, 0x500005); } } @@ -1230,7 +1270,7 @@ static int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr) tmp_result = smu7_construct_voltage_tables(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), - "Failed to contruct voltage tables!", + "Failed to construct voltage tables!", result = tmp_result); } smum_initialize_mc_reg_table(hwmgr); @@ -1262,10 +1302,12 @@ static int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to process firmware header!", result = tmp_result); - tmp_result = smu7_initial_switch_from_arbf0_to_f1(hwmgr); - PP_ASSERT_WITH_CODE((0 == tmp_result), - "Failed to initialize switch from ArbF0 to F1!", - result = tmp_result); + if (hwmgr->chip_id != CHIP_VEGAM) { + tmp_result = smu7_initial_switch_from_arbf0_to_f1(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to initialize switch from ArbF0 to F1!", + result = tmp_result); + } result = smu7_setup_default_dpm_tables(hwmgr); PP_ASSERT_WITH_CODE(0 == result, @@ -2755,6 +2797,9 @@ static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr, case CHIP_POLARIS12: switch_limit_us = data->is_memory_gddr5 ? 190 : 150; break; + case CHIP_VEGAM: + switch_limit_us = 30; + break; default: switch_limit_us = data->is_memory_gddr5 ? 450 : 150; break; @@ -2778,8 +2823,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct PP_Clocks minimum_clocks = {0}; bool disable_mclk_switching; bool disable_mclk_switching_for_frame_lock; - struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info = {0}; const struct phm_clock_and_voltage_limits *max_limits; uint32_t i; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -2788,7 +2831,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, int32_t count; int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0; - info.mode_info = &mode_info; data->battery_state = (PP_StateUILabel_Battery == request_ps->classification.ui_label); @@ -2810,10 +2852,8 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, } } - cgs_get_active_displays_info(hwmgr->device, &info); - - minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock; - minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; + minimum_clocks.engineClock = hwmgr->display_config->min_core_set_clock; + minimum_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) { @@ -2844,12 +2884,12 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); - if (info.display_count == 0) + if (hwmgr->display_config->num_display == 0) disable_mclk_switching = false; else - disable_mclk_switching = ((1 < info.display_count) || + disable_mclk_switching = ((1 < hwmgr->display_config->num_display) || disable_mclk_switching_for_frame_lock || - smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us)); + smu7_vblank_too_short(hwmgr, hwmgr->display_config->min_vblank_time)); sclk = smu7_ps->performance_levels[0].engine_clock; mclk = smu7_ps->performance_levels[0].memory_clock; @@ -2958,8 +2998,7 @@ static int smu7_dpm_patch_boot_state(struct pp_hwmgr *hwmgr, /* First retrieve the Boot clocks and VDDC from the firmware info table. * We assume here that fw_info is unchanged if this call fails. */ - fw_info = (ATOM_FIRMWARE_INFO_V2_2 *)cgs_atom_get_data_table( - hwmgr->device, index, + fw_info = (ATOM_FIRMWARE_INFO_V2_2 *)smu_atom_get_data_table(hwmgr->adev, index, &size, &frev, &crev); if (!fw_info) /* During a test, there is no firmware info table. */ @@ -3367,34 +3406,35 @@ static int smu7_get_pp_table_entry(struct pp_hwmgr *hwmgr, return 0; } -static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, - struct pp_gpu_power *query) +static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query) { - PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr, - PPSMC_MSG_PmStatusLogStart), - "Failed to start pm status log!", - return -1); + int i; + u32 tmp = 0; - /* Sampling period from 50ms to 4sec */ - msleep_interruptible(200); + if (!query) + return -EINVAL; - PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr, - PPSMC_MSG_PmStatusLogSample), - "Failed to sample pm status log!", - return -1); + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0); + tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + *query = tmp; - query->vddc_power = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, - ixSMU_PM_STATUS_40); - query->vddci_power = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, - ixSMU_PM_STATUS_49); - query->max_gpu_power = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, - ixSMU_PM_STATUS_94); - query->average_gpu_power = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, - ixSMU_PM_STATUS_95); + if (tmp != 0) + return 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixSMU_PM_STATUS_94, 0); + + for (i = 0; i < 10; i++) { + mdelay(1); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogSample); + tmp = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, + ixSMU_PM_STATUS_94); + if (tmp != 0) + break; + } + *query = tmp; return 0; } @@ -3447,10 +3487,7 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; return 0; case AMDGPU_PP_SENSOR_GPU_POWER: - if (*size < sizeof(struct pp_gpu_power)) - return -EINVAL; - *size = sizeof(struct pp_gpu_power); - return smu7_get_gpu_power(hwmgr, (struct pp_gpu_power *)value); + return smu7_get_gpu_power(hwmgr, (uint32_t *)value); case AMDGPU_PP_SENSOR_VDDGFX: if ((data->vr_config & 0xff) == 0x2) val_vid = PHM_READ_INDIRECT_FIELD(hwmgr->device, @@ -3481,7 +3518,6 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons [smu7_ps->performance_level_count - 1].memory_clock; struct PP_Clocks min_clocks = {0}; uint32_t i; - struct cgs_display_info info = {0}; for (i = 0; i < sclk_table->count; i++) { if (sclk == sclk_table->dpm_levels[i].value) @@ -3508,9 +3544,8 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons if (i >= mclk_table->count) data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; - cgs_get_active_displays_info(hwmgr->device, &info); - if (data->display_timing.num_existing_displays != info.display_count) + if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) data->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK; return 0; @@ -3813,9 +3848,14 @@ static int smu7_notify_smc_display(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - if (hwmgr->feature_mask & PP_VBI_TIME_SUPPORT_MASK) - smum_send_msg_to_smc_with_parameter(hwmgr, - (PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2); + if (hwmgr->feature_mask & PP_VBI_TIME_SUPPORT_MASK) { + if (hwmgr->chip_id == CHIP_VEGAM) + smum_send_msg_to_smc_with_parameter(hwmgr, + (PPSMC_Msg)PPSMC_MSG_SetVBITimeout_VEGAM, data->frame_time_x2); + else + smum_send_msg_to_smc_with_parameter(hwmgr, + (PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2); + } return (smum_send_msg_to_smc(hwmgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ? 0 : -EINVAL; } @@ -3909,15 +3949,8 @@ smu7_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display) static int smu7_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr) { - uint32_t num_active_displays = 0; - struct cgs_display_info info = {0}; - - info.mode_info = NULL; - cgs_get_active_displays_info(hwmgr->device, &info); - - num_active_displays = info.display_count; - - if (num_active_displays > 1 && hwmgr->display_config.multi_monitor_in_sync != true) + if (hwmgr->display_config->num_display > 1 && + !hwmgr->display_config->multi_monitor_in_sync) smu7_notify_smc_display_change(hwmgr, false); return 0; @@ -3932,33 +3965,24 @@ smu7_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr) static int smu7_program_display_gap(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t num_active_displays = 0; uint32_t display_gap = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL); uint32_t display_gap2; uint32_t pre_vbi_time_in_us; uint32_t frame_time_in_us; - uint32_t ref_clock; - uint32_t refresh_rate = 0; - struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info = {0}; - - info.mode_info = &mode_info; - cgs_get_active_displays_info(hwmgr->device, &info); - num_active_displays = info.display_count; + uint32_t ref_clock, refresh_rate; - display_gap = PHM_SET_FIELD(display_gap, CG_DISPLAY_GAP_CNTL, DISP_GAP, (num_active_displays > 0) ? DISPLAY_GAP_VBLANK_OR_WM : DISPLAY_GAP_IGNORE); + display_gap = PHM_SET_FIELD(display_gap, CG_DISPLAY_GAP_CNTL, DISP_GAP, (hwmgr->display_config->num_display > 0) ? DISPLAY_GAP_VBLANK_OR_WM : DISPLAY_GAP_IGNORE); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL, display_gap); ref_clock = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev); - - refresh_rate = mode_info.refresh_rate; + refresh_rate = hwmgr->display_config->vrefresh; if (0 == refresh_rate) refresh_rate = 60; frame_time_in_us = 1000000 / refresh_rate; - pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us; + pre_vbi_time_in_us = frame_time_in_us - 200 - hwmgr->display_config->min_vblank_time; data->frame_time_x2 = frame_time_in_us * 2 / 100; @@ -4038,17 +4062,14 @@ smu7_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); bool is_update_required = false; - struct cgs_display_info info = {0, 0, NULL}; - - cgs_get_active_displays_info(hwmgr->device, &info); - if (data->display_timing.num_existing_displays != info.display_count) + if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) is_update_required = true; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) { - if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr && + if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr && (data->display_timing.min_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK || - hwmgr->display_config.min_core_set_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK)) + hwmgr->display_config->min_core_set_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK)) is_update_required = true; } return is_update_required; @@ -4103,7 +4124,7 @@ static int smu7_check_states_equal(struct pp_hwmgr *hwmgr, return 0; } -static int smu7_upload_mc_firmware(struct pp_hwmgr *hwmgr) +static int smu7_check_mc_firmware(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -4182,13 +4203,9 @@ static int smu7_read_clock_registers(struct pp_hwmgr *hwmgr) static int smu7_get_memory_type(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t temp; - - temp = cgs_read_register(hwmgr->device, mmMC_SEQ_MISC0); + struct amdgpu_device *adev = hwmgr->adev; - data->is_memory_gddr5 = (MC_SEQ_MISC0_GDDR5_VALUE == - ((temp & MC_SEQ_MISC0_GDDR5_MASK) >> - MC_SEQ_MISC0_GDDR5_SHIFT)); + data->is_memory_gddr5 = (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5); return 0; } @@ -4236,7 +4253,7 @@ static int smu7_setup_asic_task(struct pp_hwmgr *hwmgr) { int tmp_result, result = 0; - smu7_upload_mc_firmware(hwmgr); + smu7_check_mc_firmware(hwmgr); tmp_result = smu7_read_clock_registers(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), @@ -4371,22 +4388,36 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, break; case OD_SCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s: \n", "OD_SCLK"); + size = sprintf(buf, "%s:\n", "OD_SCLK"); for (i = 0; i < odn_sclk_table->num_of_pl; i++) - size += sprintf(buf + size, "%d: %10uMhz %10u mV\n", - i, odn_sclk_table->entries[i].clock / 100, + size += sprintf(buf + size, "%d: %10uMHz %10umV\n", + i, odn_sclk_table->entries[i].clock/100, odn_sclk_table->entries[i].vddc); } break; case OD_MCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s: \n", "OD_MCLK"); + size = sprintf(buf, "%s:\n", "OD_MCLK"); for (i = 0; i < odn_mclk_table->num_of_pl; i++) - size += sprintf(buf + size, "%d: %10uMhz %10u mV\n", - i, odn_mclk_table->entries[i].clock / 100, + size += sprintf(buf + size, "%d: %10uMHz %10umV\n", + i, odn_mclk_table->entries[i].clock/100, odn_mclk_table->entries[i].vddc); } break; + case OD_RANGE: + if (hwmgr->od_enabled) { + size = sprintf(buf, "%s:\n", "OD_RANGE"); + size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.sclk_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.engineClock/100); + size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.mclk_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); + size += sprintf(buf + size, "VDDC: %7umV %11umV\n", + data->odn_dpm_table.min_vddc, + data->odn_dpm_table.max_vddc); + } + break; default: break; } @@ -4670,36 +4701,27 @@ static bool smu7_check_clk_voltage_valid(struct pp_hwmgr *hwmgr, { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - uint32_t min_vddc; - struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table; - - if (table_info == NULL) - return false; - - dep_sclk_table = table_info->vdd_dep_on_sclk; - min_vddc = dep_sclk_table->entries[0].vddc; - - if (voltage < min_vddc || voltage > 2000) { - pr_info("OD voltage is out of range [%d - 2000] mV\n", min_vddc); + if (voltage < data->odn_dpm_table.min_vddc || voltage > data->odn_dpm_table.max_vddc) { + pr_info("OD voltage is out of range [%d - %d] mV\n", + data->odn_dpm_table.min_vddc, + data->odn_dpm_table.max_vddc); return false; } if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { - if (data->vbios_boot_state.sclk_bootup_value > clk || + if (data->golden_dpm_table.sclk_table.dpm_levels[0].value > clk || hwmgr->platform_descriptor.overdriveLimit.engineClock < clk) { pr_info("OD engine clock is out of range [%d - %d] MHz\n", - data->vbios_boot_state.sclk_bootup_value, - hwmgr->platform_descriptor.overdriveLimit.engineClock / 100); + data->golden_dpm_table.sclk_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.engineClock/100); return false; } } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) { - if (data->vbios_boot_state.mclk_bootup_value > clk || + if (data->golden_dpm_table.mclk_table.dpm_levels[0].value > clk || hwmgr->platform_descriptor.overdriveLimit.memoryClock < clk) { pr_info("OD memory clock is out of range [%d - %d] MHz\n", - data->vbios_boot_state.mclk_bootup_value/100, - hwmgr->platform_descriptor.overdriveLimit.memoryClock / 100); + data->golden_dpm_table.mclk_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); return false; } } else { @@ -4748,10 +4770,6 @@ static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr) return; } } - if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { - data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; - } dep_table = table_info->vdd_dep_on_sclk; odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk); @@ -4761,9 +4779,9 @@ static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr) return; } } - if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { + if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; } } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h index b8d0bb378595..c91e75db6a8e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h @@ -184,6 +184,8 @@ struct smu7_odn_dpm_table { struct smu7_odn_clock_voltage_dependency_table vdd_dependency_on_sclk; struct smu7_odn_clock_voltage_dependency_table vdd_dependency_on_mclk; uint32_t odn_mclk_min_limit; + uint32_t min_vddc; + uint32_t max_vddc; }; struct profile_mode_setting { diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c index d9e92e306535..99b29ff45d91 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c @@ -623,6 +623,190 @@ static const struct gpu_pt_config_reg DIDTConfig_Polaris11_Kicker[] = { 0xFFFFFFFF } /* End of list */ }; +static const struct gpu_pt_config_reg GCCACConfig_VegaM[] = +{ +// --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +// Offset Mask Shift Value Type +// --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + // DIDT_SQ + // + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00060013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00860013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01060013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01860013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02060013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02860013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x03060013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x03860013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x04060013, GPU_CONFIGREG_GC_CAC_IND }, + + // DIDT_TD + // + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x000E0013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x008E0013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x010E0013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x018E0013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x020E0013, GPU_CONFIGREG_GC_CAC_IND }, + + // DIDT_TCP + // + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00100013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00900013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01100013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01900013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02100013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02900013, GPU_CONFIGREG_GC_CAC_IND }, + + { 0xFFFFFFFF } // End of list +}; + +static const struct gpu_pt_config_reg DIDTConfig_VegaM[] = +{ +// --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +// Offset Mask Shift Value Type +// --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + // DIDT_SQ + // + { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT0_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT0__SHIFT, 0x0073, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT1_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT1__SHIFT, 0x00ab, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT2_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT2__SHIFT, 0x0084, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT3_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT3__SHIFT, 0x005a, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT4_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT4__SHIFT, 0x0067, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT5_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT5__SHIFT, 0x0084, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT6_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT6__SHIFT, 0x0027, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT7_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT7__SHIFT, 0x0046, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT8_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT8__SHIFT, 0x00aa, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT9_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT9__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT10_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT10__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT11_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT11__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_CTRL1, DIDT_SQ_CTRL1__MIN_POWER_MASK, DIDT_SQ_CTRL1__MIN_POWER__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL1, DIDT_SQ_CTRL1__MAX_POWER_MASK, DIDT_SQ_CTRL1__MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_CTRL_OCP, DIDT_SQ_CTRL_OCP__UNUSED_0_MASK, DIDT_SQ_CTRL_OCP__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL_OCP, DIDT_SQ_CTRL_OCP__OCP_MAX_POWER_MASK, DIDT_SQ_CTRL_OCP__OCP_MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK, DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3853, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_0_MASK, DIDT_SQ_CTRL2__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x005a, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_1_MASK, DIDT_SQ_CTRL2__UNUSED_1__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_2_MASK, DIDT_SQ_CTRL2__UNUSED_2__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x0ebb, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__UNUSED_0_MASK, DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK, DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x3853, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x3153, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK, DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK, DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__USE_REF_CLOCK_MASK, DIDT_SQ_CTRL0__USE_REF_CLOCK__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__PHASE_OFFSET_MASK, DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK, DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__UNUSED_0_MASK, DIDT_SQ_CTRL0__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + // DIDT_TD + // + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT0_MASK, DIDT_TD_WEIGHT0_3__WEIGHT0__SHIFT, 0x000a, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT1_MASK, DIDT_TD_WEIGHT0_3__WEIGHT1__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT2_MASK, DIDT_TD_WEIGHT0_3__WEIGHT2__SHIFT, 0x0017, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT3_MASK, DIDT_TD_WEIGHT0_3__WEIGHT3__SHIFT, 0x002f, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT4_MASK, DIDT_TD_WEIGHT4_7__WEIGHT4__SHIFT, 0x0046, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT5_MASK, DIDT_TD_WEIGHT4_7__WEIGHT5__SHIFT, 0x005d, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT6_MASK, DIDT_TD_WEIGHT4_7__WEIGHT6__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT7_MASK, DIDT_TD_WEIGHT4_7__WEIGHT7__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_CTRL1, DIDT_TD_CTRL1__MIN_POWER_MASK, DIDT_TD_CTRL1__MIN_POWER__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL1, DIDT_TD_CTRL1__MAX_POWER_MASK, DIDT_TD_CTRL1__MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_CTRL_OCP, DIDT_TD_CTRL_OCP__UNUSED_0_MASK, DIDT_TD_CTRL_OCP__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL_OCP, DIDT_TD_CTRL_OCP__OCP_MAX_POWER_MASK, DIDT_TD_CTRL_OCP__OCP_MAX_POWER__SHIFT, 0x00ff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK, DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3fff, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__UNUSED_0_MASK, DIDT_TD_CTRL2__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x000f, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__UNUSED_1_MASK, DIDT_TD_CTRL2__UNUSED_1__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__UNUSED_2_MASK, DIDT_TD_CTRL2__UNUSED_2__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK, DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__UNUSED_0_MASK, DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK, DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x0dde, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x0dde, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__UNUSED_0_MASK, DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK, DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__USE_REF_CLOCK_MASK, DIDT_TD_CTRL0__USE_REF_CLOCK__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__PHASE_OFFSET_MASK, DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK, DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x0009, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x0009, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__UNUSED_0_MASK, DIDT_TD_CTRL0__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + // DIDT_TCP + // + { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT0_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT0__SHIFT, 0x0004, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT1_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT1__SHIFT, 0x0037, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT2_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT2__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT3_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT3__SHIFT, 0x00ff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT4_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT4__SHIFT, 0x0054, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT5_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT5__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT6_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT6__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT7_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT7__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_CTRL1, DIDT_TCP_CTRL1__MIN_POWER_MASK, DIDT_TCP_CTRL1__MIN_POWER__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL1, DIDT_TCP_CTRL1__MAX_POWER_MASK, DIDT_TCP_CTRL1__MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_CTRL_OCP, DIDT_TCP_CTRL_OCP__UNUSED_0_MASK, DIDT_TCP_CTRL_OCP__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL_OCP, DIDT_TCP_CTRL_OCP__OCP_MAX_POWER_MASK, DIDT_TCP_CTRL_OCP__OCP_MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK, DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3dde, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__UNUSED_0_MASK, DIDT_TCP_CTRL2__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x0032, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__UNUSED_1_MASK, DIDT_TCP_CTRL2__UNUSED_1__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__UNUSED_2_MASK, DIDT_TCP_CTRL2__UNUSED_2__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK, DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT,0x01aa, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__UNUSED_0_MASK, DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK, DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x3dde, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x3dde, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK, DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK, DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__USE_REF_CLOCK_MASK, DIDT_TCP_CTRL0__USE_REF_CLOCK__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__PHASE_OFFSET_MASK, DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK, DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__UNUSED_0_MASK, DIDT_TCP_CTRL0__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { 0xFFFFFFFF } // End of list +}; static int smu7_enable_didt(struct pp_hwmgr *hwmgr, const bool enable) { uint32_t en = enable ? 1 : 0; @@ -740,8 +924,8 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr) PP_CAP(PHM_PlatformCaps_TDRamping) || PP_CAP(PHM_PlatformCaps_TCPRamping)) { - cgs_enter_safe_mode(hwmgr->device, true); - cgs_lock_grbm_idx(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); + mutex_lock(&adev->grbm_idx_mutex); value = 0; value2 = cgs_read_register(hwmgr->device, mmGRBM_GFX_INDEX); for (count = 0; count < num_se; count++) { @@ -768,6 +952,11 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result); result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris12); PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result); + } else if (hwmgr->chip_id == CHIP_VEGAM) { + result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_VegaM); + PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result); + result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_VegaM); + PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result); } } cgs_write_register(hwmgr->device, mmGRBM_GFX_INDEX, value2); @@ -781,8 +970,8 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((0 == result), "Failed to enable DPM DIDT.", return result); } - cgs_lock_grbm_idx(hwmgr->device, false); - cgs_enter_safe_mode(hwmgr->device, false); + mutex_unlock(&adev->grbm_idx_mutex); + adev->gfx.rlc.funcs->exit_safe_mode(adev); } return 0; @@ -791,13 +980,14 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr) int smu7_disable_didt_config(struct pp_hwmgr *hwmgr) { int result; + struct amdgpu_device *adev = hwmgr->adev; if (PP_CAP(PHM_PlatformCaps_SQRamping) || PP_CAP(PHM_PlatformCaps_DBRamping) || PP_CAP(PHM_PlatformCaps_TDRamping) || PP_CAP(PHM_PlatformCaps_TCPRamping)) { - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); result = smu7_enable_didt(hwmgr, false); PP_ASSERT_WITH_CODE((result == 0), @@ -809,7 +999,7 @@ int smu7_disable_didt_config(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((0 == result), "Failed to disable DPM DIDT.", return result); } - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); } return 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c index 7b26607c646a..50690c72b2ea 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c @@ -314,8 +314,7 @@ static int smu8_get_system_info_data(struct pp_hwmgr *hwmgr) uint8_t frev, crev; uint16_t size; - info = (ATOM_INTEGRATED_SYSTEM_INFO_V1_9 *) cgs_atom_get_data_table( - hwmgr->device, + info = (ATOM_INTEGRATED_SYSTEM_INFO_V1_9 *)smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, IntegratedSystemInfo), &size, &frev, &crev); @@ -694,7 +693,7 @@ static int smu8_update_sclk_limit(struct pp_hwmgr *hwmgr) else data->sclk_dpm.soft_max_clk = table->entries[table->count - 1].clk; - clock = hwmgr->display_config.min_core_set_clock; + clock = hwmgr->display_config->min_core_set_clock; if (clock == 0) pr_debug("min_core_set_clock not set\n"); @@ -749,7 +748,7 @@ static int smu8_set_deep_sleep_sclk_threshold(struct pp_hwmgr *hwmgr) { if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) { - uint32_t clks = hwmgr->display_config.min_core_set_clock_in_sr; + uint32_t clks = hwmgr->display_config->min_core_set_clock_in_sr; if (clks == 0) clks = SMU8_MIN_DEEP_SLEEP_SCLK; @@ -1041,25 +1040,21 @@ static int smu8_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct smu8_hwmgr *data = hwmgr->backend; struct PP_Clocks clocks = {0, 0, 0, 0}; bool force_high; - uint32_t num_of_active_displays = 0; - struct cgs_display_info info = {0}; smu8_ps->need_dfs_bypass = true; data->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label); - clocks.memoryClock = hwmgr->display_config.min_mem_set_clock != 0 ? - hwmgr->display_config.min_mem_set_clock : + clocks.memoryClock = hwmgr->display_config->min_mem_set_clock != 0 ? + hwmgr->display_config->min_mem_set_clock : data->sys_info.nbp_memory_clock[1]; - cgs_get_active_displays_info(hwmgr->device, &info); - num_of_active_displays = info.display_count; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk; force_high = (clocks.memoryClock > data->sys_info.nbp_memory_clock[SMU8_NUM_NBPMEMORYCLOCK - 1]) - || (num_of_active_displays >= 3); + || (hwmgr->display_config->num_display >= 3); smu8_ps->action = smu8_current_ps->action; @@ -1897,20 +1892,20 @@ static void smu8_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate) data->uvd_power_gated = bgate; if (bgate) { - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_PG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_CG_STATE_GATE); smu8_dpm_update_uvd_dpm(hwmgr, true); smu8_dpm_powerdown_uvd(hwmgr); } else { smu8_dpm_powerup_uvd(hwmgr); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_CG_STATE_UNGATE); - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_PG_STATE_UNGATE); smu8_dpm_update_uvd_dpm(hwmgr, false); @@ -1923,12 +1918,10 @@ static void smu8_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) struct smu8_hwmgr *data = hwmgr->backend; if (bgate) { - cgs_set_powergating_state( - hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_PG_STATE_GATE); - cgs_set_clockgating_state( - hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_GATE); smu8_enable_disable_vce_dpm(hwmgr, false); @@ -1937,12 +1930,10 @@ static void smu8_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) } else { smu8_dpm_powerup_vce(hwmgr); data->vce_power_gated = false; - cgs_set_clockgating_state( - hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_UNGATE); - cgs_set_powergating_state( - hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_PG_STATE_UNGATE); smu8_dpm_update_vce_dpm(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c index 598122854ab5..93a3d022ba47 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c @@ -24,6 +24,7 @@ #include "pp_debug.h" #include "ppatomctrl.h" #include "ppsmc.h" +#include "atom.h" uint8_t convert_to_vid(uint16_t vddc) { @@ -608,3 +609,100 @@ int smu9_register_irq_handlers(struct pp_hwmgr *hwmgr) return 0; } + +void *smu_atom_get_data_table(void *dev, uint32_t table, uint16_t *size, + uint8_t *frev, uint8_t *crev) +{ + struct amdgpu_device *adev = dev; + uint16_t data_start; + + if (amdgpu_atom_parse_data_header( + adev->mode_info.atom_context, table, size, + frev, crev, &data_start)) + return (uint8_t *)adev->mode_info.atom_context->bios + + data_start; + + return NULL; +} + +int smu_get_voltage_dependency_table_ppt_v1( + const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table, + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table) +{ + uint8_t i = 0; + PP_ASSERT_WITH_CODE((0 != allowed_dep_table->count), + "Voltage Lookup Table empty", + return -EINVAL); + + dep_table->count = allowed_dep_table->count; + for (i=0; i<dep_table->count; i++) { + dep_table->entries[i].clk = allowed_dep_table->entries[i].clk; + dep_table->entries[i].vddInd = allowed_dep_table->entries[i].vddInd; + dep_table->entries[i].vdd_offset = allowed_dep_table->entries[i].vdd_offset; + dep_table->entries[i].vddc = allowed_dep_table->entries[i].vddc; + dep_table->entries[i].vddgfx = allowed_dep_table->entries[i].vddgfx; + dep_table->entries[i].vddci = allowed_dep_table->entries[i].vddci; + dep_table->entries[i].mvdd = allowed_dep_table->entries[i].mvdd; + dep_table->entries[i].phases = allowed_dep_table->entries[i].phases; + dep_table->entries[i].cks_enable = allowed_dep_table->entries[i].cks_enable; + dep_table->entries[i].cks_voffset = allowed_dep_table->entries[i].cks_voffset; + } + + return 0; +} + +int smu_set_watermarks_for_clocks_ranges(void *wt_table, + struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges) +{ + uint32_t i; + struct watermarks *table = wt_table; + + if (!table || !wm_with_clock_ranges) + return -EINVAL; + + if (wm_with_clock_ranges->num_wm_sets_dmif > 4 || wm_with_clock_ranges->num_wm_sets_mcif > 4) + return -EINVAL; + + for (i = 0; i < wm_with_clock_ranges->num_wm_sets_dmif; i++) { + table->WatermarkRow[1][i].MinClock = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_dcefclk_in_khz) / + 100); + table->WatermarkRow[1][i].MaxClock = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_dcefclk_in_khz) / + 100); + table->WatermarkRow[1][i].MinUclk = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_memclk_in_khz) / + 100); + table->WatermarkRow[1][i].MaxUclk = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_memclk_in_khz) / + 100); + table->WatermarkRow[1][i].WmSetting = (uint8_t) + wm_with_clock_ranges->wm_sets_dmif[i].wm_set_id; + } + + for (i = 0; i < wm_with_clock_ranges->num_wm_sets_mcif; i++) { + table->WatermarkRow[0][i].MinClock = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_socclk_in_khz) / + 100); + table->WatermarkRow[0][i].MaxClock = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_socclk_in_khz) / + 100); + table->WatermarkRow[0][i].MinUclk = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_memclk_in_khz) / + 100); + table->WatermarkRow[0][i].MaxUclk = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_memclk_in_khz) / + 100); + table->WatermarkRow[0][i].WmSetting = (uint8_t) + wm_with_clock_ranges->wm_sets_mcif[i].wm_set_id; + } + return 0; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h index d37d16e4b613..916cc01e7652 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h @@ -26,10 +26,27 @@ struct pp_atomctrl_voltage_table; struct pp_hwmgr; struct phm_ppt_v1_voltage_lookup_table; +struct Watermarks_t; +struct pp_wm_sets_with_clock_ranges_soc15; uint8_t convert_to_vid(uint16_t vddc); uint16_t convert_to_vddc(uint8_t vid); +struct watermark_row_generic_t { + uint16_t MinClock; + uint16_t MaxClock; + uint16_t MinUclk; + uint16_t MaxUclk; + + uint8_t WmSetting; + uint8_t Padding[3]; +}; + +struct watermarks { + struct watermark_row_generic_t WatermarkRow[2][4]; + uint32_t padding[7]; +}; + extern int phm_wait_for_register_unequal(struct pp_hwmgr *hwmgr, uint32_t index, uint32_t value, uint32_t mask); @@ -82,6 +99,16 @@ int phm_irq_process(struct amdgpu_device *adev, int smu9_register_irq_handlers(struct pp_hwmgr *hwmgr); +void *smu_atom_get_data_table(void *dev, uint32_t table, uint16_t *size, + uint8_t *frev, uint8_t *crev); + +int smu_get_voltage_dependency_table_ppt_v1( + const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table, + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table); + +int smu_set_watermarks_for_clocks_ranges(void *wt_table, + struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges); + #define PHM_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT #define PHM_FIELD_MASK(reg, field) reg##__##field##_MASK diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 7cbb56ba6fab..d156b7bb92ae 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -36,7 +36,7 @@ #include "smu9.h" #include "smu9_driver_if.h" #include "vega10_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "pppcielanes.h" #include "vega10_hwmgr.h" #include "vega10_processpptables.h" @@ -51,10 +51,6 @@ #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" -#define VOLTAGE_SCALE 4 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 - #define HBM_MEMORY_CHANNEL_WIDTH 128 static const uint32_t channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; @@ -79,8 +75,6 @@ static const uint32_t channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; #define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L #define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L #define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L -static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, - enum pp_clock_type type, uint32_t mask); static const ULONG PhwVega10_Magic = (ULONG)(PHM_VIslands_Magic); @@ -291,6 +285,48 @@ static int vega10_set_features_platform_caps(struct pp_hwmgr *hwmgr) return 0; } +static int vega10_odn_initial_default_setting(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct phm_ppt_v2_information *table_info = + (struct phm_ppt_v2_information *)(hwmgr->pptable); + struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct vega10_odn_vddc_lookup_table *od_lookup_table; + struct phm_ppt_v1_voltage_lookup_table *vddc_lookup_table; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table[3]; + struct phm_ppt_v1_clock_voltage_dependency_table *od_table[3]; + uint32_t i; + + od_lookup_table = &odn_table->vddc_lookup_table; + vddc_lookup_table = table_info->vddc_lookup_table; + + for (i = 0; i < vddc_lookup_table->count; i++) + od_lookup_table->entries[i].us_vdd = vddc_lookup_table->entries[i].us_vdd; + + od_lookup_table->count = vddc_lookup_table->count; + + dep_table[0] = table_info->vdd_dep_on_sclk; + dep_table[1] = table_info->vdd_dep_on_mclk; + dep_table[2] = table_info->vdd_dep_on_socclk; + od_table[0] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_sclk; + od_table[1] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_mclk; + od_table[2] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_socclk; + + for (i = 0; i < 3; i++) + smu_get_voltage_dependency_table_ppt_v1(dep_table[i], od_table[i]); + + if (odn_table->max_vddc == 0 || odn_table->max_vddc > 2000) + odn_table->max_vddc = dep_table[0]->entries[dep_table[0]->count - 1].vddc; + if (odn_table->min_vddc == 0 || odn_table->min_vddc > 2000) + odn_table->min_vddc = dep_table[0]->entries[0].vddc; + + i = od_table[2]->count - 1; + od_table[2]->entries[i].clk = hwmgr->platform_descriptor.overdriveLimit.memoryClock; + od_table[2]->entries[i].vddc = odn_table->max_vddc; + + return 0; +} + static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) { struct vega10_hwmgr *data = hwmgr->backend; @@ -427,7 +463,6 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) /* ACG firmware has major version 5 */ if ((hwmgr->smu_version & 0xff000000) == 0x5000000) data->smu_features[GNLD_ACG].supported = true; - if (data->registry_data.didt_support) data->smu_features[GNLD_DIDT].supported = true; @@ -754,7 +789,6 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) uint32_t config_telemetry = 0; struct pp_atomfwctrl_voltage_table vol_table; struct amdgpu_device *adev = hwmgr->adev; - uint32_t reg; data = kzalloc(sizeof(struct vega10_hwmgr), GFP_KERNEL); if (data == NULL) @@ -860,10 +894,7 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) advanceFanControlParameters.usFanPWMMinLimit * hwmgr->thermal_controller.fanInfo.ulMaxRPM / 100; - reg = soc15_get_register_offset(DF_HWID, 0, - mmDF_CS_AON0_DramBaseAddress0_BASE_IDX, - mmDF_CS_AON0_DramBaseAddress0); - data->mem_channels = (cgs_read_register(hwmgr->device, reg) & + data->mem_channels = (RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0) & DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK) >> DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; PP_ASSERT_WITH_CODE(data->mem_channels < ARRAY_SIZE(channel_number), @@ -1370,48 +1401,6 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) memcpy(&(data->golden_dpm_table), &(data->dpm_table), sizeof(struct vega10_dpm_table)); - if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) || - PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) { - data->odn_dpm_table.odn_core_clock_dpm_levels.num_of_pl = - data->dpm_table.gfx_table.count; - for (i = 0; i < data->dpm_table.gfx_table.count; i++) { - data->odn_dpm_table.odn_core_clock_dpm_levels.entries[i].clock = - data->dpm_table.gfx_table.dpm_levels[i].value; - data->odn_dpm_table.odn_core_clock_dpm_levels.entries[i].enabled = true; - } - - data->odn_dpm_table.vdd_dependency_on_sclk.count = - dep_gfx_table->count; - for (i = 0; i < dep_gfx_table->count; i++) { - data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].clk = - dep_gfx_table->entries[i].clk; - data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].vddInd = - dep_gfx_table->entries[i].vddInd; - data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].cks_enable = - dep_gfx_table->entries[i].cks_enable; - data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].cks_voffset = - dep_gfx_table->entries[i].cks_voffset; - } - - data->odn_dpm_table.odn_memory_clock_dpm_levels.num_of_pl = - data->dpm_table.mem_table.count; - for (i = 0; i < data->dpm_table.mem_table.count; i++) { - data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[i].clock = - data->dpm_table.mem_table.dpm_levels[i].value; - data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[i].enabled = true; - } - - data->odn_dpm_table.vdd_dependency_on_mclk.count = dep_mclk_table->count; - for (i = 0; i < dep_mclk_table->count; i++) { - data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].clk = - dep_mclk_table->entries[i].clk; - data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].vddInd = - dep_mclk_table->entries[i].vddInd; - data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].vddci = - dep_mclk_table->entries[i].vddci; - } - } - return 0; } @@ -1514,18 +1503,18 @@ static int vega10_populate_single_gfx_level(struct pp_hwmgr *hwmgr, { struct phm_ppt_v2_information *table_info = (struct phm_ppt_v2_information *)(hwmgr->pptable); - struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_sclk = - table_info->vdd_dep_on_sclk; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_sclk; struct vega10_hwmgr *data = hwmgr->backend; struct pp_atomfwctrl_clock_dividers_soc15 dividers; uint32_t gfx_max_clock = hwmgr->platform_descriptor.overdriveLimit.engineClock; uint32_t i = 0; - if (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_VDDC) + if (hwmgr->od_enabled) dep_on_sclk = (struct phm_ppt_v1_clock_voltage_dependency_table *) - &(data->odn_dpm_table.vdd_dependency_on_sclk); + &(data->odn_dpm_table.vdd_dep_on_sclk); + else + dep_on_sclk = table_info->vdd_dep_on_sclk; PP_ASSERT_WITH_CODE(dep_on_sclk, "Invalid SOC_VDD-GFX_CLK Dependency Table!", @@ -1577,23 +1566,32 @@ static int vega10_populate_single_soc_level(struct pp_hwmgr *hwmgr, uint32_t soc_clock, uint8_t *current_soc_did, uint8_t *current_vol_index) { + struct vega10_hwmgr *data = hwmgr->backend; struct phm_ppt_v2_information *table_info = (struct phm_ppt_v2_information *)(hwmgr->pptable); - struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_soc = - table_info->vdd_dep_on_socclk; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_soc; struct pp_atomfwctrl_clock_dividers_soc15 dividers; uint32_t i; - PP_ASSERT_WITH_CODE(dep_on_soc, - "Invalid SOC_VDD-SOC_CLK Dependency Table!", - return -EINVAL); - for (i = 0; i < dep_on_soc->count; i++) { - if (dep_on_soc->entries[i].clk == soc_clock) - break; + if (hwmgr->od_enabled) { + dep_on_soc = (struct phm_ppt_v1_clock_voltage_dependency_table *) + &data->odn_dpm_table.vdd_dep_on_socclk; + for (i = 0; i < dep_on_soc->count; i++) { + if (dep_on_soc->entries[i].clk >= soc_clock) + break; + } + } else { + dep_on_soc = table_info->vdd_dep_on_socclk; + for (i = 0; i < dep_on_soc->count; i++) { + if (dep_on_soc->entries[i].clk == soc_clock) + break; + } } + PP_ASSERT_WITH_CODE(dep_on_soc->count > i, "Cannot find SOC_CLK in SOC_VDD-SOC_CLK Dependency Table", return -EINVAL); + PP_ASSERT_WITH_CODE(!pp_atomfwctrl_get_gpu_pll_dividers_vega10(hwmgr, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, soc_clock, ÷rs), @@ -1602,22 +1600,6 @@ static int vega10_populate_single_soc_level(struct pp_hwmgr *hwmgr, *current_soc_did = (uint8_t)dividers.ulDid; *current_vol_index = (uint8_t)(dep_on_soc->entries[i].vddInd); - - return 0; -} - -uint16_t vega10_locate_vddc_given_clock(struct pp_hwmgr *hwmgr, - uint32_t clk, - struct phm_ppt_v1_clock_voltage_dependency_table *dep_table) -{ - uint16_t i; - - for (i = 0; i < dep_table->count; i++) { - if (dep_table->entries[i].clk == clk) - return dep_table->entries[i].vddc; - } - - pr_info("[LocateVddcGivenClock] Cannot locate SOC Vddc for this clock!"); return 0; } @@ -1631,8 +1613,6 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) struct vega10_hwmgr *data = hwmgr->backend; struct phm_ppt_v2_information *table_info = (struct phm_ppt_v2_information *)(hwmgr->pptable); - struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = - table_info->vdd_dep_on_socclk; PPTable_t *pp_table = &(data->smc_state_table.pp_table); struct vega10_single_dpm_table *dpm_table = &(data->dpm_table.gfx_table); int result = 0; @@ -1663,11 +1643,6 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) dpm_table = &(data->dpm_table.soc_table); for (i = 0; i < dpm_table->count; i++) { - pp_table->SocVid[i] = - (uint8_t)convert_to_vid( - vega10_locate_vddc_given_clock(hwmgr, - dpm_table->dpm_levels[i].value, - dep_table)); result = vega10_populate_single_soc_level(hwmgr, dpm_table->dpm_levels[i].value, &(pp_table->SocclkDid[i]), @@ -1678,7 +1653,6 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) j = i - 1; while (i < NUM_SOCCLK_DPM_LEVELS) { - pp_table->SocVid[i] = pp_table->SocVid[j]; result = vega10_populate_single_soc_level(hwmgr, dpm_table->dpm_levels[j].value, &(pp_table->SocclkDid[i]), @@ -1691,6 +1665,32 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) return result; } +static void vega10_populate_vddc_soc_levels(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + PPTable_t *pp_table = &(data->smc_state_table.pp_table); + struct phm_ppt_v2_information *table_info = hwmgr->pptable; + struct phm_ppt_v1_voltage_lookup_table *vddc_lookup_table; + + uint8_t soc_vid = 0; + uint32_t i, max_vddc_level; + + if (hwmgr->od_enabled) + vddc_lookup_table = (struct phm_ppt_v1_voltage_lookup_table *)&data->odn_dpm_table.vddc_lookup_table; + else + vddc_lookup_table = table_info->vddc_lookup_table; + + max_vddc_level = vddc_lookup_table->count; + for (i = 0; i < max_vddc_level; i++) { + soc_vid = (uint8_t)convert_to_vid(vddc_lookup_table->entries[i].us_vdd); + pp_table->SocVid[i] = soc_vid; + } + while (i < MAX_REGULAR_DPM_NUMBER) { + pp_table->SocVid[i] = soc_vid; + i++; + } +} + /** * @brief Populates single SMC GFXCLK structure using the provided clock. * @@ -1705,25 +1705,25 @@ static int vega10_populate_single_memory_level(struct pp_hwmgr *hwmgr, struct vega10_hwmgr *data = hwmgr->backend; struct phm_ppt_v2_information *table_info = (struct phm_ppt_v2_information *)(hwmgr->pptable); - struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_mclk = - table_info->vdd_dep_on_mclk; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_mclk; struct pp_atomfwctrl_clock_dividers_soc15 dividers; uint32_t mem_max_clock = hwmgr->platform_descriptor.overdriveLimit.memoryClock; uint32_t i = 0; - if (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_VDDC) + if (hwmgr->od_enabled) dep_on_mclk = (struct phm_ppt_v1_clock_voltage_dependency_table *) - &data->odn_dpm_table.vdd_dependency_on_mclk; + &data->odn_dpm_table.vdd_dep_on_mclk; + else + dep_on_mclk = table_info->vdd_dep_on_mclk; PP_ASSERT_WITH_CODE(dep_on_mclk, "Invalid SOC_VDD-UCLK Dependency Table!", return -EINVAL); - if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK) + if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK) { mem_clock = mem_clock > mem_max_clock ? mem_max_clock : mem_clock; - else { + } else { for (i = 0; i < dep_on_mclk->count; i++) { if (dep_on_mclk->entries[i].clk == mem_clock) break; @@ -2067,6 +2067,9 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) if (data->smu_features[GNLD_AVFS].supported) { result = pp_atomfwctrl_get_avfs_information(hwmgr, &avfs_params); if (!result) { + data->odn_dpm_table.max_vddc = avfs_params.ulMaxVddc; + data->odn_dpm_table.min_vddc = avfs_params.ulMinVddc; + pp_table->MinVoltageVid = (uint8_t) convert_to_vid((uint16_t)(avfs_params.ulMinVddc)); pp_table->MaxVoltageVid = (uint8_t) @@ -2345,6 +2348,22 @@ static int vega10_avfs_enable(struct pp_hwmgr *hwmgr, bool enable) return 0; } +static int vega10_update_avfs(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + + if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { + vega10_avfs_enable(hwmgr, false); + } else if (data->need_update_dpm_table) { + vega10_avfs_enable(hwmgr, false); + vega10_avfs_enable(hwmgr, true); + } else { + vega10_avfs_enable(hwmgr, true); + } + + return 0; +} + static int vega10_populate_and_upload_avfs_fuse_override(struct pp_hwmgr *hwmgr) { int result = 0; @@ -2406,6 +2425,10 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) "Failed to setup default DPM tables!", return result); + /* initialize ODN table */ + if (hwmgr->od_enabled) + vega10_odn_initial_default_setting(hwmgr); + pp_atomfwctrl_get_voltage_table_v4(hwmgr, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2, &voltage_table); pp_table->MaxVidStep = voltage_table.max_vid_step; @@ -2452,6 +2475,8 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) "Failed to initialize Memory Level!", return result); + vega10_populate_vddc_soc_levels(hwmgr); + result = vega10_populate_all_display_clock_levels(hwmgr); PP_ASSERT_WITH_CODE(!result, "Failed to initialize Display Level!", @@ -2481,6 +2506,12 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) data->vbios_boot_state.mvddc = boot_up_values.usMvddc; data->vbios_boot_state.gfx_clock = boot_up_values.ulGfxClk; data->vbios_boot_state.mem_clock = boot_up_values.ulUClk; + pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, + SMU9_SYSPLL0_SOCCLK_ID, &boot_up_values.ulSocClk); + + pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, + SMU9_SYSPLL0_DCEFCLK_ID, &boot_up_values.ulDCEFClk); + data->vbios_boot_state.soc_clock = boot_up_values.ulSocClk; data->vbios_boot_state.dcef_clock = boot_up_values.ulDCEFClk; if (0 != boot_up_values.usVddc) { @@ -2829,7 +2860,7 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr) tmp_result = vega10_construct_voltage_tables(hwmgr); PP_ASSERT_WITH_CODE(!tmp_result, - "Failed to contruct voltage tables!", + "Failed to construct voltage tables!", result = tmp_result); tmp_result = vega10_init_smc_table(hwmgr); @@ -3028,7 +3059,6 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, bool disable_mclk_switching_for_frame_lock; bool disable_mclk_switching_for_vr; bool force_mclk_high; - struct cgs_display_info info = {0}; const struct phm_clock_and_voltage_limits *max_limits; uint32_t i; struct vega10_hwmgr *data = hwmgr->backend; @@ -3063,11 +3093,9 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, } } - cgs_get_active_displays_info(hwmgr->device, &info); - /* result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/ - minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock; - minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; + minimum_clocks.engineClock = hwmgr->display_config->min_core_set_clock; + minimum_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock; if (PP_CAP(PHM_PlatformCaps_StablePState)) { stable_pstate_sclk_dpm_percentage = @@ -3107,10 +3135,10 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, PP_CAP(PHM_PlatformCaps_DisableMclkSwitchForVR); force_mclk_high = PP_CAP(PHM_PlatformCaps_ForceMclkHigh); - if (info.display_count == 0) + if (hwmgr->display_config->num_display == 0) disable_mclk_switching = false; else - disable_mclk_switching = (info.display_count > 1) || + disable_mclk_switching = (hwmgr->display_config->num_display > 1) || disable_mclk_switching_for_frame_lock || disable_mclk_switching_for_vr || force_mclk_high; @@ -3171,87 +3199,11 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input) { - const struct phm_set_power_state_input *states = - (const struct phm_set_power_state_input *)input; - const struct vega10_power_state *vega10_ps = - cast_const_phw_vega10_power_state(states->pnew_state); struct vega10_hwmgr *data = hwmgr->backend; - struct vega10_single_dpm_table *sclk_table = - &(data->dpm_table.gfx_table); - uint32_t sclk = vega10_ps->performance_levels - [vega10_ps->performance_level_count - 1].gfx_clock; - struct vega10_single_dpm_table *mclk_table = - &(data->dpm_table.mem_table); - uint32_t mclk = vega10_ps->performance_levels - [vega10_ps->performance_level_count - 1].mem_clock; - struct PP_Clocks min_clocks = {0}; - uint32_t i; - struct cgs_display_info info = {0}; - - data->need_update_dpm_table = 0; - - if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) || - PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) { - for (i = 0; i < sclk_table->count; i++) { - if (sclk == sclk_table->dpm_levels[i].value) - break; - } - - if (!(data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_SCLK) && i >= sclk_table->count) { - /* Check SCLK in DAL's minimum clocks - * in case DeepSleep divider update is required. - */ - if (data->display_timing.min_clock_in_sr != - min_clocks.engineClockInSR && - (min_clocks.engineClockInSR >= - VEGA10_MINIMUM_ENGINE_CLOCK || - data->display_timing.min_clock_in_sr >= - VEGA10_MINIMUM_ENGINE_CLOCK)) - data->need_update_dpm_table |= DPMTABLE_UPDATE_SCLK; - } - - cgs_get_active_displays_info(hwmgr->device, &info); - - if (data->display_timing.num_existing_displays != - info.display_count) - data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK; - } else { - for (i = 0; i < sclk_table->count; i++) { - if (sclk == sclk_table->dpm_levels[i].value) - break; - } - - if (i >= sclk_table->count) - data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; - else { - /* Check SCLK in DAL's minimum clocks - * in case DeepSleep divider update is required. - */ - if (data->display_timing.min_clock_in_sr != - min_clocks.engineClockInSR && - (min_clocks.engineClockInSR >= - VEGA10_MINIMUM_ENGINE_CLOCK || - data->display_timing.min_clock_in_sr >= - VEGA10_MINIMUM_ENGINE_CLOCK)) - data->need_update_dpm_table |= DPMTABLE_UPDATE_SCLK; - } - for (i = 0; i < mclk_table->count; i++) { - if (mclk == mclk_table->dpm_levels[i].value) - break; - } - - cgs_get_active_displays_info(hwmgr->device, &info); + if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) + data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK; - if (i >= mclk_table->count) - data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; - - if (data->display_timing.num_existing_displays != - info.display_count || - i >= mclk_table->count) - data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK; - } return 0; } @@ -3259,194 +3211,29 @@ static int vega10_populate_and_upload_sclk_mclk_dpm_levels( struct pp_hwmgr *hwmgr, const void *input) { int result = 0; - const struct phm_set_power_state_input *states = - (const struct phm_set_power_state_input *)input; - const struct vega10_power_state *vega10_ps = - cast_const_phw_vega10_power_state(states->pnew_state); struct vega10_hwmgr *data = hwmgr->backend; - uint32_t sclk = vega10_ps->performance_levels - [vega10_ps->performance_level_count - 1].gfx_clock; - uint32_t mclk = vega10_ps->performance_levels - [vega10_ps->performance_level_count - 1].mem_clock; - struct vega10_dpm_table *dpm_table = &data->dpm_table; - struct vega10_dpm_table *golden_dpm_table = - &data->golden_dpm_table; - uint32_t dpm_count, clock_percent; - uint32_t i; - if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) || - PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) { - - if (!data->need_update_dpm_table && - !data->apply_optimized_settings && - !data->apply_overdrive_next_settings_mask) - return 0; - - if (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_SCLK) { - for (dpm_count = 0; - dpm_count < dpm_table->gfx_table.count; - dpm_count++) { - dpm_table->gfx_table.dpm_levels[dpm_count].enabled = - data->odn_dpm_table.odn_core_clock_dpm_levels.entries[dpm_count].enabled; - dpm_table->gfx_table.dpm_levels[dpm_count].value = - data->odn_dpm_table.odn_core_clock_dpm_levels.entries[dpm_count].clock; - } - } - - if (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_MCLK) { - for (dpm_count = 0; - dpm_count < dpm_table->mem_table.count; - dpm_count++) { - dpm_table->mem_table.dpm_levels[dpm_count].enabled = - data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[dpm_count].enabled; - dpm_table->mem_table.dpm_levels[dpm_count].value = - data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[dpm_count].clock; - } - } - - if ((data->need_update_dpm_table & DPMTABLE_UPDATE_SCLK) || - data->apply_optimized_settings || - (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_SCLK)) { - result = vega10_populate_all_graphic_levels(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to populate SCLK during PopulateNewDPMClocksStates Function!", - return result); - } - - if ((data->need_update_dpm_table & DPMTABLE_UPDATE_MCLK) || - (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_MCLK)){ - result = vega10_populate_all_memory_levels(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to populate MCLK during PopulateNewDPMClocksStates Function!", - return result); - } - } else { - if (!data->need_update_dpm_table && - !data->apply_optimized_settings) - return 0; - - if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_SCLK && - data->smu_features[GNLD_DPM_GFXCLK].supported) { - dpm_table-> - gfx_table.dpm_levels[dpm_table->gfx_table.count - 1]. - value = sclk; - if (hwmgr->od_enabled) { - /* Need to do calculation based on the golden DPM table - * as the Heatmap GPU Clock axis is also based on - * the default values - */ - PP_ASSERT_WITH_CODE( - golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count - 1].value, - "Divide by 0!", - return -1); - - dpm_count = dpm_table->gfx_table.count < 2 ? - 0 : dpm_table->gfx_table.count - 2; - for (i = dpm_count; i > 1; i--) { - if (sclk > golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count - 1].value) { - clock_percent = - ((sclk - golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count - 1].value) * - 100) / - golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count - 1].value; - - dpm_table->gfx_table.dpm_levels[i].value = - golden_dpm_table->gfx_table.dpm_levels[i].value + - (golden_dpm_table->gfx_table.dpm_levels[i].value * - clock_percent) / 100; - } else if (golden_dpm_table-> - gfx_table.dpm_levels[dpm_table->gfx_table.count-1].value > - sclk) { - clock_percent = - ((golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count - 1].value - - sclk) * 100) / - golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count-1].value; - - dpm_table->gfx_table.dpm_levels[i].value = - golden_dpm_table->gfx_table.dpm_levels[i].value - - (golden_dpm_table->gfx_table.dpm_levels[i].value * - clock_percent) / 100; - } else - dpm_table->gfx_table.dpm_levels[i].value = - golden_dpm_table->gfx_table.dpm_levels[i].value; - } - } - } + if (!data->need_update_dpm_table) + return 0; - if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK && - data->smu_features[GNLD_DPM_UCLK].supported) { - dpm_table-> - mem_table.dpm_levels[dpm_table->mem_table.count - 1]. - value = mclk; + if (data->need_update_dpm_table & + (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK + DPMTABLE_UPDATE_SOCCLK)) { + result = vega10_populate_all_graphic_levels(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to populate SCLK during PopulateNewDPMClocksStates Function!", + return result); + } - if (hwmgr->od_enabled) { - PP_ASSERT_WITH_CODE( - golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count - 1].value, - "Divide by 0!", - return -1); + if (data->need_update_dpm_table & + (DPMTABLE_OD_UPDATE_MCLK + DPMTABLE_UPDATE_MCLK)) { + result = vega10_populate_all_memory_levels(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to populate MCLK during PopulateNewDPMClocksStates Function!", + return result); + } - dpm_count = dpm_table->mem_table.count < 2 ? - 0 : dpm_table->mem_table.count - 2; - for (i = dpm_count; i > 1; i--) { - if (mclk > golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count-1].value) { - clock_percent = ((mclk - - golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count-1].value) * - 100) / - golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count-1].value; - - dpm_table->mem_table.dpm_levels[i].value = - golden_dpm_table->mem_table.dpm_levels[i].value + - (golden_dpm_table->mem_table.dpm_levels[i].value * - clock_percent) / 100; - } else if (golden_dpm_table->mem_table.dpm_levels - [dpm_table->mem_table.count-1].value > mclk) { - clock_percent = ((golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count-1].value - mclk) * - 100) / - golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count-1].value; - - dpm_table->mem_table.dpm_levels[i].value = - golden_dpm_table->mem_table.dpm_levels[i].value - - (golden_dpm_table->mem_table.dpm_levels[i].value * - clock_percent) / 100; - } else - dpm_table->mem_table.dpm_levels[i].value = - golden_dpm_table->mem_table.dpm_levels[i].value; - } - } - } + vega10_populate_vddc_soc_levels(hwmgr); - if ((data->need_update_dpm_table & - (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK)) || - data->apply_optimized_settings) { - result = vega10_populate_all_graphic_levels(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to populate SCLK during PopulateNewDPMClocksStates Function!", - return result); - } - - if (data->need_update_dpm_table & - (DPMTABLE_OD_UPDATE_MCLK + DPMTABLE_UPDATE_MCLK)) { - result = vega10_populate_all_memory_levels(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to populate MCLK during PopulateNewDPMClocksStates Function!", - return result); - } - } return result; } @@ -3742,8 +3529,9 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr, PP_ASSERT_WITH_CODE(!result, "Failed to upload PPtable!", return result); - data->apply_optimized_settings = false; - data->apply_overdrive_next_settings_mask = 0; + vega10_update_avfs(hwmgr); + + data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC; return 0; } @@ -3793,16 +3581,18 @@ static uint32_t vega10_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low) } static int vega10_get_gpu_power(struct pp_hwmgr *hwmgr, - struct pp_gpu_power *query) + uint32_t *query) { uint32_t value; + if (!query) + return -EINVAL; + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrPkgPwr); value = smum_get_argument(hwmgr); - /* power value is an integer */ - memset(query, 0, sizeof *query); - query->average_gpu_power = value << 8; + /* SMC returning actual watts, keep consistent with legacy asics, low 8 bit as 8 fractional bits */ + *query = value << 8; return 0; } @@ -3810,22 +3600,18 @@ static int vega10_get_gpu_power(struct pp_hwmgr *hwmgr, static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx, void *value, int *size) { - uint32_t sclk_idx, mclk_idx, activity_percent = 0; + struct amdgpu_device *adev = hwmgr->adev; + uint32_t sclk_mhz, mclk_idx, activity_percent = 0; struct vega10_hwmgr *data = hwmgr->backend; struct vega10_dpm_table *dpm_table = &data->dpm_table; int ret = 0; - uint32_t reg, val_vid; + uint32_t val_vid; switch (idx) { case AMDGPU_PP_SENSOR_GFX_SCLK: - smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentGfxclkIndex); - sclk_idx = smum_get_argument(hwmgr); - if (sclk_idx < dpm_table->gfx_table.count) { - *((uint32_t *)value) = dpm_table->gfx_table.dpm_levels[sclk_idx].value; - *size = 4; - } else { - ret = -EINVAL; - } + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetAverageGfxclkActualFrequency); + sclk_mhz = smum_get_argument(hwmgr); + *((uint32_t *)value) = sclk_mhz * 100; break; case AMDGPU_PP_SENSOR_GFX_MCLK: smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex); @@ -3856,18 +3642,10 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; break; case AMDGPU_PP_SENSOR_GPU_POWER: - if (*size < sizeof(struct pp_gpu_power)) - ret = -EINVAL; - else { - *size = sizeof(struct pp_gpu_power); - ret = vega10_get_gpu_power(hwmgr, (struct pp_gpu_power *)value); - } + ret = vega10_get_gpu_power(hwmgr, (uint32_t *)value); break; case AMDGPU_PP_SENSOR_VDDGFX: - reg = soc15_get_register_offset(SMUIO_HWID, 0, - mmSMUSVI0_PLANE0_CURRENTVID_BASE_IDX, - mmSMUSVI0_PLANE0_CURRENTVID); - val_vid = (cgs_read_register(hwmgr->device, reg) & + val_vid = (RREG32_SOC15(SMUIO, 0, mmSMUSVI0_PLANE0_CURRENTVID) & SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID_MASK) >> SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT; *((uint32_t *)value) = (uint32_t)convert_to_vddc((uint8_t)val_vid); @@ -3956,26 +3734,18 @@ static int vega10_notify_smc_display_config_after_ps_adjustment( (struct phm_ppt_v2_information *)hwmgr->pptable; struct phm_ppt_v1_clock_voltage_dependency_table *mclk_table = table_info->vdd_dep_on_mclk; uint32_t idx; - uint32_t num_active_disps = 0; - struct cgs_display_info info = {0}; struct PP_Clocks min_clocks = {0}; uint32_t i; struct pp_display_clock_request clock_req; - info.mode_info = NULL; - - cgs_get_active_displays_info(hwmgr->device, &info); - - num_active_disps = info.display_count; - - if (num_active_disps > 1) + if (hwmgr->display_config->num_display > 1) vega10_notify_smc_display_change(hwmgr, false); else vega10_notify_smc_display_change(hwmgr, true); - min_clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk; - min_clocks.dcefClockInSR = hwmgr->display_config.min_dcef_deep_sleep_set_clk; - min_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; + min_clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk; + min_clocks.dcefClockInSR = hwmgr->display_config->min_dcef_deep_sleep_set_clk; + min_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock; for (i = 0; i < dpm_table->count; i++) { if (dpm_table->dpm_levels[i].value == min_clocks.dcefClock) @@ -4120,6 +3890,47 @@ static void vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) } } +static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, uint32_t mask) +{ + struct vega10_hwmgr *data = hwmgr->backend; + + switch (type) { + case PP_SCLK: + data->smc_state_table.gfx_boot_level = mask ? (ffs(mask) - 1) : 0; + data->smc_state_table.gfx_max_level = mask ? (fls(mask) - 1) : 0; + + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), + "Failed to upload boot level to lowest!", + return -EINVAL); + + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr), + "Failed to upload dpm max level to highest!", + return -EINVAL); + break; + + case PP_MCLK: + data->smc_state_table.mem_boot_level = mask ? (ffs(mask) - 1) : 0; + data->smc_state_table.mem_max_level = mask ? (fls(mask) - 1) : 0; + + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), + "Failed to upload boot level to lowest!", + return -EINVAL); + + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr), + "Failed to upload dpm max level to highest!", + return -EINVAL); + + break; + + case PP_PCIE: + default: + break; + } + + return 0; +} + static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level) { @@ -4356,97 +4167,15 @@ static int vega10_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr, struct vega10_hwmgr *data = hwmgr->backend; Watermarks_t *table = &(data->smc_state_table.water_marks_table); int result = 0; - uint32_t i; if (!data->registry_data.disable_water_mark) { - for (i = 0; i < wm_with_clock_ranges->num_wm_sets_dmif; i++) { - table->WatermarkRow[WM_DCEFCLK][i].MinClock = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_dcefclk_in_khz) / - 100); - table->WatermarkRow[WM_DCEFCLK][i].MaxClock = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_dcefclk_in_khz) / - 100); - table->WatermarkRow[WM_DCEFCLK][i].MinUclk = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_memclk_in_khz) / - 100); - table->WatermarkRow[WM_DCEFCLK][i].MaxUclk = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_memclk_in_khz) / - 100); - table->WatermarkRow[WM_DCEFCLK][i].WmSetting = (uint8_t) - wm_with_clock_ranges->wm_sets_dmif[i].wm_set_id; - } - - for (i = 0; i < wm_with_clock_ranges->num_wm_sets_mcif; i++) { - table->WatermarkRow[WM_SOCCLK][i].MinClock = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_socclk_in_khz) / - 100); - table->WatermarkRow[WM_SOCCLK][i].MaxClock = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_socclk_in_khz) / - 100); - table->WatermarkRow[WM_SOCCLK][i].MinUclk = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_memclk_in_khz) / - 100); - table->WatermarkRow[WM_SOCCLK][i].MaxUclk = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_memclk_in_khz) / - 100); - table->WatermarkRow[WM_SOCCLK][i].WmSetting = (uint8_t) - wm_with_clock_ranges->wm_sets_mcif[i].wm_set_id; - } + smu_set_watermarks_for_clocks_ranges(table, wm_with_clock_ranges); data->water_marks_bitmap = WaterMarksExist; } return result; } -static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, - enum pp_clock_type type, uint32_t mask) -{ - struct vega10_hwmgr *data = hwmgr->backend; - - switch (type) { - case PP_SCLK: - data->smc_state_table.gfx_boot_level = mask ? (ffs(mask) - 1) : 0; - data->smc_state_table.gfx_max_level = mask ? (fls(mask) - 1) : 0; - - PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), - "Failed to upload boot level to lowest!", - return -EINVAL); - - PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr), - "Failed to upload dpm max level to highest!", - return -EINVAL); - break; - - case PP_MCLK: - data->smc_state_table.mem_boot_level = mask ? (ffs(mask) - 1) : 0; - data->smc_state_table.mem_max_level = mask ? (fls(mask) - 1) : 0; - - PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), - "Failed to upload boot level to lowest!", - return -EINVAL); - - PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr), - "Failed to upload dpm max level to highest!", - return -EINVAL); - - break; - - case PP_PCIE: - default: - break; - } - - return 0; -} - static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf) { @@ -4454,6 +4183,8 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table); struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table); struct vega10_pcie_table *pcie_table = &(data->dpm_table.pcie_table); + struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep = NULL; + int i, now, size = 0; switch (type) { @@ -4492,6 +4223,40 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, (pcie_table->pcie_gen[i] == 2) ? "8.0GT/s, x16" : "", (i == now) ? "*" : ""); break; + case OD_SCLK: + if (hwmgr->od_enabled) { + size = sprintf(buf, "%s:\n", "OD_SCLK"); + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk; + for (i = 0; i < podn_vdd_dep->count; i++) + size += sprintf(buf + size, "%d: %10uMhz %10umV\n", + i, podn_vdd_dep->entries[i].clk / 100, + podn_vdd_dep->entries[i].vddc); + } + break; + case OD_MCLK: + if (hwmgr->od_enabled) { + size = sprintf(buf, "%s:\n", "OD_MCLK"); + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk; + for (i = 0; i < podn_vdd_dep->count; i++) + size += sprintf(buf + size, "%d: %10uMhz %10umV\n", + i, podn_vdd_dep->entries[i].clk/100, + podn_vdd_dep->entries[i].vddc); + } + break; + case OD_RANGE: + if (hwmgr->od_enabled) { + size = sprintf(buf, "%s:\n", "OD_RANGE"); + size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.gfx_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.engineClock/100); + size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.mem_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); + size += sprintf(buf + size, "VDDC: %7umV %11umV\n", + data->odn_dpm_table.min_vddc, + data->odn_dpm_table.max_vddc); + } + break; default: break; } @@ -4501,10 +4266,8 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, static int vega10_display_configuration_changed_task(struct pp_hwmgr *hwmgr) { struct vega10_hwmgr *data = hwmgr->backend; - int result = 0; - uint32_t num_turned_on_displays = 1; Watermarks_t *wm_table = &(data->smc_state_table.water_marks_table); - struct cgs_display_info info = {0}; + int result = 0; if ((data->water_marks_bitmap & WaterMarksExist) && !(data->water_marks_bitmap & WaterMarksLoaded)) { @@ -4514,10 +4277,8 @@ static int vega10_display_configuration_changed_task(struct pp_hwmgr *hwmgr) } if (data->water_marks_bitmap & WaterMarksLoaded) { - cgs_get_active_displays_info(hwmgr->device, &info); - num_turned_on_displays = info.display_count; smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_NumOfDisplays, num_turned_on_displays); + PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display); } return result; @@ -4603,15 +4364,12 @@ vega10_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmg { struct vega10_hwmgr *data = hwmgr->backend; bool is_update_required = false; - struct cgs_display_info info = {0, 0, NULL}; - cgs_get_active_displays_info(hwmgr->device, &info); - - if (data->display_timing.num_existing_displays != info.display_count) + if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) is_update_required = true; if (PP_CAP(PHM_PlatformCaps_SclkDeepSleep)) { - if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr) + if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr) is_update_required = true; } @@ -4886,6 +4644,200 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui return 0; } + +static bool vega10_check_clk_voltage_valid(struct pp_hwmgr *hwmgr, + enum PP_OD_DPM_TABLE_COMMAND type, + uint32_t clk, + uint32_t voltage) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct vega10_single_dpm_table *golden_table; + + if (voltage < odn_table->min_vddc || voltage > odn_table->max_vddc) { + pr_info("OD voltage is out of range [%d - %d] mV\n", odn_table->min_vddc, odn_table->max_vddc); + return false; + } + + if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { + golden_table = &(data->golden_dpm_table.gfx_table); + if (golden_table->dpm_levels[0].value > clk || + hwmgr->platform_descriptor.overdriveLimit.engineClock < clk) { + pr_info("OD engine clock is out of range [%d - %d] MHz\n", + golden_table->dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.engineClock/100); + return false; + } + } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) { + golden_table = &(data->golden_dpm_table.mem_table); + if (golden_table->dpm_levels[0].value > clk || + hwmgr->platform_descriptor.overdriveLimit.memoryClock < clk) { + pr_info("OD memory clock is out of range [%d - %d] MHz\n", + golden_table->dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); + return false; + } + } else { + return false; + } + + return true; +} + +static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct phm_ppt_v2_information *table_info = hwmgr->pptable; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table; + struct phm_ppt_v1_clock_voltage_dependency_table *odn_dep_table; + uint32_t i; + + dep_table = table_info->vdd_dep_on_mclk; + odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_mclk); + + for (i = 0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_MCLK; + return; + } + } + + dep_table = table_info->vdd_dep_on_sclk; + odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_sclk); + for (i = 0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_SCLK; + return; + } + } + + if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { + data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; + } +} + +static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, + enum PP_OD_DPM_TABLE_COMMAND type) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct phm_ppt_v2_information *table_info = hwmgr->pptable; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = table_info->vdd_dep_on_socclk; + struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.soc_table; + + struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_on_socclk = + &data->odn_dpm_table.vdd_dep_on_socclk; + struct vega10_odn_vddc_lookup_table *od_vddc_lookup_table = &data->odn_dpm_table.vddc_lookup_table; + + struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep; + uint8_t i, j; + + if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk; + for (i = 0; i < podn_vdd_dep->count - 1; i++) + od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc; + if (od_vddc_lookup_table->entries[i].us_vdd < podn_vdd_dep->entries[i].vddc) + od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc; + } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) { + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk; + for (i = 0; i < dpm_table->count; i++) { + for (j = 0; j < od_vddc_lookup_table->count; j++) { + if (od_vddc_lookup_table->entries[j].us_vdd > + podn_vdd_dep->entries[i].vddc) + break; + } + if (j == od_vddc_lookup_table->count) { + od_vddc_lookup_table->entries[j-1].us_vdd = + podn_vdd_dep->entries[i].vddc; + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; + } + podn_vdd_dep->entries[i].vddInd = j; + } + dpm_table = &data->dpm_table.soc_table; + for (i = 0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[dep_table->count-1].vddInd && + dep_table->entries[i].clk < podn_vdd_dep->entries[dep_table->count-1].clk) { + data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; + podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[dep_table->count-1].clk; + dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk; + } + } + if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk < + podn_vdd_dep->entries[dep_table->count-1].clk) { + data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; + podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = podn_vdd_dep->entries[dep_table->count-1].clk; + dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = podn_vdd_dep->entries[dep_table->count-1].clk; + } + if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd < + podn_vdd_dep->entries[dep_table->count-1].vddInd) { + data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; + podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = podn_vdd_dep->entries[dep_table->count-1].vddInd; + } + } +} + +static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, + enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_table; + struct vega10_single_dpm_table *dpm_table; + + uint32_t input_clk; + uint32_t input_vol; + uint32_t input_level; + uint32_t i; + + PP_ASSERT_WITH_CODE(input, "NULL user input for clock and voltage", + return -EINVAL); + + if (!hwmgr->od_enabled) { + pr_info("OverDrive feature not enabled\n"); + return -EINVAL; + } + + if (PP_OD_EDIT_SCLK_VDDC_TABLE == type) { + dpm_table = &data->dpm_table.gfx_table; + podn_vdd_dep_table = &data->odn_dpm_table.vdd_dep_on_sclk; + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + } else if (PP_OD_EDIT_MCLK_VDDC_TABLE == type) { + dpm_table = &data->dpm_table.mem_table; + podn_vdd_dep_table = &data->odn_dpm_table.vdd_dep_on_mclk; + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + } else if (PP_OD_RESTORE_DEFAULT_TABLE == type) { + memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table)); + vega10_odn_initial_default_setting(hwmgr); + return 0; + } else if (PP_OD_COMMIT_DPM_TABLE == type) { + vega10_check_dpm_table_updated(hwmgr); + return 0; + } else { + return -EINVAL; + } + + for (i = 0; i < size; i += 3) { + if (i + 3 > size || input[i] >= podn_vdd_dep_table->count) { + pr_info("invalid clock voltage input\n"); + return 0; + } + input_level = input[i]; + input_clk = input[i+1] * 100; + input_vol = input[i+2]; + + if (vega10_check_clk_voltage_valid(hwmgr, type, input_clk, input_vol)) { + dpm_table->dpm_levels[input_level].value = input_clk; + podn_vdd_dep_table->entries[input_level].clk = input_clk; + podn_vdd_dep_table->entries[input_level].vddc = input_vol; + } else { + return -EINVAL; + } + } + vega10_odn_update_soc_table(hwmgr, type); + return 0; +} + static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .backend_init = vega10_hwmgr_backend_init, .backend_fini = vega10_hwmgr_backend_fini, @@ -4944,6 +4896,7 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .get_power_profile_mode = vega10_get_power_profile_mode, .set_power_profile_mode = vega10_set_power_profile_mode, .set_power_limit = vega10_set_power_limit, + .odn_edit_dpm_table = vega10_odn_edit_dpm_table, }; int vega10_enable_smc_features(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h index 5339ea1f3dce..aadd6cbc7e85 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h @@ -282,15 +282,21 @@ struct vega10_registry_data { struct vega10_odn_clock_voltage_dependency_table { uint32_t count; - struct phm_ppt_v1_clock_voltage_dependency_record - entries[MAX_REGULAR_DPM_NUMBER]; + struct phm_ppt_v1_clock_voltage_dependency_record entries[MAX_REGULAR_DPM_NUMBER]; +}; + +struct vega10_odn_vddc_lookup_table { + uint32_t count; + struct phm_ppt_v1_voltage_lookup_record entries[MAX_REGULAR_DPM_NUMBER]; }; struct vega10_odn_dpm_table { - struct phm_odn_clock_levels odn_core_clock_dpm_levels; - struct phm_odn_clock_levels odn_memory_clock_dpm_levels; - struct vega10_odn_clock_voltage_dependency_table vdd_dependency_on_sclk; - struct vega10_odn_clock_voltage_dependency_table vdd_dependency_on_mclk; + struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_sclk; + struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_mclk; + struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_socclk; + struct vega10_odn_vddc_lookup_table vddc_lookup_table; + uint32_t max_vddc; + uint32_t min_vddc; }; struct vega10_odn_fan_table { @@ -301,8 +307,8 @@ struct vega10_odn_fan_table { }; struct vega10_hwmgr { - struct vega10_dpm_table dpm_table; - struct vega10_dpm_table golden_dpm_table; + struct vega10_dpm_table dpm_table; + struct vega10_dpm_table golden_dpm_table; struct vega10_registry_data registry_data; struct vega10_vbios_boot_state vbios_boot_state; struct vega10_mclk_latency_table mclk_latency_table; @@ -368,12 +374,8 @@ struct vega10_hwmgr { bool need_long_memory_training; /* Internal settings to apply the application power optimization parameters */ - bool apply_optimized_settings; uint32_t disable_dpm_mask; - /* ---- Overdrive next setting ---- */ - uint32_t apply_overdrive_next_settings_mask; - /* ---- SMU9 ---- */ struct smu_features smu_features[GNLD_FEATURES_MAX]; struct vega10_smc_state_table smc_state_table; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c index ba63faefc61f..a9efd8554fbc 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c @@ -27,7 +27,7 @@ #include "vega10_ppsmc.h" #include "vega10_inc.h" #include "pp_debug.h" -#include "pp_soc15.h" +#include "soc15_common.h" static const struct vega10_didt_config_reg SEDiDtTuningCtrlConfig_Vega10[] = { @@ -888,36 +888,36 @@ static void vega10_didt_set_mask(struct pp_hwmgr *hwmgr, const bool enable) if (PP_CAP(PHM_PlatformCaps_DiDtEDCEnable)) { if (PP_CAP(PHM_PlatformCaps_SQRamping)) { data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL); - data = CGS_REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_EN, en); - data = CGS_REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_SW_RST, ~en); + data = REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_EN, en); + data = REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_SW_RST, ~en); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL, data); } if (PP_CAP(PHM_PlatformCaps_DBRamping)) { data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL); - data = CGS_REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_EN, en); - data = CGS_REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_SW_RST, ~en); + data = REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_EN, en); + data = REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_SW_RST, ~en); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL, data); } if (PP_CAP(PHM_PlatformCaps_TDRamping)) { data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL); - data = CGS_REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_EN, en); - data = CGS_REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_SW_RST, ~en); + data = REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_EN, en); + data = REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_SW_RST, ~en); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL, data); } if (PP_CAP(PHM_PlatformCaps_TCPRamping)) { data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL); - data = CGS_REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_EN, en); - data = CGS_REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_SW_RST, ~en); + data = REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_EN, en); + data = REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_SW_RST, ~en); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL, data); } if (PP_CAP(PHM_PlatformCaps_DBRRamping)) { data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL); - data = CGS_REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_EN, en); - data = CGS_REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_SW_RST, ~en); + data = REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_EN, en); + data = REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_SW_RST, ~en); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL, data); } } @@ -930,20 +930,18 @@ static void vega10_didt_set_mask(struct pp_hwmgr *hwmgr, const bool enable) static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int result; uint32_t num_se = 0, count, data; - struct amdgpu_device *adev = hwmgr->adev; - uint32_t reg; num_se = adev->gfx.config.max_shader_engines; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); - cgs_lock_grbm_idx(hwmgr->device, true); - reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); - cgs_write_register(hwmgr->device, reg, data); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); result = vega10_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega10, VEGA10_CONFIGREG_DIDT); @@ -958,43 +956,43 @@ static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) if (0 != result) break; } - cgs_write_register(hwmgr->device, reg, 0xE0000000); - cgs_lock_grbm_idx(hwmgr->device, false); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); + mutex_unlock(&adev->grbm_idx_mutex); vega10_didt_set_mask(hwmgr, true); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } static int vega10_disable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) { - cgs_enter_safe_mode(hwmgr->device, true); + struct amdgpu_device *adev = hwmgr->adev; + + adev->gfx.rlc.funcs->enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int result; uint32_t num_se = 0, count, data; - struct amdgpu_device *adev = hwmgr->adev; - uint32_t reg; num_se = adev->gfx.config.max_shader_engines; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); - cgs_lock_grbm_idx(hwmgr->device, true); - reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); - cgs_write_register(hwmgr->device, reg, data); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); result = vega10_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega10, VEGA10_CONFIGREG_DIDT); @@ -1003,12 +1001,12 @@ static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) if (0 != result) break; } - cgs_write_register(hwmgr->device, reg, 0xE0000000); - cgs_lock_grbm_idx(hwmgr->device, false); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); + mutex_unlock(&adev->grbm_idx_mutex); vega10_didt_set_mask(hwmgr, true); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); vega10_program_gc_didt_config_registers(hwmgr, GCDiDtDroopCtrlConfig_vega10); if (PP_CAP(PHM_PlatformCaps_GCEDC)) @@ -1022,13 +1020,14 @@ static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) static int vega10_disable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t data; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); if (PP_CAP(PHM_PlatformCaps_GCEDC)) { data = 0x00000000; @@ -1043,20 +1042,18 @@ static int vega10_disable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int result; uint32_t num_se = 0, count, data; - struct amdgpu_device *adev = hwmgr->adev; - uint32_t reg; num_se = adev->gfx.config.max_shader_engines; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); - cgs_lock_grbm_idx(hwmgr->device, true); - reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); - cgs_write_register(hwmgr->device, reg, data); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); result = vega10_program_didt_config_registers(hwmgr, SEDiDtWeightConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEEDCStallDelayConfig_Vega10, VEGA10_CONFIGREG_DIDT); @@ -1067,46 +1064,46 @@ static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr) if (0 != result) break; } - cgs_write_register(hwmgr->device, reg, 0xE0000000); - cgs_lock_grbm_idx(hwmgr->device, false); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); + mutex_unlock(&adev->grbm_idx_mutex); vega10_didt_set_mask(hwmgr, true); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } static int vega10_disable_se_edc_config(struct pp_hwmgr *hwmgr) { - cgs_enter_safe_mode(hwmgr->device, true); + struct amdgpu_device *adev = hwmgr->adev; + + adev->gfx.rlc.funcs->enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int result; uint32_t num_se = 0; uint32_t count, data; - struct amdgpu_device *adev = hwmgr->adev; - uint32_t reg; num_se = adev->gfx.config.max_shader_engines; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); vega10_program_gc_didt_config_registers(hwmgr, AvfsPSMResetConfig_vega10); - cgs_lock_grbm_idx(hwmgr->device, true); - reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); - cgs_write_register(hwmgr->device, reg, data); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallDelayConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCCtrlResetConfig_Vega10, VEGA10_CONFIGREG_DIDT); @@ -1115,12 +1112,12 @@ static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) if (0 != result) break; } - cgs_write_register(hwmgr->device, reg, 0xE0000000); - cgs_lock_grbm_idx(hwmgr->device, false); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); + mutex_unlock(&adev->grbm_idx_mutex); vega10_didt_set_mask(hwmgr, true); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); vega10_program_gc_didt_config_registers(hwmgr, PSMGCEDCDroopCtrlConfig_vega10); @@ -1137,13 +1134,14 @@ static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) static int vega10_disable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t data; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); if (PP_CAP(PHM_PlatformCaps_GCEDC)) { data = 0x00000000; @@ -1158,15 +1156,14 @@ static int vega10_disable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; int result; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); - cgs_lock_grbm_idx(hwmgr->device, true); - reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); - cgs_write_register(hwmgr->device, reg, 0xE0000000); - cgs_lock_grbm_idx(hwmgr->device, false); + mutex_lock(&adev->grbm_idx_mutex); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); + mutex_unlock(&adev->grbm_idx_mutex); result = vega10_program_didt_config_registers(hwmgr, SEEDCForceStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEEDCCtrlForceStallConfig_Vega10, VEGA10_CONFIGREG_DIDT); @@ -1175,7 +1172,7 @@ static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, false); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index c61d0744860d..0768d259c07c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -52,7 +52,7 @@ static const void *get_powerplay_table(struct pp_hwmgr *hwmgr) if (!table_address) { table_address = (ATOM_Vega10_POWERPLAYTABLE *) - cgs_atom_get_data_table(hwmgr->device, index, + smu_atom_get_data_table(hwmgr->adev, index, &size, &frev, &crev); hwmgr->soft_pp_table = table_address; /*Cache the result in RAM.*/ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c index 9f18226a56ea..aa044c1955fe 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c @@ -25,7 +25,7 @@ #include "vega10_hwmgr.h" #include "vega10_ppsmc.h" #include "vega10_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "pp_debug.h" static int vega10_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm) @@ -89,6 +89,7 @@ int vega10_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed) { + struct amdgpu_device *adev = hwmgr->adev; struct vega10_hwmgr *data = hwmgr->backend; uint32_t tach_period; uint32_t crystal_clock_freq; @@ -100,10 +101,8 @@ int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed) if (data->smu_features[GNLD_FAN_CONTROL].supported) { result = vega10_get_current_rpm(hwmgr, speed); } else { - uint32_t reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_TACH_STATUS_BASE_IDX, mmCG_TACH_STATUS); tach_period = - CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg), + REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_STATUS), CG_TACH_STATUS, TACH_PERIOD); @@ -127,26 +126,23 @@ int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed) */ int vega10_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode) { - uint32_t reg; - - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2); + struct amdgpu_device *adev = hwmgr->adev; if (hwmgr->fan_ctrl_is_in_default_mode) { hwmgr->fan_ctrl_default_mode = - CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg), + REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, FDO_PWM_MODE); hwmgr->tmin = - CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg), + REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, TMIN); hwmgr->fan_ctrl_is_in_default_mode = false; } - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, TMIN, 0)); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, FDO_PWM_MODE, mode)); return 0; @@ -159,18 +155,15 @@ int vega10_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode) */ int vega10_fan_ctrl_set_default_mode(struct pp_hwmgr *hwmgr) { - uint32_t reg; - - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2); + struct amdgpu_device *adev = hwmgr->adev; if (!hwmgr->fan_ctrl_is_in_default_mode) { - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, FDO_PWM_MODE, hwmgr->fan_ctrl_default_mode)); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, TMIN, hwmgr->tmin << CG_FDO_CTRL2__TMIN__SHIFT)); hwmgr->fan_ctrl_is_in_default_mode = true; @@ -257,10 +250,10 @@ int vega10_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr) int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, uint32_t speed) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t duty100; uint32_t duty; uint64_t tmp64; - uint32_t reg; if (hwmgr->thermal_controller.fanInfo.bNoFan) return 0; @@ -271,10 +264,7 @@ int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) vega10_fan_ctrl_stop_smc_fan_control(hwmgr); - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_FDO_CTRL1_BASE_IDX, mmCG_FDO_CTRL1); - - duty100 = CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg), + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), CG_FDO_CTRL1, FMAX_DUTY100); if (duty100 == 0) @@ -284,10 +274,8 @@ int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, do_div(tmp64, 100); duty = (uint32_t)tmp64; - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_FDO_CTRL0_BASE_IDX, mmCG_FDO_CTRL0); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL0), CG_FDO_CTRL0, FDO_STATIC_DUTY, duty)); return vega10_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC); @@ -317,10 +305,10 @@ int vega10_fan_ctrl_reset_fan_speed_to_default(struct pp_hwmgr *hwmgr) */ int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t tach_period; uint32_t crystal_clock_freq; int result = 0; - uint32_t reg; if (hwmgr->thermal_controller.fanInfo.bNoFan || (speed < hwmgr->thermal_controller.fanInfo.ulMinRPM) || @@ -333,10 +321,8 @@ int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) if (!result) { crystal_clock_freq = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev); tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_TACH_STATUS_BASE_IDX, mmCG_TACH_STATUS); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_TACH_STATUS, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_STATUS), CG_TACH_STATUS, TACH_PERIOD, tach_period)); } @@ -350,13 +336,10 @@ int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) */ int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int temp; - uint32_t reg; - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_MULT_THERMAL_STATUS_BASE_IDX, mmCG_MULT_THERMAL_STATUS); - - temp = cgs_read_register(hwmgr->device, reg); + temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; @@ -379,11 +362,12 @@ int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr) static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range) { + struct amdgpu_device *adev = hwmgr->adev; int low = VEGA10_THERMAL_MINIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; int high = VEGA10_THERMAL_MAXIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; - uint32_t val, reg; + uint32_t val; if (low < range->min) low = range->min; @@ -393,20 +377,17 @@ static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, if (low > high) return -EINVAL; - reg = soc15_get_register_offset(THM_HWID, 0, - mmTHM_THERMAL_INT_CTRL_BASE_IDX, mmTHM_THERMAL_INT_CTRL); - - val = cgs_read_register(hwmgr->device, reg); + val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); val &= (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK) & (~THM_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK) & (~THM_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK); - cgs_write_register(hwmgr->device, reg, val); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); return 0; } @@ -418,21 +399,17 @@ static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, */ static int vega10_thermal_initialize(struct pp_hwmgr *hwmgr) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; if (hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution) { - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_TACH_CTRL_BASE_IDX, mmCG_TACH_CTRL); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_TACH_CTRL, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL), CG_TACH_CTRL, EDGE_PER_REV, hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution - 1)); } - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, TACH_PWM_RESP_RATE, 0x28)); return 0; @@ -445,9 +422,9 @@ static int vega10_thermal_initialize(struct pp_hwmgr *hwmgr) */ static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; struct vega10_hwmgr *data = hwmgr->backend; uint32_t val = 0; - uint32_t reg; if (data->smu_features[GNLD_FW_CTF].supported) { if (data->smu_features[GNLD_FW_CTF].enabled) @@ -465,8 +442,7 @@ static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr) val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT); val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT); - reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA); - cgs_write_register(hwmgr->device, reg, val); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, val); return 0; } @@ -477,8 +453,8 @@ static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr) */ int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; struct vega10_hwmgr *data = hwmgr->backend; - uint32_t reg; if (data->smu_features[GNLD_FW_CTF].supported) { if (!data->smu_features[GNLD_FW_CTF].enabled) @@ -493,8 +469,7 @@ int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr) data->smu_features[GNLD_FW_CTF].enabled = false; } - reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 200de46bd06b..782e2098824d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -34,7 +34,6 @@ #include "atomfirmware.h" #include "cgs_common.h" #include "vega12_inc.h" -#include "pp_soc15.h" #include "pppcielanes.h" #include "vega12_hwmgr.h" #include "vega12_processpptables.h" @@ -546,6 +545,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -565,6 +565,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -585,6 +586,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -605,6 +607,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -625,6 +628,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -645,6 +649,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -666,6 +671,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -686,6 +692,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -706,6 +713,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -726,6 +734,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -992,15 +1001,55 @@ static uint32_t vega12_find_highest_dpm_level( static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr) { + struct vega12_hwmgr *data = hwmgr->backend; + if (data->smc_state_table.gfx_boot_level != + data->dpm_table.gfx_table.dpm_state.soft_min_level) { + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMinByFreq, + PPCLK_GFXCLK<<16 | data->dpm_table.gfx_table.dpm_levels[data->smc_state_table.gfx_boot_level].value); + data->dpm_table.gfx_table.dpm_state.soft_min_level = + data->smc_state_table.gfx_boot_level; + } + + if (data->smc_state_table.mem_boot_level != + data->dpm_table.mem_table.dpm_state.soft_min_level) { + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMinByFreq, + PPCLK_UCLK<<16 | data->dpm_table.mem_table.dpm_levels[data->smc_state_table.mem_boot_level].value); + data->dpm_table.mem_table.dpm_state.soft_min_level = + data->smc_state_table.mem_boot_level; + } + return 0; + } static int vega12_upload_dpm_max_level(struct pp_hwmgr *hwmgr) { + struct vega12_hwmgr *data = hwmgr->backend; + if (data->smc_state_table.gfx_max_level != + data->dpm_table.gfx_table.dpm_state.soft_max_level) { + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxByFreq, + /* plus the vale by 1 to align the resolution */ + PPCLK_GFXCLK<<16 | (data->dpm_table.gfx_table.dpm_levels[data->smc_state_table.gfx_max_level].value + 1)); + data->dpm_table.gfx_table.dpm_state.soft_max_level = + data->smc_state_table.gfx_max_level; + } + + if (data->smc_state_table.mem_max_level != + data->dpm_table.mem_table.dpm_state.soft_max_level) { + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxByFreq, + /* plus the vale by 1 to align the resolution */ + PPCLK_UCLK<<16 | (data->dpm_table.mem_table.dpm_levels[data->smc_state_table.mem_max_level].value + 1)); + data->dpm_table.mem_table.dpm_state.soft_max_level = + data->smc_state_table.mem_max_level; + } + return 0; } - int vega12_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable) { struct vega12_hwmgr *data = @@ -1064,8 +1113,7 @@ static uint32_t vega12_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low) return (mem_clk * 100); } -static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, - struct pp_gpu_power *query) +static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query) { #if 0 uint32_t value; @@ -1077,7 +1125,7 @@ static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, vega12_read_arg_from_smc(hwmgr, &value); /* power value is an integer */ - query->average_gpu_power = value << 8; + *query = value << 8; #endif return 0; } @@ -1186,12 +1234,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; break; case AMDGPU_PP_SENSOR_GPU_POWER: - if (*size < sizeof(struct pp_gpu_power)) - ret = -EINVAL; - else { - *size = sizeof(struct pp_gpu_power); - ret = vega12_get_gpu_power(hwmgr, (struct pp_gpu_power *)value); - } + ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value); + break; default: ret = -EINVAL; @@ -1260,23 +1304,18 @@ static int vega12_notify_smc_display_config_after_ps_adjustment( { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); - uint32_t num_active_disps = 0; - struct cgs_display_info info = {0}; struct PP_Clocks min_clocks = {0}; struct pp_display_clock_request clock_req; uint32_t clk_request; - info.mode_info = NULL; - cgs_get_active_displays_info(hwmgr->device, &info); - num_active_disps = info.display_count; - if (num_active_disps > 1) + if (hwmgr->display_config->num_display > 1) vega12_notify_smc_display_change(hwmgr, false); else vega12_notify_smc_display_change(hwmgr, true); - min_clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk; - min_clocks.dcefClockInSR = hwmgr->display_config.min_dcef_deep_sleep_set_clk; - min_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; + min_clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk; + min_clocks.dcefClockInSR = hwmgr->display_config->min_dcef_deep_sleep_set_clk; + min_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock; if (data->smu_features[GNLD_DPM_DCEFCLK].supported) { clock_req.clock_type = amd_pp_dcef_clock; @@ -1832,9 +1871,7 @@ static int vega12_display_configuration_changed_task(struct pp_hwmgr *hwmgr) { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); int result = 0; - uint32_t num_turned_on_displays = 1; Watermarks_t *wm_table = &(data->smc_state_table.water_marks_table); - struct cgs_display_info info = {0}; if ((data->water_marks_bitmap & WaterMarksExist) && !(data->water_marks_bitmap & WaterMarksLoaded)) { @@ -1846,12 +1883,9 @@ static int vega12_display_configuration_changed_task(struct pp_hwmgr *hwmgr) if ((data->water_marks_bitmap & WaterMarksExist) && data->smu_features[GNLD_DPM_DCEFCLK].supported && - data->smu_features[GNLD_DPM_SOCCLK].supported) { - cgs_get_active_displays_info(hwmgr->device, &info); - num_turned_on_displays = info.display_count; + data->smu_features[GNLD_DPM_SOCCLK].supported) smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_NumOfDisplays, num_turned_on_displays); - } + PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display); return result; } @@ -1894,15 +1928,12 @@ vega12_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmg { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); bool is_update_required = false; - struct cgs_display_info info = {0, 0, NULL}; - - cgs_get_active_displays_info(hwmgr->device, &info); - if (data->display_timing.num_existing_displays != info.display_count) + if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) is_update_required = true; if (data->registry_data.gfx_clk_deep_sleep_support) { - if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr) + if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr) is_update_required = true; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h index bc98b1df3b65..e81ded1ec198 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h @@ -33,7 +33,7 @@ #define WaterMarksExist 1 #define WaterMarksLoaded 2 -#define VG12_PSUEDO_NUM_GFXCLK_DPM_LEVELS 8 +#define VG12_PSUEDO_NUM_GFXCLK_DPM_LEVELS 16 #define VG12_PSUEDO_NUM_SOCCLK_DPM_LEVELS 8 #define VG12_PSUEDO_NUM_DCEFCLK_DPM_LEVELS 8 #define VG12_PSUEDO_NUM_UCLK_DPM_LEVELS 4 diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c index b34113f45904..888ddca902d8 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c @@ -51,7 +51,7 @@ static const void *get_powerplay_table(struct pp_hwmgr *hwmgr) if (!table_address) { table_address = (ATOM_Vega12_POWERPLAYTABLE *) - cgs_atom_get_data_table(hwmgr->device, index, + smu_atom_get_data_table(hwmgr->adev, index, &size, &frev, &crev); hwmgr->soft_pp_table = table_address; /*Cache the result in RAM.*/ @@ -224,6 +224,11 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable ppsmc_pptable->AcgGfxclkSpreadPercent = smc_dpm_table.acggfxclkspreadpercent; ppsmc_pptable->AcgGfxclkSpreadFreq = smc_dpm_table.acggfxclkspreadfreq; + /* 0xFFFF will disable the ACG feature */ + if (!(hwmgr->feature_mask & PP_ACG_MASK)) { + ppsmc_pptable->AcgThresholdFreqHigh = 0xFFFF; + ppsmc_pptable->AcgThresholdFreqLow = 0xFFFF; + } return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c index df0fa815cd6e..cfd9e6ccb790 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c @@ -26,7 +26,7 @@ #include "vega12_smumgr.h" #include "vega12_ppsmc.h" #include "vega12_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "pp_debug.h" static int vega12_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm) @@ -147,13 +147,10 @@ int vega12_fan_ctrl_reset_fan_speed_to_default(struct pp_hwmgr *hwmgr) */ int vega12_thermal_get_temperature(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int temp = 0; - uint32_t reg; - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_MULT_THERMAL_STATUS_BASE_IDX, mmCG_MULT_THERMAL_STATUS); - - temp = cgs_read_register(hwmgr->device, reg); + temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; @@ -175,11 +172,12 @@ int vega12_thermal_get_temperature(struct pp_hwmgr *hwmgr) static int vega12_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range) { + struct amdgpu_device *adev = hwmgr->adev; int low = VEGA12_THERMAL_MINIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; int high = VEGA12_THERMAL_MAXIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; - uint32_t val, reg; + uint32_t val; if (low < range->min) low = range->min; @@ -189,18 +187,15 @@ static int vega12_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, if (low > high) return -EINVAL; - reg = soc15_get_register_offset(THM_HWID, 0, - mmTHM_THERMAL_INT_CTRL_BASE_IDX, mmTHM_THERMAL_INT_CTRL); - - val = cgs_read_register(hwmgr->device, reg); + val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); - cgs_write_register(hwmgr->device, reg, val); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); return 0; } @@ -212,15 +207,14 @@ static int vega12_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, */ static int vega12_thermal_enable_alert(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t val = 0; - uint32_t reg; val |= (1 << THM_THERMAL_INT_ENA__THERM_INTH_CLR__SHIFT); val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT); val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT); - reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA); - cgs_write_register(hwmgr->device, reg, val); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, val); return 0; } @@ -231,10 +225,9 @@ static int vega12_thermal_enable_alert(struct pp_hwmgr *hwmgr) */ int vega12_thermal_disable_alert(struct pp_hwmgr *hwmgr) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; - reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h index 8b78bbecd1bc..9bb87857a20f 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h @@ -377,11 +377,7 @@ struct phm_clocks { #define DPMTABLE_UPDATE_SCLK 0x00000004 #define DPMTABLE_UPDATE_MCLK 0x00000008 #define DPMTABLE_OD_UPDATE_VDDC 0x00000010 - -/* To determine if sclk and mclk are in overdrive state */ -#define SCLK_OVERDRIVE_ENABLED 0x00000001 -#define MCLK_OVERDRIVE_ENABLED 0x00000002 -#define VDDC_OVERDRIVE_ENABLED 0x00000010 +#define DPMTABLE_UPDATE_SOCCLK 0x00000020 struct phm_odn_performance_level { uint32_t clock; diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 17f811d181c8..3c321c7d9626 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -38,6 +38,8 @@ struct phm_fan_speed_info; struct pp_atomctrl_voltage_table; #define VOLTAGE_SCALE 4 +#define VOLTAGE_VID_OFFSET_SCALE1 625 +#define VOLTAGE_VID_OFFSET_SCALE2 100 enum DISPLAY_GAP { DISPLAY_GAP_VBLANK_OR_WM = 0, /* Wait for vblank or MCHG watermark. */ @@ -64,24 +66,6 @@ struct vi_dpm_table { #define PCIE_PERF_REQ_GEN2 3 #define PCIE_PERF_REQ_GEN3 4 -enum PP_FEATURE_MASK { - PP_SCLK_DPM_MASK = 0x1, - PP_MCLK_DPM_MASK = 0x2, - PP_PCIE_DPM_MASK = 0x4, - PP_SCLK_DEEP_SLEEP_MASK = 0x8, - PP_POWER_CONTAINMENT_MASK = 0x10, - PP_UVD_HANDSHAKE_MASK = 0x20, - PP_SMC_VOLTAGE_CONTROL_MASK = 0x40, - PP_VBI_TIME_SUPPORT_MASK = 0x80, - PP_ULV_MASK = 0x100, - PP_ENABLE_GFX_CG_THRU_SMU = 0x200, - PP_CLOCK_STRETCH_MASK = 0x400, - PP_OD_FUZZY_FAN_CONTROL_MASK = 0x800, - PP_SOCCLK_DPM_MASK = 0x1000, - PP_DCEFCLK_DPM_MASK = 0x2000, - PP_OVERDRIVE_MASK = 0x4000, -}; - enum PHM_BackEnd_Magic { PHM_Dummy_Magic = 0xAA5555AA, PHM_RV770_Magic = 0xDCBAABCD, @@ -312,6 +296,7 @@ struct pp_hwmgr_func { int (*display_clock_voltage_request)(struct pp_hwmgr *hwmgr, struct pp_display_clock_request *clock); int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); + int (*gfx_off_control)(struct pp_hwmgr *hwmgr, bool enable); int (*power_off_asic)(struct pp_hwmgr *hwmgr); int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask); int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf); @@ -341,6 +326,7 @@ struct pp_hwmgr_func { long *input, uint32_t size); int (*set_power_limit)(struct pp_hwmgr *hwmgr, uint32_t n); int (*set_mmhub_powergating_by_smu)(struct pp_hwmgr *hwmgr); + int (*smus_notify_pwe)(struct pp_hwmgr *hwmgr); }; struct pp_table_func { @@ -718,6 +704,7 @@ struct pp_hwmgr { uint32_t chip_family; uint32_t chip_id; uint32_t smu_version; + bool not_vf; bool pm_en; struct mutex smu_lock; @@ -764,7 +751,7 @@ struct pp_hwmgr { struct pp_power_state *request_ps; struct pp_power_state *boot_ps; struct pp_power_state *uvd_ps; - struct amd_pp_display_configuration display_config; + const struct amd_pp_display_configuration *display_config; uint32_t feature_mask; bool avfs_supported; /* UMD Pstate */ @@ -782,10 +769,13 @@ struct pp_hwmgr { }; int hwmgr_early_init(struct pp_hwmgr *hwmgr); +int hwmgr_sw_init(struct pp_hwmgr *hwmgr); +int hwmgr_sw_fini(struct pp_hwmgr *hwmgr); int hwmgr_hw_init(struct pp_hwmgr *hwmgr); int hwmgr_hw_fini(struct pp_hwmgr *hwmgr); -int hwmgr_hw_suspend(struct pp_hwmgr *hwmgr); -int hwmgr_hw_resume(struct pp_hwmgr *hwmgr); +int hwmgr_suspend(struct pp_hwmgr *hwmgr); +int hwmgr_resume(struct pp_hwmgr *hwmgr); + int hwmgr_handle_task(struct pp_hwmgr *hwmgr, enum amd_pp_task task_id, enum amd_pm_state_type *user_state); diff --git a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h index 426bff2aad2b..a2991fa2e6f8 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h @@ -75,13 +75,15 @@ #define PPSMC_MSG_GetMinGfxclkFrequency 0x2C #define PPSMC_MSG_GetMaxGfxclkFrequency 0x2D #define PPSMC_MSG_SoftReset 0x2E +#define PPSMC_MSG_SetGfxCGPG 0x2F #define PPSMC_MSG_SetSoftMaxGfxClk 0x30 #define PPSMC_MSG_SetHardMinGfxClk 0x31 #define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x32 #define PPSMC_MSG_SetSoftMaxFclkByFreq 0x33 #define PPSMC_MSG_SetSoftMaxVcn 0x34 #define PPSMC_MSG_PowerGateMmHub 0x35 -#define PPSMC_Message_Count 0x36 +#define PPSMC_MSG_SetRccPfcPmeRestoreRegister 0x36 +#define PPSMC_Message_Count 0x37 typedef uint16_t PPSMC_Result; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu75.h b/drivers/gpu/drm/amd/powerplay/inc/smu75.h new file mode 100644 index 000000000000..771523001533 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/inc/smu75.h @@ -0,0 +1,760 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef SMU75_H +#define SMU75_H + +#pragma pack(push, 1) + +typedef struct { + uint32_t high; + uint32_t low; +} data_64_t; + +typedef struct { + data_64_t high; + data_64_t low; +} data_128_t; + +#define SMU__DGPU_ONLY + +#define SMU__NUM_SCLK_DPM_STATE 8 +#define SMU__NUM_MCLK_DPM_LEVELS 4 +#define SMU__NUM_LCLK_DPM_LEVELS 8 +#define SMU__NUM_PCIE_DPM_LEVELS 8 + +#define SMU7_CONTEXT_ID_SMC 1 +#define SMU7_CONTEXT_ID_VBIOS 2 + +#define SMU75_MAX_LEVELS_VDDC 16 +#define SMU75_MAX_LEVELS_VDDGFX 16 +#define SMU75_MAX_LEVELS_VDDCI 8 +#define SMU75_MAX_LEVELS_MVDD 4 + +#define SMU_MAX_SMIO_LEVELS 4 + +#define SMU75_MAX_LEVELS_GRAPHICS SMU__NUM_SCLK_DPM_STATE +#define SMU75_MAX_LEVELS_MEMORY SMU__NUM_MCLK_DPM_LEVELS +#define SMU75_MAX_LEVELS_GIO SMU__NUM_LCLK_DPM_LEVELS +#define SMU75_MAX_LEVELS_LINK SMU__NUM_PCIE_DPM_LEVELS +#define SMU75_MAX_LEVELS_UVD 8 +#define SMU75_MAX_LEVELS_VCE 8 +#define SMU75_MAX_LEVELS_ACP 8 +#define SMU75_MAX_LEVELS_SAMU 8 +#define SMU75_MAX_ENTRIES_SMIO 32 + +#define DPM_NO_LIMIT 0 +#define DPM_NO_UP 1 +#define DPM_GO_DOWN 2 +#define DPM_GO_UP 3 + +#define SMU7_FIRST_DPM_GRAPHICS_LEVEL 0 +#define SMU7_FIRST_DPM_MEMORY_LEVEL 0 + +#define GPIO_CLAMP_MODE_VRHOT 1 +#define GPIO_CLAMP_MODE_THERM 2 +#define GPIO_CLAMP_MODE_DC 4 + +#define SCRATCH_B_TARG_PCIE_INDEX_SHIFT 0 +#define SCRATCH_B_TARG_PCIE_INDEX_MASK (0x7<<SCRATCH_B_TARG_PCIE_INDEX_SHIFT) +#define SCRATCH_B_CURR_PCIE_INDEX_SHIFT 3 +#define SCRATCH_B_CURR_PCIE_INDEX_MASK (0x7<<SCRATCH_B_CURR_PCIE_INDEX_SHIFT) +#define SCRATCH_B_TARG_UVD_INDEX_SHIFT 6 +#define SCRATCH_B_TARG_UVD_INDEX_MASK (0x7<<SCRATCH_B_TARG_UVD_INDEX_SHIFT) +#define SCRATCH_B_CURR_UVD_INDEX_SHIFT 9 +#define SCRATCH_B_CURR_UVD_INDEX_MASK (0x7<<SCRATCH_B_CURR_UVD_INDEX_SHIFT) +#define SCRATCH_B_TARG_VCE_INDEX_SHIFT 12 +#define SCRATCH_B_TARG_VCE_INDEX_MASK (0x7<<SCRATCH_B_TARG_VCE_INDEX_SHIFT) +#define SCRATCH_B_CURR_VCE_INDEX_SHIFT 15 +#define SCRATCH_B_CURR_VCE_INDEX_MASK (0x7<<SCRATCH_B_CURR_VCE_INDEX_SHIFT) +#define SCRATCH_B_TARG_ACP_INDEX_SHIFT 18 +#define SCRATCH_B_TARG_ACP_INDEX_MASK (0x7<<SCRATCH_B_TARG_ACP_INDEX_SHIFT) +#define SCRATCH_B_CURR_ACP_INDEX_SHIFT 21 +#define SCRATCH_B_CURR_ACP_INDEX_MASK (0x7<<SCRATCH_B_CURR_ACP_INDEX_SHIFT) +#define SCRATCH_B_TARG_SAMU_INDEX_SHIFT 24 +#define SCRATCH_B_TARG_SAMU_INDEX_MASK (0x7<<SCRATCH_B_TARG_SAMU_INDEX_SHIFT) +#define SCRATCH_B_CURR_SAMU_INDEX_SHIFT 27 +#define SCRATCH_B_CURR_SAMU_INDEX_MASK (0x7<<SCRATCH_B_CURR_SAMU_INDEX_SHIFT) + +/* Virtualization Defines */ +#define CG_XDMA_MASK 0x1 +#define CG_XDMA_SHIFT 0 +#define CG_UVD_MASK 0x2 +#define CG_UVD_SHIFT 1 +#define CG_VCE_MASK 0x4 +#define CG_VCE_SHIFT 2 +#define CG_SAMU_MASK 0x8 +#define CG_SAMU_SHIFT 3 +#define CG_GFX_MASK 0x10 +#define CG_GFX_SHIFT 4 +#define CG_SDMA_MASK 0x20 +#define CG_SDMA_SHIFT 5 +#define CG_HDP_MASK 0x40 +#define CG_HDP_SHIFT 6 +#define CG_MC_MASK 0x80 +#define CG_MC_SHIFT 7 +#define CG_DRM_MASK 0x100 +#define CG_DRM_SHIFT 8 +#define CG_ROM_MASK 0x200 +#define CG_ROM_SHIFT 9 +#define CG_BIF_MASK 0x400 +#define CG_BIF_SHIFT 10 + +#if defined SMU__DGPU_ONLY +#define SMU75_DTE_ITERATIONS 5 +#define SMU75_DTE_SOURCES 3 +#define SMU75_DTE_SINKS 1 +#define SMU75_NUM_CPU_TES 0 +#define SMU75_NUM_GPU_TES 1 +#define SMU75_NUM_NON_TES 2 +#define SMU75_DTE_FAN_SCALAR_MIN 0x100 +#define SMU75_DTE_FAN_SCALAR_MAX 0x166 +#define SMU75_DTE_FAN_TEMP_MAX 93 +#define SMU75_DTE_FAN_TEMP_MIN 83 +#endif +#define SMU75_THERMAL_INPUT_LOOP_COUNT 2 +#define SMU75_THERMAL_CLAMP_MODE_COUNT 2 + +#define EXP_M1_1 93 +#define EXP_M2_1 195759 +#define EXP_B_1 111176531 + +#define EXP_M1_2 67 +#define EXP_M2_2 153720 +#define EXP_B_2 94415767 + +#define EXP_M1_3 48 +#define EXP_M2_3 119796 +#define EXP_B_3 79195279 + +#define EXP_M1_4 550 +#define EXP_M2_4 1484190 +#define EXP_B_4 1051432828 + +#define EXP_M1_5 394 +#define EXP_M2_5 1143049 +#define EXP_B_5 864288432 + +struct SMU7_HystController_Data { + uint16_t waterfall_up; + uint16_t waterfall_down; + uint16_t waterfall_limit; + uint16_t release_cnt; + uint16_t release_limit; + uint16_t spare; +}; + +typedef struct SMU7_HystController_Data SMU7_HystController_Data; + +struct SMU75_PIDController { + uint32_t Ki; + int32_t LFWindupUpperLim; + int32_t LFWindupLowerLim; + uint32_t StatePrecision; + uint32_t LfPrecision; + uint32_t LfOffset; + uint32_t MaxState; + uint32_t MaxLfFraction; + uint32_t StateShift; +}; + +typedef struct SMU75_PIDController SMU75_PIDController; + +struct SMU7_LocalDpmScoreboard { + uint32_t PercentageBusy; + + int32_t PIDError; + int32_t PIDIntegral; + int32_t PIDOutput; + + uint32_t SigmaDeltaAccum; + uint32_t SigmaDeltaOutput; + uint32_t SigmaDeltaLevel; + + uint32_t UtilizationSetpoint; + + uint8_t TdpClampMode; + uint8_t TdcClampMode; + uint8_t ThermClampMode; + uint8_t VoltageBusy; + + int8_t CurrLevel; + int8_t TargLevel; + uint8_t LevelChangeInProgress; + uint8_t UpHyst; + + uint8_t DownHyst; + uint8_t VoltageDownHyst; + uint8_t DpmEnable; + uint8_t DpmRunning; + + uint8_t DpmForce; + uint8_t DpmForceLevel; + uint8_t DisplayWatermark; + uint8_t McArbIndex; + + uint32_t MinimumPerfSclk; + + uint8_t AcpiReq; + uint8_t AcpiAck; + uint8_t GfxClkSlow; + uint8_t GpioClampMode; + + uint8_t EnableModeSwitchRLCNotification; + uint8_t EnabledLevelsChange; + uint8_t DteClampMode; + uint8_t FpsClampMode; + + uint16_t LevelResidencyCounters [SMU75_MAX_LEVELS_GRAPHICS]; + uint16_t LevelSwitchCounters [SMU75_MAX_LEVELS_GRAPHICS]; + + void (*TargetStateCalculator)(uint8_t); + void (*SavedTargetStateCalculator)(uint8_t); + + uint16_t AutoDpmInterval; + uint16_t AutoDpmRange; + + uint8_t FpsEnabled; + uint8_t MaxPerfLevel; + uint8_t AllowLowClkInterruptToHost; + uint8_t FpsRunning; + + uint32_t MaxAllowedFrequency; + + uint32_t FilteredSclkFrequency; + uint32_t LastSclkFrequency; + uint32_t FilteredSclkFrequencyCnt; + + uint8_t MinPerfLevel; +#ifdef SMU__FIRMWARE_SCKS_PRESENT__1 + uint8_t ScksClampMode; + uint8_t padding[2]; +#else + uint8_t padding[3]; +#endif + + uint16_t FpsAlpha; + uint16_t DeltaTime; + uint32_t CurrentFps; + uint32_t FilteredFps; + uint32_t FrameCount; + uint32_t FrameCountLast; + uint16_t FpsTargetScalar; + uint16_t FpsWaterfallLimitScalar; + uint16_t FpsAlphaScalar; + uint16_t spare8; + SMU7_HystController_Data HystControllerData; +}; + +typedef struct SMU7_LocalDpmScoreboard SMU7_LocalDpmScoreboard; + +#define SMU7_MAX_VOLTAGE_CLIENTS 12 + +typedef uint8_t (*VoltageChangeHandler_t)(uint16_t, uint8_t); + +#define VDDC_MASK 0x00007FFF +#define VDDC_SHIFT 0 +#define VDDCI_MASK 0x3FFF8000 +#define VDDCI_SHIFT 15 +#define PHASES_MASK 0xC0000000 +#define PHASES_SHIFT 30 + +typedef uint32_t SMU_VoltageLevel; + +struct SMU7_VoltageScoreboard { + SMU_VoltageLevel TargetVoltage; + uint16_t MaxVid; + uint8_t HighestVidOffset; + uint8_t CurrentVidOffset; + + uint16_t CurrentVddc; + uint16_t CurrentVddci; + + uint8_t ControllerBusy; + uint8_t CurrentVid; + uint8_t CurrentVddciVid; + uint8_t padding; + + SMU_VoltageLevel RequestedVoltage[SMU7_MAX_VOLTAGE_CLIENTS]; + SMU_VoltageLevel TargetVoltageState; + uint8_t EnabledRequest[SMU7_MAX_VOLTAGE_CLIENTS]; + + uint8_t padding2; + uint8_t padding3; + uint8_t ControllerEnable; + uint8_t ControllerRunning; + uint16_t CurrentStdVoltageHiSidd; + uint16_t CurrentStdVoltageLoSidd; + uint8_t OverrideVoltage; + uint8_t padding4; + uint8_t padding5; + uint8_t CurrentPhases; + + VoltageChangeHandler_t ChangeVddc; + VoltageChangeHandler_t ChangeVddci; + VoltageChangeHandler_t ChangePhase; + VoltageChangeHandler_t ChangeMvdd; + + VoltageChangeHandler_t functionLinks[6]; + + uint16_t * VddcFollower1; + int16_t Driver_OD_RequestedVidOffset1; + int16_t Driver_OD_RequestedVidOffset2; +}; + +typedef struct SMU7_VoltageScoreboard SMU7_VoltageScoreboard; + +#define SMU7_MAX_PCIE_LINK_SPEEDS 3 + +struct SMU7_PCIeLinkSpeedScoreboard { + uint8_t DpmEnable; + uint8_t DpmRunning; + uint8_t DpmForce; + uint8_t DpmForceLevel; + + uint8_t CurrentLinkSpeed; + uint8_t EnabledLevelsChange; + uint16_t AutoDpmInterval; + + uint16_t AutoDpmRange; + uint16_t AutoDpmCount; + + uint8_t DpmMode; + uint8_t AcpiReq; + uint8_t AcpiAck; + uint8_t CurrentLinkLevel; +}; + +typedef struct SMU7_PCIeLinkSpeedScoreboard SMU7_PCIeLinkSpeedScoreboard; + +#define SMU7_LKGE_LUT_NUM_OF_TEMP_ENTRIES 16 +#define SMU7_LKGE_LUT_NUM_OF_VOLT_ENTRIES 16 + +#define SMU7_SCALE_I 7 +#define SMU7_SCALE_R 12 + +struct SMU7_PowerScoreboard { + uint32_t GpuPower; + + uint32_t VddcPower; + uint32_t VddcVoltage; + uint32_t VddcCurrent; + + uint32_t VddciPower; + uint32_t VddciVoltage; + uint32_t VddciCurrent; + + uint32_t RocPower; + + uint16_t Telemetry_1_slope; + uint16_t Telemetry_2_slope; + int32_t Telemetry_1_offset; + int32_t Telemetry_2_offset; + + uint8_t MCLK_patch_flag; + uint8_t reserved[3]; +}; + +typedef struct SMU7_PowerScoreboard SMU7_PowerScoreboard; + +#define SMU7_SCLK_DPM_CONFIG_MASK 0x01 +#define SMU7_VOLTAGE_CONTROLLER_CONFIG_MASK 0x02 +#define SMU7_THERMAL_CONTROLLER_CONFIG_MASK 0x04 +#define SMU7_MCLK_DPM_CONFIG_MASK 0x08 +#define SMU7_UVD_DPM_CONFIG_MASK 0x10 +#define SMU7_VCE_DPM_CONFIG_MASK 0x20 +#define SMU7_ACP_DPM_CONFIG_MASK 0x40 +#define SMU7_SAMU_DPM_CONFIG_MASK 0x80 +#define SMU7_PCIEGEN_DPM_CONFIG_MASK 0x100 + +#define SMU7_ACP_MCLK_HANDSHAKE_DISABLE 0x00000001 +#define SMU7_ACP_SCLK_HANDSHAKE_DISABLE 0x00000002 +#define SMU7_UVD_MCLK_HANDSHAKE_DISABLE 0x00000100 +#define SMU7_UVD_SCLK_HANDSHAKE_DISABLE 0x00000200 +#define SMU7_VCE_MCLK_HANDSHAKE_DISABLE 0x00010000 +#define SMU7_VCE_SCLK_HANDSHAKE_DISABLE 0x00020000 + +struct SMU75_SoftRegisters { + uint32_t RefClockFrequency; + uint32_t PmTimerPeriod; + uint32_t FeatureEnables; +#if defined (SMU__DGPU_ONLY) + uint32_t PreVBlankGap; + uint32_t VBlankTimeout; + uint32_t TrainTimeGap; + uint32_t MvddSwitchTime; + uint32_t LongestAcpiTrainTime; + uint32_t AcpiDelay; + uint32_t G5TrainTime; + uint32_t DelayMpllPwron; + uint32_t VoltageChangeTimeout; +#endif + uint32_t HandshakeDisables; + + uint8_t DisplayPhy1Config; + uint8_t DisplayPhy2Config; + uint8_t DisplayPhy3Config; + uint8_t DisplayPhy4Config; + + uint8_t DisplayPhy5Config; + uint8_t DisplayPhy6Config; + uint8_t DisplayPhy7Config; + uint8_t DisplayPhy8Config; + + uint32_t AverageGraphicsActivity; + uint32_t AverageMemoryActivity; + uint32_t AverageGioActivity; + + uint8_t SClkDpmEnabledLevels; + uint8_t MClkDpmEnabledLevels; + uint8_t LClkDpmEnabledLevels; + uint8_t PCIeDpmEnabledLevels; + + uint8_t UVDDpmEnabledLevels; + uint8_t SAMUDpmEnabledLevels; + uint8_t ACPDpmEnabledLevels; + uint8_t VCEDpmEnabledLevels; + + uint32_t DRAM_LOG_ADDR_H; + uint32_t DRAM_LOG_ADDR_L; + uint32_t DRAM_LOG_PHY_ADDR_H; + uint32_t DRAM_LOG_PHY_ADDR_L; + uint32_t DRAM_LOG_BUFF_SIZE; + uint32_t UlvEnterCount; + uint32_t UlvTime; + uint32_t UcodeLoadStatus; + uint32_t AllowMvddSwitch; + uint8_t Activity_Weight; + uint8_t Reserved8[3]; +}; + +typedef struct SMU75_SoftRegisters SMU75_SoftRegisters; + +struct SMU75_Firmware_Header { + uint32_t Digest[5]; + uint32_t Version; + uint32_t HeaderSize; + uint32_t Flags; + uint32_t EntryPoint; + uint32_t CodeSize; + uint32_t ImageSize; + + uint32_t Rtos; + uint32_t SoftRegisters; + uint32_t DpmTable; + uint32_t FanTable; + uint32_t CacConfigTable; + uint32_t CacStatusTable; + uint32_t mcRegisterTable; + uint32_t mcArbDramTimingTable; + uint32_t PmFuseTable; + uint32_t Globals; + uint32_t ClockStretcherTable; + uint32_t VftTable; + uint32_t Reserved1; + uint32_t AvfsCksOff_AvfsGbvTable; + uint32_t AvfsCksOff_BtcGbvTable; + uint32_t MM_AvfsTable; + uint32_t PowerSharingTable; + uint32_t AvfsTable; + uint32_t AvfsCksOffGbvTable; + uint32_t AvfsMeanNSigma; + uint32_t AvfsSclkOffsetTable; + uint32_t Reserved[12]; + uint32_t Signature; +}; + +typedef struct SMU75_Firmware_Header SMU75_Firmware_Header; + +#define SMU7_FIRMWARE_HEADER_LOCATION 0x20000 + +enum DisplayConfig { + PowerDown = 1, + DP54x4, + DP54x2, + DP54x1, + DP27x4, + DP27x2, + DP27x1, + HDMI297, + HDMI162, + LVDS, + DP324x4, + DP324x2, + DP324x1 +}; + +#define MC_BLOCK_COUNT 1 +#define CPL_BLOCK_COUNT 5 +#define SE_BLOCK_COUNT 15 +#define GC_BLOCK_COUNT 24 + +struct SMU7_Local_Cac { + uint8_t BlockId; + uint8_t SignalId; + uint8_t Threshold; + uint8_t Padding; +}; + +typedef struct SMU7_Local_Cac SMU7_Local_Cac; + +struct SMU7_Local_Cac_Table { + SMU7_Local_Cac CplLocalCac[CPL_BLOCK_COUNT]; + SMU7_Local_Cac McLocalCac[MC_BLOCK_COUNT]; + SMU7_Local_Cac SeLocalCac[SE_BLOCK_COUNT]; + SMU7_Local_Cac GcLocalCac[GC_BLOCK_COUNT]; +}; + +typedef struct SMU7_Local_Cac_Table SMU7_Local_Cac_Table; + +#pragma pack(pop) + +#define CG_SYS_BITMASK_FIRST_BIT 0 +#define CG_SYS_BITMASK_LAST_BIT 10 +#define CG_SYS_BIF_MGLS_SHIFT 0 +#define CG_SYS_ROM_SHIFT 1 +#define CG_SYS_MC_MGCG_SHIFT 2 +#define CG_SYS_MC_MGLS_SHIFT 3 +#define CG_SYS_SDMA_MGCG_SHIFT 4 +#define CG_SYS_SDMA_MGLS_SHIFT 5 +#define CG_SYS_DRM_MGCG_SHIFT 6 +#define CG_SYS_HDP_MGCG_SHIFT 7 +#define CG_SYS_HDP_MGLS_SHIFT 8 +#define CG_SYS_DRM_MGLS_SHIFT 9 +#define CG_SYS_BIF_MGCG_SHIFT 10 + +#define CG_SYS_BIF_MGLS_MASK 0x1 +#define CG_SYS_ROM_MASK 0x2 +#define CG_SYS_MC_MGCG_MASK 0x4 +#define CG_SYS_MC_MGLS_MASK 0x8 +#define CG_SYS_SDMA_MGCG_MASK 0x10 +#define CG_SYS_SDMA_MGLS_MASK 0x20 +#define CG_SYS_DRM_MGCG_MASK 0x40 +#define CG_SYS_HDP_MGCG_MASK 0x80 +#define CG_SYS_HDP_MGLS_MASK 0x100 +#define CG_SYS_DRM_MGLS_MASK 0x200 +#define CG_SYS_BIF_MGCG_MASK 0x400 + +#define CG_GFX_BITMASK_FIRST_BIT 16 +#define CG_GFX_BITMASK_LAST_BIT 24 + +#define CG_GFX_CGCG_SHIFT 16 +#define CG_GFX_CGLS_SHIFT 17 +#define CG_CPF_MGCG_SHIFT 18 +#define CG_RLC_MGCG_SHIFT 19 +#define CG_GFX_OTHERS_MGCG_SHIFT 20 +#define CG_GFX_3DCG_SHIFT 21 +#define CG_GFX_3DLS_SHIFT 22 +#define CG_GFX_RLC_LS_SHIFT 23 +#define CG_GFX_CP_LS_SHIFT 24 + +#define CG_GFX_CGCG_MASK 0x00010000 +#define CG_GFX_CGLS_MASK 0x00020000 +#define CG_CPF_MGCG_MASK 0x00040000 +#define CG_RLC_MGCG_MASK 0x00080000 +#define CG_GFX_OTHERS_MGCG_MASK 0x00100000 +#define CG_GFX_3DCG_MASK 0x00200000 +#define CG_GFX_3DLS_MASK 0x00400000 +#define CG_GFX_RLC_LS_MASK 0x00800000 +#define CG_GFX_CP_LS_MASK 0x01000000 + + +#define VRCONF_VDDC_MASK 0x000000FF +#define VRCONF_VDDC_SHIFT 0 +#define VRCONF_VDDGFX_MASK 0x0000FF00 +#define VRCONF_VDDGFX_SHIFT 8 +#define VRCONF_VDDCI_MASK 0x00FF0000 +#define VRCONF_VDDCI_SHIFT 16 +#define VRCONF_MVDD_MASK 0xFF000000 +#define VRCONF_MVDD_SHIFT 24 + +#define VR_MERGED_WITH_VDDC 0 +#define VR_SVI2_PLANE_1 1 +#define VR_SVI2_PLANE_2 2 +#define VR_SMIO_PATTERN_1 3 +#define VR_SMIO_PATTERN_2 4 +#define VR_STATIC_VOLTAGE 5 + +#define CLOCK_STRETCHER_MAX_ENTRIES 0x4 +#define CKS_LOOKUPTable_MAX_ENTRIES 0x4 + +#define CLOCK_STRETCHER_SETTING_DDT_MASK 0x01 +#define CLOCK_STRETCHER_SETTING_DDT_SHIFT 0x0 +#define CLOCK_STRETCHER_SETTING_STRETCH_AMOUNT_MASK 0x1E +#define CLOCK_STRETCHER_SETTING_STRETCH_AMOUNT_SHIFT 0x1 +#define CLOCK_STRETCHER_SETTING_ENABLE_MASK 0x80 +#define CLOCK_STRETCHER_SETTING_ENABLE_SHIFT 0x7 + +struct SMU_ClockStretcherDataTableEntry { + uint8_t minVID; + uint8_t maxVID; + + uint16_t setting; +}; +typedef struct SMU_ClockStretcherDataTableEntry SMU_ClockStretcherDataTableEntry; + +struct SMU_ClockStretcherDataTable { + SMU_ClockStretcherDataTableEntry ClockStretcherDataTableEntry[CLOCK_STRETCHER_MAX_ENTRIES]; +}; +typedef struct SMU_ClockStretcherDataTable SMU_ClockStretcherDataTable; + +struct SMU_CKS_LOOKUPTableEntry { + uint16_t minFreq; + uint16_t maxFreq; + + uint8_t setting; + uint8_t padding[3]; +}; +typedef struct SMU_CKS_LOOKUPTableEntry SMU_CKS_LOOKUPTableEntry; + +struct SMU_CKS_LOOKUPTable { + SMU_CKS_LOOKUPTableEntry CKS_LOOKUPTableEntry[CKS_LOOKUPTable_MAX_ENTRIES]; +}; +typedef struct SMU_CKS_LOOKUPTable SMU_CKS_LOOKUPTable; + +struct AgmAvfsData_t { + uint16_t avgPsmCount[28]; + uint16_t minPsmCount[28]; +}; +typedef struct AgmAvfsData_t AgmAvfsData_t; + +enum VFT_COLUMNS { + SCLK0, + SCLK1, + SCLK2, + SCLK3, + SCLK4, + SCLK5, + SCLK6, + SCLK7, + + NUM_VFT_COLUMNS +}; +enum { + SCS_FUSE_T0, + SCS_FUSE_T1, + NUM_SCS_FUSE_TEMPERATURE +}; +enum { + SCKS_ON, + SCKS_OFF, + NUM_SCKS_STATE_TYPES +}; + +#define VFT_TABLE_DEFINED + +#define TEMP_RANGE_MAXSTEPS 12 +struct VFT_CELL_t { + uint16_t Voltage; +}; + +typedef struct VFT_CELL_t VFT_CELL_t; +#ifdef SMU__FIRMWARE_SCKS_PRESENT__1 +struct SCS_CELL_t { + uint16_t PsmCnt[NUM_SCKS_STATE_TYPES]; +}; +typedef struct SCS_CELL_t SCS_CELL_t; +#endif + +struct VFT_TABLE_t { + VFT_CELL_t Cell[TEMP_RANGE_MAXSTEPS][NUM_VFT_COLUMNS]; + uint16_t AvfsGbv [NUM_VFT_COLUMNS]; + uint16_t BtcGbv [NUM_VFT_COLUMNS]; + int16_t Temperature [TEMP_RANGE_MAXSTEPS]; + +#ifdef SMU__FIRMWARE_SCKS_PRESENT__1 + SCS_CELL_t ScksCell[TEMP_RANGE_MAXSTEPS][NUM_VFT_COLUMNS]; +#endif + + uint8_t NumTemperatureSteps; + uint8_t padding[3]; +}; +typedef struct VFT_TABLE_t VFT_TABLE_t; + +#define BTCGB_VDROOP_TABLE_MAX_ENTRIES 2 +#define AVFSGB_VDROOP_TABLE_MAX_ENTRIES 2 + +struct GB_VDROOP_TABLE_t { + int32_t a0; + int32_t a1; + int32_t a2; + uint32_t spare; +}; +typedef struct GB_VDROOP_TABLE_t GB_VDROOP_TABLE_t; + +struct SMU_QuadraticCoeffs { + int32_t m1; + int32_t b; + + int16_t m2; + uint8_t m1_shift; + uint8_t m2_shift; +}; +typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs; + +struct AVFS_Margin_t { + VFT_CELL_t Cell[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_Margin_t AVFS_Margin_t; + +struct AVFS_CksOff_Gbv_t { + VFT_CELL_t Cell[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_CksOff_Gbv_t AVFS_CksOff_Gbv_t; + +struct AVFS_CksOff_AvfsGbv_t { + VFT_CELL_t Cell[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_CksOff_AvfsGbv_t AVFS_CksOff_AvfsGbv_t; + +struct AVFS_CksOff_BtcGbv_t { + VFT_CELL_t Cell[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_CksOff_BtcGbv_t AVFS_CksOff_BtcGbv_t; + +struct AVFS_meanNsigma_t { + uint32_t Aconstant[3]; + uint16_t DC_tol_sigma; + uint16_t Platform_mean; + uint16_t Platform_sigma; + uint16_t PSM_Age_CompFactor; + uint8_t Static_Voltage_Offset[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_meanNsigma_t AVFS_meanNsigma_t; + +struct AVFS_Sclk_Offset_t { + uint16_t Sclk_Offset[8]; +}; +typedef struct AVFS_Sclk_Offset_t AVFS_Sclk_Offset_t; + +struct Power_Sharing_t { + uint32_t EnergyCounter; + uint32_t EngeryThreshold; + uint64_t AM_SCLK_CNT; + uint64_t AM_0_BUSY_CNT; +}; +typedef struct Power_Sharing_t Power_Sharing_t; + + +#endif + + diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h new file mode 100644 index 000000000000..b64e58a22ddf --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h @@ -0,0 +1,886 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU75_DISCRETE_H +#define SMU75_DISCRETE_H + +#include "smu75.h" + +#pragma pack(push, 1) + +#define NUM_SCLK_RANGE 8 + +#define VCO_3_6 1 +#define VCO_2_4 3 + +#define POSTDIV_DIV_BY_1 0 +#define POSTDIV_DIV_BY_2 1 +#define POSTDIV_DIV_BY_4 2 +#define POSTDIV_DIV_BY_8 3 +#define POSTDIV_DIV_BY_16 4 + +struct sclkFcwRange_t { + uint8_t vco_setting; /* 1: 3-6GHz, 3: 2-4GHz */ + uint8_t postdiv; /* divide by 2^n */ + uint16_t fcw_pcc; + uint16_t fcw_trans_upper; + uint16_t fcw_trans_lower; +}; +typedef struct sclkFcwRange_t sclkFcwRange_t; + +struct SMIO_Pattern { + uint16_t Voltage; + uint8_t Smio; + uint8_t padding; +}; + +typedef struct SMIO_Pattern SMIO_Pattern; + +struct SMIO_Table { + SMIO_Pattern Pattern[SMU_MAX_SMIO_LEVELS]; +}; + +typedef struct SMIO_Table SMIO_Table; + +struct SMU_SclkSetting { + uint32_t SclkFrequency; + uint16_t Fcw_int; + uint16_t Fcw_frac; + uint16_t Pcc_fcw_int; + uint8_t PllRange; + uint8_t SSc_En; + uint16_t Sclk_slew_rate; + uint16_t Pcc_up_slew_rate; + uint16_t Pcc_down_slew_rate; + uint16_t Fcw1_int; + uint16_t Fcw1_frac; + uint16_t Sclk_ss_slew_rate; +}; +typedef struct SMU_SclkSetting SMU_SclkSetting; + +struct SMU75_Discrete_GraphicsLevel { + SMU_VoltageLevel MinVoltage; + + uint8_t pcieDpmLevel; + uint8_t DeepSleepDivId; + uint16_t ActivityLevel; + + uint32_t CgSpllFuncCntl3; + uint32_t CgSpllFuncCntl4; + uint32_t CcPwrDynRm; + uint32_t CcPwrDynRm1; + + uint8_t SclkDid; + uint8_t padding; + uint8_t EnabledForActivity; + uint8_t EnabledForThrottle; + uint8_t UpHyst; + uint8_t DownHyst; + uint8_t VoltageDownHyst; + uint8_t PowerThrottle; + + SMU_SclkSetting SclkSetting; + + uint8_t ScksStretchThreshVid[NUM_SCKS_STATE_TYPES]; + uint16_t Padding; +}; + +typedef struct SMU75_Discrete_GraphicsLevel SMU75_Discrete_GraphicsLevel; + +struct SMU75_Discrete_ACPILevel { + uint32_t Flags; + SMU_VoltageLevel MinVoltage; + uint32_t SclkFrequency; + uint8_t SclkDid; + uint8_t DisplayWatermark; + uint8_t DeepSleepDivId; + uint8_t padding; + uint32_t CcPwrDynRm; + uint32_t CcPwrDynRm1; + + SMU_SclkSetting SclkSetting; +}; + +typedef struct SMU75_Discrete_ACPILevel SMU75_Discrete_ACPILevel; + +struct SMU75_Discrete_Ulv { + uint32_t CcPwrDynRm; + uint32_t CcPwrDynRm1; + uint16_t VddcOffset; + uint8_t VddcOffsetVid; + uint8_t VddcPhase; + uint16_t BifSclkDfs; + uint16_t Reserved; +}; + +typedef struct SMU75_Discrete_Ulv SMU75_Discrete_Ulv; + +struct SMU75_Discrete_MemoryLevel { + SMU_VoltageLevel MinVoltage; + uint32_t MinMvdd; + + uint32_t MclkFrequency; + + uint8_t StutterEnable; + uint8_t EnabledForThrottle; + uint8_t EnabledForActivity; + uint8_t padding_0; + + uint8_t UpHyst; + uint8_t DownHyst; + uint8_t VoltageDownHyst; + uint8_t padding_1; + + uint16_t ActivityLevel; + uint8_t DisplayWatermark; + uint8_t padding_2; + + uint16_t Fcw_int; + uint16_t Fcw_frac; + uint8_t Postdiv; + uint8_t padding_3[3]; +}; + +typedef struct SMU75_Discrete_MemoryLevel SMU75_Discrete_MemoryLevel; + +struct SMU75_Discrete_LinkLevel { + uint8_t PcieGenSpeed; + uint8_t PcieLaneCount; + uint8_t EnabledForActivity; + uint8_t SPC; + uint32_t DownThreshold; + uint32_t UpThreshold; + uint16_t BifSclkDfs; + uint16_t Reserved; +}; + +typedef struct SMU75_Discrete_LinkLevel SMU75_Discrete_LinkLevel; + + +/* MC ARB DRAM Timing registers. */ +struct SMU75_Discrete_MCArbDramTimingTableEntry { + uint32_t McArbDramTiming; + uint32_t McArbDramTiming2; + uint32_t McArbBurstTime; + uint32_t McArbRfshRate; + uint32_t McArbMisc3; +}; + +typedef struct SMU75_Discrete_MCArbDramTimingTableEntry SMU75_Discrete_MCArbDramTimingTableEntry; + +struct SMU75_Discrete_MCArbDramTimingTable { + SMU75_Discrete_MCArbDramTimingTableEntry entries[SMU__NUM_SCLK_DPM_STATE][SMU__NUM_MCLK_DPM_LEVELS]; +}; + +typedef struct SMU75_Discrete_MCArbDramTimingTable SMU75_Discrete_MCArbDramTimingTable; + +/* UVD VCLK/DCLK state (level) definition. */ +struct SMU75_Discrete_UvdLevel { + uint32_t VclkFrequency; + uint32_t DclkFrequency; + SMU_VoltageLevel MinVoltage; + uint8_t VclkDivider; + uint8_t DclkDivider; + uint8_t padding[2]; +}; + +typedef struct SMU75_Discrete_UvdLevel SMU75_Discrete_UvdLevel; + +/* Clocks for other external blocks (VCE, ACP, SAMU). */ +struct SMU75_Discrete_ExtClkLevel { + uint32_t Frequency; + SMU_VoltageLevel MinVoltage; + uint8_t Divider; + uint8_t padding[3]; +}; + +typedef struct SMU75_Discrete_ExtClkLevel SMU75_Discrete_ExtClkLevel; + +struct SMU75_Discrete_StateInfo { + uint32_t SclkFrequency; + uint32_t MclkFrequency; + uint32_t VclkFrequency; + uint32_t DclkFrequency; + uint32_t SamclkFrequency; + uint32_t AclkFrequency; + uint32_t EclkFrequency; + uint16_t MvddVoltage; + uint16_t padding16; + uint8_t DisplayWatermark; + uint8_t McArbIndex; + uint8_t McRegIndex; + uint8_t SeqIndex; + uint8_t SclkDid; + int8_t SclkIndex; + int8_t MclkIndex; + uint8_t PCIeGen; +}; + +typedef struct SMU75_Discrete_StateInfo SMU75_Discrete_StateInfo; + +struct SMU75_Discrete_DpmTable { + SMU75_PIDController GraphicsPIDController; + SMU75_PIDController MemoryPIDController; + SMU75_PIDController LinkPIDController; + + uint32_t SystemFlags; + + uint32_t VRConfig; + uint32_t SmioMask1; + uint32_t SmioMask2; + SMIO_Table SmioTable1; + SMIO_Table SmioTable2; + + uint32_t MvddLevelCount; + + uint8_t BapmVddcVidHiSidd [SMU75_MAX_LEVELS_VDDC]; + uint8_t BapmVddcVidLoSidd [SMU75_MAX_LEVELS_VDDC]; + uint8_t BapmVddcVidHiSidd2 [SMU75_MAX_LEVELS_VDDC]; + + uint8_t GraphicsDpmLevelCount; + uint8_t MemoryDpmLevelCount; + uint8_t LinkLevelCount; + uint8_t MasterDeepSleepControl; + + uint8_t UvdLevelCount; + uint8_t VceLevelCount; + uint8_t AcpLevelCount; + uint8_t SamuLevelCount; + + uint8_t ThermOutGpio; + uint8_t ThermOutPolarity; + uint8_t ThermOutMode; + uint8_t BootPhases; + + uint8_t VRHotLevel; + uint8_t LdoRefSel; + + uint8_t Reserved1[2]; + + uint16_t FanStartTemperature; + uint16_t FanStopTemperature; + + uint16_t MaxVoltage; + uint16_t Reserved2; + uint32_t Reserved; + + SMU75_Discrete_GraphicsLevel GraphicsLevel [SMU75_MAX_LEVELS_GRAPHICS]; + SMU75_Discrete_MemoryLevel MemoryACPILevel; + SMU75_Discrete_MemoryLevel MemoryLevel [SMU75_MAX_LEVELS_MEMORY]; + SMU75_Discrete_LinkLevel LinkLevel [SMU75_MAX_LEVELS_LINK]; + SMU75_Discrete_ACPILevel ACPILevel; + SMU75_Discrete_UvdLevel UvdLevel [SMU75_MAX_LEVELS_UVD]; + SMU75_Discrete_ExtClkLevel VceLevel [SMU75_MAX_LEVELS_VCE]; + SMU75_Discrete_ExtClkLevel AcpLevel [SMU75_MAX_LEVELS_ACP]; + SMU75_Discrete_ExtClkLevel SamuLevel [SMU75_MAX_LEVELS_SAMU]; + SMU75_Discrete_Ulv Ulv; + + uint8_t DisplayWatermark [SMU75_MAX_LEVELS_MEMORY][SMU75_MAX_LEVELS_GRAPHICS]; + + uint32_t SclkStepSize; + uint32_t Smio [SMU75_MAX_ENTRIES_SMIO]; + + uint8_t UvdBootLevel; + uint8_t VceBootLevel; + uint8_t AcpBootLevel; + uint8_t SamuBootLevel; + + uint8_t GraphicsBootLevel; + uint8_t GraphicsVoltageChangeEnable; + uint8_t GraphicsThermThrottleEnable; + uint8_t GraphicsInterval; + + uint8_t VoltageInterval; + uint8_t ThermalInterval; + uint16_t TemperatureLimitHigh; + + uint16_t TemperatureLimitLow; + uint8_t MemoryBootLevel; + uint8_t MemoryVoltageChangeEnable; + + uint16_t BootMVdd; + uint8_t MemoryInterval; + uint8_t MemoryThermThrottleEnable; + + uint16_t VoltageResponseTime; + uint16_t PhaseResponseTime; + + uint8_t PCIeBootLinkLevel; + uint8_t PCIeGenInterval; + uint8_t DTEInterval; + uint8_t DTEMode; + + uint8_t SVI2Enable; + uint8_t VRHotGpio; + uint8_t AcDcGpio; + uint8_t ThermGpio; + + uint16_t PPM_PkgPwrLimit; + uint16_t PPM_TemperatureLimit; + + uint16_t DefaultTdp; + uint16_t TargetTdp; + + uint16_t FpsHighThreshold; + uint16_t FpsLowThreshold; + + uint16_t BAPMTI_R [SMU75_DTE_ITERATIONS][SMU75_DTE_SOURCES][SMU75_DTE_SINKS]; + uint16_t BAPMTI_RC [SMU75_DTE_ITERATIONS][SMU75_DTE_SOURCES][SMU75_DTE_SINKS]; + + uint16_t TemperatureLimitEdge; + uint16_t TemperatureLimitHotspot; + + uint16_t BootVddc; + uint16_t BootVddci; + + uint16_t FanGainEdge; + uint16_t FanGainHotspot; + + uint32_t LowSclkInterruptThreshold; + uint32_t VddGfxReChkWait; + + uint8_t ClockStretcherAmount; + uint8_t Sclk_CKS_masterEn0_7; + uint8_t Sclk_CKS_masterEn8_15; + uint8_t DPMFreezeAndForced; + + uint8_t Sclk_voltageOffset[8]; + + SMU_ClockStretcherDataTable ClockStretcherDataTable; + SMU_CKS_LOOKUPTable CKS_LOOKUPTable; + + uint32_t CurrSclkPllRange; + sclkFcwRange_t SclkFcwRangeTable[NUM_SCLK_RANGE]; + + GB_VDROOP_TABLE_t BTCGB_VDROOP_TABLE[BTCGB_VDROOP_TABLE_MAX_ENTRIES]; + SMU_QuadraticCoeffs AVFSGB_FUSE_TABLE[AVFSGB_VDROOP_TABLE_MAX_ENTRIES]; +}; + +typedef struct SMU75_Discrete_DpmTable SMU75_Discrete_DpmTable; + +struct SMU75_Discrete_FanTable { + uint16_t FdoMode; + int16_t TempMin; + int16_t TempMed; + int16_t TempMax; + int16_t Slope1; + int16_t Slope2; + int16_t FdoMin; + int16_t HystUp; + int16_t HystDown; + int16_t HystSlope; + int16_t TempRespLim; + int16_t TempCurr; + int16_t SlopeCurr; + int16_t PwmCurr; + uint32_t RefreshPeriod; + int16_t FdoMax; + uint8_t TempSrc; + int8_t Padding; +}; + +typedef struct SMU75_Discrete_FanTable SMU75_Discrete_FanTable; + +#define SMU7_DISCRETE_GPIO_SCLK_DEBUG 4 +#define SMU7_DISCRETE_GPIO_SCLK_DEBUG_BIT (0x1 << SMU7_DISCRETE_GPIO_SCLK_DEBUG) + + + +struct SMU7_MclkDpmScoreboard { + uint32_t PercentageBusy; + + int32_t PIDError; + int32_t PIDIntegral; + int32_t PIDOutput; + + uint32_t SigmaDeltaAccum; + uint32_t SigmaDeltaOutput; + uint32_t SigmaDeltaLevel; + + uint32_t UtilizationSetpoint; + + uint8_t TdpClampMode; + uint8_t TdcClampMode; + uint8_t ThermClampMode; + uint8_t VoltageBusy; + + int8_t CurrLevel; + int8_t TargLevel; + uint8_t LevelChangeInProgress; + uint8_t UpHyst; + + uint8_t DownHyst; + uint8_t VoltageDownHyst; + uint8_t DpmEnable; + uint8_t DpmRunning; + + uint8_t DpmForce; + uint8_t DpmForceLevel; + uint8_t padding2; + uint8_t McArbIndex; + + uint32_t MinimumPerfMclk; + + uint8_t AcpiReq; + uint8_t AcpiAck; + uint8_t MclkSwitchInProgress; + uint8_t MclkSwitchCritical; + + uint8_t IgnoreVBlank; + uint8_t TargetMclkIndex; + uint8_t TargetMvddIndex; + uint8_t MclkSwitchResult; + + uint16_t VbiFailureCount; + uint8_t VbiWaitCounter; + uint8_t EnabledLevelsChange; + + uint16_t LevelResidencyCounters [SMU75_MAX_LEVELS_MEMORY]; + uint16_t LevelSwitchCounters [SMU75_MAX_LEVELS_MEMORY]; + + void (*TargetStateCalculator)(uint8_t); + void (*SavedTargetStateCalculator)(uint8_t); + + uint16_t AutoDpmInterval; + uint16_t AutoDpmRange; + + uint16_t VbiTimeoutCount; + uint16_t MclkSwitchingTime; + + uint8_t fastSwitch; + uint8_t Save_PIC_VDDGFX_EXIT; + uint8_t Save_PIC_VDDGFX_ENTER; + uint8_t VbiTimeout; + + uint32_t HbmTempRegBackup; +}; + +typedef struct SMU7_MclkDpmScoreboard SMU7_MclkDpmScoreboard; + +struct SMU7_UlvScoreboard { + uint8_t EnterUlv; + uint8_t ExitUlv; + uint8_t UlvActive; + uint8_t WaitingForUlv; + uint8_t UlvEnable; + uint8_t UlvRunning; + uint8_t UlvMasterEnable; + uint8_t padding; + uint32_t UlvAbortedCount; + uint32_t UlvTimeStamp; +}; + +typedef struct SMU7_UlvScoreboard SMU7_UlvScoreboard; + +struct VddgfxSavedRegisters { + uint32_t GPU_DBG[3]; + uint32_t MEC_BaseAddress_Hi; + uint32_t MEC_BaseAddress_Lo; + uint32_t THM_TMON0_CTRL2__RDIR_PRESENT; + uint32_t THM_TMON1_CTRL2__RDIR_PRESENT; + uint32_t CP_INT_CNTL; +}; + +typedef struct VddgfxSavedRegisters VddgfxSavedRegisters; + +struct SMU7_VddGfxScoreboard { + uint8_t VddGfxEnable; + uint8_t VddGfxActive; + uint8_t VPUResetOccured; + uint8_t padding; + + uint32_t VddGfxEnteredCount; + uint32_t VddGfxAbortedCount; + + uint32_t VddGfxVid; + + VddgfxSavedRegisters SavedRegisters; +}; + +typedef struct SMU7_VddGfxScoreboard SMU7_VddGfxScoreboard; + +struct SMU7_TdcLimitScoreboard { + uint8_t Enable; + uint8_t Running; + uint16_t Alpha; + uint32_t FilteredIddc; + uint32_t IddcLimit; + uint32_t IddcHyst; + SMU7_HystController_Data HystControllerData; +}; + +typedef struct SMU7_TdcLimitScoreboard SMU7_TdcLimitScoreboard; + +struct SMU7_PkgPwrLimitScoreboard { + uint8_t Enable; + uint8_t Running; + uint16_t Alpha; + uint32_t FilteredPkgPwr; + uint32_t Limit; + uint32_t Hyst; + uint32_t LimitFromDriver; + uint8_t PowerSharingEnabled; + uint8_t PowerSharingCounter; + uint8_t PowerSharingINTEnabled; + uint8_t GFXActivityCounterEnabled; + uint32_t EnergyCount; + uint32_t PSACTCount; + uint8_t RollOverRequired; + uint8_t RollOverCount; + uint8_t padding[2]; + SMU7_HystController_Data HystControllerData; +}; + +typedef struct SMU7_PkgPwrLimitScoreboard SMU7_PkgPwrLimitScoreboard; + +struct SMU7_BapmScoreboard { + uint32_t source_powers[SMU75_DTE_SOURCES]; + uint32_t source_powers_last[SMU75_DTE_SOURCES]; + int32_t entity_temperatures[SMU75_NUM_GPU_TES]; + int32_t initial_entity_temperatures[SMU75_NUM_GPU_TES]; + int32_t Limit; + int32_t Hyst; + int32_t therm_influence_coeff_table[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS * 2]; + int32_t therm_node_table[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS]; + uint16_t ConfigTDPPowerScalar; + uint16_t FanSpeedPowerScalar; + uint16_t OverDrivePowerScalar; + uint16_t OverDriveLimitScalar; + uint16_t FinalPowerScalar; + uint8_t VariantID; + uint8_t spare997; + + SMU7_HystController_Data HystControllerData; + + int32_t temperature_gradient_slope; + int32_t temperature_gradient; + uint32_t measured_temperature; +}; + + +typedef struct SMU7_BapmScoreboard SMU7_BapmScoreboard; + +struct SMU7_AcpiScoreboard { + uint32_t SavedInterruptMask[2]; + uint8_t LastACPIRequest; + uint8_t CgBifResp; + uint8_t RequestType; + uint8_t Padding; + SMU75_Discrete_ACPILevel D0Level; +}; + +typedef struct SMU7_AcpiScoreboard SMU7_AcpiScoreboard; + +struct SMU75_Discrete_PmFuses { + uint8_t BapmVddCVidHiSidd[8]; + + uint8_t BapmVddCVidLoSidd[8]; + + uint8_t VddCVid[8]; + + uint8_t SviLoadLineEn; + uint8_t SviLoadLineVddC; + uint8_t SviLoadLineTrimVddC; + uint8_t SviLoadLineOffsetVddC; + + uint16_t TDC_VDDC_PkgLimit; + uint8_t TDC_VDDC_ThrottleReleaseLimitPerc; + uint8_t TDC_MAWt; + + uint8_t TdcWaterfallCtl; + uint8_t LPMLTemperatureMin; + uint8_t LPMLTemperatureMax; + uint8_t Reserved; + + uint8_t LPMLTemperatureScaler[16]; + + int16_t FuzzyFan_ErrorSetDelta; + int16_t FuzzyFan_ErrorRateSetDelta; + int16_t FuzzyFan_PwmSetDelta; + uint16_t Reserved6; + + uint8_t GnbLPML[16]; + + uint8_t GnbLPMLMaxVid; + uint8_t GnbLPMLMinVid; + uint8_t Reserved1[2]; + + uint16_t BapmVddCBaseLeakageHiSidd; + uint16_t BapmVddCBaseLeakageLoSidd; + + uint16_t VFT_Temp[3]; + uint8_t Version; + uint8_t padding; + + SMU_QuadraticCoeffs VFT_ATE[3]; + + SMU_QuadraticCoeffs AVFS_GB; + SMU_QuadraticCoeffs ATE_ACBTC_GB; + + SMU_QuadraticCoeffs P2V; + + uint32_t PsmCharzFreq; + + uint16_t InversionVoltage; + uint16_t PsmCharzTemp; + + uint32_t EnabledAvfsModules; + + SMU_QuadraticCoeffs BtcGbv_CksOff; +}; + +typedef struct SMU75_Discrete_PmFuses SMU75_Discrete_PmFuses; + +struct SMU7_Discrete_Log_Header_Table { + uint32_t version; + uint32_t asic_id; + uint16_t flags; + uint16_t entry_size; + uint32_t total_size; + uint32_t num_of_entries; + uint8_t type; + uint8_t mode; + uint8_t filler_0[2]; + uint32_t filler_1[2]; +}; + +typedef struct SMU7_Discrete_Log_Header_Table SMU7_Discrete_Log_Header_Table; + +struct SMU7_Discrete_Log_Cntl { + uint8_t Enabled; + uint8_t Type; + uint8_t padding[2]; + uint32_t BufferSize; + uint32_t SamplesLogged; + uint32_t SampleSize; + uint32_t AddrL; + uint32_t AddrH; +}; + +typedef struct SMU7_Discrete_Log_Cntl SMU7_Discrete_Log_Cntl; + +#if defined SMU__DGPU_ONLY +#define CAC_ACC_NW_NUM_OF_SIGNALS 87 +#endif + + +struct SMU7_Discrete_Cac_Collection_Table { + uint32_t temperature; + uint32_t cac_acc_nw[CAC_ACC_NW_NUM_OF_SIGNALS]; +}; + +typedef struct SMU7_Discrete_Cac_Collection_Table SMU7_Discrete_Cac_Collection_Table; + +struct SMU7_Discrete_Cac_Verification_Table { + uint32_t VddcTotalPower; + uint32_t VddcLeakagePower; + uint32_t VddcConstantPower; + uint32_t VddcGfxDynamicPower; + uint32_t VddcUvdDynamicPower; + uint32_t VddcVceDynamicPower; + uint32_t VddcAcpDynamicPower; + uint32_t VddcPcieDynamicPower; + uint32_t VddcDceDynamicPower; + uint32_t VddcCurrent; + uint32_t VddcVoltage; + uint32_t VddciTotalPower; + uint32_t VddciLeakagePower; + uint32_t VddciConstantPower; + uint32_t VddciDynamicPower; + uint32_t Vddr1TotalPower; + uint32_t Vddr1LeakagePower; + uint32_t Vddr1ConstantPower; + uint32_t Vddr1DynamicPower; + uint32_t spare[4]; + uint32_t temperature; +}; + +typedef struct SMU7_Discrete_Cac_Verification_Table SMU7_Discrete_Cac_Verification_Table; + +struct SMU7_Discrete_Pm_Status_Table { + int32_t T_meas_max[SMU75_THERMAL_INPUT_LOOP_COUNT]; + int32_t T_meas_acc[SMU75_THERMAL_INPUT_LOOP_COUNT]; + + uint32_t I_calc_max; + uint32_t I_calc_acc; + uint32_t P_meas_acc; + uint32_t V_meas_load_acc; + uint32_t I_meas_acc; + uint32_t P_meas_acc_vddci; + uint32_t V_meas_load_acc_vddci; + uint32_t I_meas_acc_vddci; + + uint16_t Sclk_dpm_residency[8]; + uint16_t Uvd_dpm_residency[8]; + uint16_t Vce_dpm_residency[8]; + uint16_t Mclk_dpm_residency[4]; + + uint32_t P_roc_acc; + uint32_t PkgPwr_max; + uint32_t PkgPwr_acc; + uint32_t MclkSwitchingTime_max; + uint32_t MclkSwitchingTime_acc; + uint32_t FanPwm_acc; + uint32_t FanRpm_acc; + uint32_t Gfx_busy_acc; + uint32_t Mc_busy_acc; + uint32_t Fps_acc; + + uint32_t AccCnt; +}; + +typedef struct SMU7_Discrete_Pm_Status_Table SMU7_Discrete_Pm_Status_Table; + +struct SMU7_Discrete_AutoWattMan_Status_Table { + int32_t T_meas_acc[SMU75_THERMAL_INPUT_LOOP_COUNT]; + uint16_t Sclk_dpm_residency[8]; + uint16_t Mclk_dpm_residency[4]; + uint32_t TgpPwr_acc; + uint32_t Gfx_busy_acc; + uint32_t Mc_busy_acc; + uint32_t AccCnt; +}; + +typedef struct SMU7_Discrete_AutoWattMan_Status_Table SMU7_Discrete_AutoWattMan_Status_Table; + +#define SMU7_MAX_GFX_CU_COUNT 24 +#define SMU7_MIN_GFX_CU_COUNT 8 +#define SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_SHIFT 0 +#define SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_MASK (0xFFFF << SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_SHIFT) +#define SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_SHIFT 16 +#define SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_MASK (0xFFFF << SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_SHIFT) + +struct SMU7_GfxCuPgScoreboard { + uint8_t Enabled; + uint8_t WaterfallUp; + uint8_t WaterfallDown; + uint8_t WaterfallLimit; + uint8_t CurrMaxCu; + uint8_t TargMaxCu; + uint8_t ClampMode; + uint8_t Active; + uint8_t MaxSupportedCu; + uint8_t MinSupportedCu; + uint8_t PendingGfxCuHostInterrupt; + uint8_t LastFilteredMaxCuInteger; + uint16_t FilteredMaxCu; + uint16_t FilteredMaxCuAlpha; + uint16_t FilterResetCount; + uint16_t FilterResetCountLimit; + uint8_t ForceCu; + uint8_t ForceCuCount; + uint8_t AcModeMaxCu; + uint8_t DcModeMaxCu; +}; + +typedef struct SMU7_GfxCuPgScoreboard SMU7_GfxCuPgScoreboard; + +#define SMU7_SCLK_CAC 0x561 +#define SMU7_MCLK_CAC 0xF9 +#define SMU7_VCLK_CAC 0x2DE +#define SMU7_DCLK_CAC 0x2DE +#define SMU7_ECLK_CAC 0x25E +#define SMU7_ACLK_CAC 0x25E +#define SMU7_SAMCLK_CAC 0x25E +#define SMU7_DISPCLK_CAC 0x100 +#define SMU7_CAC_CONSTANT 0x2EE3430 +#define SMU7_CAC_CONSTANT_SHIFT 18 + +#define SMU7_VDDCI_MCLK_CONST 1765 +#define SMU7_VDDCI_MCLK_CONST_SHIFT 16 +#define SMU7_VDDCI_VDDCI_CONST 50958 +#define SMU7_VDDCI_VDDCI_CONST_SHIFT 14 +#define SMU7_VDDCI_CONST 11781 +#define SMU7_VDDCI_STROBE_PWR 1331 + +#define SMU7_VDDR1_CONST 693 +#define SMU7_VDDR1_CAC_WEIGHT 20 +#define SMU7_VDDR1_CAC_WEIGHT_SHIFT 19 +#define SMU7_VDDR1_STROBE_PWR 512 + +#define SMU7_AREA_COEFF_UVD 0xA78 +#define SMU7_AREA_COEFF_VCE 0x190A +#define SMU7_AREA_COEFF_ACP 0x22D1 +#define SMU7_AREA_COEFF_SAMU 0x534 + +#define SMU7_THERM_OUT_MODE_DISABLE 0x0 +#define SMU7_THERM_OUT_MODE_THERM_ONLY 0x1 +#define SMU7_THERM_OUT_MODE_THERM_VRHOT 0x2 + +#define SQ_Enable_MASK 0x1 +#define SQ_IR_MASK 0x2 +#define SQ_PCC_MASK 0x4 +#define SQ_EDC_MASK 0x8 + +#define TCP_Enable_MASK 0x100 +#define TCP_IR_MASK 0x200 +#define TCP_PCC_MASK 0x400 +#define TCP_EDC_MASK 0x800 + +#define TD_Enable_MASK 0x10000 +#define TD_IR_MASK 0x20000 +#define TD_PCC_MASK 0x40000 +#define TD_EDC_MASK 0x80000 + +#define DB_Enable_MASK 0x1000000 +#define DB_IR_MASK 0x2000000 +#define DB_PCC_MASK 0x4000000 +#define DB_EDC_MASK 0x8000000 + +#define SQ_Enable_SHIFT 0 +#define SQ_IR_SHIFT 1 +#define SQ_PCC_SHIFT 2 +#define SQ_EDC_SHIFT 3 + +#define TCP_Enable_SHIFT 8 +#define TCP_IR_SHIFT 9 +#define TCP_PCC_SHIFT 10 +#define TCP_EDC_SHIFT 11 + +#define TD_Enable_SHIFT 16 +#define TD_IR_SHIFT 17 +#define TD_PCC_SHIFT 18 +#define TD_EDC_SHIFT 19 + +#define DB_Enable_SHIFT 24 +#define DB_IR_SHIFT 25 +#define DB_PCC_SHIFT 26 +#define DB_EDC_SHIFT 27 + +#define PMFUSES_AVFSSIZE 104 + +#define BTCGB0_Vdroop_Enable_MASK 0x1 +#define BTCGB1_Vdroop_Enable_MASK 0x2 +#define AVFSGB0_Vdroop_Enable_MASK 0x4 +#define AVFSGB1_Vdroop_Enable_MASK 0x8 + +#define BTCGB0_Vdroop_Enable_SHIFT 0 +#define BTCGB1_Vdroop_Enable_SHIFT 1 +#define AVFSGB0_Vdroop_Enable_SHIFT 2 +#define AVFSGB1_Vdroop_Enable_SHIFT 3 + +#pragma pack(pop) + + +#endif + diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h index c3ed737ab951..715b5a168831 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h @@ -131,6 +131,7 @@ typedef uint16_t PPSMC_Result; #define PPSMC_MSG_RunAcgInOpenLoop 0x5E #define PPSMC_MSG_InitializeAcg 0x5F #define PPSMC_MSG_GetCurrPkgPwr 0x61 +#define PPSMC_MSG_GetAverageGfxclkActualFrequency 0x63 #define PPSMC_MSG_SetPccThrottleLevel 0x67 #define PPSMC_MSG_UpdatePkgPwrPidAlpha 0x68 #define PPSMC_Message_Count 0x69 diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile index 958755075421..0a200406a1ec 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile @@ -26,7 +26,7 @@ SMU_MGR = smumgr.o smu8_smumgr.o tonga_smumgr.o fiji_smumgr.o \ polaris10_smumgr.o iceland_smumgr.o \ smu7_smumgr.o vega10_smumgr.o smu10_smumgr.o ci_smumgr.o \ - vega12_smumgr.o + vega12_smumgr.o vegam_smumgr.o AMD_PP_SMUMGR = $(addprefix $(AMD_PP_PATH)/smumgr/,$(SMU_MGR)) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 08d000140eca..2d4ec8ac3a08 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -61,9 +61,6 @@ #define SMC_RAM_END 0x40000 -#define VOLTAGE_SCALE 4 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 #define CISLAND_MINIMUM_ENGINE_CLOCK 800 #define CISLAND_MAX_DEEPSLEEP_DIVIDER_ID 5 @@ -211,9 +208,7 @@ static int ci_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) { int ret; - if (!ci_is_smc_ram_running(hwmgr)) - return -EINVAL; - + cgs_write_register(hwmgr->device, mmSMC_RESP_0, 0); cgs_write_register(hwmgr->device, mmSMC_MESSAGE_0, msg); PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0); @@ -1182,7 +1177,6 @@ static int ci_populate_single_memory_level( struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); int result = 0; bool dll_state_on; - struct cgs_display_info info = {0}; uint32_t mclk_edc_wr_enable_threshold = 40000; uint32_t mclk_edc_enable_threshold = 40000; uint32_t mclk_strobe_mode_threshold = 40000; @@ -1236,8 +1230,7 @@ static int ci_populate_single_memory_level( /* default set to low watermark. Highest level will be set to high later.*/ memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - cgs_get_active_displays_info(hwmgr->device, &info); - data->display_timing.num_existing_displays = info.display_count; + data->display_timing.num_existing_displays = hwmgr->display_config->num_display; /* stutter mode not support on ci */ @@ -2784,7 +2777,6 @@ static int ci_smu_fini(struct pp_hwmgr *hwmgr) { kfree(hwmgr->smu_backend); hwmgr->smu_backend = NULL; - cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index faef78321446..53df9405f43a 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -53,10 +53,7 @@ #define FIJI_SMC_SIZE 0x20000 -#define VOLTAGE_SCALE 4 #define POWERTUNE_DEFAULT_SET_MAX 1 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 #define VDDC_VDDCI_DELTA 300 #define MC_CG_ARB_FREQ_F1 0x0b @@ -288,8 +285,7 @@ static int fiji_start_smu(struct pp_hwmgr *hwmgr) struct fiji_smumgr *priv = (struct fiji_smumgr *)(hwmgr->smu_backend); /* Only start SMC if SMC RAM is not running */ - if (!(smu7_is_smc_ram_running(hwmgr) - || cgs_is_virtualization_enabled(hwmgr->device))) { + if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) { /* Check if SMU is running in protected mode */ if (0 == PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, @@ -307,13 +303,13 @@ static int fiji_start_smu(struct pp_hwmgr *hwmgr) } /* To initialize all clock gating before RLC loaded and running.*/ - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_CG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GMC, AMD_CG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_SDMA, AMD_CG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_COMMON, AMD_CG_STATE_GATE); /* Setup SoftRegsStart here for register lookup in case @@ -335,10 +331,10 @@ static bool fiji_is_hw_avfs_present(struct pp_hwmgr *hwmgr) uint32_t efuse = 0; uint32_t mask = (1 << ((AVFS_EN_MSB - AVFS_EN_LSB) + 1)) - 1; - if (cgs_is_virtualization_enabled(hwmgr->device)) - return 0; + if (!hwmgr->not_vf) + return false; - if (!atomctrl_read_efuse(hwmgr->device, AVFS_EN_LSB, AVFS_EN_MSB, + if (!atomctrl_read_efuse(hwmgr, AVFS_EN_LSB, AVFS_EN_MSB, mask, &efuse)) { if (efuse) return true; @@ -989,11 +985,11 @@ static int fiji_populate_single_graphic_level(struct pp_hwmgr *hwmgr, threshold = clock * data->fast_watermark_threshold / 100; - data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr; + data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock, - hwmgr->display_config.min_core_set_clock_in_sr); + hwmgr->display_config->min_core_set_clock_in_sr); /* Default to slow, highest DPM level will be diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c index d4bb934e7334..415f691c3fa9 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c @@ -60,10 +60,7 @@ #define ICELAND_SMC_SIZE 0x20000 -#define VOLTAGE_SCALE 4 #define POWERTUNE_DEFAULT_SET_MAX 1 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 #define MC_CG_ARB_FREQ_F1 0x0b #define VDDC_VDDCI_DELTA 200 @@ -932,7 +929,7 @@ static int iceland_populate_single_graphic_level(struct pp_hwmgr *hwmgr, graphic_level->PowerThrottle = 0; data->display_timing.min_clock_in_sr = - hwmgr->display_config.min_core_set_clock_in_sr; + hwmgr->display_config->min_core_set_clock_in_sr; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) @@ -1236,7 +1233,6 @@ static int iceland_populate_single_memory_level( struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); int result = 0; bool dll_state_on; - struct cgs_display_info info = {0}; uint32_t mclk_edc_wr_enable_threshold = 40000; uint32_t mclk_edc_enable_threshold = 40000; uint32_t mclk_strobe_mode_threshold = 40000; @@ -1283,8 +1279,7 @@ static int iceland_populate_single_memory_level( /* default set to low watermark. Highest level will be set to high later.*/ memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - cgs_get_active_displays_info(hwmgr->device, &info); - data->display_timing.num_existing_displays = info.display_count; + data->display_timing.num_existing_displays = hwmgr->display_config->num_display; /* stutter mode not support on iceland */ diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 997a777dd35b..a8c6524f07e4 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -52,8 +52,6 @@ #include "dce/dce_10_0_sh_mask.h" #define POLARIS10_SMC_SIZE 0x20000 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 #define POWERTUNE_DEFAULT_SET_MAX 1 #define VDDC_VDDCI_DELTA 200 #define MC_CG_ARB_FREQ_F1 0x0b @@ -295,25 +293,16 @@ static int polaris10_start_smu(struct pp_hwmgr *hwmgr) struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); /* Only start SMC if SMC RAM is not running */ - if (!(smu7_is_smc_ram_running(hwmgr) - || cgs_is_virtualization_enabled(hwmgr->device))) { + if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) { smu_data->protected_mode = (uint8_t) (PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_MODE)); smu_data->smu7_data.security_hard_key = (uint8_t) (PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_SEL)); /* Check if SMU is running in protected mode */ - if (smu_data->protected_mode == 0) { + if (smu_data->protected_mode == 0) result = polaris10_start_smu_in_non_protection_mode(hwmgr); - } else { + else result = polaris10_start_smu_in_protection_mode(hwmgr); - /* If failed, try with different security Key. */ - if (result != 0) { - smu_data->smu7_data.security_hard_key ^= 1; - cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU); - result = polaris10_start_smu_in_protection_mode(hwmgr); - } - } - if (result != 0) PP_ASSERT_WITH_CODE(0, "Failed to load SMU ucode.", return result); @@ -951,11 +940,11 @@ static int polaris10_populate_single_graphic_level(struct pp_hwmgr *hwmgr, level->DownHyst = data->current_profile_setting.sclk_down_hyst; level->VoltageDownHyst = 0; level->PowerThrottle = 0; - data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr; + data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock, - hwmgr->display_config.min_core_set_clock_in_sr); + hwmgr->display_config->min_core_set_clock_in_sr); /* Default to slow, highest DPM level will be * set to PPSMC_DISPLAY_WATERMARK_LOW later. @@ -1085,11 +1074,9 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_information *table_info = (struct phm_ppt_v1_information *)(hwmgr->pptable); int result = 0; - struct cgs_display_info info = {0, 0, NULL}; uint32_t mclk_stutter_mode_threshold = 40000; phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL; - cgs_get_active_displays_info(hwmgr->device, &info); if (hwmgr->od_enabled) vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_mclk; @@ -1115,7 +1102,7 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr, mem_level->StutterEnable = false; mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - data->display_timing.num_existing_displays = info.display_count; + data->display_timing.num_existing_displays = hwmgr->display_config->num_display; if (mclk_stutter_mode_threshold && (clock <= mclk_stutter_mode_threshold) && diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c index bc53f2beda30..0a563f6fe9ea 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c @@ -23,7 +23,7 @@ #include "smumgr.h" #include "smu10_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "smu10_smumgr.h" #include "ppatomctrl.h" #include "rv_ppsmc.h" @@ -33,8 +33,6 @@ #include "pp_debug.h" -#define VOLTAGE_SCALE 4 - #define BUFFER_SIZE 80000 #define MAX_STRING_SIZE 15 #define BUFFER_SIZETWO 131072 @@ -49,48 +47,41 @@ static uint32_t smu10_wait_for_response(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t reg; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); + reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); phm_wait_for_register_unequal(hwmgr, reg, 0, MP1_C2PMSG_90__CONTENT_MASK); - return cgs_read_register(hwmgr->device, reg); + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); } static int smu10_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66); - cgs_write_register(hwmgr->device, reg, msg); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg); return 0; } static int smu10_read_arg_from_smc(struct pp_hwmgr *hwmgr) { - uint32_t reg; - - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); + struct amdgpu_device *adev = hwmgr->adev; - return cgs_read_register(hwmgr->device, reg); + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); } static int smu10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; smu10_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); smu10_send_msg_to_smc_without_waiting(hwmgr, msg); @@ -104,17 +95,13 @@ static int smu10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) static int smu10_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; smu10_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); - cgs_write_register(hwmgr->device, reg, parameter); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter); smu10_send_msg_to_smc_without_waiting(hwmgr, msg); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c index 0399c10d2be0..64d33b775906 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c @@ -167,24 +167,25 @@ int smu7_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) { int ret; - if (!smu7_is_smc_ram_running(hwmgr)) - return -EINVAL; - - PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0); ret = PHM_READ_FIELD(hwmgr->device, SMC_RESP_0, SMC_RESP); - if (ret != 1) - pr_info("\n failed to send pre message %x ret is %d \n", msg, ret); + if (ret == 0xFE) + pr_debug("last message was not supported\n"); + else if (ret != 1) + pr_info("\n last message was failed ret is %d\n", ret); + cgs_write_register(hwmgr->device, mmSMC_RESP_0, 0); cgs_write_register(hwmgr->device, mmSMC_MESSAGE_0, msg); PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0); ret = PHM_READ_FIELD(hwmgr->device, SMC_RESP_0, SMC_RESP); - if (ret != 1) + if (ret == 0xFE) + pr_debug("message %x was not supported\n", msg); + else if (ret != 1) pr_info("\n failed to send message %x ret is %d \n", msg, ret); return 0; @@ -199,10 +200,6 @@ int smu7_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg) int smu7_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter) { - if (!smu7_is_smc_ram_running(hwmgr)) { - return -EINVAL; - } - PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0); cgs_write_register(hwmgr->device, mmSMC_MSG_ARG_0, parameter); @@ -231,16 +228,6 @@ int smu7_send_msg_to_smc_offset(struct pp_hwmgr *hwmgr) return 0; } -int smu7_wait_for_smc_inactive(struct pp_hwmgr *hwmgr) -{ - if (!smu7_is_smc_ram_running(hwmgr)) - return -EINVAL; - - PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, SMC_SYSCON_CLOCK_CNTL_0, cken, 0); - return 0; -} - - enum cgs_ucode_id smu7_convert_fw_type_to_cgs(uint32_t fw_type) { enum cgs_ucode_id result = CGS_UCODE_ID_MAXIMUM; @@ -375,7 +362,7 @@ static int smu7_populate_single_firmware_entry(struct pp_hwmgr *hwmgr, entry->meta_data_addr_low = 0; /* digest need be excluded out */ - if (cgs_is_virtualization_enabled(hwmgr->device)) + if (!hwmgr->not_vf) info.image_size -= 20; entry->data_size_byte = info.image_size; entry->num_register_entries = 0; @@ -409,7 +396,7 @@ int smu7_request_smu_load_fw(struct pp_hwmgr *hwmgr) 0x0); if (hwmgr->chip_id > CHIP_TOPAZ) { /* add support for Topaz */ - if (!cgs_is_virtualization_enabled(hwmgr->device)) { + if (hwmgr->not_vf) { smu7_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SMU_DRAM_ADDR_HI, upper_32_bits(smu_data->smu_buffer.mc_addr)); @@ -467,7 +454,7 @@ int smu7_request_smu_load_fw(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE(0 == smu7_populate_single_firmware_entry(hwmgr, UCODE_ID_SDMA1, &toc->entry[toc->num_entries++]), "Failed to Get Firmware Entry.", return -EINVAL); - if (cgs_is_virtualization_enabled(hwmgr->device)) + if (!hwmgr->not_vf) PP_ASSERT_WITH_CODE(0 == smu7_populate_single_firmware_entry(hwmgr, UCODE_ID_MEC_STORAGE, &toc->entry[toc->num_entries++]), "Failed to Get Firmware Entry.", return -EINVAL); @@ -608,7 +595,7 @@ int smu7_init(struct pp_hwmgr *hwmgr) smu_data->header = smu_data->header_buffer.kaddr; smu_data->header_buffer.mc_addr = mc_addr; - if (cgs_is_virtualization_enabled(hwmgr->device)) + if (!hwmgr->not_vf) return 0; smu_data->smu_buffer.data_size = 200*4096; @@ -643,13 +630,12 @@ int smu7_smu_fini(struct pp_hwmgr *hwmgr) &smu_data->header_buffer.mc_addr, &smu_data->header_buffer.kaddr); - if (!cgs_is_virtualization_enabled(hwmgr->device)) + if (hwmgr->not_vf) amdgpu_bo_free_kernel(&smu_data->smu_buffer.handle, &smu_data->smu_buffer.mc_addr, &smu_data->smu_buffer.kaddr); kfree(hwmgr->smu_backend); hwmgr->smu_backend = NULL; - cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h index 126d300259ba..39c9bfda0ab4 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h @@ -67,7 +67,6 @@ int smu7_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, int smu7_send_msg_to_smc_with_parameter_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter); int smu7_send_msg_to_smc_offset(struct pp_hwmgr *hwmgr); -int smu7_wait_for_smc_inactive(struct pp_hwmgr *hwmgr); enum cgs_ucode_id smu7_convert_fw_type_to_cgs(uint32_t fw_type); int smu7_read_smc_sram_dword(struct pp_hwmgr *hwmgr, uint32_t smc_addr, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c index c28b60aae5f8..ee236dfbf1d6 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c @@ -41,6 +41,7 @@ MODULE_FIRMWARE("amdgpu/polaris11_smc.bin"); MODULE_FIRMWARE("amdgpu/polaris11_smc_sk.bin"); MODULE_FIRMWARE("amdgpu/polaris11_k_smc.bin"); MODULE_FIRMWARE("amdgpu/polaris12_smc.bin"); +MODULE_FIRMWARE("amdgpu/vegam_smc.bin"); MODULE_FIRMWARE("amdgpu/vega10_smc.bin"); MODULE_FIRMWARE("amdgpu/vega10_acg_smc.bin"); MODULE_FIRMWARE("amdgpu/vega12_smc.bin"); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index b51d7468c3e7..782b19fc2e70 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -55,11 +55,7 @@ #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" - -#define VOLTAGE_SCALE 4 #define POWERTUNE_DEFAULT_SET_MAX 1 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 #define MC_CG_ARB_FREQ_F1 0x0b #define VDDC_VDDCI_DELTA 200 @@ -199,8 +195,7 @@ static int tonga_start_smu(struct pp_hwmgr *hwmgr) int result; /* Only start SMC if SMC RAM is not running */ - if (!(smu7_is_smc_ram_running(hwmgr) || - cgs_is_virtualization_enabled(hwmgr->device))) { + if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) { /*Check if SMU is running in protected mode*/ if (0 == PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_MODE)) { @@ -651,7 +646,7 @@ static int tonga_populate_single_graphic_level(struct pp_hwmgr *hwmgr, graphic_level->PowerThrottle = 0; data->display_timing.min_clock_in_sr = - hwmgr->display_config.min_core_set_clock_in_sr; + hwmgr->display_config->min_core_set_clock_in_sr; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) @@ -957,18 +952,17 @@ static int tonga_populate_single_memory_level( SMU72_Discrete_MemoryLevel *memory_level ) { - uint32_t mvdd = 0; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable); - int result = 0; - bool dll_state_on; - struct cgs_display_info info = {0}; uint32_t mclk_edc_wr_enable_threshold = 40000; uint32_t mclk_stutter_mode_threshold = 30000; uint32_t mclk_edc_enable_threshold = 40000; uint32_t mclk_strobe_mode_threshold = 40000; phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL; + int result = 0; + bool dll_state_on; + uint32_t mvdd = 0; if (hwmgr->od_enabled) vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_mclk; @@ -1009,8 +1003,7 @@ static int tonga_populate_single_memory_level( /* default set to low watermark. Highest level will be set to high later.*/ memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - cgs_get_active_displays_info(hwmgr->device, &info); - data->display_timing.num_existing_displays = info.display_count; + data->display_timing.num_existing_displays = hwmgr->display_config->num_display; if ((mclk_stutter_mode_threshold != 0) && (memory_clock <= mclk_stutter_mode_threshold) && diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c index 4aafb043bcb0..e84669c448a3 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c @@ -23,7 +23,7 @@ #include "smumgr.h" #include "vega10_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "vega10_smumgr.h" #include "vega10_hwmgr.h" #include "vega10_ppsmc.h" @@ -35,8 +35,6 @@ #define AVFS_EN_MSB 1568 #define AVFS_EN_LSB 1568 -#define VOLTAGE_SCALE 4 - /* Microcode file is stored in this buffer */ #define BUFFER_SIZE 80000 #define MAX_STRING_SIZE 15 @@ -54,18 +52,13 @@ static bool vega10_is_smc_ram_running(struct pp_hwmgr *hwmgr) { - uint32_t mp1_fw_flags, reg; - - reg = soc15_get_register_offset(NBIF_HWID, 0, - mmPCIE_INDEX2_BASE_IDX, mmPCIE_INDEX2); + struct amdgpu_device *adev = hwmgr->adev; + uint32_t mp1_fw_flags; - cgs_write_register(hwmgr->device, reg, + WREG32_SOC15(NBIF, 0, mmPCIE_INDEX2, (MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff))); - reg = soc15_get_register_offset(NBIF_HWID, 0, - mmPCIE_DATA2_BASE_IDX, mmPCIE_DATA2); - - mp1_fw_flags = cgs_read_register(hwmgr->device, reg); + mp1_fw_flags = RREG32_SOC15(NBIF, 0, mmPCIE_DATA2); if (mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) return true; @@ -81,11 +74,11 @@ static bool vega10_is_smc_ram_running(struct pp_hwmgr *hwmgr) */ static uint32_t vega10_wait_for_response(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t reg; uint32_t ret; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); + reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); ret = phm_wait_for_register_unequal(hwmgr, reg, 0, MP1_C2PMSG_90__CONTENT_MASK); @@ -93,7 +86,7 @@ static uint32_t vega10_wait_for_response(struct pp_hwmgr *hwmgr) if (ret) pr_err("No response from smu\n"); - return cgs_read_register(hwmgr->device, reg); + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); } /* @@ -105,11 +98,9 @@ static uint32_t vega10_wait_for_response(struct pp_hwmgr *hwmgr) static int vega10_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66); - cgs_write_register(hwmgr->device, reg, msg); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg); return 0; } @@ -122,14 +113,12 @@ static int vega10_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, */ static int vega10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; uint32_t ret; vega10_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); vega10_send_msg_to_smc_without_waiting(hwmgr, msg); @@ -150,18 +139,14 @@ static int vega10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) static int vega10_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; uint32_t ret; vega10_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); - cgs_write_register(hwmgr->device, reg, parameter); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter); vega10_send_msg_to_smc_without_waiting(hwmgr, msg); @@ -174,12 +159,9 @@ static int vega10_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, static int vega10_get_argument(struct pp_hwmgr *hwmgr) { - uint32_t reg; - - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); + struct amdgpu_device *adev = hwmgr->adev; - return cgs_read_register(hwmgr->device, reg); + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); } static int vega10_copy_table_from_smc(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c index 651a3f28734b..7d9b40e8b1bf 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c @@ -23,7 +23,7 @@ #include "smumgr.h" #include "vega12_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "vega12_smumgr.h" #include "vega12_ppsmc.h" #include "vega12/smu9_driver_if.h" @@ -44,18 +44,13 @@ static bool vega12_is_smc_ram_running(struct pp_hwmgr *hwmgr) { - uint32_t mp1_fw_flags, reg; + struct amdgpu_device *adev = hwmgr->adev; + uint32_t mp1_fw_flags; - reg = soc15_get_register_offset(NBIF_HWID, 0, - mmPCIE_INDEX2_BASE_IDX, mmPCIE_INDEX2); - - cgs_write_register(hwmgr->device, reg, + WREG32_SOC15(NBIF, 0, mmPCIE_INDEX2, (MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff))); - reg = soc15_get_register_offset(NBIF_HWID, 0, - mmPCIE_DATA2_BASE_IDX, mmPCIE_DATA2); - - mp1_fw_flags = cgs_read_register(hwmgr->device, reg); + mp1_fw_flags = RREG32_SOC15(NBIF, 0, mmPCIE_DATA2); if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >> MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT) @@ -72,15 +67,15 @@ static bool vega12_is_smc_ram_running(struct pp_hwmgr *hwmgr) */ static uint32_t vega12_wait_for_response(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t reg; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); + reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); phm_wait_for_register_unequal(hwmgr, reg, 0, MP1_C2PMSG_90__CONTENT_MASK); - return cgs_read_register(hwmgr->device, reg); + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); } /* @@ -92,11 +87,9 @@ static uint32_t vega12_wait_for_response(struct pp_hwmgr *hwmgr) int vega12_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66); - cgs_write_register(hwmgr->device, reg, msg); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg); return 0; } @@ -109,13 +102,11 @@ int vega12_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, */ int vega12_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; vega12_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); vega12_send_msg_to_smc_without_waiting(hwmgr, msg); @@ -135,17 +126,13 @@ int vega12_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) int vega12_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; vega12_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); - cgs_write_register(hwmgr->device, reg, parameter); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter); vega12_send_msg_to_smc_without_waiting(hwmgr, msg); @@ -166,11 +153,9 @@ int vega12_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, int vega12_send_msg_to_smc_with_parameter_without_waiting( struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66); - cgs_write_register(hwmgr->device, reg, parameter); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, parameter); return vega12_send_msg_to_smc_without_waiting(hwmgr, msg); } @@ -183,12 +168,9 @@ int vega12_send_msg_to_smc_with_parameter_without_waiting( */ int vega12_read_arg_from_smc(struct pp_hwmgr *hwmgr, uint32_t *arg) { - uint32_t reg; - - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); + struct amdgpu_device *adev = hwmgr->adev; - *arg = cgs_read_register(hwmgr->device, reg); + *arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c new file mode 100644 index 000000000000..c9a563399330 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c @@ -0,0 +1,2382 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "pp_debug.h" +#include "smumgr.h" +#include "smu_ucode_xfer_vi.h" +#include "vegam_smumgr.h" +#include "smu/smu_7_1_3_d.h" +#include "smu/smu_7_1_3_sh_mask.h" +#include "gmc/gmc_8_1_d.h" +#include "gmc/gmc_8_1_sh_mask.h" +#include "oss/oss_3_0_d.h" +#include "gca/gfx_8_0_d.h" +#include "bif/bif_5_0_d.h" +#include "bif/bif_5_0_sh_mask.h" +#include "ppatomctrl.h" +#include "cgs_common.h" +#include "smu7_ppsmc.h" + +#include "smu7_dyn_defaults.h" + +#include "smu7_hwmgr.h" +#include "hardwaremanager.h" +#include "ppatomctrl.h" +#include "atombios.h" +#include "pppcielanes.h" + +#include "dce/dce_11_2_d.h" +#include "dce/dce_11_2_sh_mask.h" + +#define PPVEGAM_TARGETACTIVITY_DFLT 50 + +#define VOLTAGE_VID_OFFSET_SCALE1 625 +#define VOLTAGE_VID_OFFSET_SCALE2 100 +#define POWERTUNE_DEFAULT_SET_MAX 1 +#define VDDC_VDDCI_DELTA 200 +#define MC_CG_ARB_FREQ_F1 0x0b + +#define STRAP_ASIC_RO_LSB 2168 +#define STRAP_ASIC_RO_MSB 2175 + +#define PPSMC_MSG_ApplyAvfsCksOffVoltage ((uint16_t) 0x415) +#define PPSMC_MSG_EnableModeSwitchRLCNotification ((uint16_t) 0x305) + +static const struct vegam_pt_defaults +vegam_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = { + /* sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, + * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT */ + { 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000, + { 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61}, + { 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } }, +}; + +static const sclkFcwRange_t Range_Table[NUM_SCLK_RANGE] = { + {VCO_2_4, POSTDIV_DIV_BY_16, 75, 160, 112}, + {VCO_3_6, POSTDIV_DIV_BY_16, 112, 224, 160}, + {VCO_2_4, POSTDIV_DIV_BY_8, 75, 160, 112}, + {VCO_3_6, POSTDIV_DIV_BY_8, 112, 224, 160}, + {VCO_2_4, POSTDIV_DIV_BY_4, 75, 160, 112}, + {VCO_3_6, POSTDIV_DIV_BY_4, 112, 216, 160}, + {VCO_2_4, POSTDIV_DIV_BY_2, 75, 160, 108}, + {VCO_3_6, POSTDIV_DIV_BY_2, 112, 216, 160} }; + +static int vegam_smu_init(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data; + + smu_data = kzalloc(sizeof(struct vegam_smumgr), GFP_KERNEL); + if (smu_data == NULL) + return -ENOMEM; + + hwmgr->smu_backend = smu_data; + + if (smu7_init(hwmgr)) { + kfree(smu_data); + return -EINVAL; + } + + return 0; +} + +static int vegam_start_smu_in_protection_mode(struct pp_hwmgr *hwmgr) +{ + int result = 0; + + /* Wait for smc boot up */ + /* PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(smumgr, SMC_IND, RCU_UC_EVENTS, boot_seq_done, 0) */ + + /* Assert reset */ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, rst_reg, 1); + + result = smu7_upload_smu_firmware_image(hwmgr); + if (result != 0) + return result; + + /* Clear status */ + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixSMU_STATUS, 0); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_CLOCK_CNTL_0, ck_disable, 0); + + /* De-assert reset */ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, rst_reg, 0); + + + PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, RCU_UC_EVENTS, INTERRUPTS_ENABLED, 1); + + + /* Call Test SMU message with 0x20000 offset to trigger SMU start */ + smu7_send_msg_to_smc_offset(hwmgr); + + /* Wait done bit to be set */ + /* Check pass/failed indicator */ + + PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(hwmgr, SMC_IND, SMU_STATUS, SMU_DONE, 0); + + if (1 != PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMU_STATUS, SMU_PASS)) + PP_ASSERT_WITH_CODE(false, "SMU Firmware start failed!", return -1); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixFIRMWARE_FLAGS, 0); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, rst_reg, 1); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, rst_reg, 0); + + /* Wait for firmware to initialize */ + PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, FIRMWARE_FLAGS, INTERRUPTS_ENABLED, 1); + + return result; +} + +static int vegam_start_smu_in_non_protection_mode(struct pp_hwmgr *hwmgr) +{ + int result = 0; + + /* wait for smc boot up */ + PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(hwmgr, SMC_IND, RCU_UC_EVENTS, boot_seq_done, 0); + + /* Clear firmware interrupt enable flag */ + /* PHM_WRITE_VFPF_INDIRECT_FIELD(pSmuMgr, SMC_IND, SMC_SYSCON_MISC_CNTL, pre_fetcher_en, 1); */ + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixFIRMWARE_FLAGS, 0); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, + rst_reg, 1); + + result = smu7_upload_smu_firmware_image(hwmgr); + if (result != 0) + return result; + + /* Set smc instruct start point at 0x0 */ + smu7_program_jump_on_start(hwmgr); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_CLOCK_CNTL_0, ck_disable, 0); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, rst_reg, 0); + + /* Wait for firmware to initialize */ + + PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, + FIRMWARE_FLAGS, INTERRUPTS_ENABLED, 1); + + return result; +} + +static int vegam_start_smu(struct pp_hwmgr *hwmgr) +{ + int result = 0; + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + /* Only start SMC if SMC RAM is not running */ + if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) { + smu_data->protected_mode = (uint8_t)(PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_MODE)); + smu_data->smu7_data.security_hard_key = (uint8_t)(PHM_READ_VFPF_INDIRECT_FIELD( + hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_SEL)); + + /* Check if SMU is running in protected mode */ + if (smu_data->protected_mode == 0) + result = vegam_start_smu_in_non_protection_mode(hwmgr); + else + result = vegam_start_smu_in_protection_mode(hwmgr); + + if (result != 0) + PP_ASSERT_WITH_CODE(0, "Failed to load SMU ucode.", return result); + } + + /* Setup SoftRegsStart here for register lookup in case DummyBackEnd is used and ProcessFirmwareHeader is not executed */ + smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU75_Firmware_Header, SoftRegisters), + &(smu_data->smu7_data.soft_regs_start), + 0x40000); + + result = smu7_request_smu_load_fw(hwmgr); + + return result; +} + +static int vegam_process_firmware_header(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t tmp; + int result; + bool error = false; + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, DpmTable), + &tmp, SMC_RAM_END); + + if (0 == result) + smu_data->smu7_data.dpm_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, SoftRegisters), + &tmp, SMC_RAM_END); + + if (!result) { + data->soft_regs_start = tmp; + smu_data->smu7_data.soft_regs_start = tmp; + } + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, mcRegisterTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.mc_reg_table_start = tmp; + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, FanTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.fan_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, mcArbDramTimingTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.arb_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, Version), + &tmp, SMC_RAM_END); + + if (!result) + hwmgr->microcode_version_info.SMC = tmp; + + error |= (0 != result); + + return error ? -1 : 0; +} + +static bool vegam_is_dpm_running(struct pp_hwmgr *hwmgr) +{ + return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) + ? true : false; +} + +static uint32_t vegam_get_mac_definition(uint32_t value) +{ + switch (value) { + case SMU_MAX_LEVELS_GRAPHICS: + return SMU75_MAX_LEVELS_GRAPHICS; + case SMU_MAX_LEVELS_MEMORY: + return SMU75_MAX_LEVELS_MEMORY; + case SMU_MAX_LEVELS_LINK: + return SMU75_MAX_LEVELS_LINK; + case SMU_MAX_ENTRIES_SMIO: + return SMU75_MAX_ENTRIES_SMIO; + case SMU_MAX_LEVELS_VDDC: + return SMU75_MAX_LEVELS_VDDC; + case SMU_MAX_LEVELS_VDDGFX: + return SMU75_MAX_LEVELS_VDDGFX; + case SMU_MAX_LEVELS_VDDCI: + return SMU75_MAX_LEVELS_VDDCI; + case SMU_MAX_LEVELS_MVDD: + return SMU75_MAX_LEVELS_MVDD; + case SMU_UVD_MCLK_HANDSHAKE_DISABLE: + return SMU7_UVD_MCLK_HANDSHAKE_DISABLE | + SMU7_VCE_MCLK_HANDSHAKE_DISABLE; + } + + pr_warn("can't get the mac of %x\n", value); + return 0; +} + +static int vegam_update_uvd_smc_table(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + smu_data->smc_state_table.UvdBootLevel = 0; + if (table_info->mm_dep_table->count > 0) + smu_data->smc_state_table.UvdBootLevel = + (uint8_t) (table_info->mm_dep_table->count - 1); + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + offsetof(SMU75_Discrete_DpmTable, + UvdBootLevel); + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0x00FFFFFF; + mm_boot_level_value |= smu_data->smc_state_table.UvdBootLevel << 24; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_UVDDPM) || + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_UVDDPM_SetEnabledMask, + (uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel)); + return 0; +} + +static int vegam_update_vce_smc_table(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smu_data->smc_state_table.VceBootLevel = + (uint8_t) (table_info->mm_dep_table->count - 1); + else + smu_data->smc_state_table.VceBootLevel = 0; + + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, VceBootLevel); + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0xFF00FFFF; + mm_boot_level_value |= smu_data->smc_state_table.VceBootLevel << 16; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_VCEDPM_SetEnabledMask, + (uint32_t)1 << smu_data->smc_state_table.VceBootLevel); + return 0; +} + +static int vegam_update_samu_smc_table(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + + + smu_data->smc_state_table.SamuBootLevel = 0; + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, SamuBootLevel); + + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0xFFFFFF00; + mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SAMUDPM_SetEnabledMask, + (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); + return 0; +} + + +static int vegam_update_bif_smc_table(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table; + int max_entry, i; + + max_entry = (SMU75_MAX_LEVELS_LINK < pcie_table->count) ? + SMU75_MAX_LEVELS_LINK : + pcie_table->count; + /* Setup BIF_SCLK levels */ + for (i = 0; i < max_entry; i++) + smu_data->bif_sclk_table[i] = pcie_table->entries[i].pcie_sclk; + return 0; +} + +static int vegam_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) +{ + switch (type) { + case SMU_UVD_TABLE: + vegam_update_uvd_smc_table(hwmgr); + break; + case SMU_VCE_TABLE: + vegam_update_vce_smc_table(hwmgr); + break; + case SMU_SAMU_TABLE: + vegam_update_samu_smc_table(hwmgr); + break; + case SMU_BIF_TABLE: + vegam_update_bif_smc_table(hwmgr); + break; + default: + break; + } + return 0; +} + +static void vegam_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + if (table_info && + table_info->cac_dtp_table->usPowerTuneDataSetID <= POWERTUNE_DEFAULT_SET_MAX && + table_info->cac_dtp_table->usPowerTuneDataSetID) + smu_data->power_tune_defaults = + &vegam_power_tune_data_set_array + [table_info->cac_dtp_table->usPowerTuneDataSetID - 1]; + else + smu_data->power_tune_defaults = &vegam_power_tune_data_set_array[0]; + +} + +static int vegam_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, + SMU75_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t count, level; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { + count = data->mvdd_voltage_table.count; + if (count > SMU_MAX_SMIO_LEVELS) + count = SMU_MAX_SMIO_LEVELS; + for (level = 0; level < count; level++) { + table->SmioTable2.Pattern[level].Voltage = PP_HOST_TO_SMC_US( + data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE); + /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/ + table->SmioTable2.Pattern[level].Smio = + (uint8_t) level; + table->Smio[level] |= + data->mvdd_voltage_table.entries[level].smio_low; + } + table->SmioMask2 = data->mvdd_voltage_table.mask_low; + + table->MvddLevelCount = (uint32_t) PP_HOST_TO_SMC_UL(count); + } + + return 0; +} + +static int vegam_populate_smc_vddci_table(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + uint32_t count, level; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + count = data->vddci_voltage_table.count; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { + if (count > SMU_MAX_SMIO_LEVELS) + count = SMU_MAX_SMIO_LEVELS; + for (level = 0; level < count; ++level) { + table->SmioTable1.Pattern[level].Voltage = PP_HOST_TO_SMC_US( + data->vddci_voltage_table.entries[level].value * VOLTAGE_SCALE); + table->SmioTable1.Pattern[level].Smio = (uint8_t) level; + + table->Smio[level] |= data->vddci_voltage_table.entries[level].smio_low; + } + } + + table->SmioMask1 = data->vddci_voltage_table.mask_low; + + return 0; +} + +static int vegam_populate_cac_table(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + uint32_t count; + uint8_t index; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_voltage_lookup_table *lookup_table = + table_info->vddc_lookup_table; + /* tables is already swapped, so in order to use the value from it, + * we need to swap it back. + * We are populating vddc CAC data to BapmVddc table + * in split and merged mode + */ + for (count = 0; count < lookup_table->count; count++) { + index = phm_get_voltage_index(lookup_table, + data->vddc_voltage_table.entries[count].value); + table->BapmVddcVidLoSidd[count] = + convert_to_vid(lookup_table->entries[index].us_cac_low); + table->BapmVddcVidHiSidd[count] = + convert_to_vid(lookup_table->entries[index].us_cac_mid); + table->BapmVddcVidHiSidd2[count] = + convert_to_vid(lookup_table->entries[index].us_cac_high); + } + + return 0; +} + +static int vegam_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + vegam_populate_smc_vddci_table(hwmgr, table); + vegam_populate_smc_mvdd_table(hwmgr, table); + vegam_populate_cac_table(hwmgr, table); + + return 0; +} + +static int vegam_populate_ulv_level(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_Ulv *state) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + state->CcPwrDynRm = 0; + state->CcPwrDynRm1 = 0; + + state->VddcOffset = (uint16_t) table_info->us_ulv_voltage_offset; + state->VddcOffsetVid = (uint8_t)(table_info->us_ulv_voltage_offset * + VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); + + state->VddcPhase = data->vddc_phase_shed_control ^ 0x3; + + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1); + CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset); + + return 0; +} + +static int vegam_populate_ulv_state(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + return vegam_populate_ulv_level(hwmgr, &table->Ulv); +} + +static int vegam_populate_smc_link_level(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = + (struct vegam_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &data->dpm_table; + int i; + + /* Index (dpm_table->pcie_speed_table.count) + * is reserved for PCIE boot level. */ + for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) { + table->LinkLevel[i].PcieGenSpeed = + (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value; + table->LinkLevel[i].PcieLaneCount = (uint8_t)encode_pcie_lane_width( + dpm_table->pcie_speed_table.dpm_levels[i].param1); + table->LinkLevel[i].EnabledForActivity = 1; + table->LinkLevel[i].SPC = (uint8_t)(data->pcie_spc_cap & 0xff); + table->LinkLevel[i].DownThreshold = PP_HOST_TO_SMC_UL(5); + table->LinkLevel[i].UpThreshold = PP_HOST_TO_SMC_UL(30); + } + + smu_data->smc_state_table.LinkLevelCount = + (uint8_t)dpm_table->pcie_speed_table.count; + +/* To Do move to hwmgr */ + data->dpm_level_enable_mask.pcie_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); + + return 0; +} + +static int vegam_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table, + uint32_t clock, SMU_VoltageLevel *voltage, uint32_t *mvdd) +{ + uint32_t i; + uint16_t vddci; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + *voltage = *mvdd = 0; + + /* clock - voltage dependency table is empty table */ + if (dep_table->count == 0) + return -EINVAL; + + for (i = 0; i < dep_table->count; i++) { + /* find first sclk bigger than request */ + if (dep_table->entries[i].clk >= clock) { + *voltage |= (dep_table->entries[i].vddc * + VOLTAGE_SCALE) << VDDC_SHIFT; + if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) + *voltage |= (data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else if (dep_table->entries[i].vddci) + *voltage |= (dep_table->entries[i].vddci * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else { + vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), + (dep_table->entries[i].vddc - + (uint16_t)VDDC_VDDCI_DELTA)); + *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + } + + if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) + *mvdd = data->vbios_boot_state.mvdd_bootup_value * + VOLTAGE_SCALE; + else if (dep_table->entries[i].mvdd) + *mvdd = (uint32_t) dep_table->entries[i].mvdd * + VOLTAGE_SCALE; + + *voltage |= 1 << PHASES_SHIFT; + return 0; + } + } + + /* sclk is bigger than max sclk in the dependence table */ + *voltage |= (dep_table->entries[i - 1].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), + (dep_table->entries[i - 1].vddc - + (uint16_t)VDDC_VDDCI_DELTA)); + + if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) + *voltage |= (data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else if (dep_table->entries[i - 1].vddci) + *voltage |= (dep_table->entries[i - 1].vddci * + VOLTAGE_SCALE) << VDDC_SHIFT; + else + *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) + *mvdd = data->vbios_boot_state.mvdd_bootup_value * VOLTAGE_SCALE; + else if (dep_table->entries[i].mvdd) + *mvdd = (uint32_t) dep_table->entries[i - 1].mvdd * VOLTAGE_SCALE; + + return 0; +} + +static void vegam_get_sclk_range_table(struct pp_hwmgr *hwmgr, + SMU75_Discrete_DpmTable *table) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + uint32_t i, ref_clk; + + struct pp_atom_ctrl_sclk_range_table range_table_from_vbios = { { {0} } }; + + ref_clk = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev); + + if (0 == atomctrl_get_smc_sclk_range_table(hwmgr, &range_table_from_vbios)) { + for (i = 0; i < NUM_SCLK_RANGE; i++) { + table->SclkFcwRangeTable[i].vco_setting = + range_table_from_vbios.entry[i].ucVco_setting; + table->SclkFcwRangeTable[i].postdiv = + range_table_from_vbios.entry[i].ucPostdiv; + table->SclkFcwRangeTable[i].fcw_pcc = + range_table_from_vbios.entry[i].usFcw_pcc; + + table->SclkFcwRangeTable[i].fcw_trans_upper = + range_table_from_vbios.entry[i].usFcw_trans_upper; + table->SclkFcwRangeTable[i].fcw_trans_lower = + range_table_from_vbios.entry[i].usRcw_trans_lower; + + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower); + } + return; + } + + for (i = 0; i < NUM_SCLK_RANGE; i++) { + smu_data->range_table[i].trans_lower_frequency = + (ref_clk * Range_Table[i].fcw_trans_lower) >> Range_Table[i].postdiv; + smu_data->range_table[i].trans_upper_frequency = + (ref_clk * Range_Table[i].fcw_trans_upper) >> Range_Table[i].postdiv; + + table->SclkFcwRangeTable[i].vco_setting = Range_Table[i].vco_setting; + table->SclkFcwRangeTable[i].postdiv = Range_Table[i].postdiv; + table->SclkFcwRangeTable[i].fcw_pcc = Range_Table[i].fcw_pcc; + + table->SclkFcwRangeTable[i].fcw_trans_upper = Range_Table[i].fcw_trans_upper; + table->SclkFcwRangeTable[i].fcw_trans_lower = Range_Table[i].fcw_trans_lower; + + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower); + } +} + +static int vegam_calculate_sclk_params(struct pp_hwmgr *hwmgr, + uint32_t clock, SMU_SclkSetting *sclk_setting) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + const SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table); + struct pp_atomctrl_clock_dividers_ai dividers; + uint32_t ref_clock; + uint32_t pcc_target_percent, pcc_target_freq, ss_target_percent, ss_target_freq; + uint8_t i; + int result; + uint64_t temp; + + sclk_setting->SclkFrequency = clock; + /* get the engine clock dividers for this clock value */ + result = atomctrl_get_engine_pll_dividers_ai(hwmgr, clock, ÷rs); + if (result == 0) { + sclk_setting->Fcw_int = dividers.usSclk_fcw_int; + sclk_setting->Fcw_frac = dividers.usSclk_fcw_frac; + sclk_setting->Pcc_fcw_int = dividers.usPcc_fcw_int; + sclk_setting->PllRange = dividers.ucSclkPllRange; + sclk_setting->Sclk_slew_rate = 0x400; + sclk_setting->Pcc_up_slew_rate = dividers.usPcc_fcw_slew_frac; + sclk_setting->Pcc_down_slew_rate = 0xffff; + sclk_setting->SSc_En = dividers.ucSscEnable; + sclk_setting->Fcw1_int = dividers.usSsc_fcw1_int; + sclk_setting->Fcw1_frac = dividers.usSsc_fcw1_frac; + sclk_setting->Sclk_ss_slew_rate = dividers.usSsc_fcw_slew_frac; + return result; + } + + ref_clock = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev); + + for (i = 0; i < NUM_SCLK_RANGE; i++) { + if (clock > smu_data->range_table[i].trans_lower_frequency + && clock <= smu_data->range_table[i].trans_upper_frequency) { + sclk_setting->PllRange = i; + break; + } + } + + sclk_setting->Fcw_int = (uint16_t) + ((clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / + ref_clock); + temp = clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv; + temp <<= 0x10; + do_div(temp, ref_clock); + sclk_setting->Fcw_frac = temp & 0xffff; + + pcc_target_percent = 10; /* Hardcode 10% for now. */ + pcc_target_freq = clock - (clock * pcc_target_percent / 100); + sclk_setting->Pcc_fcw_int = (uint16_t) + ((pcc_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / + ref_clock); + + ss_target_percent = 2; /* Hardcode 2% for now. */ + sclk_setting->SSc_En = 0; + if (ss_target_percent) { + sclk_setting->SSc_En = 1; + ss_target_freq = clock - (clock * ss_target_percent / 100); + sclk_setting->Fcw1_int = (uint16_t) + ((ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / + ref_clock); + temp = ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv; + temp <<= 0x10; + do_div(temp, ref_clock); + sclk_setting->Fcw1_frac = temp & 0xffff; + } + + return 0; +} + +static uint8_t vegam_get_sleep_divider_id_from_clock(uint32_t clock, + uint32_t clock_insr) +{ + uint8_t i; + uint32_t temp; + uint32_t min = max(clock_insr, (uint32_t)SMU7_MINIMUM_ENGINE_CLOCK); + + PP_ASSERT_WITH_CODE((clock >= min), + "Engine clock can't satisfy stutter requirement!", + return 0); + for (i = 31; ; i--) { + temp = clock / (i + 1); + + if (temp >= min || i == 0) + break; + } + return i; +} + +static int vegam_populate_single_graphic_level(struct pp_hwmgr *hwmgr, + uint32_t clock, struct SMU75_Discrete_GraphicsLevel *level) +{ + int result; + /* PP_Clocks minClocks; */ + uint32_t mvdd; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + SMU_SclkSetting curr_sclk_setting = { 0 }; + + result = vegam_calculate_sclk_params(hwmgr, clock, &curr_sclk_setting); + + /* populate graphics levels */ + result = vegam_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_sclk, clock, + &level->MinVoltage, &mvdd); + + PP_ASSERT_WITH_CODE((0 == result), + "can not find VDDC voltage value for " + "VDDC engine clock dependency table", + return result); + level->ActivityLevel = (uint16_t)(SclkDPMTuning_VEGAM >> DPMTuning_Activity_Shift); + + level->CcPwrDynRm = 0; + level->CcPwrDynRm1 = 0; + level->EnabledForActivity = 0; + level->EnabledForThrottle = 1; + level->VoltageDownHyst = 0; + level->PowerThrottle = 0; + data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) + level->DeepSleepDivId = vegam_get_sleep_divider_id_from_clock(clock, + hwmgr->display_config->min_core_set_clock_in_sr); + + level->SclkSetting = curr_sclk_setting; + + CONVERT_FROM_HOST_TO_SMC_UL(level->MinVoltage); + CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm1); + CONVERT_FROM_HOST_TO_SMC_US(level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(level->SclkSetting.SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_frac); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_up_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_down_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_int); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_frac); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_ss_slew_rate); + return 0; +} + +static int vegam_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &hw_data->dpm_table; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table; + uint8_t pcie_entry_cnt = (uint8_t) hw_data->dpm_table.pcie_speed_table.count; + int result = 0; + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, GraphicsLevel); + uint32_t array_size = sizeof(struct SMU75_Discrete_GraphicsLevel) * + SMU75_MAX_LEVELS_GRAPHICS; + struct SMU75_Discrete_GraphicsLevel *levels = + smu_data->smc_state_table.GraphicsLevel; + uint32_t i, max_entry; + uint8_t hightest_pcie_level_enabled = 0, + lowest_pcie_level_enabled = 0, + mid_pcie_level_enabled = 0, + count = 0; + + vegam_get_sclk_range_table(hwmgr, &(smu_data->smc_state_table)); + + for (i = 0; i < dpm_table->sclk_table.count; i++) { + + result = vegam_populate_single_graphic_level(hwmgr, + dpm_table->sclk_table.dpm_levels[i].value, + &(smu_data->smc_state_table.GraphicsLevel[i])); + if (result) + return result; + + levels[i].UpHyst = (uint8_t) + (SclkDPMTuning_VEGAM >> DPMTuning_Uphyst_Shift); + levels[i].DownHyst = (uint8_t) + (SclkDPMTuning_VEGAM >> DPMTuning_Downhyst_Shift); + /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */ + if (i > 1) + levels[i].DeepSleepDivId = 0; + } + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SPLLShutdownSupport)) + smu_data->smc_state_table.GraphicsLevel[0].SclkSetting.SSc_En = 0; + + smu_data->smc_state_table.GraphicsDpmLevelCount = + (uint8_t)dpm_table->sclk_table.count; + hw_data->dpm_level_enable_mask.sclk_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); + + for (i = 0; i < dpm_table->sclk_table.count; i++) + levels[i].EnabledForActivity = + (hw_data->dpm_level_enable_mask.sclk_dpm_enable_mask >> i) & 0x1; + + if (pcie_table != NULL) { + PP_ASSERT_WITH_CODE((1 <= pcie_entry_cnt), + "There must be 1 or more PCIE levels defined in PPTable.", + return -EINVAL); + max_entry = pcie_entry_cnt - 1; + for (i = 0; i < dpm_table->sclk_table.count; i++) + levels[i].pcieDpmLevel = + (uint8_t) ((i < max_entry) ? i : max_entry); + } else { + while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask && + ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (hightest_pcie_level_enabled + 1))) != 0)) + hightest_pcie_level_enabled++; + + while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask && + ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << lowest_pcie_level_enabled)) == 0)) + lowest_pcie_level_enabled++; + + while ((count < hightest_pcie_level_enabled) && + ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (lowest_pcie_level_enabled + 1 + count))) == 0)) + count++; + + mid_pcie_level_enabled = (lowest_pcie_level_enabled + 1 + count) < + hightest_pcie_level_enabled ? + (lowest_pcie_level_enabled + 1 + count) : + hightest_pcie_level_enabled; + + /* set pcieDpmLevel to hightest_pcie_level_enabled */ + for (i = 2; i < dpm_table->sclk_table.count; i++) + levels[i].pcieDpmLevel = hightest_pcie_level_enabled; + + /* set pcieDpmLevel to lowest_pcie_level_enabled */ + levels[0].pcieDpmLevel = lowest_pcie_level_enabled; + + /* set pcieDpmLevel to mid_pcie_level_enabled */ + levels[1].pcieDpmLevel = mid_pcie_level_enabled; + } + /* level count will send to smc once at init smc table and never change */ + result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, + (uint32_t)array_size, SMC_RAM_END); + + return result; +} + +static int vegam_calculate_mclk_params(struct pp_hwmgr *hwmgr, + uint32_t clock, struct SMU75_Discrete_MemoryLevel *mem_level) +{ + struct pp_atomctrl_memory_clock_param_ai mpll_param; + + PP_ASSERT_WITH_CODE(!atomctrl_get_memory_pll_dividers_ai(hwmgr, + clock, &mpll_param), + "Failed to retrieve memory pll parameter.", + return -EINVAL); + + mem_level->MclkFrequency = (uint32_t)mpll_param.ulClock; + mem_level->Fcw_int = (uint16_t)mpll_param.ulMclk_fcw_int; + mem_level->Fcw_frac = (uint16_t)mpll_param.ulMclk_fcw_frac; + mem_level->Postdiv = (uint8_t)mpll_param.ulPostDiv; + + return 0; +} + +static int vegam_populate_single_memory_level(struct pp_hwmgr *hwmgr, + uint32_t clock, struct SMU75_Discrete_MemoryLevel *mem_level) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + int result = 0; + uint32_t mclk_stutter_mode_threshold = 60000; + + + if (table_info->vdd_dep_on_mclk) { + result = vegam_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_mclk, clock, + &mem_level->MinVoltage, &mem_level->MinMvdd); + PP_ASSERT_WITH_CODE(!result, + "can not find MinVddc voltage value from memory " + "VDDC voltage dependency table", return result); + } + + result = vegam_calculate_mclk_params(hwmgr, clock, mem_level); + PP_ASSERT_WITH_CODE(!result, + "Failed to calculate mclk params.", + return -EINVAL); + + mem_level->EnabledForThrottle = 1; + mem_level->EnabledForActivity = 0; + mem_level->VoltageDownHyst = 0; + mem_level->ActivityLevel = (uint16_t) + (MemoryDPMTuning_VEGAM >> DPMTuning_Activity_Shift); + mem_level->StutterEnable = false; + mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + data->display_timing.num_existing_displays = hwmgr->display_config->num_display; + + if (mclk_stutter_mode_threshold && + (clock <= mclk_stutter_mode_threshold) && + (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL, + STUTTER_ENABLE) & 0x1)) + mem_level->StutterEnable = true; + + if (!result) { + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinMvdd); + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(mem_level->Fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(mem_level->Fcw_frac); + CONVERT_FROM_HOST_TO_SMC_US(mem_level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinVoltage); + } + + return result; +} + +static int vegam_populate_all_memory_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &hw_data->dpm_table; + int result; + /* populate MCLK dpm table to SMU7 */ + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, MemoryLevel); + uint32_t array_size = sizeof(SMU75_Discrete_MemoryLevel) * + SMU75_MAX_LEVELS_MEMORY; + struct SMU75_Discrete_MemoryLevel *levels = + smu_data->smc_state_table.MemoryLevel; + uint32_t i; + + for (i = 0; i < dpm_table->mclk_table.count; i++) { + PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value), + "can not populate memory level as memory clock is zero", + return -EINVAL); + result = vegam_populate_single_memory_level(hwmgr, + dpm_table->mclk_table.dpm_levels[i].value, + &levels[i]); + + if (result) + return result; + + levels[i].UpHyst = (uint8_t) + (MemoryDPMTuning_VEGAM >> DPMTuning_Uphyst_Shift); + levels[i].DownHyst = (uint8_t) + (MemoryDPMTuning_VEGAM >> DPMTuning_Downhyst_Shift); + } + + smu_data->smc_state_table.MemoryDpmLevelCount = + (uint8_t)dpm_table->mclk_table.count; + hw_data->dpm_level_enable_mask.mclk_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); + + for (i = 0; i < dpm_table->mclk_table.count; i++) + levels[i].EnabledForActivity = + (hw_data->dpm_level_enable_mask.mclk_dpm_enable_mask >> i) & 0x1; + + levels[dpm_table->mclk_table.count - 1].DisplayWatermark = + PPSMC_DISPLAY_WATERMARK_HIGH; + + /* level count will send to smc once at init smc table and never change */ + result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, + (uint32_t)array_size, SMC_RAM_END); + + return result; +} + +static int vegam_populate_mvdd_value(struct pp_hwmgr *hwmgr, + uint32_t mclk, SMIO_Pattern *smio_pat) +{ + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint32_t i = 0; + + if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) { + /* find mvdd value which clock is more than request */ + for (i = 0; i < table_info->vdd_dep_on_mclk->count; i++) { + if (mclk <= table_info->vdd_dep_on_mclk->entries[i].clk) { + smio_pat->Voltage = data->mvdd_voltage_table.entries[i].value; + break; + } + } + PP_ASSERT_WITH_CODE(i < table_info->vdd_dep_on_mclk->count, + "MVDD Voltage is outside the supported range.", + return -EINVAL); + } else + return -EINVAL; + + return 0; +} + +static int vegam_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, + SMU75_Discrete_DpmTable *table) +{ + int result = 0; + uint32_t sclk_frequency; + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + SMIO_Pattern vol_level; + uint32_t mvdd; + uint16_t us_mvdd; + + table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; + + /* Get MinVoltage and Frequency from DPM0, + * already converted to SMC_UL */ + sclk_frequency = data->vbios_boot_state.sclk_bootup_value; + result = vegam_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_sclk, + sclk_frequency, + &table->ACPILevel.MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE(!result, + "Cannot find ACPI VDDC voltage value " + "in Clock Dependency Table", + ); + + result = vegam_calculate_sclk_params(hwmgr, sclk_frequency, + &(table->ACPILevel.SclkSetting)); + PP_ASSERT_WITH_CODE(!result, + "Error retrieving Engine Clock dividers from VBIOS.", + return result); + + table->ACPILevel.DeepSleepDivId = 0; + table->ACPILevel.CcPwrDynRm = 0; + table->ACPILevel.CcPwrDynRm1 = 0; + + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.MinVoltage); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1); + + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkSetting.SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_frac); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_up_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_down_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_int); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate); + + + /* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */ + table->MemoryACPILevel.MclkFrequency = data->vbios_boot_state.mclk_bootup_value; + result = vegam_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_mclk, + table->MemoryACPILevel.MclkFrequency, + &table->MemoryACPILevel.MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE((0 == result), + "Cannot find ACPI VDDCI voltage value " + "in Clock Dependency Table", + ); + + us_mvdd = 0; + if ((SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) || + (data->mclk_dpm_key_disabled)) + us_mvdd = data->vbios_boot_state.mvdd_bootup_value; + else { + if (!vegam_populate_mvdd_value(hwmgr, + data->dpm_table.mclk_table.dpm_levels[0].value, + &vol_level)) + us_mvdd = vol_level.Voltage; + } + + if (!vegam_populate_mvdd_value(hwmgr, 0, &vol_level)) + table->MemoryACPILevel.MinMvdd = PP_HOST_TO_SMC_UL(vol_level.Voltage); + else + table->MemoryACPILevel.MinMvdd = 0; + + table->MemoryACPILevel.StutterEnable = false; + + table->MemoryACPILevel.EnabledForThrottle = 0; + table->MemoryACPILevel.EnabledForActivity = 0; + table->MemoryACPILevel.UpHyst = 0; + table->MemoryACPILevel.DownHyst = 100; + table->MemoryACPILevel.VoltageDownHyst = 0; + table->MemoryACPILevel.ActivityLevel = + PP_HOST_TO_SMC_US(data->current_profile_setting.mclk_activity); + + CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage); + + return result; +} + +static int vegam_populate_smc_vce_level(struct pp_hwmgr *hwmgr, + SMU75_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t vddci; + + table->VceLevelCount = (uint8_t)(mm_table->count); + table->VceBootLevel = 0; + + for (count = 0; count < table->VceLevelCount; count++) { + table->VceLevel[count].Frequency = mm_table->entries[count].eclk; + table->VceLevel[count].MinVoltage = 0; + table->VceLevel[count].MinVoltage |= + (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + + table->VceLevel[count].MinVoltage |= + (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /*retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->VceLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for VCE engine clock", + return result); + + table->VceLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].Frequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].MinVoltage); + } + return result; +} + +static int vegam_populate_smc_samu_level(struct pp_hwmgr *hwmgr, + SMU75_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t vddci; + + table->SamuBootLevel = 0; + table->SamuLevelCount = (uint8_t)(mm_table->count); + + for (count = 0; count < table->SamuLevelCount; count++) { + /* not sure whether we need evclk or not */ + table->SamuLevel[count].MinVoltage = 0; + table->SamuLevel[count].Frequency = mm_table->entries[count].samclock; + table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc * + VOLTAGE_SCALE) << VDDC_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->SamuLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for samu clock", return result); + + table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].MinVoltage); + } + return result; +} + +static int vegam_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr, + int32_t eng_clock, int32_t mem_clock, + SMU75_Discrete_MCArbDramTimingTableEntry *arb_regs) +{ + uint32_t dram_timing; + uint32_t dram_timing2; + uint32_t burst_time; + uint32_t rfsh_rate; + uint32_t misc3; + + int result; + + result = atomctrl_set_engine_dram_timings_rv770(hwmgr, + eng_clock, mem_clock); + PP_ASSERT_WITH_CODE(result == 0, + "Error calling VBIOS to set DRAM_TIMING.", + return result); + + dram_timing = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); + dram_timing2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); + burst_time = cgs_read_register(hwmgr->device, mmMC_ARB_BURST_TIME); + rfsh_rate = cgs_read_register(hwmgr->device, mmMC_ARB_RFSH_RATE); + misc3 = cgs_read_register(hwmgr->device, mmMC_ARB_MISC3); + + arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dram_timing); + arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dram_timing2); + arb_regs->McArbBurstTime = PP_HOST_TO_SMC_UL(burst_time); + arb_regs->McArbRfshRate = PP_HOST_TO_SMC_UL(rfsh_rate); + arb_regs->McArbMisc3 = PP_HOST_TO_SMC_UL(misc3); + + return 0; +} + +static int vegam_program_memory_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct SMU75_Discrete_MCArbDramTimingTable arb_regs = {0}; + uint32_t i, j; + int result = 0; + + for (i = 0; i < hw_data->dpm_table.sclk_table.count; i++) { + for (j = 0; j < hw_data->dpm_table.mclk_table.count; j++) { + result = vegam_populate_memory_timing_parameters(hwmgr, + hw_data->dpm_table.sclk_table.dpm_levels[i].value, + hw_data->dpm_table.mclk_table.dpm_levels[j].value, + &arb_regs.entries[i][j]); + if (result) + return result; + } + } + + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.arb_table_start, + (uint8_t *)&arb_regs, + sizeof(SMU75_Discrete_MCArbDramTimingTable), + SMC_RAM_END); + return result; +} + +static int vegam_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t vddci; + + table->UvdLevelCount = (uint8_t)(mm_table->count); + table->UvdBootLevel = 0; + + for (count = 0; count < table->UvdLevelCount; count++) { + table->UvdLevel[count].MinVoltage = 0; + table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk; + table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk; + table->UvdLevel[count].MinVoltage |= + (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->UvdLevel[count].VclkFrequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for Vclk clock", return result); + + table->UvdLevel[count].VclkDivider = (uint8_t)dividers.pll_post_divider; + + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->UvdLevel[count].DclkFrequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for Dclk clock", return result); + + table->UvdLevel[count].DclkDivider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage); + } + + return result; +} + +static int vegam_populate_smc_boot_level(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + int result = 0; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + table->GraphicsBootLevel = 0; + table->MemoryBootLevel = 0; + + /* find boot level from dpm table */ + result = phm_find_boot_level(&(data->dpm_table.sclk_table), + data->vbios_boot_state.sclk_bootup_value, + (uint32_t *)&(table->GraphicsBootLevel)); + + result = phm_find_boot_level(&(data->dpm_table.mclk_table), + data->vbios_boot_state.mclk_bootup_value, + (uint32_t *)&(table->MemoryBootLevel)); + + table->BootVddc = data->vbios_boot_state.vddc_bootup_value * + VOLTAGE_SCALE; + table->BootVddci = data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE; + table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value * + VOLTAGE_SCALE; + + CONVERT_FROM_HOST_TO_SMC_US(table->BootVddc); + CONVERT_FROM_HOST_TO_SMC_US(table->BootVddci); + CONVERT_FROM_HOST_TO_SMC_US(table->BootMVdd); + + return 0; +} + +static int vegam_populate_smc_initial_state(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint8_t count, level; + + count = (uint8_t)(table_info->vdd_dep_on_sclk->count); + + for (level = 0; level < count; level++) { + if (table_info->vdd_dep_on_sclk->entries[level].clk >= + hw_data->vbios_boot_state.sclk_bootup_value) { + smu_data->smc_state_table.GraphicsBootLevel = level; + break; + } + } + + count = (uint8_t)(table_info->vdd_dep_on_mclk->count); + for (level = 0; level < count; level++) { + if (table_info->vdd_dep_on_mclk->entries[level].clk >= + hw_data->vbios_boot_state.mclk_bootup_value) { + smu_data->smc_state_table.MemoryBootLevel = level; + break; + } + } + + return 0; +} + +static uint16_t scale_fan_gain_settings(uint16_t raw_setting) +{ + uint32_t tmp; + tmp = raw_setting * 4096 / 100; + return (uint16_t)tmp; +} + +static int vegam_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults; + SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_cac_tdp_table *cac_dtp_table = table_info->cac_dtp_table; + struct pp_advance_fan_control_parameters *fan_table = + &hwmgr->thermal_controller.advanceFanControlParameters; + int i, j, k; + const uint16_t *pdef1; + const uint16_t *pdef2; + + table->DefaultTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128)); + table->TargetTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128)); + + PP_ASSERT_WITH_CODE(cac_dtp_table->usTargetOperatingTemp <= 255, + "Target Operating Temp is out of Range!", + ); + + table->TemperatureLimitEdge = PP_HOST_TO_SMC_US( + cac_dtp_table->usTargetOperatingTemp * 256); + table->TemperatureLimitHotspot = PP_HOST_TO_SMC_US( + cac_dtp_table->usTemperatureLimitHotspot * 256); + table->FanGainEdge = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainEdge)); + table->FanGainHotspot = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainHotspot)); + + pdef1 = defaults->BAPMTI_R; + pdef2 = defaults->BAPMTI_RC; + + for (i = 0; i < SMU75_DTE_ITERATIONS; i++) { + for (j = 0; j < SMU75_DTE_SOURCES; j++) { + for (k = 0; k < SMU75_DTE_SINKS; k++) { + table->BAPMTI_R[i][j][k] = PP_HOST_TO_SMC_US(*pdef1); + table->BAPMTI_RC[i][j][k] = PP_HOST_TO_SMC_US(*pdef2); + pdef1++; + pdef2++; + } + } + } + + return 0; +} + +static int vegam_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) +{ + uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min; + struct vegam_smumgr *smu_data = + (struct vegam_smumgr *)(hwmgr->smu_backend); + + uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = + table_info->vdd_dep_on_sclk; + uint32_t mask = (1 << ((STRAP_ASIC_RO_MSB - STRAP_ASIC_RO_LSB) + 1)) - 1; + + stretch_amount = (uint8_t)table_info->cac_dtp_table->usClockStretchAmount; + + atomctrl_read_efuse(hwmgr, STRAP_ASIC_RO_LSB, STRAP_ASIC_RO_MSB, + mask, &efuse); + + min = 1200; + max = 2500; + + ro = efuse * (max - min) / 255 + min; + + /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ + for (i = 0; i < sclk_table->count; i++) { + smu_data->smc_state_table.Sclk_CKS_masterEn0_7 |= + sclk_table->entries[i].cks_enable << i; + volt_without_cks = (uint32_t)((2753594000U + (sclk_table->entries[i].clk/100) * + 136418 - (ro - 70) * 1000000) / + (2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000)); + volt_with_cks = (uint32_t)((2797202000U + sclk_table->entries[i].clk/100 * + 3232 - (ro - 65) * 1000000) / + (2522480 - sclk_table->entries[i].clk/100 * 115764/100)); + + if (volt_without_cks >= volt_with_cks) + volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + + sclk_table->entries[i].cks_voffset) * 100 + 624) / 625); + + smu_data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; + } + + smu_data->smc_state_table.LdoRefSel = + (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ? + table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 5; + /* Populate CKS Lookup Table */ + if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5) + stretch_amount2 = 0; + else if (stretch_amount == 3 || stretch_amount == 4) + stretch_amount2 = 1; + else { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ClockStretcher); + PP_ASSERT_WITH_CODE(false, + "Stretch Amount in PPTable not supported\n", + return -EINVAL); + } + + value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL); + value &= 0xFFFFFFFE; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value); + + return 0; +} + +static bool vegam_is_hw_avfs_present(struct pp_hwmgr *hwmgr) +{ + uint32_t efuse; + + efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixSMU_EFUSE_0 + (49 * 4)); + efuse &= 0x00000001; + + if (efuse) + return true; + + return false; +} + +static int vegam_populate_avfs_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table); + int result = 0; + struct pp_atom_ctrl__avfs_parameters avfs_params = {0}; + AVFS_meanNsigma_t AVFS_meanNsigma = { {0} }; + AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} }; + uint32_t tmp, i; + + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)hwmgr->pptable; + struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = + table_info->vdd_dep_on_sclk; + + if (!hwmgr->avfs_supported) + return 0; + + result = atomctrl_get_avfs_information(hwmgr, &avfs_params); + + if (0 == result) { + table->BTCGB_VDROOP_TABLE[0].a0 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0); + table->BTCGB_VDROOP_TABLE[0].a1 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1); + table->BTCGB_VDROOP_TABLE[0].a2 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2); + table->BTCGB_VDROOP_TABLE[1].a0 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0); + table->BTCGB_VDROOP_TABLE[1].a1 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1); + table->BTCGB_VDROOP_TABLE[1].a2 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2); + table->AVFSGB_FUSE_TABLE[0].m1 = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1); + table->AVFSGB_FUSE_TABLE[0].m2 = + PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2); + table->AVFSGB_FUSE_TABLE[0].b = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b); + table->AVFSGB_FUSE_TABLE[0].m1_shift = 24; + table->AVFSGB_FUSE_TABLE[0].m2_shift = 12; + table->AVFSGB_FUSE_TABLE[1].m1 = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1); + table->AVFSGB_FUSE_TABLE[1].m2 = + PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2); + table->AVFSGB_FUSE_TABLE[1].b = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b); + table->AVFSGB_FUSE_TABLE[1].m1_shift = 24; + table->AVFSGB_FUSE_TABLE[1].m2_shift = 12; + table->MaxVoltage = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv); + AVFS_meanNsigma.Aconstant[0] = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0); + AVFS_meanNsigma.Aconstant[1] = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1); + AVFS_meanNsigma.Aconstant[2] = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2); + AVFS_meanNsigma.DC_tol_sigma = + PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma); + AVFS_meanNsigma.Platform_mean = + PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean); + AVFS_meanNsigma.PSM_Age_CompFactor = + PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor); + AVFS_meanNsigma.Platform_sigma = + PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma); + + for (i = 0; i < sclk_table->count; i++) { + AVFS_meanNsigma.Static_Voltage_Offset[i] = + (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625); + AVFS_SclkOffset.Sclk_Offset[i] = + PP_HOST_TO_SMC_US((uint16_t) + (sclk_table->entries[i].sclk_offset) / 100); + } + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, AvfsMeanNSigma), + &tmp, SMC_RAM_END); + smu7_copy_bytes_to_smc(hwmgr, + tmp, + (uint8_t *)&AVFS_meanNsigma, + sizeof(AVFS_meanNsigma_t), + SMC_RAM_END); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, AvfsSclkOffsetTable), + &tmp, SMC_RAM_END); + smu7_copy_bytes_to_smc(hwmgr, + tmp, + (uint8_t *)&AVFS_SclkOffset, + sizeof(AVFS_Sclk_Offset_t), + SMC_RAM_END); + + data->avfs_vdroop_override_setting = + (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT); + data->apply_avfs_cks_off_voltage = + (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false; + } + return result; +} + +static int vegam_populate_vr_config(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = + (struct vegam_smumgr *)(hwmgr->smu_backend); + uint16_t config; + + config = VR_MERGED_WITH_VDDC; + table->VRConfig |= (config << VRCONF_VDDGFX_SHIFT); + + /* Set Vddc Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { + config = VR_SVI2_PLANE_1; + table->VRConfig |= config; + } else { + PP_ASSERT_WITH_CODE(false, + "VDDC should be on SVI2 control in merged mode!", + ); + } + /* Set Vddci Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) { + config = VR_SVI2_PLANE_2; /* only in merged mode */ + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { + config = VR_SMIO_PATTERN_1; + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } else { + config = VR_STATIC_VOLTAGE; + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } + /* Set Mvdd Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) { + if (config != VR_SVI2_PLANE_2) { + config = VR_SVI2_PLANE_2; + table->VRConfig |= (config << VRCONF_MVDD_SHIFT); + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, + smu_data->smu7_data.soft_regs_start + + offsetof(SMU75_SoftRegisters, AllowMvddSwitch), + 0x1); + } else { + PP_ASSERT_WITH_CODE(false, + "SVI2 Plane 2 is already taken, set MVDD as Static",); + config = VR_STATIC_VOLTAGE; + table->VRConfig = (config << VRCONF_MVDD_SHIFT); + } + } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { + config = VR_SMIO_PATTERN_2; + table->VRConfig = (config << VRCONF_MVDD_SHIFT); + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, + smu_data->smu7_data.soft_regs_start + + offsetof(SMU75_SoftRegisters, AllowMvddSwitch), + 0x1); + } else { + config = VR_STATIC_VOLTAGE; + table->VRConfig |= (config << VRCONF_MVDD_SHIFT); + } + + return 0; +} + +static int vegam_populate_svi_load_line(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults; + + smu_data->power_tune_table.SviLoadLineEn = defaults->SviLoadLineEn; + smu_data->power_tune_table.SviLoadLineVddC = defaults->SviLoadLineVddC; + smu_data->power_tune_table.SviLoadLineTrimVddC = 3; + smu_data->power_tune_table.SviLoadLineOffsetVddC = 0; + + return 0; +} + +static int vegam_populate_tdc_limit(struct pp_hwmgr *hwmgr) +{ + uint16_t tdc_limit; + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults; + + tdc_limit = (uint16_t)(table_info->cac_dtp_table->usTDC * 128); + smu_data->power_tune_table.TDC_VDDC_PkgLimit = + CONVERT_FROM_HOST_TO_SMC_US(tdc_limit); + smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc = + defaults->TDC_VDDC_ThrottleReleaseLimitPerc; + smu_data->power_tune_table.TDC_MAWt = defaults->TDC_MAWt; + + return 0; +} + +static int vegam_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults; + uint32_t temp; + + if (smu7_read_smc_sram_dword(hwmgr, + fuse_table_offset + + offsetof(SMU75_Discrete_PmFuses, TdcWaterfallCtl), + (uint32_t *)&temp, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to read PmFuses.DW6 (SviLoadLineEn) from SMC Failed!", + return -EINVAL); + else { + smu_data->power_tune_table.TdcWaterfallCtl = defaults->TdcWaterfallCtl; + smu_data->power_tune_table.LPMLTemperatureMin = + (uint8_t)((temp >> 16) & 0xff); + smu_data->power_tune_table.LPMLTemperatureMax = + (uint8_t)((temp >> 8) & 0xff); + smu_data->power_tune_table.Reserved = (uint8_t)(temp & 0xff); + } + return 0; +} + +static int vegam_populate_temperature_scaler(struct pp_hwmgr *hwmgr) +{ + int i; + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 16; i++) + smu_data->power_tune_table.LPMLTemperatureScaler[i] = 0; + + return 0; +} + +static int vegam_populate_fuzzy_fan(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + +/* TO DO move to hwmgr */ + if ((hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity & (1 << 15)) + || 0 == hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity) + hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity = + hwmgr->thermal_controller.advanceFanControlParameters.usDefaultFanOutputSensitivity; + + smu_data->power_tune_table.FuzzyFan_PwmSetDelta = PP_HOST_TO_SMC_US( + hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity); + return 0; +} + +static int vegam_populate_gnb_lpml(struct pp_hwmgr *hwmgr) +{ + int i; + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 16; i++) + smu_data->power_tune_table.GnbLPML[i] = 0; + + return 0; +} + +static int vegam_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint16_t hi_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd; + uint16_t lo_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd; + struct phm_cac_tdp_table *cac_table = table_info->cac_dtp_table; + + hi_sidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256); + lo_sidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256); + + smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd = + CONVERT_FROM_HOST_TO_SMC_US(hi_sidd); + smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd = + CONVERT_FROM_HOST_TO_SMC_US(lo_sidd); + + return 0; +} + +static int vegam_populate_pm_fuses(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + uint32_t pm_fuse_table_offset; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment)) { + if (smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, PmFuseTable), + &pm_fuse_table_offset, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to get pm_fuse_table_offset Failed!", + return -EINVAL); + + if (vegam_populate_svi_load_line(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate SviLoadLine Failed!", + return -EINVAL); + + if (vegam_populate_tdc_limit(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TDCLimit Failed!", return -EINVAL); + + if (vegam_populate_dw8(hwmgr, pm_fuse_table_offset)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TdcWaterfallCtl, " + "LPMLTemperature Min and Max Failed!", + return -EINVAL); + + if (0 != vegam_populate_temperature_scaler(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate LPMLTemperatureScaler Failed!", + return -EINVAL); + + if (vegam_populate_fuzzy_fan(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate Fuzzy Fan Control parameters Failed!", + return -EINVAL); + + if (vegam_populate_gnb_lpml(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate GnbLPML Failed!", + return -EINVAL); + + if (vegam_populate_bapm_vddc_base_leakage_sidd(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate BapmVddCBaseLeakage Hi and Lo " + "Sidd Failed!", return -EINVAL); + + if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset, + (uint8_t *)&smu_data->power_tune_table, + (sizeof(struct SMU75_Discrete_PmFuses) - PMFUSES_AVFSSIZE), + SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to download PmFuseTable Failed!", + return -EINVAL); + } + return 0; +} + +static int vegam_enable_reconfig_cus(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = hwmgr->adev; + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_EnableModeSwitchRLCNotification, + adev->gfx.cu_info.number); + + return 0; +} + +static int vegam_init_smc_table(struct pp_hwmgr *hwmgr) +{ + int result; + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table); + uint8_t i; + struct pp_atomctrl_gpio_pin_assignment gpio_pin; + struct phm_ppt_v1_gpio_table *gpio_table = + (struct phm_ppt_v1_gpio_table *)table_info->gpio_table; + pp_atomctrl_clock_dividers_vi dividers; + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + + vegam_initialize_power_tune_defaults(hwmgr); + + if (SMU7_VOLTAGE_CONTROL_NONE != hw_data->voltage_control) + vegam_populate_smc_voltage_tables(hwmgr, table); + + table->SystemFlags = 0; + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StepVddc)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; + + if (hw_data->is_memory_gddr5) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; + + if (hw_data->ulv_supported && table_info->us_ulv_voltage_offset) { + result = vegam_populate_ulv_state(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize ULV state!", return result); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_ULV_PARAMETER, SMU7_CGULVPARAMETER_DFLT); + } + + result = vegam_populate_smc_link_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Link Level!", return result); + + result = vegam_populate_all_graphic_levels(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Graphics Level!", return result); + + result = vegam_populate_all_memory_levels(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Memory Level!", return result); + + result = vegam_populate_smc_acpi_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize ACPI Level!", return result); + + result = vegam_populate_smc_vce_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize VCE Level!", return result); + + result = vegam_populate_smc_samu_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize SAMU Level!", return result); + + /* Since only the initial state is completely set up at this point + * (the other states are just copies of the boot state) we only + * need to populate the ARB settings for the initial state. + */ + result = vegam_program_memory_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to Write ARB settings for the initial state.", return result); + + result = vegam_populate_smc_uvd_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize UVD Level!", return result); + + result = vegam_populate_smc_boot_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Boot Level!", return result); + + result = vegam_populate_smc_initial_state(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Boot State!", return result); + + result = vegam_populate_bapm_parameters_in_dpm_table(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate BAPM Parameters!", return result); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ClockStretcher)) { + result = vegam_populate_clock_stretcher_data_table(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate Clock Stretcher Data Table!", + return result); + } + + result = vegam_populate_avfs_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate AVFS Parameters!", return result;); + + table->CurrSclkPllRange = 0xff; + table->GraphicsVoltageChangeEnable = 1; + table->GraphicsThermThrottleEnable = 1; + table->GraphicsInterval = 1; + table->VoltageInterval = 1; + table->ThermalInterval = 1; + table->TemperatureLimitHigh = + table_info->cac_dtp_table->usTargetOperatingTemp * + SMU7_Q88_FORMAT_CONVERSION_UNIT; + table->TemperatureLimitLow = + (table_info->cac_dtp_table->usTargetOperatingTemp - 1) * + SMU7_Q88_FORMAT_CONVERSION_UNIT; + table->MemoryVoltageChangeEnable = 1; + table->MemoryInterval = 1; + table->VoltageResponseTime = 0; + table->PhaseResponseTime = 0; + table->MemoryThermThrottleEnable = 1; + + PP_ASSERT_WITH_CODE(hw_data->dpm_table.pcie_speed_table.count >= 1, + "There must be 1 or more PCIE levels defined in PPTable.", + return -EINVAL); + table->PCIeBootLinkLevel = + hw_data->dpm_table.pcie_speed_table.count; + table->PCIeGenInterval = 1; + table->VRConfig = 0; + + result = vegam_populate_vr_config(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate VRConfig setting!", return result); + + table->ThermGpio = 17; + table->SclkStepSize = 0x4000; + + if (atomctrl_get_pp_assign_pin(hwmgr, + VDDC_VRHOT_GPIO_PINID, &gpio_pin)) { + table->VRHotGpio = gpio_pin.uc_gpio_pin_bit_shift; + if (gpio_table) + table->VRHotLevel = + table_info->gpio_table->vrhot_triggered_sclk_dpm_index; + } else { + table->VRHotGpio = SMU7_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot); + } + + if (atomctrl_get_pp_assign_pin(hwmgr, + PP_AC_DC_SWITCH_GPIO_PINID, &gpio_pin)) { + table->AcDcGpio = gpio_pin.uc_gpio_pin_bit_shift; + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition) && + !smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UseNewGPIOScheme)) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SMCtoPPLIBAcdcGpioScheme); + } else { + table->AcDcGpio = SMU7_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + } + + /* Thermal Output GPIO */ + if (atomctrl_get_pp_assign_pin(hwmgr, + THERMAL_INT_OUTPUT_GPIO_PINID, &gpio_pin)) { + table->ThermOutGpio = gpio_pin.uc_gpio_pin_bit_shift; + + /* For porlarity read GPIOPAD_A with assigned Gpio pin + * since VBIOS will program this register to set 'inactive state', + * driver can then determine 'active state' from this and + * program SMU with correct polarity + */ + table->ThermOutPolarity = + (0 == (cgs_read_register(hwmgr->device, mmGPIOPAD_A) & + (1 << gpio_pin.uc_gpio_pin_bit_shift))) ? 1:0; + table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_ONLY; + + /* if required, combine VRHot/PCC with thermal out GPIO */ + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot) && + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_CombinePCCWithThermalSignal)) + table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_VRHOT; + } else { + table->ThermOutGpio = 17; + table->ThermOutPolarity = 1; + table->ThermOutMode = SMU7_THERM_OUT_MODE_DISABLE; + } + + /* Populate BIF_SCLK levels into SMC DPM table */ + for (i = 0; i <= hw_data->dpm_table.pcie_speed_table.count; i++) { + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + smu_data->bif_sclk_table[i], ÷rs); + PP_ASSERT_WITH_CODE(!result, + "Can not find DFS divide id for Sclk", + return result); + + if (i == 0) + table->Ulv.BifSclkDfs = + PP_HOST_TO_SMC_US((uint16_t)(dividers.pll_post_divider)); + else + table->LinkLevel[i - 1].BifSclkDfs = + PP_HOST_TO_SMC_US((uint16_t)(dividers.pll_post_divider)); + } + + for (i = 0; i < SMU75_MAX_ENTRIES_SMIO; i++) + table->Smio[i] = PP_HOST_TO_SMC_UL(table->Smio[i]); + + CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); + CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask1); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask2); + CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize); + CONVERT_FROM_HOST_TO_SMC_UL(table->CurrSclkPllRange); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow); + CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime); + CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime); + + /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */ + result = smu7_copy_bytes_to_smc(hwmgr, + smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, SystemFlags), + (uint8_t *)&(table->SystemFlags), + sizeof(SMU75_Discrete_DpmTable) - 3 * sizeof(SMU75_PIDController), + SMC_RAM_END); + PP_ASSERT_WITH_CODE(!result, + "Failed to upload dpm data to SMC memory!", return result); + + result = vegam_populate_pm_fuses(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate PM fuses to SMC memory!", return result); + + result = vegam_enable_reconfig_cus(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to enable reconfigurable CUs!", return result); + + return 0; +} + +static uint32_t vegam_get_offsetof(uint32_t type, uint32_t member) +{ + switch (type) { + case SMU_SoftRegisters: + switch (member) { + case HandshakeDisables: + return offsetof(SMU75_SoftRegisters, HandshakeDisables); + case VoltageChangeTimeout: + return offsetof(SMU75_SoftRegisters, VoltageChangeTimeout); + case AverageGraphicsActivity: + return offsetof(SMU75_SoftRegisters, AverageGraphicsActivity); + case PreVBlankGap: + return offsetof(SMU75_SoftRegisters, PreVBlankGap); + case VBlankTimeout: + return offsetof(SMU75_SoftRegisters, VBlankTimeout); + case UcodeLoadStatus: + return offsetof(SMU75_SoftRegisters, UcodeLoadStatus); + case DRAM_LOG_ADDR_H: + return offsetof(SMU75_SoftRegisters, DRAM_LOG_ADDR_H); + case DRAM_LOG_ADDR_L: + return offsetof(SMU75_SoftRegisters, DRAM_LOG_ADDR_L); + case DRAM_LOG_PHY_ADDR_H: + return offsetof(SMU75_SoftRegisters, DRAM_LOG_PHY_ADDR_H); + case DRAM_LOG_PHY_ADDR_L: + return offsetof(SMU75_SoftRegisters, DRAM_LOG_PHY_ADDR_L); + case DRAM_LOG_BUFF_SIZE: + return offsetof(SMU75_SoftRegisters, DRAM_LOG_BUFF_SIZE); + } + case SMU_Discrete_DpmTable: + switch (member) { + case UvdBootLevel: + return offsetof(SMU75_Discrete_DpmTable, UvdBootLevel); + case VceBootLevel: + return offsetof(SMU75_Discrete_DpmTable, VceBootLevel); + case SamuBootLevel: + return offsetof(SMU75_Discrete_DpmTable, SamuBootLevel); + case LowSclkInterruptThreshold: + return offsetof(SMU75_Discrete_DpmTable, LowSclkInterruptThreshold); + } + } + pr_warn("can't get the offset of type %x member %x\n", type, member); + return 0; +} + +static int vegam_program_mem_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + if (data->need_update_smu7_dpm_table & + (DPMTABLE_OD_UPDATE_SCLK + + DPMTABLE_UPDATE_SCLK + + DPMTABLE_UPDATE_MCLK)) + return vegam_program_memory_timing_parameters(hwmgr); + + return 0; +} + +static int vegam_update_sclk_threshold(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = + (struct vegam_smumgr *)(hwmgr->smu_backend); + int result = 0; + uint32_t low_sclk_interrupt_threshold = 0; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkThrottleLowNotification) + && (data->low_sclk_interrupt_threshold != 0)) { + low_sclk_interrupt_threshold = + data->low_sclk_interrupt_threshold; + + CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold); + + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, + LowSclkInterruptThreshold), + (uint8_t *)&low_sclk_interrupt_threshold, + sizeof(uint32_t), + SMC_RAM_END); + } + PP_ASSERT_WITH_CODE((result == 0), + "Failed to update SCLK threshold!", return result); + + result = vegam_program_mem_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE((result == 0), + "Failed to program memory timing parameters!", + ); + + return result; +} + +int vegam_thermal_avfs_enable(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + int ret; + + if (!hwmgr->avfs_supported) + return 0; + + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs); + if (!ret) { + if (data->apply_avfs_cks_off_voltage) + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ApplyAvfsCksOffVoltage); + } + + return ret; +} + +static int vegam_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) +{ + PP_ASSERT_WITH_CODE(hwmgr->thermal_controller.fanInfo.bNoFan, + "VBIOS fan info is not correct!", + ); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; +} + +const struct pp_smumgr_func vegam_smu_funcs = { + .smu_init = vegam_smu_init, + .smu_fini = smu7_smu_fini, + .start_smu = vegam_start_smu, + .check_fw_load_finish = smu7_check_fw_load_finish, + .request_smu_load_fw = smu7_reload_firmware, + .request_smu_load_specific_fw = NULL, + .send_msg_to_smc = smu7_send_msg_to_smc, + .send_msg_to_smc_with_parameter = smu7_send_msg_to_smc_with_parameter, + .process_firmware_header = vegam_process_firmware_header, + .is_dpm_running = vegam_is_dpm_running, + .get_mac_definition = vegam_get_mac_definition, + .update_smc_table = vegam_update_smc_table, + .init_smc_table = vegam_init_smc_table, + .get_offsetof = vegam_get_offsetof, + .populate_all_graphic_levels = vegam_populate_all_graphic_levels, + .populate_all_memory_levels = vegam_populate_all_memory_levels, + .update_sclk_threshold = vegam_update_sclk_threshold, + .is_hw_avfs_present = vegam_is_hw_avfs_present, + .thermal_avfs_enable = vegam_thermal_avfs_enable, + .is_dpm_running = vegam_is_dpm_running, + .thermal_setup_fan_table = vegam_thermal_setup_fan_table, +}; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h new file mode 100644 index 000000000000..2b6558238500 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h @@ -0,0 +1,75 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _VEGAM_SMUMANAGER_H +#define _VEGAM_SMUMANAGER_H + + +#include <pp_endian.h> +#include "smu75_discrete.h" +#include "smu7_smumgr.h" + +#define SMC_RAM_END 0x40000 + +#define DPMTuning_Uphyst_Shift 0 +#define DPMTuning_Downhyst_Shift 8 +#define DPMTuning_Activity_Shift 16 + +#define GraphicsDPMTuning_VEGAM 0x001e6400 +#define MemoryDPMTuning_VEGAM 0x000f3c0a +#define SclkDPMTuning_VEGAM 0x002d000a +#define MclkDPMTuning_VEGAM 0x001f100a + + +struct vegam_pt_defaults { + uint8_t SviLoadLineEn; + uint8_t SviLoadLineVddC; + uint8_t TDC_VDDC_ThrottleReleaseLimitPerc; + uint8_t TDC_MAWt; + uint8_t TdcWaterfallCtl; + uint8_t DTEAmbientTempBase; + + uint32_t DisplayCac; + uint32_t BAPM_TEMP_GRADIENT; + uint16_t BAPMTI_R[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS]; + uint16_t BAPMTI_RC[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS]; +}; + +struct vegam_range_table { + uint32_t trans_lower_frequency; /* in 10khz */ + uint32_t trans_upper_frequency; +}; + +struct vegam_smumgr { + struct smu7_smumgr smu7_data; + uint8_t protected_mode; + SMU75_Discrete_DpmTable smc_state_table; + struct SMU75_Discrete_Ulv ulv_setting; + struct SMU75_Discrete_PmFuses power_tune_table; + struct vegam_range_table range_table[NUM_SCLK_RANGE]; + const struct vegam_pt_defaults *power_tune_defaults; + uint32_t bif_sclk_table[SMU75_MAX_LEVELS_LINK]; +}; + + +#endif diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 831b73392d82..036dff8a1f33 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -799,7 +799,7 @@ static int ast_get_modes(struct drm_connector *connector) return 0; } -static int ast_mode_valid(struct drm_connector *connector, +static enum drm_mode_status ast_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct ast_private *ast = connector->dev->dev_private; diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h index ab32d5b268d2..60c937f42114 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h @@ -299,7 +299,6 @@ struct atmel_hlcdc_layer { struct atmel_hlcdc_plane { struct drm_plane base; struct atmel_hlcdc_layer layer; - struct atmel_hlcdc_plane_properties *properties; }; static inline struct atmel_hlcdc_plane * @@ -346,18 +345,6 @@ struct atmel_hlcdc_dc_desc { }; /** - * Atmel HLCDC Plane properties. - * - * This structure stores plane property definitions. - * - * @alpha: alpha blending (or transparency) property - * @rotation: rotation property - */ -struct atmel_hlcdc_plane_properties { - struct drm_property *alpha; -}; - -/** * Atmel HLCDC Display Controller. * * @desc: HLCDC Display Controller description diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c index e18800ed7cd1..73c875db45f4 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c @@ -31,7 +31,6 @@ * @src_y: y buffer position * @src_w: buffer width * @src_h: buffer height - * @alpha: alpha blending of the plane * @disc_x: x discard position * @disc_y: y discard position * @disc_w: discard width @@ -54,8 +53,6 @@ struct atmel_hlcdc_plane_state { uint32_t src_w; uint32_t src_h; - u8 alpha; - int disc_x; int disc_y; int disc_w; @@ -385,7 +382,7 @@ atmel_hlcdc_plane_update_general_settings(struct atmel_hlcdc_plane *plane, cfg |= ATMEL_HLCDC_LAYER_LAEN; else cfg |= ATMEL_HLCDC_LAYER_GAEN | - ATMEL_HLCDC_LAYER_GA(state->alpha); + ATMEL_HLCDC_LAYER_GA(state->base.alpha >> 8); } if (state->disc_h && state->disc_w) @@ -553,7 +550,7 @@ atmel_hlcdc_plane_prepare_disc_area(struct drm_crtc_state *c_state) if (!ovl_s->fb || ovl_s->fb->format->has_alpha || - ovl_state->alpha != 255) + ovl_s->alpha != DRM_BLEND_ALPHA_OPAQUE) continue; /* TODO: implement a smarter hidden area detection */ @@ -829,51 +826,18 @@ static void atmel_hlcdc_plane_destroy(struct drm_plane *p) drm_plane_cleanup(p); } -static int atmel_hlcdc_plane_atomic_set_property(struct drm_plane *p, - struct drm_plane_state *s, - struct drm_property *property, - uint64_t val) -{ - struct atmel_hlcdc_plane *plane = drm_plane_to_atmel_hlcdc_plane(p); - struct atmel_hlcdc_plane_properties *props = plane->properties; - struct atmel_hlcdc_plane_state *state = - drm_plane_state_to_atmel_hlcdc_plane_state(s); - - if (property == props->alpha) - state->alpha = val; - else - return -EINVAL; - - return 0; -} - -static int atmel_hlcdc_plane_atomic_get_property(struct drm_plane *p, - const struct drm_plane_state *s, - struct drm_property *property, - uint64_t *val) -{ - struct atmel_hlcdc_plane *plane = drm_plane_to_atmel_hlcdc_plane(p); - struct atmel_hlcdc_plane_properties *props = plane->properties; - const struct atmel_hlcdc_plane_state *state = - container_of(s, const struct atmel_hlcdc_plane_state, base); - - if (property == props->alpha) - *val = state->alpha; - else - return -EINVAL; - - return 0; -} - -static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane, - struct atmel_hlcdc_plane_properties *props) +static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane) { const struct atmel_hlcdc_layer_desc *desc = plane->layer.desc; if (desc->type == ATMEL_HLCDC_OVERLAY_LAYER || - desc->type == ATMEL_HLCDC_CURSOR_LAYER) - drm_object_attach_property(&plane->base.base, - props->alpha, 255); + desc->type == ATMEL_HLCDC_CURSOR_LAYER) { + int ret; + + ret = drm_plane_create_alpha_property(&plane->base); + if (ret) + return ret; + } if (desc->layout.xstride && desc->layout.pstride) { int ret; @@ -988,8 +952,8 @@ static void atmel_hlcdc_plane_reset(struct drm_plane *p) return; } - state->alpha = 255; p->state = &state->base; + p->state->alpha = DRM_BLEND_ALPHA_OPAQUE; p->state->plane = p; } } @@ -1042,13 +1006,10 @@ static const struct drm_plane_funcs layer_plane_funcs = { .reset = atmel_hlcdc_plane_reset, .atomic_duplicate_state = atmel_hlcdc_plane_atomic_duplicate_state, .atomic_destroy_state = atmel_hlcdc_plane_atomic_destroy_state, - .atomic_set_property = atmel_hlcdc_plane_atomic_set_property, - .atomic_get_property = atmel_hlcdc_plane_atomic_get_property, }; static int atmel_hlcdc_plane_create(struct drm_device *dev, - const struct atmel_hlcdc_layer_desc *desc, - struct atmel_hlcdc_plane_properties *props) + const struct atmel_hlcdc_layer_desc *desc) { struct atmel_hlcdc_dc *dc = dev->dev_private; struct atmel_hlcdc_plane *plane; @@ -1060,7 +1021,6 @@ static int atmel_hlcdc_plane_create(struct drm_device *dev, return -ENOMEM; atmel_hlcdc_layer_init(&plane->layer, desc, dc->hlcdc->regmap); - plane->properties = props; if (desc->type == ATMEL_HLCDC_BASE_LAYER) type = DRM_PLANE_TYPE_PRIMARY; @@ -1081,7 +1041,7 @@ static int atmel_hlcdc_plane_create(struct drm_device *dev, &atmel_hlcdc_layer_plane_helper_funcs); /* Set default property values*/ - ret = atmel_hlcdc_plane_init_properties(plane, props); + ret = atmel_hlcdc_plane_init_properties(plane); if (ret) return ret; @@ -1090,34 +1050,13 @@ static int atmel_hlcdc_plane_create(struct drm_device *dev, return 0; } -static struct atmel_hlcdc_plane_properties * -atmel_hlcdc_plane_create_properties(struct drm_device *dev) -{ - struct atmel_hlcdc_plane_properties *props; - - props = devm_kzalloc(dev->dev, sizeof(*props), GFP_KERNEL); - if (!props) - return ERR_PTR(-ENOMEM); - - props->alpha = drm_property_create_range(dev, 0, "alpha", 0, 255); - if (!props->alpha) - return ERR_PTR(-ENOMEM); - - return props; -} - int atmel_hlcdc_create_planes(struct drm_device *dev) { struct atmel_hlcdc_dc *dc = dev->dev_private; - struct atmel_hlcdc_plane_properties *props; const struct atmel_hlcdc_layer_desc *descs = dc->desc->layers; int nlayers = dc->desc->nlayers; int i, ret; - props = atmel_hlcdc_plane_create_properties(dev); - if (IS_ERR(props)) - return PTR_ERR(props); - dc->dscrpool = dmam_pool_create("atmel-hlcdc-dscr", dev->dev, sizeof(struct atmel_hlcdc_dma_channel_dscr), sizeof(u64), 0); @@ -1130,7 +1069,7 @@ int atmel_hlcdc_create_planes(struct drm_device *dev) descs[i].type != ATMEL_HLCDC_CURSOR_LAYER) continue; - ret = atmel_hlcdc_plane_create(dev, &descs[i], props); + ret = atmel_hlcdc_plane_create(dev, &descs[i]); if (ret) return ret; } diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c index a24a18fbd65a..233980a78591 100644 --- a/drivers/gpu/drm/bochs/bochs_kms.c +++ b/drivers/gpu/drm/bochs/bochs_kms.c @@ -188,7 +188,7 @@ static int bochs_connector_get_modes(struct drm_connector *connector) return count; } -static int bochs_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status bochs_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct bochs_device *bochs = diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 684ac626ac53..fa2c7997e2fd 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -25,6 +25,16 @@ config DRM_ANALOGIX_ANX78XX the HDMI output of an application processor to MyDP or DisplayPort. +config DRM_CDNS_DSI + tristate "Cadence DPI/DSI bridge" + select DRM_KMS_HELPER + select DRM_MIPI_DSI + select DRM_PANEL_BRIDGE + depends on OF + help + Support Cadence DPI to DSI bridge. This is an internal + bridge and is meant to be directly embedded in a SoC. + config DRM_DUMB_VGA_DAC tristate "Dumb VGA DAC Bridge support" depends on OF @@ -94,6 +104,12 @@ config DRM_SII9234 It is an I2C driver, that detects connection of MHL bridge and starts encapsulation of HDMI signal. +config DRM_THINE_THC63LVD1024 + tristate "Thine THC63LVD1024 LVDS decoder bridge" + depends on OF + ---help--- + Thine THC63LVD1024 LVDS/parallel converter driver. + config DRM_TOSHIBA_TC358767 tristate "Toshiba TC358767 eDP bridge" depends on OF diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile index 373eb28f31ed..35f88d48ec20 100644 --- a/drivers/gpu/drm/bridge/Makefile +++ b/drivers/gpu/drm/bridge/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DRM_ANALOGIX_ANX78XX) += analogix-anx78xx.o +obj-$(CONFIG_DRM_CDNS_DSI) += cdns-dsi.o obj-$(CONFIG_DRM_DUMB_VGA_DAC) += dumb-vga-dac.o obj-$(CONFIG_DRM_LVDS_ENCODER) += lvds-encoder.o obj-$(CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW) += megachips-stdpxxxx-ge-b850v3-fw.o @@ -8,6 +9,7 @@ obj-$(CONFIG_DRM_PARADE_PS8622) += parade-ps8622.o obj-$(CONFIG_DRM_SIL_SII8620) += sil-sii8620.o obj-$(CONFIG_DRM_SII902X) += sii902x.o obj-$(CONFIG_DRM_SII9234) += sii9234.o +obj-$(CONFIG_DRM_THINE_THC63LVD1024) += thc63lvd1024.o obj-$(CONFIG_DRM_TOSHIBA_TC358767) += tc358767.o obj-$(CONFIG_DRM_ANALOGIX_DP) += analogix/ obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511/ diff --git a/drivers/gpu/drm/bridge/adv7511/Kconfig b/drivers/gpu/drm/bridge/adv7511/Kconfig index 592b9d2ec034..944e440c4fde 100644 --- a/drivers/gpu/drm/bridge/adv7511/Kconfig +++ b/drivers/gpu/drm/bridge/adv7511/Kconfig @@ -1,5 +1,5 @@ config DRM_I2C_ADV7511 - tristate "AV7511 encoder" + tristate "ADV7511 encoder" depends on OF select DRM_KMS_HELPER select REGMAP_I2C diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index d034b2cb5eee..73d8ccb97742 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -93,6 +93,11 @@ #define ADV7511_REG_CHIP_ID_HIGH 0xf5 #define ADV7511_REG_CHIP_ID_LOW 0xf6 +/* Hardware defined default addresses for I2C register maps */ +#define ADV7511_CEC_I2C_ADDR_DEFAULT 0x3c +#define ADV7511_EDID_I2C_ADDR_DEFAULT 0x3f +#define ADV7511_PACKET_I2C_ADDR_DEFAULT 0x38 + #define ADV7511_CSC_ENABLE BIT(7) #define ADV7511_CSC_UPDATE_MODE BIT(5) @@ -321,6 +326,7 @@ enum adv7511_type { struct adv7511 { struct i2c_client *i2c_main; struct i2c_client *i2c_edid; + struct i2c_client *i2c_packet; struct i2c_client *i2c_cec; struct regmap *regmap; diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index efa29db5fc2b..73021b388e12 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -586,7 +586,7 @@ static int adv7511_get_modes(struct adv7511 *adv7511, /* Reading the EDID only works if the device is powered */ if (!adv7511->powered) { unsigned int edid_i2c_addr = - (adv7511->i2c_main->addr << 1) + 4; + (adv7511->i2c_edid->addr << 1); __adv7511_power_on(adv7511); @@ -654,7 +654,7 @@ adv7511_detect(struct adv7511 *adv7511, struct drm_connector *connector) return status; } -static int adv7511_mode_valid(struct adv7511 *adv7511, +static enum drm_mode_status adv7511_mode_valid(struct adv7511 *adv7511, struct drm_display_mode *mode) { if (mode->clock > 165000) @@ -969,10 +969,10 @@ static int adv7511_init_cec_regmap(struct adv7511 *adv) { int ret; - adv->i2c_cec = i2c_new_dummy(adv->i2c_main->adapter, - adv->i2c_main->addr - 1); + adv->i2c_cec = i2c_new_secondary_device(adv->i2c_main, "cec", + ADV7511_CEC_I2C_ADDR_DEFAULT); if (!adv->i2c_cec) - return -ENOMEM; + return -EINVAL; i2c_set_clientdata(adv->i2c_cec, adv); adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec, @@ -1082,8 +1082,6 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) struct adv7511_link_config link_config; struct adv7511 *adv7511; struct device *dev = &i2c->dev; - unsigned int main_i2c_addr = i2c->addr << 1; - unsigned int edid_i2c_addr = main_i2c_addr + 4; unsigned int val; int ret; @@ -1129,7 +1127,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) } if (adv7511->gpio_pd) { - mdelay(5); + usleep_range(5000, 6000); gpiod_set_value_cansleep(adv7511->gpio_pd, 0); } @@ -1153,23 +1151,34 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) if (ret) goto uninit_regulators; - regmap_write(adv7511->regmap, ADV7511_REG_EDID_I2C_ADDR, edid_i2c_addr); - regmap_write(adv7511->regmap, ADV7511_REG_PACKET_I2C_ADDR, - main_i2c_addr - 0xa); - regmap_write(adv7511->regmap, ADV7511_REG_CEC_I2C_ADDR, - main_i2c_addr - 2); - adv7511_packet_disable(adv7511, 0xffff); - adv7511->i2c_edid = i2c_new_dummy(i2c->adapter, edid_i2c_addr >> 1); + adv7511->i2c_edid = i2c_new_secondary_device(i2c, "edid", + ADV7511_EDID_I2C_ADDR_DEFAULT); if (!adv7511->i2c_edid) { - ret = -ENOMEM; + ret = -EINVAL; goto uninit_regulators; } + regmap_write(adv7511->regmap, ADV7511_REG_EDID_I2C_ADDR, + adv7511->i2c_edid->addr << 1); + + adv7511->i2c_packet = i2c_new_secondary_device(i2c, "packet", + ADV7511_PACKET_I2C_ADDR_DEFAULT); + if (!adv7511->i2c_packet) { + ret = -EINVAL; + goto err_i2c_unregister_edid; + } + + regmap_write(adv7511->regmap, ADV7511_REG_PACKET_I2C_ADDR, + adv7511->i2c_packet->addr << 1); + ret = adv7511_init_cec_regmap(adv7511); if (ret) - goto err_i2c_unregister_edid; + goto err_i2c_unregister_packet; + + regmap_write(adv7511->regmap, ADV7511_REG_CEC_I2C_ADDR, + adv7511->i2c_cec->addr << 1); INIT_WORK(&adv7511->hpd_work, adv7511_hpd_work); @@ -1207,6 +1216,8 @@ err_unregister_cec: i2c_unregister_device(adv7511->i2c_cec); if (adv7511->cec_clk) clk_disable_unprepare(adv7511->cec_clk); +err_i2c_unregister_packet: + i2c_unregister_device(adv7511->i2c_packet); err_i2c_unregister_edid: i2c_unregister_device(adv7511->i2c_edid); uninit_regulators: @@ -1233,6 +1244,7 @@ static int adv7511_remove(struct i2c_client *i2c) cec_unregister_adapter(adv7511->cec_adap); + i2c_unregister_device(adv7511->i2c_packet); i2c_unregister_device(adv7511->i2c_edid); return 0; diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 5c52307146c7..2bcbfadb6ac5 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -43,8 +43,10 @@ struct bridge_init { struct device_node *node; }; -static void analogix_dp_init_dp(struct analogix_dp_device *dp) +static int analogix_dp_init_dp(struct analogix_dp_device *dp) { + int ret; + analogix_dp_reset(dp); analogix_dp_swreset(dp); @@ -56,10 +58,13 @@ static void analogix_dp_init_dp(struct analogix_dp_device *dp) analogix_dp_enable_sw_function(dp); analogix_dp_config_interrupt(dp); - analogix_dp_init_analog_func(dp); + ret = analogix_dp_init_analog_func(dp); + if (ret) + return ret; analogix_dp_init_hpd(dp); analogix_dp_init_aux(dp); + return 0; } static int analogix_dp_detect_hpd(struct analogix_dp_device *dp) @@ -71,7 +76,7 @@ static int analogix_dp_detect_hpd(struct analogix_dp_device *dp) return 0; timeout_loop++; - usleep_range(10, 11); + usleep_range(1000, 1100); } /* @@ -148,87 +153,146 @@ int analogix_dp_disable_psr(struct analogix_dp_device *dp) psr_vsc.DB1 = 0; ret = drm_dp_dpcd_writeb(&dp->aux, DP_SET_POWER, DP_SET_POWER_D0); - if (ret != 1) + if (ret != 1) { dev_err(dp->dev, "Failed to set DP Power0 %d\n", ret); + return ret; + } return analogix_dp_send_psr_spd(dp, &psr_vsc, false); } EXPORT_SYMBOL_GPL(analogix_dp_disable_psr); -static bool analogix_dp_detect_sink_psr(struct analogix_dp_device *dp) +static int analogix_dp_detect_sink_psr(struct analogix_dp_device *dp) { unsigned char psr_version; + int ret; + + ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_SUPPORT, &psr_version); + if (ret != 1) { + dev_err(dp->dev, "failed to get PSR version, disable it\n"); + return ret; + } - drm_dp_dpcd_readb(&dp->aux, DP_PSR_SUPPORT, &psr_version); dev_dbg(dp->dev, "Panel PSR version : %x\n", psr_version); - return (psr_version & DP_PSR_IS_SUPPORTED) ? true : false; + dp->psr_enable = (psr_version & DP_PSR_IS_SUPPORTED) ? true : false; + + return 0; } -static void analogix_dp_enable_sink_psr(struct analogix_dp_device *dp) +static int analogix_dp_enable_sink_psr(struct analogix_dp_device *dp) { unsigned char psr_en; + int ret; /* Disable psr function */ - drm_dp_dpcd_readb(&dp->aux, DP_PSR_EN_CFG, &psr_en); + ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_EN_CFG, &psr_en); + if (ret != 1) { + dev_err(dp->dev, "failed to get psr config\n"); + goto end; + } + psr_en &= ~DP_PSR_ENABLE; - drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + if (ret != 1) { + dev_err(dp->dev, "failed to disable panel psr\n"); + goto end; + } /* Main-Link transmitter remains active during PSR active states */ psr_en = DP_PSR_MAIN_LINK_ACTIVE | DP_PSR_CRC_VERIFICATION; - drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + if (ret != 1) { + dev_err(dp->dev, "failed to set panel psr\n"); + goto end; + } /* Enable psr function */ psr_en = DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE | DP_PSR_CRC_VERIFICATION; - drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + if (ret != 1) { + dev_err(dp->dev, "failed to set panel psr\n"); + goto end; + } analogix_dp_enable_psr_crc(dp); + + return 0; +end: + dev_err(dp->dev, "enable psr fail, force to disable psr\n"); + dp->psr_enable = false; + + return ret; } -static void +static int analogix_dp_enable_rx_to_enhanced_mode(struct analogix_dp_device *dp, bool enable) { u8 data; + int ret; - drm_dp_dpcd_readb(&dp->aux, DP_LANE_COUNT_SET, &data); + ret = drm_dp_dpcd_readb(&dp->aux, DP_LANE_COUNT_SET, &data); + if (ret != 1) + return ret; if (enable) - drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET, - DP_LANE_COUNT_ENHANCED_FRAME_EN | - DPCD_LANE_COUNT_SET(data)); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET, + DP_LANE_COUNT_ENHANCED_FRAME_EN | + DPCD_LANE_COUNT_SET(data)); else - drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET, - DPCD_LANE_COUNT_SET(data)); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET, + DPCD_LANE_COUNT_SET(data)); + + return ret < 0 ? ret : 0; } -static int analogix_dp_is_enhanced_mode_available(struct analogix_dp_device *dp) +static int analogix_dp_is_enhanced_mode_available(struct analogix_dp_device *dp, + u8 *enhanced_mode_support) { u8 data; - int retval; + int ret; - drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &data); - retval = DPCD_ENHANCED_FRAME_CAP(data); + ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &data); + if (ret != 1) { + *enhanced_mode_support = 0; + return ret; + } - return retval; + *enhanced_mode_support = DPCD_ENHANCED_FRAME_CAP(data); + + return 0; } -static void analogix_dp_set_enhanced_mode(struct analogix_dp_device *dp) +static int analogix_dp_set_enhanced_mode(struct analogix_dp_device *dp) { u8 data; + int ret; + + ret = analogix_dp_is_enhanced_mode_available(dp, &data); + if (ret < 0) + return ret; + + ret = analogix_dp_enable_rx_to_enhanced_mode(dp, data); + if (ret < 0) + return ret; - data = analogix_dp_is_enhanced_mode_available(dp); - analogix_dp_enable_rx_to_enhanced_mode(dp, data); analogix_dp_enable_enhanced_mode(dp, data); + + return 0; } -static void analogix_dp_training_pattern_dis(struct analogix_dp_device *dp) +static int analogix_dp_training_pattern_dis(struct analogix_dp_device *dp) { + int ret; + analogix_dp_set_training_pattern(dp, DP_NONE); - drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, - DP_TRAINING_PATTERN_DISABLE); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, + DP_TRAINING_PATTERN_DISABLE); + + return ret < 0 ? ret : 0; } static void @@ -276,6 +340,12 @@ static int analogix_dp_link_start(struct analogix_dp_device *dp) retval = drm_dp_dpcd_write(&dp->aux, DP_LINK_BW_SET, buf, 2); if (retval < 0) return retval; + /* set enhanced mode if available */ + retval = analogix_dp_set_enhanced_mode(dp); + if (retval < 0) { + dev_err(dp->dev, "failed to set enhance mode\n"); + return retval; + } /* Set TX pre-emphasis to minimum */ for (lane = 0; lane < lane_count; lane++) @@ -531,7 +601,7 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) { int lane, lane_count, retval; u32 reg; - u8 link_align, link_status[2], adjust_request[2], spread; + u8 link_align, link_status[2], adjust_request[2]; usleep_range(400, 401); @@ -560,10 +630,11 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) if (!analogix_dp_channel_eq_ok(link_status, link_align, lane_count)) { /* traing pattern Set to Normal */ - analogix_dp_training_pattern_dis(dp); + retval = analogix_dp_training_pattern_dis(dp); + if (retval < 0) + return retval; dev_info(dp->dev, "Link Training success!\n"); - analogix_dp_get_link_bandwidth(dp, ®); dp->link_train.link_rate = reg; dev_dbg(dp->dev, "final bandwidth = %.2x\n", @@ -574,22 +645,6 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) dev_dbg(dp->dev, "final lane count = %.2x\n", dp->link_train.lane_count); - retval = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, - &spread); - if (retval != 1) { - dev_err(dp->dev, "failed to read downspread %d\n", - retval); - dp->fast_train_support = false; - } else { - dp->fast_train_support = - (spread & DP_NO_AUX_HANDSHAKE_LINK_TRAINING) ? - true : false; - } - dev_dbg(dp->dev, "fast link training %s\n", - dp->fast_train_support ? "supported" : "unsupported"); - - /* set enhanced mode if available */ - analogix_dp_set_enhanced_mode(dp); dp->link_train.lt_state = FINISHED; return 0; @@ -793,7 +848,7 @@ static int analogix_dp_fast_link_train(struct analogix_dp_device *dp) static int analogix_dp_train_link(struct analogix_dp_device *dp) { - if (dp->fast_train_support) + if (dp->fast_train_enable) return analogix_dp_fast_link_train(dp); return analogix_dp_full_link_train(dp, dp->video_info.max_lane_count, @@ -819,11 +874,10 @@ static int analogix_dp_config_video(struct analogix_dp_device *dp) if (analogix_dp_is_slave_video_stream_clock_on(dp) == 0) break; if (timeout_loop > DP_TIMEOUT_LOOP_COUNT) { - dev_err(dp->dev, "Timeout of video streamclk ok\n"); + dev_err(dp->dev, "Timeout of slave video streamclk ok\n"); return -ETIMEDOUT; } - - usleep_range(1, 2); + usleep_range(1000, 1001); } /* Set to use the register calculated M/N video */ @@ -838,6 +892,9 @@ static int analogix_dp_config_video(struct analogix_dp_device *dp) /* Configure video slave mode */ analogix_dp_enable_video_master(dp, 0); + /* Enable video */ + analogix_dp_start_video(dp); + timeout_loop = 0; for (;;) { @@ -850,8 +907,9 @@ static int analogix_dp_config_video(struct analogix_dp_device *dp) done_count = 0; } if (timeout_loop > DP_TIMEOUT_LOOP_COUNT) { - dev_err(dp->dev, "Timeout of video streamclk ok\n"); - return -ETIMEDOUT; + dev_warn(dp->dev, + "Ignoring timeout of video streamclk ok\n"); + break; } usleep_range(1000, 1001); @@ -860,24 +918,32 @@ static int analogix_dp_config_video(struct analogix_dp_device *dp) return 0; } -static void analogix_dp_enable_scramble(struct analogix_dp_device *dp, - bool enable) +static int analogix_dp_enable_scramble(struct analogix_dp_device *dp, + bool enable) { u8 data; + int ret; if (enable) { analogix_dp_enable_scrambling(dp); - drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET, &data); - drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, + ret = drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET, + &data); + if (ret != 1) + return ret; + ret = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, (u8)(data & ~DP_LINK_SCRAMBLING_DISABLE)); } else { analogix_dp_disable_scrambling(dp); - drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET, &data); - drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, + ret = drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET, + &data); + if (ret != 1) + return ret; + ret = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, (u8)(data | DP_LINK_SCRAMBLING_DISABLE)); } + return ret < 0 ? ret : 0; } static irqreturn_t analogix_dp_hardirq(int irq, void *arg) @@ -916,7 +982,23 @@ static irqreturn_t analogix_dp_irq_thread(int irq, void *arg) return IRQ_HANDLED; } -static void analogix_dp_commit(struct analogix_dp_device *dp) +static int analogix_dp_fast_link_train_detection(struct analogix_dp_device *dp) +{ + int ret; + u8 spread; + + ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &spread); + if (ret != 1) { + dev_err(dp->dev, "failed to read downspread %d\n", ret); + return ret; + } + dp->fast_train_enable = !!(spread & DP_NO_AUX_HANDSHAKE_LINK_TRAINING); + dev_dbg(dp->dev, "fast link training %s\n", + dp->fast_train_enable ? "supported" : "unsupported"); + return 0; +} + +static int analogix_dp_commit(struct analogix_dp_device *dp) { int ret; @@ -926,34 +1008,50 @@ static void analogix_dp_commit(struct analogix_dp_device *dp) DRM_ERROR("failed to disable the panel\n"); } - ret = readx_poll_timeout(analogix_dp_train_link, dp, ret, !ret, 100, - DP_TIMEOUT_TRAINING_US * 5); + ret = analogix_dp_train_link(dp); if (ret) { dev_err(dp->dev, "unable to do link train, ret=%d\n", ret); - return; + return ret; } - analogix_dp_enable_scramble(dp, 1); - analogix_dp_enable_rx_to_enhanced_mode(dp, 1); - analogix_dp_enable_enhanced_mode(dp, 1); + ret = analogix_dp_enable_scramble(dp, 1); + if (ret < 0) { + dev_err(dp->dev, "can not enable scramble\n"); + return ret; + } analogix_dp_init_video(dp); ret = analogix_dp_config_video(dp); - if (ret) + if (ret) { dev_err(dp->dev, "unable to config video\n"); + return ret; + } /* Safe to enable the panel now */ if (dp->plat_data->panel) { - if (drm_panel_enable(dp->plat_data->panel)) + ret = drm_panel_enable(dp->plat_data->panel); + if (ret) { DRM_ERROR("failed to enable the panel\n"); + return ret; + } } - /* Enable video */ - analogix_dp_start_video(dp); + ret = analogix_dp_detect_sink_psr(dp); + if (ret) + return ret; - dp->psr_enable = analogix_dp_detect_sink_psr(dp); - if (dp->psr_enable) - analogix_dp_enable_sink_psr(dp); + if (dp->psr_enable) { + ret = analogix_dp_enable_sink_psr(dp); + if (ret) + return ret; + } + + /* Check whether panel supports fast training */ + ret = analogix_dp_fast_link_train_detection(dp); + if (ret) + dp->psr_enable = false; + + return ret; } /* @@ -1150,24 +1248,80 @@ static void analogix_dp_bridge_pre_enable(struct drm_bridge *bridge) DRM_ERROR("failed to setup the panel ret = %d\n", ret); } -static void analogix_dp_bridge_enable(struct drm_bridge *bridge) +static int analogix_dp_set_bridge(struct analogix_dp_device *dp) { - struct analogix_dp_device *dp = bridge->driver_private; - - if (dp->dpms_mode == DRM_MODE_DPMS_ON) - return; + int ret; pm_runtime_get_sync(dp->dev); - if (dp->plat_data->power_on) - dp->plat_data->power_on(dp->plat_data); + ret = clk_prepare_enable(dp->clock); + if (ret < 0) { + DRM_ERROR("Failed to prepare_enable the clock clk [%d]\n", ret); + goto out_dp_clk_pre; + } + + if (dp->plat_data->power_on_start) + dp->plat_data->power_on_start(dp->plat_data); phy_power_on(dp->phy); - analogix_dp_init_dp(dp); + + ret = analogix_dp_init_dp(dp); + if (ret) + goto out_dp_init; + + /* + * According to DP spec v1.3 chap 3.5.1.2 Link Training, + * We should first make sure the HPD signal is asserted high by device + * when we want to establish a link with it. + */ + ret = analogix_dp_detect_hpd(dp); + if (ret) { + DRM_ERROR("failed to get hpd single ret = %d\n", ret); + goto out_dp_init; + } + + ret = analogix_dp_commit(dp); + if (ret) { + DRM_ERROR("dp commit error, ret = %d\n", ret); + goto out_dp_init; + } + + if (dp->plat_data->power_on_end) + dp->plat_data->power_on_end(dp->plat_data); + enable_irq(dp->irq); - analogix_dp_commit(dp); + return 0; - dp->dpms_mode = DRM_MODE_DPMS_ON; +out_dp_init: + phy_power_off(dp->phy); + if (dp->plat_data->power_off) + dp->plat_data->power_off(dp->plat_data); + clk_disable_unprepare(dp->clock); +out_dp_clk_pre: + pm_runtime_put_sync(dp->dev); + + return ret; +} + +static void analogix_dp_bridge_enable(struct drm_bridge *bridge) +{ + struct analogix_dp_device *dp = bridge->driver_private; + int timeout_loop = 0; + + if (dp->dpms_mode == DRM_MODE_DPMS_ON) + return; + + while (timeout_loop < MAX_PLL_LOCK_LOOP) { + if (analogix_dp_set_bridge(dp) == 0) { + dp->dpms_mode = DRM_MODE_DPMS_ON; + return; + } + dev_err(dp->dev, "failed to set bridge, retry: %d\n", + timeout_loop); + timeout_loop++; + usleep_range(10, 11); + } + dev_err(dp->dev, "too many times retry set bridge, give it up\n"); } static void analogix_dp_bridge_disable(struct drm_bridge *bridge) @@ -1186,11 +1340,15 @@ static void analogix_dp_bridge_disable(struct drm_bridge *bridge) } disable_irq(dp->irq); - phy_power_off(dp->phy); if (dp->plat_data->power_off) dp->plat_data->power_off(dp->plat_data); + analogix_dp_set_analog_power_down(dp, POWER_ALL, 1); + phy_power_off(dp->phy); + + clk_disable_unprepare(dp->clock); + pm_runtime_put_sync(dp->dev); ret = analogix_dp_prepare_panel(dp, false, true); @@ -1198,6 +1356,7 @@ static void analogix_dp_bridge_disable(struct drm_bridge *bridge) DRM_ERROR("failed to setup the panel ret = %d\n", ret); dp->psr_enable = false; + dp->fast_train_enable = false; dp->dpms_mode = DRM_MODE_DPMS_OFF; } diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h index 6a96ef7e6934..769255dc6e99 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h @@ -19,6 +19,7 @@ #define DP_TIMEOUT_LOOP_COUNT 100 #define MAX_CR_LOOP 5 #define MAX_EQ_LOOP 5 +#define MAX_PLL_LOCK_LOOP 5 /* Training takes 22ms if AUX channel comm fails. Use this as retry interval */ #define DP_TIMEOUT_TRAINING_US 22000 @@ -173,7 +174,7 @@ struct analogix_dp_device { int hpd_gpio; bool force_hpd; bool psr_enable; - bool fast_train_support; + bool fast_train_enable; struct mutex panel_lock; bool panel_is_modeset; @@ -197,7 +198,7 @@ void analogix_dp_set_pll_power_down(struct analogix_dp_device *dp, bool enable); void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, enum analog_power_block block, bool enable); -void analogix_dp_init_analog_func(struct analogix_dp_device *dp); +int analogix_dp_init_analog_func(struct analogix_dp_device *dp); void analogix_dp_init_hpd(struct analogix_dp_device *dp); void analogix_dp_force_hpd(struct analogix_dp_device *dp); enum dp_irq_type analogix_dp_get_irq_type(struct analogix_dp_device *dp); diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c index 9df2f3ef000c..a5f2763d72e4 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c @@ -126,9 +126,14 @@ void analogix_dp_reset(struct analogix_dp_device *dp) analogix_dp_stop_video(dp); analogix_dp_enable_video_mute(dp, 0); - reg = MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N | - AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N | - HDCP_FUNC_EN_N | SW_FUNC_EN_N; + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) + reg = RK_VID_CAP_FUNC_EN_N | RK_VID_FIFO_FUNC_EN_N | + SW_FUNC_EN_N; + else + reg = MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N | + AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N | + HDCP_FUNC_EN_N | SW_FUNC_EN_N; + writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1); reg = SSC_FUNC_EN_N | AUX_FUNC_EN_N | @@ -230,16 +235,20 @@ enum pll_status analogix_dp_get_pll_lock_status(struct analogix_dp_device *dp) void analogix_dp_set_pll_power_down(struct analogix_dp_device *dp, bool enable) { u32 reg; + u32 mask = DP_PLL_PD; + u32 pd_addr = ANALOGIX_DP_PLL_CTL; - if (enable) { - reg = readl(dp->reg_base + ANALOGIX_DP_PLL_CTL); - reg |= DP_PLL_PD; - writel(reg, dp->reg_base + ANALOGIX_DP_PLL_CTL); - } else { - reg = readl(dp->reg_base + ANALOGIX_DP_PLL_CTL); - reg &= ~DP_PLL_PD; - writel(reg, dp->reg_base + ANALOGIX_DP_PLL_CTL); + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) { + pd_addr = ANALOGIX_DP_PD; + mask = RK_PLL_PD; } + + reg = readl(dp->reg_base + pd_addr); + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + pd_addr); } void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, @@ -248,83 +257,98 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, { u32 reg; u32 phy_pd_addr = ANALOGIX_DP_PHY_PD; + u32 mask; if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) phy_pd_addr = ANALOGIX_DP_PD; switch (block) { case AUX_BLOCK: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= AUX_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~AUX_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) + mask = RK_AUX_PD; + else + mask = AUX_PD; + + reg = readl(dp->reg_base + phy_pd_addr); + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + phy_pd_addr); break; case CH0_BLOCK: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= CH0_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~CH0_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + mask = CH0_PD; + reg = readl(dp->reg_base + phy_pd_addr); + + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + phy_pd_addr); break; case CH1_BLOCK: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= CH1_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~CH1_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + mask = CH1_PD; + reg = readl(dp->reg_base + phy_pd_addr); + + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + phy_pd_addr); break; case CH2_BLOCK: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= CH2_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~CH2_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + mask = CH2_PD; + reg = readl(dp->reg_base + phy_pd_addr); + + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + phy_pd_addr); break; case CH3_BLOCK: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= CH3_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~CH3_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + mask = CH3_PD; + reg = readl(dp->reg_base + phy_pd_addr); + + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + phy_pd_addr); break; case ANALOG_TOTAL: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= DP_PHY_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~DP_PHY_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + /* + * There is no bit named DP_PHY_PD, so We used DP_INC_BG + * to power off everything instead of DP_PHY_PD in + * Rockchip + */ + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) + mask = DP_INC_BG; + else + mask = DP_PHY_PD; + + reg = readl(dp->reg_base + phy_pd_addr); + if (enable) + reg |= mask; + else + reg &= ~mask; + + writel(reg, dp->reg_base + phy_pd_addr); + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) + usleep_range(10, 15); break; case POWER_ALL: if (enable) { - reg = DP_PHY_PD | AUX_PD | CH3_PD | CH2_PD | - CH1_PD | CH0_PD; + reg = DP_ALL_PD; writel(reg, dp->reg_base + phy_pd_addr); } else { + reg = DP_ALL_PD; + writel(reg, dp->reg_base + phy_pd_addr); + usleep_range(10, 15); + reg &= ~DP_INC_BG; + writel(reg, dp->reg_base + phy_pd_addr); + usleep_range(10, 15); + writel(0x00, dp->reg_base + phy_pd_addr); } break; @@ -333,7 +357,7 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, } } -void analogix_dp_init_analog_func(struct analogix_dp_device *dp) +int analogix_dp_init_analog_func(struct analogix_dp_device *dp) { u32 reg; int timeout_loop = 0; @@ -355,7 +379,7 @@ void analogix_dp_init_analog_func(struct analogix_dp_device *dp) timeout_loop++; if (DP_TIMEOUT_LOOP_COUNT < timeout_loop) { dev_err(dp->dev, "failed to get pll lock status\n"); - return; + return -ETIMEDOUT; } usleep_range(10, 20); } @@ -366,6 +390,7 @@ void analogix_dp_init_analog_func(struct analogix_dp_device *dp) reg &= ~(SERDES_FIFO_FUNC_EN_N | LS_CLK_DOMAIN_FUNC_EN_N | AUX_FUNC_EN_N); writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); + return 0; } void analogix_dp_clear_hotplug_interrupts(struct analogix_dp_device *dp) @@ -450,17 +475,22 @@ void analogix_dp_init_aux(struct analogix_dp_device *dp) reg = RPLY_RECEIV | AUX_ERR; writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA); + analogix_dp_set_analog_power_down(dp, AUX_BLOCK, true); + usleep_range(10, 11); + analogix_dp_set_analog_power_down(dp, AUX_BLOCK, false); + analogix_dp_reset_aux(dp); - /* Disable AUX transaction H/W retry */ + /* AUX_BIT_PERIOD_EXPECTED_DELAY doesn't apply to Rockchip IP */ if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) - reg = AUX_BIT_PERIOD_EXPECTED_DELAY(0) | - AUX_HW_RETRY_COUNT_SEL(3) | - AUX_HW_RETRY_INTERVAL_600_MICROSECONDS; + reg = 0; else - reg = AUX_BIT_PERIOD_EXPECTED_DELAY(3) | - AUX_HW_RETRY_COUNT_SEL(0) | - AUX_HW_RETRY_INTERVAL_600_MICROSECONDS; + reg = AUX_BIT_PERIOD_EXPECTED_DELAY(3); + + /* Disable AUX transaction H/W retry */ + reg |= AUX_HW_RETRY_COUNT_SEL(0) | + AUX_HW_RETRY_INTERVAL_600_MICROSECONDS; + writel(reg, dp->reg_base + ANALOGIX_DP_AUX_HW_RETRY_CTL); /* Receive AUX Channel DEFER commands equal to DEFFER_COUNT*64 */ @@ -947,8 +977,12 @@ void analogix_dp_config_video_slave_mode(struct analogix_dp_device *dp) u32 reg; reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_1); - reg &= ~(MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N); - reg |= MASTER_VID_FUNC_EN_N; + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) { + reg &= ~(RK_VID_CAP_FUNC_EN_N | RK_VID_FIFO_FUNC_EN_N); + } else { + reg &= ~(MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N); + reg |= MASTER_VID_FUNC_EN_N; + } writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1); reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); @@ -1072,10 +1106,11 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, struct drm_dp_aux_msg *msg) { u32 reg; + u32 status_reg; u8 *buffer = msg->buffer; - int timeout_loop = 0; unsigned int i; int num_transferred = 0; + int ret; /* Buffer size of AUX CH is 16 bytes */ if (WARN_ON(msg->size > 16)) @@ -1139,17 +1174,20 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, writel(reg, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_2); - /* Is AUX CH command reply received? */ + ret = readx_poll_timeout(readl, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_2, + reg, !(reg & AUX_EN), 25, 500 * 1000); + if (ret) { + dev_err(dp->dev, "AUX CH enable timeout!\n"); + goto aux_error; + } + /* TODO: Wait for an interrupt instead of looping? */ - reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA); - while (!(reg & RPLY_RECEIV)) { - timeout_loop++; - if (timeout_loop > DP_TIMEOUT_LOOP_COUNT) { - dev_err(dp->dev, "AUX CH command reply failed!\n"); - return -ETIMEDOUT; - } - reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA); - usleep_range(10, 11); + /* Is AUX CH command reply received? */ + ret = readx_poll_timeout(readl, dp->reg_base + ANALOGIX_DP_INT_STA, + reg, reg & RPLY_RECEIV, 10, 20 * 1000); + if (ret) { + dev_err(dp->dev, "AUX CH cmd reply timeout!\n"); + goto aux_error; } /* Clear interrupt source for AUX CH command reply */ @@ -1157,17 +1195,13 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, /* Clear interrupt source for AUX CH access error */ reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA); - if (reg & AUX_ERR) { + status_reg = readl(dp->reg_base + ANALOGIX_DP_AUX_CH_STA); + if ((reg & AUX_ERR) || (status_reg & AUX_STATUS_MASK)) { writel(AUX_ERR, dp->reg_base + ANALOGIX_DP_INT_STA); - return -EREMOTEIO; - } - /* Check AUX CH error access status */ - reg = readl(dp->reg_base + ANALOGIX_DP_AUX_CH_STA); - if ((reg & AUX_STATUS_MASK)) { - dev_err(dp->dev, "AUX CH error happened: %d\n\n", - reg & AUX_STATUS_MASK); - return -EREMOTEIO; + dev_warn(dp->dev, "AUX CH error happened: %#x (%d)\n", + status_reg & AUX_STATUS_MASK, !!(reg & AUX_ERR)); + goto aux_error; } if (msg->request & DP_AUX_I2C_READ) { @@ -1193,4 +1227,10 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, msg->reply = DP_AUX_NATIVE_REPLY_ACK; return num_transferred > 0 ? num_transferred : -EBUSY; + +aux_error: + /* if aux err happen, reset aux */ + analogix_dp_init_aux(dp); + + return -EREMOTEIO; } diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h index 40200c652533..0cf27c731727 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h @@ -127,7 +127,9 @@ /* ANALOGIX_DP_FUNC_EN_1 */ #define MASTER_VID_FUNC_EN_N (0x1 << 7) +#define RK_VID_CAP_FUNC_EN_N (0x1 << 6) #define SLAVE_VID_FUNC_EN_N (0x1 << 5) +#define RK_VID_FIFO_FUNC_EN_N (0x1 << 5) #define AUD_FIFO_FUNC_EN_N (0x1 << 4) #define AUD_FUNC_EN_N (0x1 << 3) #define HDCP_FUNC_EN_N (0x1 << 2) @@ -342,12 +344,17 @@ #define DP_PLL_REF_BIT_1_2500V (0x7 << 0) /* ANALOGIX_DP_PHY_PD */ +#define DP_INC_BG (0x1 << 7) +#define DP_EXP_BG (0x1 << 6) #define DP_PHY_PD (0x1 << 5) +#define RK_AUX_PD (0x1 << 5) #define AUX_PD (0x1 << 4) +#define RK_PLL_PD (0x1 << 4) #define CH3_PD (0x1 << 3) #define CH2_PD (0x1 << 2) #define CH1_PD (0x1 << 1) #define CH0_PD (0x1 << 0) +#define DP_ALL_PD (0xff) /* ANALOGIX_DP_PHY_TEST */ #define MACRO_RST (0x1 << 5) diff --git a/drivers/gpu/drm/bridge/cdns-dsi.c b/drivers/gpu/drm/bridge/cdns-dsi.c new file mode 100644 index 000000000000..c255fc3e1be5 --- /dev/null +++ b/drivers/gpu/drm/bridge/cdns-dsi.c @@ -0,0 +1,1623 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright: 2017 Cadence Design Systems, Inc. + * + * Author: Boris Brezillon <boris.brezillon@bootlin.com> + */ + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_panel.h> +#include <video/mipi_display.h> + +#include <linux/clk.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/of_address.h> +#include <linux/of_graph.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/reset.h> + +#define IP_CONF 0x0 +#define SP_HS_FIFO_DEPTH(x) (((x) & GENMASK(30, 26)) >> 26) +#define SP_LP_FIFO_DEPTH(x) (((x) & GENMASK(25, 21)) >> 21) +#define VRS_FIFO_DEPTH(x) (((x) & GENMASK(20, 16)) >> 16) +#define DIRCMD_FIFO_DEPTH(x) (((x) & GENMASK(15, 13)) >> 13) +#define SDI_IFACE_32 BIT(12) +#define INTERNAL_DATAPATH_32 (0 << 10) +#define INTERNAL_DATAPATH_16 (1 << 10) +#define INTERNAL_DATAPATH_8 (3 << 10) +#define INTERNAL_DATAPATH_SIZE ((x) & GENMASK(11, 10)) +#define NUM_IFACE(x) ((((x) & GENMASK(9, 8)) >> 8) + 1) +#define MAX_LANE_NB(x) (((x) & GENMASK(7, 6)) >> 6) +#define RX_FIFO_DEPTH(x) ((x) & GENMASK(5, 0)) + +#define MCTL_MAIN_DATA_CTL 0x4 +#define TE_MIPI_POLLING_EN BIT(25) +#define TE_HW_POLLING_EN BIT(24) +#define DISP_EOT_GEN BIT(18) +#define HOST_EOT_GEN BIT(17) +#define DISP_GEN_CHECKSUM BIT(16) +#define DISP_GEN_ECC BIT(15) +#define BTA_EN BIT(14) +#define READ_EN BIT(13) +#define REG_TE_EN BIT(12) +#define IF_TE_EN(x) BIT(8 + (x)) +#define TVG_SEL BIT(6) +#define VID_EN BIT(5) +#define IF_VID_SELECT(x) ((x) << 2) +#define IF_VID_SELECT_MASK GENMASK(3, 2) +#define IF_VID_MODE BIT(1) +#define LINK_EN BIT(0) + +#define MCTL_MAIN_PHY_CTL 0x8 +#define HS_INVERT_DAT(x) BIT(19 + ((x) * 2)) +#define SWAP_PINS_DAT(x) BIT(18 + ((x) * 2)) +#define HS_INVERT_CLK BIT(17) +#define SWAP_PINS_CLK BIT(16) +#define HS_SKEWCAL_EN BIT(15) +#define WAIT_BURST_TIME(x) ((x) << 10) +#define DATA_ULPM_EN(x) BIT(6 + (x)) +#define CLK_ULPM_EN BIT(5) +#define CLK_CONTINUOUS BIT(4) +#define DATA_LANE_EN(x) BIT((x) - 1) + +#define MCTL_MAIN_EN 0xc +#define DATA_FORCE_STOP BIT(17) +#define CLK_FORCE_STOP BIT(16) +#define IF_EN(x) BIT(13 + (x)) +#define DATA_LANE_ULPM_REQ(l) BIT(9 + (l)) +#define CLK_LANE_ULPM_REQ BIT(8) +#define DATA_LANE_START(x) BIT(4 + (x)) +#define CLK_LANE_EN BIT(3) +#define PLL_START BIT(0) + +#define MCTL_DPHY_CFG0 0x10 +#define DPHY_C_RSTB BIT(20) +#define DPHY_D_RSTB(x) GENMASK(15 + (x), 16) +#define DPHY_PLL_PDN BIT(10) +#define DPHY_CMN_PDN BIT(9) +#define DPHY_C_PDN BIT(8) +#define DPHY_D_PDN(x) GENMASK(3 + (x), 4) +#define DPHY_ALL_D_PDN GENMASK(7, 4) +#define DPHY_PLL_PSO BIT(1) +#define DPHY_CMN_PSO BIT(0) + +#define MCTL_DPHY_TIMEOUT1 0x14 +#define HSTX_TIMEOUT(x) ((x) << 4) +#define HSTX_TIMEOUT_MAX GENMASK(17, 0) +#define CLK_DIV(x) (x) +#define CLK_DIV_MAX GENMASK(3, 0) + +#define MCTL_DPHY_TIMEOUT2 0x18 +#define LPRX_TIMEOUT(x) (x) + +#define MCTL_ULPOUT_TIME 0x1c +#define DATA_LANE_ULPOUT_TIME(x) ((x) << 9) +#define CLK_LANE_ULPOUT_TIME(x) (x) + +#define MCTL_3DVIDEO_CTL 0x20 +#define VID_VSYNC_3D_EN BIT(7) +#define VID_VSYNC_3D_LR BIT(5) +#define VID_VSYNC_3D_SECOND_EN BIT(4) +#define VID_VSYNC_3DFORMAT_LINE (0 << 2) +#define VID_VSYNC_3DFORMAT_FRAME (1 << 2) +#define VID_VSYNC_3DFORMAT_PIXEL (2 << 2) +#define VID_VSYNC_3DMODE_OFF 0 +#define VID_VSYNC_3DMODE_PORTRAIT 1 +#define VID_VSYNC_3DMODE_LANDSCAPE 2 + +#define MCTL_MAIN_STS 0x24 +#define MCTL_MAIN_STS_CTL 0x130 +#define MCTL_MAIN_STS_CLR 0x150 +#define MCTL_MAIN_STS_FLAG 0x170 +#define HS_SKEWCAL_DONE BIT(11) +#define IF_UNTERM_PKT_ERR(x) BIT(8 + (x)) +#define LPRX_TIMEOUT_ERR BIT(7) +#define HSTX_TIMEOUT_ERR BIT(6) +#define DATA_LANE_RDY(l) BIT(2 + (l)) +#define CLK_LANE_RDY BIT(1) +#define PLL_LOCKED BIT(0) + +#define MCTL_DPHY_ERR 0x28 +#define MCTL_DPHY_ERR_CTL1 0x148 +#define MCTL_DPHY_ERR_CLR 0x168 +#define MCTL_DPHY_ERR_FLAG 0x188 +#define ERR_CONT_LP(x, l) BIT(18 + ((x) * 4) + (l)) +#define ERR_CONTROL(l) BIT(14 + (l)) +#define ERR_SYNESC(l) BIT(10 + (l)) +#define ERR_ESC(l) BIT(6 + (l)) + +#define MCTL_DPHY_ERR_CTL2 0x14c +#define ERR_CONT_LP_EDGE(x, l) BIT(12 + ((x) * 4) + (l)) +#define ERR_CONTROL_EDGE(l) BIT(8 + (l)) +#define ERR_SYN_ESC_EDGE(l) BIT(4 + (l)) +#define ERR_ESC_EDGE(l) BIT(0 + (l)) + +#define MCTL_LANE_STS 0x2c +#define PPI_C_TX_READY_HS BIT(18) +#define DPHY_PLL_LOCK BIT(17) +#define PPI_D_RX_ULPS_ESC(x) (((x) & GENMASK(15, 12)) >> 12) +#define LANE_STATE_START 0 +#define LANE_STATE_IDLE 1 +#define LANE_STATE_WRITE 2 +#define LANE_STATE_ULPM 3 +#define LANE_STATE_READ 4 +#define DATA_LANE_STATE(l, val) \ + (((val) >> (2 + 2 * (l) + ((l) ? 1 : 0))) & GENMASK((l) ? 1 : 2, 0)) +#define CLK_LANE_STATE_HS 2 +#define CLK_LANE_STATE(val) ((val) & GENMASK(1, 0)) + +#define DSC_MODE_CTL 0x30 +#define DSC_MODE_EN BIT(0) + +#define DSC_CMD_SEND 0x34 +#define DSC_SEND_PPS BIT(0) +#define DSC_EXECUTE_QUEUE BIT(1) + +#define DSC_PPS_WRDAT 0x38 + +#define DSC_MODE_STS 0x3c +#define DSC_PPS_DONE BIT(1) +#define DSC_EXEC_DONE BIT(2) + +#define CMD_MODE_CTL 0x70 +#define IF_LP_EN(x) BIT(9 + (x)) +#define IF_VCHAN_ID(x, c) ((c) << ((x) * 2)) + +#define CMD_MODE_CTL2 0x74 +#define TE_TIMEOUT(x) ((x) << 11) +#define FILL_VALUE(x) ((x) << 3) +#define ARB_IF_WITH_HIGHEST_PRIORITY(x) ((x) << 1) +#define ARB_ROUND_ROBIN_MODE BIT(0) + +#define CMD_MODE_STS 0x78 +#define CMD_MODE_STS_CTL 0x134 +#define CMD_MODE_STS_CLR 0x154 +#define CMD_MODE_STS_FLAG 0x174 +#define ERR_IF_UNDERRUN(x) BIT(4 + (x)) +#define ERR_UNWANTED_READ BIT(3) +#define ERR_TE_MISS BIT(2) +#define ERR_NO_TE BIT(1) +#define CSM_RUNNING BIT(0) + +#define DIRECT_CMD_SEND 0x80 + +#define DIRECT_CMD_MAIN_SETTINGS 0x84 +#define TRIGGER_VAL(x) ((x) << 25) +#define CMD_LP_EN BIT(24) +#define CMD_SIZE(x) ((x) << 16) +#define CMD_VCHAN_ID(x) ((x) << 14) +#define CMD_DATATYPE(x) ((x) << 8) +#define CMD_LONG BIT(3) +#define WRITE_CMD 0 +#define READ_CMD 1 +#define TE_REQ 4 +#define TRIGGER_REQ 5 +#define BTA_REQ 6 + +#define DIRECT_CMD_STS 0x88 +#define DIRECT_CMD_STS_CTL 0x138 +#define DIRECT_CMD_STS_CLR 0x158 +#define DIRECT_CMD_STS_FLAG 0x178 +#define RCVD_ACK_VAL(val) ((val) >> 16) +#define RCVD_TRIGGER_VAL(val) (((val) & GENMASK(14, 11)) >> 11) +#define READ_COMPLETED_WITH_ERR BIT(10) +#define BTA_FINISHED BIT(9) +#define BTA_COMPLETED BIT(8) +#define TE_RCVD BIT(7) +#define TRIGGER_RCVD BIT(6) +#define ACK_WITH_ERR_RCVD BIT(5) +#define ACK_RCVD BIT(4) +#define READ_COMPLETED BIT(3) +#define TRIGGER_COMPLETED BIT(2) +#define WRITE_COMPLETED BIT(1) +#define SENDING_CMD BIT(0) + +#define DIRECT_CMD_STOP_READ 0x8c + +#define DIRECT_CMD_WRDATA 0x90 + +#define DIRECT_CMD_FIFO_RST 0x94 + +#define DIRECT_CMD_RDDATA 0xa0 + +#define DIRECT_CMD_RD_PROPS 0xa4 +#define RD_DCS BIT(18) +#define RD_VCHAN_ID(val) (((val) >> 16) & GENMASK(1, 0)) +#define RD_SIZE(val) ((val) & GENMASK(15, 0)) + +#define DIRECT_CMD_RD_STS 0xa8 +#define DIRECT_CMD_RD_STS_CTL 0x13c +#define DIRECT_CMD_RD_STS_CLR 0x15c +#define DIRECT_CMD_RD_STS_FLAG 0x17c +#define ERR_EOT_WITH_ERR BIT(8) +#define ERR_MISSING_EOT BIT(7) +#define ERR_WRONG_LENGTH BIT(6) +#define ERR_OVERSIZE BIT(5) +#define ERR_RECEIVE BIT(4) +#define ERR_UNDECODABLE BIT(3) +#define ERR_CHECKSUM BIT(2) +#define ERR_UNCORRECTABLE BIT(1) +#define ERR_FIXED BIT(0) + +#define VID_MAIN_CTL 0xb0 +#define VID_IGNORE_MISS_VSYNC BIT(31) +#define VID_FIELD_SW BIT(28) +#define VID_INTERLACED_EN BIT(27) +#define RECOVERY_MODE(x) ((x) << 25) +#define RECOVERY_MODE_NEXT_HSYNC 0 +#define RECOVERY_MODE_NEXT_STOP_POINT 2 +#define RECOVERY_MODE_NEXT_VSYNC 3 +#define REG_BLKEOL_MODE(x) ((x) << 23) +#define REG_BLKLINE_MODE(x) ((x) << 21) +#define REG_BLK_MODE_NULL_PKT 0 +#define REG_BLK_MODE_BLANKING_PKT 1 +#define REG_BLK_MODE_LP 2 +#define SYNC_PULSE_HORIZONTAL BIT(20) +#define SYNC_PULSE_ACTIVE BIT(19) +#define BURST_MODE BIT(18) +#define VID_PIXEL_MODE_MASK GENMASK(17, 14) +#define VID_PIXEL_MODE_RGB565 (0 << 14) +#define VID_PIXEL_MODE_RGB666_PACKED (1 << 14) +#define VID_PIXEL_MODE_RGB666 (2 << 14) +#define VID_PIXEL_MODE_RGB888 (3 << 14) +#define VID_PIXEL_MODE_RGB101010 (4 << 14) +#define VID_PIXEL_MODE_RGB121212 (5 << 14) +#define VID_PIXEL_MODE_YUV420 (8 << 14) +#define VID_PIXEL_MODE_YUV422_PACKED (9 << 14) +#define VID_PIXEL_MODE_YUV422 (10 << 14) +#define VID_PIXEL_MODE_YUV422_24B (11 << 14) +#define VID_PIXEL_MODE_DSC_COMP (12 << 14) +#define VID_DATATYPE(x) ((x) << 8) +#define VID_VIRTCHAN_ID(iface, x) ((x) << (4 + (iface) * 2)) +#define STOP_MODE(x) ((x) << 2) +#define START_MODE(x) (x) + +#define VID_VSIZE1 0xb4 +#define VFP_LEN(x) ((x) << 12) +#define VBP_LEN(x) ((x) << 6) +#define VSA_LEN(x) (x) + +#define VID_VSIZE2 0xb8 +#define VACT_LEN(x) (x) + +#define VID_HSIZE1 0xc0 +#define HBP_LEN(x) ((x) << 16) +#define HSA_LEN(x) (x) + +#define VID_HSIZE2 0xc4 +#define HFP_LEN(x) ((x) << 16) +#define HACT_LEN(x) (x) + +#define VID_BLKSIZE1 0xcc +#define BLK_EOL_PKT_LEN(x) ((x) << 15) +#define BLK_LINE_EVENT_PKT_LEN(x) (x) + +#define VID_BLKSIZE2 0xd0 +#define BLK_LINE_PULSE_PKT_LEN(x) (x) + +#define VID_PKT_TIME 0xd8 +#define BLK_EOL_DURATION(x) (x) + +#define VID_DPHY_TIME 0xdc +#define REG_WAKEUP_TIME(x) ((x) << 17) +#define REG_LINE_DURATION(x) (x) + +#define VID_ERR_COLOR1 0xe0 +#define COL_GREEN(x) ((x) << 12) +#define COL_RED(x) (x) + +#define VID_ERR_COLOR2 0xe4 +#define PAD_VAL(x) ((x) << 12) +#define COL_BLUE(x) (x) + +#define VID_VPOS 0xe8 +#define LINE_VAL(val) (((val) & GENMASK(14, 2)) >> 2) +#define LINE_POS(val) ((val) & GENMASK(1, 0)) + +#define VID_HPOS 0xec +#define HORIZ_VAL(val) (((val) & GENMASK(17, 3)) >> 3) +#define HORIZ_POS(val) ((val) & GENMASK(2, 0)) + +#define VID_MODE_STS 0xf0 +#define VID_MODE_STS_CTL 0x140 +#define VID_MODE_STS_CLR 0x160 +#define VID_MODE_STS_FLAG 0x180 +#define VSG_RECOVERY BIT(10) +#define ERR_VRS_WRONG_LEN BIT(9) +#define ERR_LONG_READ BIT(8) +#define ERR_LINE_WRITE BIT(7) +#define ERR_BURST_WRITE BIT(6) +#define ERR_SMALL_HEIGHT BIT(5) +#define ERR_SMALL_LEN BIT(4) +#define ERR_MISSING_VSYNC BIT(3) +#define ERR_MISSING_HSYNC BIT(2) +#define ERR_MISSING_DATA BIT(1) +#define VSG_RUNNING BIT(0) + +#define VID_VCA_SETTING1 0xf4 +#define BURST_LP BIT(16) +#define MAX_BURST_LIMIT(x) (x) + +#define VID_VCA_SETTING2 0xf8 +#define MAX_LINE_LIMIT(x) ((x) << 16) +#define EXACT_BURST_LIMIT(x) (x) + +#define TVG_CTL 0xfc +#define TVG_STRIPE_SIZE(x) ((x) << 5) +#define TVG_MODE_MASK GENMASK(4, 3) +#define TVG_MODE_SINGLE_COLOR (0 << 3) +#define TVG_MODE_VSTRIPES (2 << 3) +#define TVG_MODE_HSTRIPES (3 << 3) +#define TVG_STOPMODE_MASK GENMASK(2, 1) +#define TVG_STOPMODE_EOF (0 << 1) +#define TVG_STOPMODE_EOL (1 << 1) +#define TVG_STOPMODE_NOW (2 << 1) +#define TVG_RUN BIT(0) + +#define TVG_IMG_SIZE 0x100 +#define TVG_NBLINES(x) ((x) << 16) +#define TVG_LINE_SIZE(x) (x) + +#define TVG_COLOR1 0x104 +#define TVG_COL1_GREEN(x) ((x) << 12) +#define TVG_COL1_RED(x) (x) + +#define TVG_COLOR1_BIS 0x108 +#define TVG_COL1_BLUE(x) (x) + +#define TVG_COLOR2 0x10c +#define TVG_COL2_GREEN(x) ((x) << 12) +#define TVG_COL2_RED(x) (x) + +#define TVG_COLOR2_BIS 0x110 +#define TVG_COL2_BLUE(x) (x) + +#define TVG_STS 0x114 +#define TVG_STS_CTL 0x144 +#define TVG_STS_CLR 0x164 +#define TVG_STS_FLAG 0x184 +#define TVG_STS_RUNNING BIT(0) + +#define STS_CTL_EDGE(e) ((e) << 16) + +#define DPHY_LANES_MAP 0x198 +#define DAT_REMAP_CFG(b, l) ((l) << ((b) * 8)) + +#define DPI_IRQ_EN 0x1a0 +#define DPI_IRQ_CLR 0x1a4 +#define DPI_IRQ_STS 0x1a8 +#define PIXEL_BUF_OVERFLOW BIT(0) + +#define DPI_CFG 0x1ac +#define DPI_CFG_FIFO_DEPTH(x) ((x) >> 16) +#define DPI_CFG_FIFO_LEVEL(x) ((x) & GENMASK(15, 0)) + +#define TEST_GENERIC 0x1f0 +#define TEST_STATUS(x) ((x) >> 16) +#define TEST_CTRL(x) (x) + +#define ID_REG 0x1fc +#define REV_VENDOR_ID(x) (((x) & GENMASK(31, 20)) >> 20) +#define REV_PRODUCT_ID(x) (((x) & GENMASK(19, 12)) >> 12) +#define REV_HW(x) (((x) & GENMASK(11, 8)) >> 8) +#define REV_MAJOR(x) (((x) & GENMASK(7, 4)) >> 4) +#define REV_MINOR(x) ((x) & GENMASK(3, 0)) + +#define DSI_OUTPUT_PORT 0 +#define DSI_INPUT_PORT(inputid) (1 + (inputid)) + +#define DSI_HBP_FRAME_OVERHEAD 12 +#define DSI_HSA_FRAME_OVERHEAD 14 +#define DSI_HFP_FRAME_OVERHEAD 6 +#define DSI_HSS_VSS_VSE_FRAME_OVERHEAD 4 +#define DSI_BLANKING_FRAME_OVERHEAD 6 +#define DSI_NULL_FRAME_OVERHEAD 6 +#define DSI_EOT_PKT_SIZE 4 + +#define REG_WAKEUP_TIME_NS 800 +#define DPHY_PLL_RATE_HZ 108000000 + +/* DPHY registers */ +#define DPHY_PMA_CMN(reg) (reg) +#define DPHY_PMA_LCLK(reg) (0x100 + (reg)) +#define DPHY_PMA_LDATA(lane, reg) (0x200 + ((lane) * 0x100) + (reg)) +#define DPHY_PMA_RCLK(reg) (0x600 + (reg)) +#define DPHY_PMA_RDATA(lane, reg) (0x700 + ((lane) * 0x100) + (reg)) +#define DPHY_PCS(reg) (0xb00 + (reg)) + +#define DPHY_CMN_SSM DPHY_PMA_CMN(0x20) +#define DPHY_CMN_SSM_EN BIT(0) +#define DPHY_CMN_TX_MODE_EN BIT(9) + +#define DPHY_CMN_PWM DPHY_PMA_CMN(0x40) +#define DPHY_CMN_PWM_DIV(x) ((x) << 20) +#define DPHY_CMN_PWM_LOW(x) ((x) << 10) +#define DPHY_CMN_PWM_HIGH(x) (x) + +#define DPHY_CMN_FBDIV DPHY_PMA_CMN(0x4c) +#define DPHY_CMN_FBDIV_VAL(low, high) (((high) << 11) | ((low) << 22)) +#define DPHY_CMN_FBDIV_FROM_REG (BIT(10) | BIT(21)) + +#define DPHY_CMN_OPIPDIV DPHY_PMA_CMN(0x50) +#define DPHY_CMN_IPDIV_FROM_REG BIT(0) +#define DPHY_CMN_IPDIV(x) ((x) << 1) +#define DPHY_CMN_OPDIV_FROM_REG BIT(6) +#define DPHY_CMN_OPDIV(x) ((x) << 7) + +#define DPHY_PSM_CFG DPHY_PCS(0x4) +#define DPHY_PSM_CFG_FROM_REG BIT(0) +#define DPHY_PSM_CLK_DIV(x) ((x) << 1) + +struct cdns_dsi_output { + struct mipi_dsi_device *dev; + struct drm_panel *panel; + struct drm_bridge *bridge; +}; + +enum cdns_dsi_input_id { + CDNS_SDI_INPUT, + CDNS_DPI_INPUT, + CDNS_DSC_INPUT, +}; + +struct cdns_dphy_cfg { + u8 pll_ipdiv; + u8 pll_opdiv; + u16 pll_fbdiv; + unsigned long lane_bps; + unsigned int nlanes; +}; + +struct cdns_dsi_cfg { + unsigned int hfp; + unsigned int hsa; + unsigned int hbp; + unsigned int hact; + unsigned int htotal; +}; + +struct cdns_dphy; + +enum cdns_dphy_clk_lane_cfg { + DPHY_CLK_CFG_LEFT_DRIVES_ALL = 0, + DPHY_CLK_CFG_LEFT_DRIVES_RIGHT = 1, + DPHY_CLK_CFG_LEFT_DRIVES_LEFT = 2, + DPHY_CLK_CFG_RIGHT_DRIVES_ALL = 3, +}; + +struct cdns_dphy_ops { + int (*probe)(struct cdns_dphy *dphy); + void (*remove)(struct cdns_dphy *dphy); + void (*set_psm_div)(struct cdns_dphy *dphy, u8 div); + void (*set_clk_lane_cfg)(struct cdns_dphy *dphy, + enum cdns_dphy_clk_lane_cfg cfg); + void (*set_pll_cfg)(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg); + unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy); +}; + +struct cdns_dphy { + struct cdns_dphy_cfg cfg; + void __iomem *regs; + struct clk *psm_clk; + struct clk *pll_ref_clk; + const struct cdns_dphy_ops *ops; +}; + +struct cdns_dsi_input { + enum cdns_dsi_input_id id; + struct drm_bridge bridge; +}; + +struct cdns_dsi { + struct mipi_dsi_host base; + void __iomem *regs; + struct cdns_dsi_input input; + struct cdns_dsi_output output; + unsigned int direct_cmd_fifo_depth; + unsigned int rx_fifo_depth; + struct completion direct_cmd_comp; + struct clk *dsi_p_clk; + struct reset_control *dsi_p_rst; + struct clk *dsi_sys_clk; + bool link_initialized; + struct cdns_dphy *dphy; +}; + +static inline struct cdns_dsi *input_to_dsi(struct cdns_dsi_input *input) +{ + return container_of(input, struct cdns_dsi, input); +} + +static inline struct cdns_dsi *to_cdns_dsi(struct mipi_dsi_host *host) +{ + return container_of(host, struct cdns_dsi, base); +} + +static inline struct cdns_dsi_input * +bridge_to_cdns_dsi_input(struct drm_bridge *bridge) +{ + return container_of(bridge, struct cdns_dsi_input, bridge); +} + +static int cdns_dsi_get_dphy_pll_cfg(struct cdns_dphy *dphy, + struct cdns_dphy_cfg *cfg, + unsigned int dpi_htotal, + unsigned int dpi_bpp, + unsigned int dpi_hz, + unsigned int dsi_htotal, + unsigned int dsi_nlanes, + unsigned int *dsi_hfp_ext) +{ + u64 dlane_bps, dlane_bps_max, fbdiv, fbdiv_max, adj_dsi_htotal; + unsigned long pll_ref_hz = clk_get_rate(dphy->pll_ref_clk); + + memset(cfg, 0, sizeof(*cfg)); + + cfg->nlanes = dsi_nlanes; + + if (pll_ref_hz < 9600000 || pll_ref_hz >= 150000000) + return -EINVAL; + else if (pll_ref_hz < 19200000) + cfg->pll_ipdiv = 1; + else if (pll_ref_hz < 38400000) + cfg->pll_ipdiv = 2; + else if (pll_ref_hz < 76800000) + cfg->pll_ipdiv = 4; + else + cfg->pll_ipdiv = 8; + + /* + * Make sure DSI htotal is aligned on a lane boundary when calculating + * the expected data rate. This is done by extending HFP in case of + * misalignment. + */ + adj_dsi_htotal = dsi_htotal; + if (dsi_htotal % dsi_nlanes) + adj_dsi_htotal += dsi_nlanes - (dsi_htotal % dsi_nlanes); + + dlane_bps = (u64)dpi_hz * adj_dsi_htotal; + + /* data rate in bytes/sec is not an integer, refuse the mode. */ + if (do_div(dlane_bps, dsi_nlanes * dpi_htotal)) + return -EINVAL; + + /* data rate was in bytes/sec, convert to bits/sec. */ + dlane_bps *= 8; + + if (dlane_bps > 2500000000UL || dlane_bps < 160000000UL) + return -EINVAL; + else if (dlane_bps >= 1250000000) + cfg->pll_opdiv = 1; + else if (dlane_bps >= 630000000) + cfg->pll_opdiv = 2; + else if (dlane_bps >= 320000000) + cfg->pll_opdiv = 4; + else if (dlane_bps >= 160000000) + cfg->pll_opdiv = 8; + + /* + * Allow a deviation of 0.2% on the per-lane data rate to try to + * recover a potential mismatch between DPI and PPI clks. + */ + dlane_bps_max = dlane_bps + DIV_ROUND_DOWN_ULL(dlane_bps, 500); + fbdiv_max = DIV_ROUND_DOWN_ULL(dlane_bps_max * 2 * + cfg->pll_opdiv * cfg->pll_ipdiv, + pll_ref_hz); + fbdiv = DIV_ROUND_UP_ULL(dlane_bps * 2 * cfg->pll_opdiv * + cfg->pll_ipdiv, + pll_ref_hz); + + /* + * Iterate over all acceptable fbdiv and try to find an adjusted DSI + * htotal length providing an exact match. + * + * Note that we could do something even trickier by relying on the fact + * that a new line is not necessarily aligned on a lane boundary, so, + * by making adj_dsi_htotal non aligned on a dsi_lanes we can improve a + * bit the precision. With this, the step would be + * + * pll_ref_hz / (2 * opdiv * ipdiv * nlanes) + * + * instead of + * + * pll_ref_hz / (2 * opdiv * ipdiv) + * + * The drawback of this approach is that we would need to make sure the + * number or lines is a multiple of the realignment periodicity which is + * a function of the number of lanes and the original misalignment. For + * example, for NLANES = 4 and HTOTAL % NLANES = 3, it takes 4 lines + * to realign on a lane: + * LINE 0: expected number of bytes, starts emitting first byte of + * LINE 1 on LANE 3 + * LINE 1: expected number of bytes, starts emitting first 2 bytes of + * LINE 2 on LANES 2 and 3 + * LINE 2: expected number of bytes, starts emitting first 3 bytes of + * of LINE 3 on LANES 1, 2 and 3 + * LINE 3: one byte less, now things are realigned on LANE 0 for LINE 4 + * + * I figured this extra complexity was not worth the benefit, but if + * someone really has unfixable mismatch, that would be something to + * investigate. + */ + for (; fbdiv <= fbdiv_max; fbdiv++) { + u32 rem; + + adj_dsi_htotal = (u64)fbdiv * pll_ref_hz * dsi_nlanes * + dpi_htotal; + + /* + * Do the division in 2 steps to avoid an overflow on the + * divider. + */ + rem = do_div(adj_dsi_htotal, dpi_hz); + if (rem) + continue; + + rem = do_div(adj_dsi_htotal, + cfg->pll_opdiv * cfg->pll_ipdiv * 2 * 8); + if (rem) + continue; + + cfg->pll_fbdiv = fbdiv; + *dsi_hfp_ext = adj_dsi_htotal - dsi_htotal; + break; + } + + /* No match, let's just reject the display mode. */ + if (!cfg->pll_fbdiv) + return -EINVAL; + + dlane_bps = DIV_ROUND_DOWN_ULL((u64)dpi_hz * adj_dsi_htotal * 8, + dsi_nlanes * dpi_htotal); + cfg->lane_bps = dlane_bps; + + return 0; +} + +static int cdns_dphy_setup_psm(struct cdns_dphy *dphy) +{ + unsigned long psm_clk_hz = clk_get_rate(dphy->psm_clk); + unsigned long psm_div; + + if (!psm_clk_hz || psm_clk_hz > 100000000) + return -EINVAL; + + psm_div = DIV_ROUND_CLOSEST(psm_clk_hz, 1000000); + if (dphy->ops->set_psm_div) + dphy->ops->set_psm_div(dphy, psm_div); + + return 0; +} + +static void cdns_dphy_set_clk_lane_cfg(struct cdns_dphy *dphy, + enum cdns_dphy_clk_lane_cfg cfg) +{ + if (dphy->ops->set_clk_lane_cfg) + dphy->ops->set_clk_lane_cfg(dphy, cfg); +} + +static void cdns_dphy_set_pll_cfg(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg) +{ + if (dphy->ops->set_pll_cfg) + dphy->ops->set_pll_cfg(dphy, cfg); +} + +static unsigned long cdns_dphy_get_wakeup_time_ns(struct cdns_dphy *dphy) +{ + return dphy->ops->get_wakeup_time_ns(dphy); +} + +static unsigned int dpi_to_dsi_timing(unsigned int dpi_timing, + unsigned int dpi_bpp, + unsigned int dsi_pkt_overhead) +{ + unsigned int dsi_timing = DIV_ROUND_UP(dpi_timing * dpi_bpp, 8); + + if (dsi_timing < dsi_pkt_overhead) + dsi_timing = 0; + else + dsi_timing -= dsi_pkt_overhead; + + return dsi_timing; +} + +static int cdns_dsi_mode2cfg(struct cdns_dsi *dsi, + const struct drm_display_mode *mode, + struct cdns_dsi_cfg *dsi_cfg, + struct cdns_dphy_cfg *dphy_cfg, + bool mode_valid_check) +{ + unsigned long dsi_htotal = 0, dsi_hss_hsa_hse_hbp = 0; + struct cdns_dsi_output *output = &dsi->output; + unsigned int dsi_hfp_ext = 0, dpi_hfp, tmp; + bool sync_pulse = false; + int bpp, nlanes, ret; + + memset(dsi_cfg, 0, sizeof(*dsi_cfg)); + + if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) + sync_pulse = true; + + bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format); + nlanes = output->dev->lanes; + + if (mode_valid_check) + tmp = mode->htotal - + (sync_pulse ? mode->hsync_end : mode->hsync_start); + else + tmp = mode->crtc_htotal - + (sync_pulse ? + mode->crtc_hsync_end : mode->crtc_hsync_start); + + dsi_cfg->hbp = dpi_to_dsi_timing(tmp, bpp, DSI_HBP_FRAME_OVERHEAD); + dsi_htotal += dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD; + dsi_hss_hsa_hse_hbp += dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD; + + if (sync_pulse) { + if (mode_valid_check) + tmp = mode->hsync_end - mode->hsync_start; + else + tmp = mode->crtc_hsync_end - mode->crtc_hsync_start; + + dsi_cfg->hsa = dpi_to_dsi_timing(tmp, bpp, + DSI_HSA_FRAME_OVERHEAD); + dsi_htotal += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD; + dsi_hss_hsa_hse_hbp += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD; + } + + dsi_cfg->hact = dpi_to_dsi_timing(mode_valid_check ? + mode->hdisplay : mode->crtc_hdisplay, + bpp, 0); + dsi_htotal += dsi_cfg->hact; + + if (mode_valid_check) + dpi_hfp = mode->hsync_start - mode->hdisplay; + else + dpi_hfp = mode->crtc_hsync_start - mode->crtc_hdisplay; + + dsi_cfg->hfp = dpi_to_dsi_timing(dpi_hfp, bpp, DSI_HFP_FRAME_OVERHEAD); + dsi_htotal += dsi_cfg->hfp + DSI_HFP_FRAME_OVERHEAD; + + if (mode_valid_check) + ret = cdns_dsi_get_dphy_pll_cfg(dsi->dphy, dphy_cfg, + mode->htotal, bpp, + mode->clock * 1000, + dsi_htotal, nlanes, + &dsi_hfp_ext); + else + ret = cdns_dsi_get_dphy_pll_cfg(dsi->dphy, dphy_cfg, + mode->crtc_htotal, bpp, + mode->crtc_clock * 1000, + dsi_htotal, nlanes, + &dsi_hfp_ext); + + if (ret) + return ret; + + dsi_cfg->hfp += dsi_hfp_ext; + dsi_htotal += dsi_hfp_ext; + dsi_cfg->htotal = dsi_htotal; + + /* + * Make sure DPI(HFP) > DSI(HSS+HSA+HSE+HBP) to guarantee that the FIFO + * is empty before we start a receiving a new line on the DPI + * interface. + */ + if ((u64)dphy_cfg->lane_bps * dpi_hfp * nlanes < + (u64)dsi_hss_hsa_hse_hbp * + (mode_valid_check ? mode->clock : mode->crtc_clock) * 1000) + return -EINVAL; + + return 0; +} + +static int cdns_dsi_bridge_attach(struct drm_bridge *bridge) +{ + struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); + struct cdns_dsi *dsi = input_to_dsi(input); + struct cdns_dsi_output *output = &dsi->output; + + if (!drm_core_check_feature(bridge->dev, DRIVER_ATOMIC)) { + dev_err(dsi->base.dev, + "cdns-dsi driver is only compatible with DRM devices supporting atomic updates"); + return -ENOTSUPP; + } + + return drm_bridge_attach(bridge->encoder, output->bridge, bridge); +} + +static enum drm_mode_status +cdns_dsi_bridge_mode_valid(struct drm_bridge *bridge, + const struct drm_display_mode *mode) +{ + struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); + struct cdns_dsi *dsi = input_to_dsi(input); + struct cdns_dsi_output *output = &dsi->output; + struct cdns_dphy_cfg dphy_cfg; + struct cdns_dsi_cfg dsi_cfg; + int bpp, nlanes, ret; + + /* + * VFP_DSI should be less than VFP_DPI and VFP_DSI should be at + * least 1. + */ + if (mode->vtotal - mode->vsync_end < 2) + return MODE_V_ILLEGAL; + + /* VSA_DSI = VSA_DPI and must be at least 2. */ + if (mode->vsync_end - mode->vsync_start < 2) + return MODE_V_ILLEGAL; + + /* HACT must be 32-bits aligned. */ + bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format); + if ((mode->hdisplay * bpp) % 32) + return MODE_H_ILLEGAL; + + nlanes = output->dev->lanes; + + ret = cdns_dsi_mode2cfg(dsi, mode, &dsi_cfg, &dphy_cfg, true); + if (ret) + return MODE_CLOCK_RANGE; + + return MODE_OK; +} + +static void cdns_dsi_bridge_disable(struct drm_bridge *bridge) +{ + struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); + struct cdns_dsi *dsi = input_to_dsi(input); + u32 val; + + val = readl(dsi->regs + MCTL_MAIN_DATA_CTL); + val &= ~(IF_VID_SELECT_MASK | IF_VID_MODE | VID_EN | HOST_EOT_GEN | + DISP_EOT_GEN); + writel(val, dsi->regs + MCTL_MAIN_DATA_CTL); + + val = readl(dsi->regs + MCTL_MAIN_EN) & ~IF_EN(input->id); + writel(val, dsi->regs + MCTL_MAIN_EN); + pm_runtime_put(dsi->base.dev); +} + +static void cdns_dsi_hs_init(struct cdns_dsi *dsi, + const struct cdns_dphy_cfg *dphy_cfg) +{ + u32 status; + + /* + * Power all internal DPHY blocks down and maintain their reset line + * asserted before changing the DPHY config. + */ + writel(DPHY_CMN_PSO | DPHY_PLL_PSO | DPHY_ALL_D_PDN | DPHY_C_PDN | + DPHY_CMN_PDN | DPHY_PLL_PDN, + dsi->regs + MCTL_DPHY_CFG0); + + /* + * Configure the internal PSM clk divider so that the DPHY has a + * 1MHz clk (or something close). + */ + WARN_ON_ONCE(cdns_dphy_setup_psm(dsi->dphy)); + + /* + * Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes + * and 8 data lanes, each clk lane can be attache different set of + * data lanes. The 2 groups are named 'left' and 'right', so here we + * just say that we want the 'left' clk lane to drive the 'left' data + * lanes. + */ + cdns_dphy_set_clk_lane_cfg(dsi->dphy, DPHY_CLK_CFG_LEFT_DRIVES_LEFT); + + /* + * Configure the DPHY PLL that will be used to generate the TX byte + * clk. + */ + cdns_dphy_set_pll_cfg(dsi->dphy, dphy_cfg); + + /* Start TX state machine. */ + writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN, + dsi->dphy->regs + DPHY_CMN_SSM); + + /* Activate the PLL and wait until it's locked. */ + writel(PLL_LOCKED, dsi->regs + MCTL_MAIN_STS_CLR); + writel(DPHY_CMN_PSO | DPHY_ALL_D_PDN | DPHY_C_PDN | DPHY_CMN_PDN, + dsi->regs + MCTL_DPHY_CFG0); + WARN_ON_ONCE(readl_poll_timeout(dsi->regs + MCTL_MAIN_STS, status, + status & PLL_LOCKED, 100, 100)); + /* De-assert data and clock reset lines. */ + writel(DPHY_CMN_PSO | DPHY_ALL_D_PDN | DPHY_C_PDN | DPHY_CMN_PDN | + DPHY_D_RSTB(dphy_cfg->nlanes) | DPHY_C_RSTB, + dsi->regs + MCTL_DPHY_CFG0); +} + +static void cdns_dsi_init_link(struct cdns_dsi *dsi) +{ + struct cdns_dsi_output *output = &dsi->output; + unsigned long sysclk_period, ulpout; + u32 val; + int i; + + if (dsi->link_initialized) + return; + + val = 0; + for (i = 1; i < output->dev->lanes; i++) + val |= DATA_LANE_EN(i); + + if (!(output->dev->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS)) + val |= CLK_CONTINUOUS; + + writel(val, dsi->regs + MCTL_MAIN_PHY_CTL); + + /* ULPOUT should be set to 1ms and is expressed in sysclk cycles. */ + sysclk_period = NSEC_PER_SEC / clk_get_rate(dsi->dsi_sys_clk); + ulpout = DIV_ROUND_UP(NSEC_PER_MSEC, sysclk_period); + writel(CLK_LANE_ULPOUT_TIME(ulpout) | DATA_LANE_ULPOUT_TIME(ulpout), + dsi->regs + MCTL_ULPOUT_TIME); + + writel(LINK_EN, dsi->regs + MCTL_MAIN_DATA_CTL); + + val = CLK_LANE_EN | PLL_START; + for (i = 0; i < output->dev->lanes; i++) + val |= DATA_LANE_START(i); + + writel(val, dsi->regs + MCTL_MAIN_EN); + + dsi->link_initialized = true; +} + +static void cdns_dsi_bridge_enable(struct drm_bridge *bridge) +{ + struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); + struct cdns_dsi *dsi = input_to_dsi(input); + struct cdns_dsi_output *output = &dsi->output; + struct drm_display_mode *mode; + struct cdns_dphy_cfg dphy_cfg; + unsigned long tx_byte_period; + struct cdns_dsi_cfg dsi_cfg; + u32 tmp, reg_wakeup, div; + int bpp, nlanes; + + if (WARN_ON(pm_runtime_get_sync(dsi->base.dev) < 0)) + return; + + mode = &bridge->encoder->crtc->state->adjusted_mode; + bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format); + nlanes = output->dev->lanes; + + WARN_ON_ONCE(cdns_dsi_mode2cfg(dsi, mode, &dsi_cfg, &dphy_cfg, false)); + + cdns_dsi_hs_init(dsi, &dphy_cfg); + cdns_dsi_init_link(dsi); + + writel(HBP_LEN(dsi_cfg.hbp) | HSA_LEN(dsi_cfg.hsa), + dsi->regs + VID_HSIZE1); + writel(HFP_LEN(dsi_cfg.hfp) | HACT_LEN(dsi_cfg.hact), + dsi->regs + VID_HSIZE2); + + writel(VBP_LEN(mode->crtc_vtotal - mode->crtc_vsync_end - 1) | + VFP_LEN(mode->crtc_vsync_start - mode->crtc_vdisplay) | + VSA_LEN(mode->crtc_vsync_end - mode->crtc_vsync_start + 1), + dsi->regs + VID_VSIZE1); + writel(mode->crtc_vdisplay, dsi->regs + VID_VSIZE2); + + tmp = dsi_cfg.htotal - + (dsi_cfg.hsa + DSI_BLANKING_FRAME_OVERHEAD + + DSI_HSA_FRAME_OVERHEAD); + writel(BLK_LINE_PULSE_PKT_LEN(tmp), dsi->regs + VID_BLKSIZE2); + if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) + writel(MAX_LINE_LIMIT(tmp - DSI_NULL_FRAME_OVERHEAD), + dsi->regs + VID_VCA_SETTING2); + + tmp = dsi_cfg.htotal - + (DSI_HSS_VSS_VSE_FRAME_OVERHEAD + DSI_BLANKING_FRAME_OVERHEAD); + writel(BLK_LINE_EVENT_PKT_LEN(tmp), dsi->regs + VID_BLKSIZE1); + if (!(output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE)) + writel(MAX_LINE_LIMIT(tmp - DSI_NULL_FRAME_OVERHEAD), + dsi->regs + VID_VCA_SETTING2); + + tmp = DIV_ROUND_UP(dsi_cfg.htotal, nlanes) - + DIV_ROUND_UP(dsi_cfg.hsa, nlanes); + + if (!(output->dev->mode_flags & MIPI_DSI_MODE_EOT_PACKET)) + tmp -= DIV_ROUND_UP(DSI_EOT_PKT_SIZE, nlanes); + + tx_byte_period = DIV_ROUND_DOWN_ULL((u64)NSEC_PER_SEC * 8, + dphy_cfg.lane_bps); + reg_wakeup = cdns_dphy_get_wakeup_time_ns(dsi->dphy) / + tx_byte_period; + writel(REG_WAKEUP_TIME(reg_wakeup) | REG_LINE_DURATION(tmp), + dsi->regs + VID_DPHY_TIME); + + /* + * HSTX and LPRX timeouts are both expressed in TX byte clk cycles and + * both should be set to at least the time it takes to transmit a + * frame. + */ + tmp = NSEC_PER_SEC / drm_mode_vrefresh(mode); + tmp /= tx_byte_period; + + for (div = 0; div <= CLK_DIV_MAX; div++) { + if (tmp <= HSTX_TIMEOUT_MAX) + break; + + tmp >>= 1; + } + + if (tmp > HSTX_TIMEOUT_MAX) + tmp = HSTX_TIMEOUT_MAX; + + writel(CLK_DIV(div) | HSTX_TIMEOUT(tmp), + dsi->regs + MCTL_DPHY_TIMEOUT1); + + writel(LPRX_TIMEOUT(tmp), dsi->regs + MCTL_DPHY_TIMEOUT2); + + if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO) { + switch (output->dev->format) { + case MIPI_DSI_FMT_RGB888: + tmp = VID_PIXEL_MODE_RGB888 | + VID_DATATYPE(MIPI_DSI_PACKED_PIXEL_STREAM_24); + break; + + case MIPI_DSI_FMT_RGB666: + tmp = VID_PIXEL_MODE_RGB666 | + VID_DATATYPE(MIPI_DSI_PIXEL_STREAM_3BYTE_18); + break; + + case MIPI_DSI_FMT_RGB666_PACKED: + tmp = VID_PIXEL_MODE_RGB666_PACKED | + VID_DATATYPE(MIPI_DSI_PACKED_PIXEL_STREAM_18); + break; + + case MIPI_DSI_FMT_RGB565: + tmp = VID_PIXEL_MODE_RGB565 | + VID_DATATYPE(MIPI_DSI_PACKED_PIXEL_STREAM_16); + break; + + default: + dev_err(dsi->base.dev, "Unsupported DSI format\n"); + return; + } + + if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) + tmp |= SYNC_PULSE_ACTIVE | SYNC_PULSE_HORIZONTAL; + + tmp |= REG_BLKLINE_MODE(REG_BLK_MODE_BLANKING_PKT) | + REG_BLKEOL_MODE(REG_BLK_MODE_BLANKING_PKT) | + RECOVERY_MODE(RECOVERY_MODE_NEXT_HSYNC) | + VID_IGNORE_MISS_VSYNC; + + writel(tmp, dsi->regs + VID_MAIN_CTL); + } + + tmp = readl(dsi->regs + MCTL_MAIN_DATA_CTL); + tmp &= ~(IF_VID_SELECT_MASK | HOST_EOT_GEN | IF_VID_MODE); + + if (!(output->dev->mode_flags & MIPI_DSI_MODE_EOT_PACKET)) + tmp |= HOST_EOT_GEN; + + if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO) + tmp |= IF_VID_MODE | IF_VID_SELECT(input->id) | VID_EN; + + writel(tmp, dsi->regs + MCTL_MAIN_DATA_CTL); + + tmp = readl(dsi->regs + MCTL_MAIN_EN) | IF_EN(input->id); + writel(tmp, dsi->regs + MCTL_MAIN_EN); +} + +static const struct drm_bridge_funcs cdns_dsi_bridge_funcs = { + .attach = cdns_dsi_bridge_attach, + .mode_valid = cdns_dsi_bridge_mode_valid, + .disable = cdns_dsi_bridge_disable, + .enable = cdns_dsi_bridge_enable, +}; + +static int cdns_dsi_attach(struct mipi_dsi_host *host, + struct mipi_dsi_device *dev) +{ + struct cdns_dsi *dsi = to_cdns_dsi(host); + struct cdns_dsi_output *output = &dsi->output; + struct cdns_dsi_input *input = &dsi->input; + struct drm_bridge *bridge; + struct drm_panel *panel; + struct device_node *np; + int ret; + + /* + * We currently do not support connecting several DSI devices to the + * same host. In order to support that we'd need the DRM bridge + * framework to allow dynamic reconfiguration of the bridge chain. + */ + if (output->dev) + return -EBUSY; + + /* We do not support burst mode yet. */ + if (dev->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) + return -ENOTSUPP; + + /* + * The host <-> device link might be described using an OF-graph + * representation, in this case we extract the device of_node from + * this representation, otherwise we use dsidev->dev.of_node which + * should have been filled by the core. + */ + np = of_graph_get_remote_node(dsi->base.dev->of_node, DSI_OUTPUT_PORT, + dev->channel); + if (!np) + np = of_node_get(dev->dev.of_node); + + panel = of_drm_find_panel(np); + if (panel) { + bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_DSI); + } else { + bridge = of_drm_find_bridge(dev->dev.of_node); + if (!bridge) + bridge = ERR_PTR(-EINVAL); + } + + of_node_put(np); + + if (IS_ERR(bridge)) { + ret = PTR_ERR(bridge); + dev_err(host->dev, "failed to add DSI device %s (err = %d)", + dev->name, ret); + return ret; + } + + output->dev = dev; + output->bridge = bridge; + output->panel = panel; + + /* + * The DSI output has been properly configured, we can now safely + * register the input to the bridge framework so that it can take place + * in a display pipeline. + */ + drm_bridge_add(&input->bridge); + + return 0; +} + +static int cdns_dsi_detach(struct mipi_dsi_host *host, + struct mipi_dsi_device *dev) +{ + struct cdns_dsi *dsi = to_cdns_dsi(host); + struct cdns_dsi_output *output = &dsi->output; + struct cdns_dsi_input *input = &dsi->input; + + drm_bridge_remove(&input->bridge); + if (output->panel) + drm_panel_bridge_remove(output->bridge); + + return 0; +} + +static irqreturn_t cdns_dsi_interrupt(int irq, void *data) +{ + struct cdns_dsi *dsi = data; + irqreturn_t ret = IRQ_NONE; + u32 flag, ctl; + + flag = readl(dsi->regs + DIRECT_CMD_STS_FLAG); + if (flag) { + ctl = readl(dsi->regs + DIRECT_CMD_STS_CTL); + ctl &= ~flag; + writel(ctl, dsi->regs + DIRECT_CMD_STS_CTL); + complete(&dsi->direct_cmd_comp); + ret = IRQ_HANDLED; + } + + return ret; +} + +static ssize_t cdns_dsi_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + struct cdns_dsi *dsi = to_cdns_dsi(host); + u32 cmd, sts, val, wait = WRITE_COMPLETED, ctl = 0; + struct mipi_dsi_packet packet; + int ret, i, tx_len, rx_len; + + ret = pm_runtime_get_sync(host->dev); + if (ret < 0) + return ret; + + cdns_dsi_init_link(dsi); + + ret = mipi_dsi_create_packet(&packet, msg); + if (ret) + goto out; + + tx_len = msg->tx_buf ? msg->tx_len : 0; + rx_len = msg->rx_buf ? msg->rx_len : 0; + + /* For read operations, the maximum TX len is 2. */ + if (rx_len && tx_len > 2) { + ret = -ENOTSUPP; + goto out; + } + + /* TX len is limited by the CMD FIFO depth. */ + if (tx_len > dsi->direct_cmd_fifo_depth) { + ret = -ENOTSUPP; + goto out; + } + + /* RX len is limited by the RX FIFO depth. */ + if (rx_len > dsi->rx_fifo_depth) { + ret = -ENOTSUPP; + goto out; + } + + cmd = CMD_SIZE(tx_len) | CMD_VCHAN_ID(msg->channel) | + CMD_DATATYPE(msg->type); + + if (msg->flags & MIPI_DSI_MSG_USE_LPM) + cmd |= CMD_LP_EN; + + if (mipi_dsi_packet_format_is_long(msg->type)) + cmd |= CMD_LONG; + + if (rx_len) { + cmd |= READ_CMD; + wait = READ_COMPLETED_WITH_ERR | READ_COMPLETED; + ctl = READ_EN | BTA_EN; + } else if (msg->flags & MIPI_DSI_MSG_REQ_ACK) { + cmd |= BTA_REQ; + wait = ACK_WITH_ERR_RCVD | ACK_RCVD; + ctl = BTA_EN; + } + + writel(readl(dsi->regs + MCTL_MAIN_DATA_CTL) | ctl, + dsi->regs + MCTL_MAIN_DATA_CTL); + + writel(cmd, dsi->regs + DIRECT_CMD_MAIN_SETTINGS); + + for (i = 0; i < tx_len; i += 4) { + const u8 *buf = msg->tx_buf; + int j; + + val = 0; + for (j = 0; j < 4 && j + i < tx_len; j++) + val |= (u32)buf[i + j] << (8 * j); + + writel(val, dsi->regs + DIRECT_CMD_WRDATA); + } + + /* Clear status flags before sending the command. */ + writel(wait, dsi->regs + DIRECT_CMD_STS_CLR); + writel(wait, dsi->regs + DIRECT_CMD_STS_CTL); + reinit_completion(&dsi->direct_cmd_comp); + writel(0, dsi->regs + DIRECT_CMD_SEND); + + wait_for_completion_timeout(&dsi->direct_cmd_comp, + msecs_to_jiffies(1000)); + + sts = readl(dsi->regs + DIRECT_CMD_STS); + writel(wait, dsi->regs + DIRECT_CMD_STS_CLR); + writel(0, dsi->regs + DIRECT_CMD_STS_CTL); + + writel(readl(dsi->regs + MCTL_MAIN_DATA_CTL) & ~ctl, + dsi->regs + MCTL_MAIN_DATA_CTL); + + /* We did not receive the events we were waiting for. */ + if (!(sts & wait)) { + ret = -ETIMEDOUT; + goto out; + } + + /* 'READ' or 'WRITE with ACK' failed. */ + if (sts & (READ_COMPLETED_WITH_ERR | ACK_WITH_ERR_RCVD)) { + ret = -EIO; + goto out; + } + + for (i = 0; i < rx_len; i += 4) { + u8 *buf = msg->rx_buf; + int j; + + val = readl(dsi->regs + DIRECT_CMD_RDDATA); + for (j = 0; j < 4 && j + i < rx_len; j++) + buf[i + j] = val >> (8 * j); + } + +out: + pm_runtime_put(host->dev); + return ret; +} + +static const struct mipi_dsi_host_ops cdns_dsi_ops = { + .attach = cdns_dsi_attach, + .detach = cdns_dsi_detach, + .transfer = cdns_dsi_transfer, +}; + +static int cdns_dsi_resume(struct device *dev) +{ + struct cdns_dsi *dsi = dev_get_drvdata(dev); + + reset_control_deassert(dsi->dsi_p_rst); + clk_prepare_enable(dsi->dsi_p_clk); + clk_prepare_enable(dsi->dsi_sys_clk); + clk_prepare_enable(dsi->dphy->psm_clk); + clk_prepare_enable(dsi->dphy->pll_ref_clk); + + return 0; +} + +static int cdns_dsi_suspend(struct device *dev) +{ + struct cdns_dsi *dsi = dev_get_drvdata(dev); + + clk_disable_unprepare(dsi->dphy->pll_ref_clk); + clk_disable_unprepare(dsi->dphy->psm_clk); + clk_disable_unprepare(dsi->dsi_sys_clk); + clk_disable_unprepare(dsi->dsi_p_clk); + reset_control_assert(dsi->dsi_p_rst); + dsi->link_initialized = false; + return 0; +} + +static UNIVERSAL_DEV_PM_OPS(cdns_dsi_pm_ops, cdns_dsi_suspend, cdns_dsi_resume, + NULL); + +static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy) +{ + /* Default wakeup time is 800 ns (in a simulated environment). */ + return 800; +} + +static void cdns_dphy_ref_set_pll_cfg(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg) +{ + u32 fbdiv_low, fbdiv_high; + + fbdiv_low = (cfg->pll_fbdiv / 4) - 2; + fbdiv_high = cfg->pll_fbdiv - fbdiv_low - 2; + + writel(DPHY_CMN_IPDIV_FROM_REG | DPHY_CMN_OPDIV_FROM_REG | + DPHY_CMN_IPDIV(cfg->pll_ipdiv) | + DPHY_CMN_OPDIV(cfg->pll_opdiv), + dphy->regs + DPHY_CMN_OPIPDIV); + writel(DPHY_CMN_FBDIV_FROM_REG | + DPHY_CMN_FBDIV_VAL(fbdiv_low, fbdiv_high), + dphy->regs + DPHY_CMN_FBDIV); + writel(DPHY_CMN_PWM_HIGH(6) | DPHY_CMN_PWM_LOW(0x101) | + DPHY_CMN_PWM_DIV(0x8), + dphy->regs + DPHY_CMN_PWM); +} + +static void cdns_dphy_ref_set_psm_div(struct cdns_dphy *dphy, u8 div) +{ + writel(DPHY_PSM_CFG_FROM_REG | DPHY_PSM_CLK_DIV(div), + dphy->regs + DPHY_PSM_CFG); +} + +/* + * This is the reference implementation of DPHY hooks. Specific integration of + * this IP may have to re-implement some of them depending on how they decided + * to wire things in the SoC. + */ +static const struct cdns_dphy_ops ref_dphy_ops = { + .get_wakeup_time_ns = cdns_dphy_ref_get_wakeup_time_ns, + .set_pll_cfg = cdns_dphy_ref_set_pll_cfg, + .set_psm_div = cdns_dphy_ref_set_psm_div, +}; + +static const struct of_device_id cdns_dphy_of_match[] = { + { .compatible = "cdns,dphy", .data = &ref_dphy_ops }, + { /* sentinel */ }, +}; + +static struct cdns_dphy *cdns_dphy_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + struct cdns_dphy *dphy; + struct of_phandle_args args; + struct resource res; + int ret; + + ret = of_parse_phandle_with_args(pdev->dev.of_node, "phys", + "#phy-cells", 0, &args); + if (ret) + return ERR_PTR(-ENOENT); + + match = of_match_node(cdns_dphy_of_match, args.np); + if (!match || !match->data) + return ERR_PTR(-EINVAL); + + dphy = devm_kzalloc(&pdev->dev, sizeof(*dphy), GFP_KERNEL); + if (!dphy) + return ERR_PTR(-ENOMEM); + + dphy->ops = match->data; + + ret = of_address_to_resource(args.np, 0, &res); + if (ret) + return ERR_PTR(ret); + + dphy->regs = devm_ioremap_resource(&pdev->dev, &res); + if (IS_ERR(dphy->regs)) + return ERR_CAST(dphy->regs); + + dphy->psm_clk = of_clk_get_by_name(args.np, "psm"); + if (IS_ERR(dphy->psm_clk)) + return ERR_CAST(dphy->psm_clk); + + dphy->pll_ref_clk = of_clk_get_by_name(args.np, "pll_ref"); + if (IS_ERR(dphy->pll_ref_clk)) { + ret = PTR_ERR(dphy->pll_ref_clk); + goto err_put_psm_clk; + } + + if (dphy->ops->probe) { + ret = dphy->ops->probe(dphy); + if (ret) + goto err_put_pll_ref_clk; + } + + return dphy; + +err_put_pll_ref_clk: + clk_put(dphy->pll_ref_clk); + +err_put_psm_clk: + clk_put(dphy->psm_clk); + + return ERR_PTR(ret); +} + +static void cdns_dphy_remove(struct cdns_dphy *dphy) +{ + if (dphy->ops->remove) + dphy->ops->remove(dphy); + + clk_put(dphy->pll_ref_clk); + clk_put(dphy->psm_clk); +} + +static int cdns_dsi_drm_probe(struct platform_device *pdev) +{ + struct cdns_dsi *dsi; + struct cdns_dsi_input *input; + struct resource *res; + int ret, irq; + u32 val; + + dsi = devm_kzalloc(&pdev->dev, sizeof(*dsi), GFP_KERNEL); + if (!dsi) + return -ENOMEM; + + platform_set_drvdata(pdev, dsi); + + input = &dsi->input; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + dsi->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(dsi->regs)) + return PTR_ERR(dsi->regs); + + dsi->dsi_p_clk = devm_clk_get(&pdev->dev, "dsi_p_clk"); + if (IS_ERR(dsi->dsi_p_clk)) + return PTR_ERR(dsi->dsi_p_clk); + + dsi->dsi_p_rst = devm_reset_control_get_optional_exclusive(&pdev->dev, + "dsi_p_rst"); + if (IS_ERR(dsi->dsi_p_rst)) + return PTR_ERR(dsi->dsi_p_rst); + + dsi->dsi_sys_clk = devm_clk_get(&pdev->dev, "dsi_sys_clk"); + if (IS_ERR(dsi->dsi_sys_clk)) + return PTR_ERR(dsi->dsi_sys_clk); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + dsi->dphy = cdns_dphy_probe(pdev); + if (IS_ERR(dsi->dphy)) + return PTR_ERR(dsi->dphy); + + ret = clk_prepare_enable(dsi->dsi_p_clk); + if (ret) + goto err_remove_dphy; + + val = readl(dsi->regs + ID_REG); + if (REV_VENDOR_ID(val) != 0xcad) { + dev_err(&pdev->dev, "invalid vendor id\n"); + ret = -EINVAL; + goto err_disable_pclk; + } + + val = readl(dsi->regs + IP_CONF); + dsi->direct_cmd_fifo_depth = 1 << (DIRCMD_FIFO_DEPTH(val) + 2); + dsi->rx_fifo_depth = RX_FIFO_DEPTH(val); + init_completion(&dsi->direct_cmd_comp); + + writel(0, dsi->regs + MCTL_MAIN_DATA_CTL); + writel(0, dsi->regs + MCTL_MAIN_EN); + writel(0, dsi->regs + MCTL_MAIN_PHY_CTL); + + /* + * We only support the DPI input, so force input->id to + * CDNS_DPI_INPUT. + */ + input->id = CDNS_DPI_INPUT; + input->bridge.funcs = &cdns_dsi_bridge_funcs; + input->bridge.of_node = pdev->dev.of_node; + + /* Mask all interrupts before registering the IRQ handler. */ + writel(0, dsi->regs + MCTL_MAIN_STS_CTL); + writel(0, dsi->regs + MCTL_DPHY_ERR_CTL1); + writel(0, dsi->regs + CMD_MODE_STS_CTL); + writel(0, dsi->regs + DIRECT_CMD_STS_CTL); + writel(0, dsi->regs + DIRECT_CMD_RD_STS_CTL); + writel(0, dsi->regs + VID_MODE_STS_CTL); + writel(0, dsi->regs + TVG_STS_CTL); + writel(0, dsi->regs + DPI_IRQ_EN); + ret = devm_request_irq(&pdev->dev, irq, cdns_dsi_interrupt, 0, + dev_name(&pdev->dev), dsi); + if (ret) + goto err_disable_pclk; + + pm_runtime_enable(&pdev->dev); + dsi->base.dev = &pdev->dev; + dsi->base.ops = &cdns_dsi_ops; + + ret = mipi_dsi_host_register(&dsi->base); + if (ret) + goto err_disable_runtime_pm; + + clk_disable_unprepare(dsi->dsi_p_clk); + + return 0; + +err_disable_runtime_pm: + pm_runtime_disable(&pdev->dev); + +err_disable_pclk: + clk_disable_unprepare(dsi->dsi_p_clk); + +err_remove_dphy: + cdns_dphy_remove(dsi->dphy); + + return ret; +} + +static int cdns_dsi_drm_remove(struct platform_device *pdev) +{ + struct cdns_dsi *dsi = platform_get_drvdata(pdev); + + mipi_dsi_host_unregister(&dsi->base); + pm_runtime_disable(&pdev->dev); + cdns_dphy_remove(dsi->dphy); + + return 0; +} + +static const struct of_device_id cdns_dsi_of_match[] = { + { .compatible = "cdns,dsi" }, + { }, +}; + +static struct platform_driver cdns_dsi_platform_driver = { + .probe = cdns_dsi_drm_probe, + .remove = cdns_dsi_drm_remove, + .driver = { + .name = "cdns-dsi", + .of_match_table = cdns_dsi_of_match, + .pm = &cdns_dsi_pm_ops, + }, +}; +module_platform_driver(cdns_dsi_platform_driver); + +MODULE_AUTHOR("Boris Brezillon <boris.brezillon@bootlin.com>"); +MODULE_DESCRIPTION("Cadence DSI driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:cdns-dsi"); + diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c index 3b7e5c59a5e9..8f9c8a6b46de 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c @@ -152,7 +152,6 @@ static struct platform_driver snd_dw_hdmi_driver = { .remove = snd_dw_hdmi_remove, .driver = { .name = DRIVER_NAME, - .owner = THIS_MODULE, }, }; module_platform_driver(snd_dw_hdmi_driver); diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index 226171a3ece1..fd7999642cf8 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd * Copyright (C) STMicroelectronics SA 2017 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * * Modified by Philippe Cornu <philippe.cornu@st.com> * This generic Synopsys DesignWare MIPI DSI host driver is based on the * Rockchip version from rockchip/dw-mipi-dsi.c with phy & bridge APIs. @@ -775,20 +771,20 @@ static void dw_mipi_dsi_bridge_mode_set(struct drm_bridge *bridge, clk_prepare_enable(dsi->pclk); - ret = phy_ops->get_lane_mbps(priv_data, mode, dsi->mode_flags, + ret = phy_ops->get_lane_mbps(priv_data, adjusted_mode, dsi->mode_flags, dsi->lanes, dsi->format, &dsi->lane_mbps); if (ret) DRM_DEBUG_DRIVER("Phy get_lane_mbps() failed\n"); pm_runtime_get_sync(dsi->dev); dw_mipi_dsi_init(dsi); - dw_mipi_dsi_dpi_config(dsi, mode); + dw_mipi_dsi_dpi_config(dsi, adjusted_mode); dw_mipi_dsi_packet_handler_config(dsi); dw_mipi_dsi_video_mode_config(dsi); - dw_mipi_dsi_video_packet_config(dsi, mode); + dw_mipi_dsi_video_packet_config(dsi, adjusted_mode); dw_mipi_dsi_command_mode_config(dsi); - dw_mipi_dsi_line_timer_config(dsi, mode); - dw_mipi_dsi_vertical_timing_config(dsi, mode); + dw_mipi_dsi_line_timer_config(dsi, adjusted_mode); + dw_mipi_dsi_vertical_timing_config(dsi, adjusted_mode); dw_mipi_dsi_dphy_init(dsi); dw_mipi_dsi_dphy_timing_config(dsi); @@ -802,7 +798,7 @@ static void dw_mipi_dsi_bridge_mode_set(struct drm_bridge *bridge, dw_mipi_dsi_dphy_enable(dsi); - dw_mipi_dsi_wait_for_two_frames(mode); + dw_mipi_dsi_wait_for_two_frames(adjusted_mode); /* Switch to cmd mode for panel-bridge pre_enable & panel prepare */ dw_mipi_dsi_set_mode(dsi, 0); diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 08ab7d6aea65..0fd9cf27542c 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1102,7 +1102,7 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge, return true; } -static int tc_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status tc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { /* DPI interface clock limitation: upto 154 MHz */ diff --git a/drivers/gpu/drm/bridge/thc63lvd1024.c b/drivers/gpu/drm/bridge/thc63lvd1024.c new file mode 100644 index 000000000000..c8b9edd5a7f4 --- /dev/null +++ b/drivers/gpu/drm/bridge/thc63lvd1024.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * THC63LVD1024 LVDS to parallel data DRM bridge driver. + * + * Copyright (C) 2018 Jacopo Mondi <jacopo+renesas@jmondi.org> + */ + +#include <drm/drmP.h> +#include <drm/drm_bridge.h> +#include <drm/drm_panel.h> + +#include <linux/gpio/consumer.h> +#include <linux/of_graph.h> +#include <linux/regulator/consumer.h> +#include <linux/slab.h> + +enum thc63_ports { + THC63_LVDS_IN0, + THC63_LVDS_IN1, + THC63_RGB_OUT0, + THC63_RGB_OUT1, +}; + +struct thc63_dev { + struct device *dev; + + struct regulator *vcc; + + struct gpio_desc *pdwn; + struct gpio_desc *oe; + + struct drm_bridge bridge; + struct drm_bridge *next; +}; + +static inline struct thc63_dev *to_thc63(struct drm_bridge *bridge) +{ + return container_of(bridge, struct thc63_dev, bridge); +} + +static int thc63_attach(struct drm_bridge *bridge) +{ + struct thc63_dev *thc63 = to_thc63(bridge); + + return drm_bridge_attach(bridge->encoder, thc63->next, bridge); +} + +static void thc63_enable(struct drm_bridge *bridge) +{ + struct thc63_dev *thc63 = to_thc63(bridge); + int ret; + + ret = regulator_enable(thc63->vcc); + if (ret) { + dev_err(thc63->dev, + "Failed to enable regulator \"vcc\": %d\n", ret); + return; + } + + gpiod_set_value(thc63->pdwn, 0); + gpiod_set_value(thc63->oe, 1); +} + +static void thc63_disable(struct drm_bridge *bridge) +{ + struct thc63_dev *thc63 = to_thc63(bridge); + int ret; + + gpiod_set_value(thc63->oe, 0); + gpiod_set_value(thc63->pdwn, 1); + + ret = regulator_disable(thc63->vcc); + if (ret) + dev_err(thc63->dev, + "Failed to disable regulator \"vcc\": %d\n", ret); +} + +static const struct drm_bridge_funcs thc63_bridge_func = { + .attach = thc63_attach, + .enable = thc63_enable, + .disable = thc63_disable, +}; + +static int thc63_parse_dt(struct thc63_dev *thc63) +{ + struct device_node *thc63_out; + struct device_node *remote; + + thc63_out = of_graph_get_endpoint_by_regs(thc63->dev->of_node, + THC63_RGB_OUT0, -1); + if (!thc63_out) { + dev_err(thc63->dev, "Missing endpoint in port@%u\n", + THC63_RGB_OUT0); + return -ENODEV; + } + + remote = of_graph_get_remote_port_parent(thc63_out); + of_node_put(thc63_out); + if (!remote) { + dev_err(thc63->dev, "Endpoint in port@%u unconnected\n", + THC63_RGB_OUT0); + return -ENODEV; + } + + if (!of_device_is_available(remote)) { + dev_err(thc63->dev, "port@%u remote endpoint is disabled\n", + THC63_RGB_OUT0); + of_node_put(remote); + return -ENODEV; + } + + thc63->next = of_drm_find_bridge(remote); + of_node_put(remote); + if (!thc63->next) + return -EPROBE_DEFER; + + return 0; +} + +static int thc63_gpio_init(struct thc63_dev *thc63) +{ + thc63->oe = devm_gpiod_get_optional(thc63->dev, "oe", GPIOD_OUT_LOW); + if (IS_ERR(thc63->oe)) { + dev_err(thc63->dev, "Unable to get \"oe-gpios\": %ld\n", + PTR_ERR(thc63->oe)); + return PTR_ERR(thc63->oe); + } + + thc63->pdwn = devm_gpiod_get_optional(thc63->dev, "powerdown", + GPIOD_OUT_HIGH); + if (IS_ERR(thc63->pdwn)) { + dev_err(thc63->dev, "Unable to get \"powerdown-gpios\": %ld\n", + PTR_ERR(thc63->pdwn)); + return PTR_ERR(thc63->pdwn); + } + + return 0; +} + +static int thc63_probe(struct platform_device *pdev) +{ + struct thc63_dev *thc63; + int ret; + + thc63 = devm_kzalloc(&pdev->dev, sizeof(*thc63), GFP_KERNEL); + if (!thc63) + return -ENOMEM; + + thc63->dev = &pdev->dev; + platform_set_drvdata(pdev, thc63); + + thc63->vcc = devm_regulator_get_optional(thc63->dev, "vcc"); + if (IS_ERR(thc63->vcc)) { + if (PTR_ERR(thc63->vcc) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + dev_err(thc63->dev, "Unable to get \"vcc\" supply: %ld\n", + PTR_ERR(thc63->vcc)); + return PTR_ERR(thc63->vcc); + } + + ret = thc63_gpio_init(thc63); + if (ret) + return ret; + + ret = thc63_parse_dt(thc63); + if (ret) + return ret; + + thc63->bridge.driver_private = thc63; + thc63->bridge.of_node = pdev->dev.of_node; + thc63->bridge.funcs = &thc63_bridge_func; + + drm_bridge_add(&thc63->bridge); + + return 0; +} + +static int thc63_remove(struct platform_device *pdev) +{ + struct thc63_dev *thc63 = platform_get_drvdata(pdev); + + drm_bridge_remove(&thc63->bridge); + + return 0; +} + +static const struct of_device_id thc63_match[] = { + { .compatible = "thine,thc63lvd1024", }, + { }, +}; +MODULE_DEVICE_TABLE(of, thc63_match); + +static struct platform_driver thc63_driver = { + .probe = thc63_probe, + .remove = thc63_remove, + .driver = { + .name = "thc63lvd1024", + .of_match_table = thc63_match, + }, +}; +module_platform_driver(thc63_driver); + +MODULE_AUTHOR("Jacopo Mondi <jacopo@jmondi.org>"); +MODULE_DESCRIPTION("Thine THC63LVD1024 LVDS decoder DRM bridge driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index c825c76edc1d..895741e9cd7d 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -791,6 +791,8 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, state->src_w = val; } else if (property == config->prop_src_h) { state->src_h = val; + } else if (property == plane->alpha_property) { + state->alpha = val; } else if (property == plane->rotation_property) { if (!is_power_of_2(val & DRM_MODE_ROTATE_MASK)) return -EINVAL; @@ -856,6 +858,8 @@ drm_atomic_plane_get_property(struct drm_plane *plane, *val = state->src_w; } else if (property == config->prop_src_h) { *val = state->src_h; + } else if (property == plane->alpha_property) { + *val = state->alpha; } else if (property == plane->rotation_property) { *val = state->rotation; } else if (property == plane->zpos_property) { @@ -1429,7 +1433,9 @@ drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state, { struct drm_plane *plane = plane_state->plane; struct drm_crtc_state *crtc_state; - + /* Nothing to do for same crtc*/ + if (plane_state->crtc == crtc) + return 0; if (plane_state->crtc) { crtc_state = drm_atomic_get_crtc_state(plane_state->state, plane_state->crtc); @@ -1500,6 +1506,14 @@ EXPORT_SYMBOL(drm_atomic_set_fb_for_plane); * Otherwise, if &drm_plane_state.fence is not set this function we just set it * with the received implicit fence. In both cases this function consumes a * reference for @fence. + * + * This way explicit fencing can be used to overrule implicit fencing, which is + * important to make explicit fencing use-cases work: One example is using one + * buffer for 2 screens with different refresh rates. Implicit fencing will + * clamp rendering to the refresh rate of the slower screen, whereas explicit + * fence allows 2 independent render and display loops on a single buffer. If a + * driver allows obeys both implicit and explicit fences for plane updates, then + * it will break all the benefits of explicit fencing. */ void drm_atomic_set_fence_for_plane(struct drm_plane_state *plane_state, @@ -1710,11 +1724,15 @@ int drm_atomic_check_only(struct drm_atomic_state *state) } } - if (config->funcs->atomic_check) + if (config->funcs->atomic_check) { ret = config->funcs->atomic_check(state->dev, state); - if (ret) - return ret; + if (ret) { + DRM_DEBUG_ATOMIC("atomic driver check for %p failed: %d\n", + state, ret); + return ret; + } + } if (!state->allow_modeset) { for_each_new_crtc_in_state(state, crtc, crtc_state, i) { diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index c35654591c12..130da5195f3b 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -766,7 +766,7 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, if (crtc_state->enable) drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2); - plane_state->visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale); + plane_state->visible = drm_rect_clip_scaled(src, dst, &clip); drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation); @@ -875,6 +875,11 @@ EXPORT_SYMBOL(drm_atomic_helper_check_planes); * functions depend upon an updated adjusted_mode.clock to e.g. properly compute * watermarks. * + * Note that zpos normalization will add all enable planes to the state which + * might not desired for some drivers. + * For example enable/disable of a cursor plane which have fixed zpos value + * would trigger all other enabled planes to be forced to the state change. + * * RETURNS: * Zero for success or -errno */ @@ -887,6 +892,12 @@ int drm_atomic_helper_check(struct drm_device *dev, if (ret) return ret; + if (dev->mode_config.normalize_zpos) { + ret = drm_atomic_normalize_zpos(dev, state); + if (ret) + return ret; + } + ret = drm_atomic_helper_check_planes(dev, state); if (ret) return ret; @@ -1561,6 +1572,17 @@ void drm_atomic_helper_async_commit(struct drm_device *dev, for_each_new_plane_in_state(state, plane, plane_state, i) { funcs = plane->helper_private; funcs->atomic_async_update(plane, plane_state); + + /* + * ->atomic_async_update() is supposed to update the + * plane->state in-place, make sure at least common + * properties have been properly updated. + */ + WARN_ON_ONCE(plane->state->fb != plane_state->fb); + WARN_ON_ONCE(plane->state->crtc_x != plane_state->crtc_x); + WARN_ON_ONCE(plane->state->crtc_y != plane_state->crtc_y); + WARN_ON_ONCE(plane->state->src_x != plane_state->src_x); + WARN_ON_ONCE(plane->state->src_y != plane_state->src_y); } } EXPORT_SYMBOL(drm_atomic_helper_async_commit); @@ -2659,7 +2681,7 @@ int drm_atomic_helper_disable_plane(struct drm_plane *plane, goto fail; } - if (plane_state->crtc && (plane == plane->crtc->cursor)) + if (plane_state->crtc && plane_state->crtc->cursor == plane) plane_state->state->legacy_cursor_update = true; ret = __drm_atomic_helper_disable_plane(plane, plane_state); @@ -2881,31 +2903,9 @@ commit: return 0; } -/** - * drm_atomic_helper_disable_all - disable all currently active outputs - * @dev: DRM device - * @ctx: lock acquisition context - * - * Loops through all connectors, finding those that aren't turned off and then - * turns them off by setting their DPMS mode to OFF and deactivating the CRTC - * that they are connected to. - * - * This is used for example in suspend/resume to disable all currently active - * functions when suspending. If you just want to shut down everything at e.g. - * driver unload, look at drm_atomic_helper_shutdown(). - * - * Note that if callers haven't already acquired all modeset locks this might - * return -EDEADLK, which must be handled by calling drm_modeset_backoff(). - * - * Returns: - * 0 on success or a negative error code on failure. - * - * See also: - * drm_atomic_helper_suspend(), drm_atomic_helper_resume() and - * drm_atomic_helper_shutdown(). - */ -int drm_atomic_helper_disable_all(struct drm_device *dev, - struct drm_modeset_acquire_ctx *ctx) +static int __drm_atomic_helper_disable_all(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx, + bool clean_old_fbs) { struct drm_atomic_state *state; struct drm_connector_state *conn_state; @@ -2957,8 +2957,11 @@ int drm_atomic_helper_disable_all(struct drm_device *dev, goto free; drm_atomic_set_fb_for_plane(plane_state, NULL); - plane_mask |= BIT(drm_plane_index(plane)); - plane->old_fb = plane->fb; + + if (clean_old_fbs) { + plane->old_fb = plane->fb; + plane_mask |= BIT(drm_plane_index(plane)); + } } ret = drm_atomic_commit(state); @@ -2969,6 +2972,34 @@ free: return ret; } +/** + * drm_atomic_helper_disable_all - disable all currently active outputs + * @dev: DRM device + * @ctx: lock acquisition context + * + * Loops through all connectors, finding those that aren't turned off and then + * turns them off by setting their DPMS mode to OFF and deactivating the CRTC + * that they are connected to. + * + * This is used for example in suspend/resume to disable all currently active + * functions when suspending. If you just want to shut down everything at e.g. + * driver unload, look at drm_atomic_helper_shutdown(). + * + * Note that if callers haven't already acquired all modeset locks this might + * return -EDEADLK, which must be handled by calling drm_modeset_backoff(). + * + * Returns: + * 0 on success or a negative error code on failure. + * + * See also: + * drm_atomic_helper_suspend(), drm_atomic_helper_resume() and + * drm_atomic_helper_shutdown(). + */ +int drm_atomic_helper_disable_all(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx) +{ + return __drm_atomic_helper_disable_all(dev, ctx, false); +} EXPORT_SYMBOL(drm_atomic_helper_disable_all); /** @@ -2991,7 +3022,7 @@ void drm_atomic_helper_shutdown(struct drm_device *dev) while (1) { ret = drm_modeset_lock_all_ctx(dev, &ctx); if (!ret) - ret = drm_atomic_helper_disable_all(dev, &ctx); + ret = __drm_atomic_helper_disable_all(dev, &ctx, true); if (ret != -EDEADLK) break; @@ -3095,14 +3126,14 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state, struct drm_connector_state *new_conn_state; struct drm_crtc *crtc; struct drm_crtc_state *new_crtc_state; - unsigned plane_mask = 0; - struct drm_device *dev = state->dev; - int ret; state->acquire_ctx = ctx; for_each_new_plane_in_state(state, plane, new_plane_state, i) { - plane_mask |= BIT(drm_plane_index(plane)); + WARN_ON(plane->crtc != new_plane_state->crtc); + WARN_ON(plane->fb != new_plane_state->fb); + WARN_ON(plane->old_fb); + state->planes[i].old_state = plane->state; } @@ -3112,11 +3143,7 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state, for_each_new_connector_in_state(state, connector, new_conn_state, i) state->connectors[i].old_state = connector->state; - ret = drm_atomic_commit(state); - if (plane_mask) - drm_atomic_clean_old_fb(dev, plane_mask, ret); - - return ret; + return drm_atomic_commit(state); } EXPORT_SYMBOL(drm_atomic_helper_commit_duplicated_state); @@ -3484,6 +3511,10 @@ void drm_atomic_helper_plane_reset(struct drm_plane *plane) if (plane->state) { plane->state->plane = plane; plane->state->rotation = DRM_MODE_ROTATE_0; + + /* Reset the alpha value to fully opaque if it matters */ + if (plane->alpha_property) + plane->state->alpha = plane->alpha_property->values[1]; } } EXPORT_SYMBOL(drm_atomic_helper_plane_reset); diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index 5a81e1b4c076..a16a74d7e15e 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -88,6 +88,13 @@ * On top of this basic transformation additional properties can be exposed by * the driver: * + * alpha: + * Alpha is setup with drm_plane_create_alpha_property(). It controls the + * plane-wide opacity, from transparent (0) to opaque (0xffff). It can be + * combined with pixel alpha. + * The pixel values in the framebuffers are expected to not be + * pre-multiplied by the global alpha associated to the plane. + * * rotation: * Rotation is set up with drm_plane_create_rotation_property(). It adds a * rotation and reflection step between the source and destination rectangles. @@ -106,6 +113,38 @@ */ /** + * drm_plane_create_alpha_property - create a new alpha property + * @plane: drm plane + * + * This function creates a generic, mutable, alpha property and enables support + * for it in the DRM core. It is attached to @plane. + * + * The alpha property will be allowed to be within the bounds of 0 + * (transparent) to 0xffff (opaque). + * + * Returns: + * 0 on success, negative error code on failure. + */ +int drm_plane_create_alpha_property(struct drm_plane *plane) +{ + struct drm_property *prop; + + prop = drm_property_create_range(plane->dev, 0, "alpha", + 0, DRM_BLEND_ALPHA_OPAQUE); + if (!prop) + return -ENOMEM; + + drm_object_attach_property(&plane->base, prop, DRM_BLEND_ALPHA_OPAQUE); + plane->alpha_property = prop; + + if (plane->state) + plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE; + + return 0; +} +EXPORT_SYMBOL(drm_plane_create_alpha_property); + +/** * drm_plane_create_rotation_property - create a new rotation property * @plane: drm plane * @rotation: initial value of the rotation property diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index b3cde897cd80..9b9ba5d5ec0c 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1069,7 +1069,7 @@ int drm_mode_create_tv_properties(struct drm_device *dev, goto nomem; for (i = 0; i < num_modes; i++) - drm_property_add_enum(dev->mode_config.tv_mode_property, i, + drm_property_add_enum(dev->mode_config.tv_mode_property, i, modes[i]); dev->mode_config.tv_brightness_property = @@ -1156,7 +1156,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, { struct drm_device *dev = connector->dev; struct drm_property *scaling_mode_property; - int i, j = 0; + int i; const unsigned valid_scaling_mode_mask = (1U << ARRAY_SIZE(drm_scaling_mode_enum_list)) - 1; @@ -1177,7 +1177,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, if (!(BIT(i) & scaling_mode_mask)) continue; - ret = drm_property_add_enum(scaling_mode_property, j++, + ret = drm_property_add_enum(scaling_mode_property, drm_scaling_mode_enum_list[i].type, drm_scaling_mode_enum_list[i].name); @@ -1531,8 +1531,10 @@ static struct drm_encoder *drm_connector_get_encoder(struct drm_connector *conne return connector->encoder; } -static bool drm_mode_expose_to_userspace(const struct drm_display_mode *mode, - const struct drm_file *file_priv) +static bool +drm_mode_expose_to_userspace(const struct drm_display_mode *mode, + const struct list_head *export_list, + const struct drm_file *file_priv) { /* * If user-space hasn't configured the driver to expose the stereo 3D @@ -1540,6 +1542,23 @@ static bool drm_mode_expose_to_userspace(const struct drm_display_mode *mode, */ if (!file_priv->stereo_allowed && drm_mode_is_stereo(mode)) return false; + /* + * If user-space hasn't configured the driver to expose the modes + * with aspect-ratio, don't expose them. However if such a mode + * is unique, let it be exposed, but reset the aspect-ratio flags + * while preparing the list of user-modes. + */ + if (!file_priv->aspect_ratio_allowed) { + struct drm_display_mode *mode_itr; + + list_for_each_entry(mode_itr, export_list, export_head) + if (drm_mode_match(mode_itr, mode, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS)) + return false; + } return true; } @@ -1559,6 +1578,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, struct drm_mode_modeinfo u_mode; struct drm_mode_modeinfo __user *mode_ptr; uint32_t __user *encoder_ptr; + LIST_HEAD(export_list); if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; @@ -1607,21 +1627,31 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, /* delayed so we get modes regardless of pre-fill_modes state */ list_for_each_entry(mode, &connector->modes, head) - if (drm_mode_expose_to_userspace(mode, file_priv)) + if (drm_mode_expose_to_userspace(mode, &export_list, + file_priv)) { + list_add_tail(&mode->export_head, &export_list); mode_count++; + } /* * This ioctl is called twice, once to determine how much space is * needed, and the 2nd time to fill it. + * The modes that need to be exposed to the user are maintained in the + * 'export_list'. When the ioctl is called first time to determine the, + * space, the export_list gets filled, to find the no.of modes. In the + * 2nd time, the user modes are filled, one by one from the export_list. */ if ((out_resp->count_modes >= mode_count) && mode_count) { copied = 0; mode_ptr = (struct drm_mode_modeinfo __user *)(unsigned long)out_resp->modes_ptr; - list_for_each_entry(mode, &connector->modes, head) { - if (!drm_mode_expose_to_userspace(mode, file_priv)) - continue; - + list_for_each_entry(mode, &export_list, export_head) { drm_mode_convert_to_umode(&u_mode, mode); + /* + * Reset aspect ratio flags of user-mode, if modes with + * aspect-ratio are not supported. + */ + if (!file_priv->aspect_ratio_allowed) + u_mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK; if (copy_to_user(mode_ptr + copied, &u_mode, sizeof(u_mode))) { ret = -EFAULT; diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 03583887cfec..98a36e6c69ad 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -402,6 +402,7 @@ int drm_mode_getcrtc(struct drm_device *dev, { struct drm_mode_crtc *crtc_resp = data; struct drm_crtc *crtc; + struct drm_plane *plane; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; @@ -410,34 +411,36 @@ int drm_mode_getcrtc(struct drm_device *dev, if (!crtc) return -ENOENT; + plane = crtc->primary; + crtc_resp->gamma_size = crtc->gamma_size; - drm_modeset_lock(&crtc->primary->mutex, NULL); - if (crtc->primary->state && crtc->primary->state->fb) - crtc_resp->fb_id = crtc->primary->state->fb->base.id; - else if (!crtc->primary->state && crtc->primary->fb) - crtc_resp->fb_id = crtc->primary->fb->base.id; + drm_modeset_lock(&plane->mutex, NULL); + if (plane->state && plane->state->fb) + crtc_resp->fb_id = plane->state->fb->base.id; + else if (!plane->state && plane->fb) + crtc_resp->fb_id = plane->fb->base.id; else crtc_resp->fb_id = 0; - if (crtc->primary->state) { - crtc_resp->x = crtc->primary->state->src_x >> 16; - crtc_resp->y = crtc->primary->state->src_y >> 16; + if (plane->state) { + crtc_resp->x = plane->state->src_x >> 16; + crtc_resp->y = plane->state->src_y >> 16; } - drm_modeset_unlock(&crtc->primary->mutex); + drm_modeset_unlock(&plane->mutex); drm_modeset_lock(&crtc->mutex, NULL); if (crtc->state) { if (crtc->state->enable) { drm_mode_convert_to_umode(&crtc_resp->mode, &crtc->state->mode); crtc_resp->mode_valid = 1; - } else { crtc_resp->mode_valid = 0; } } else { crtc_resp->x = crtc->x; crtc_resp->y = crtc->y; + if (crtc->enabled) { drm_mode_convert_to_umode(&crtc_resp->mode, &crtc->mode); crtc_resp->mode_valid = 1; @@ -446,6 +449,8 @@ int drm_mode_getcrtc(struct drm_device *dev, crtc_resp->mode_valid = 0; } } + if (!file_priv->aspect_ratio_allowed) + crtc_resp->mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK; drm_modeset_unlock(&crtc->mutex); return 0; @@ -471,7 +476,7 @@ static int __drm_mode_set_config_internal(struct drm_mode_set *set, ret = crtc->funcs->set_config(set, ctx); if (ret == 0) { - crtc->primary->crtc = crtc; + crtc->primary->crtc = fb ? crtc : NULL; crtc->primary->fb = fb; } @@ -554,6 +559,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, struct drm_mode_config *config = &dev->mode_config; struct drm_mode_crtc *crtc_req = data; struct drm_crtc *crtc; + struct drm_plane *plane; struct drm_connector **connector_set = NULL, *connector; struct drm_framebuffer *fb = NULL; struct drm_display_mode *mode = NULL; @@ -580,22 +586,33 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, } DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name); + plane = crtc->primary; + mutex_lock(&crtc->dev->mode_config.mutex); drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE); retry: ret = drm_modeset_lock_all_ctx(crtc->dev, &ctx); if (ret) goto out; + if (crtc_req->mode_valid) { /* If we have a mode we need a framebuffer. */ /* If we pass -1, set the mode with the currently bound fb */ if (crtc_req->fb_id == -1) { - if (!crtc->primary->fb) { + struct drm_framebuffer *old_fb; + + if (plane->state) + old_fb = plane->state->fb; + else + old_fb = plane->fb; + + if (!old_fb) { DRM_DEBUG_KMS("CRTC doesn't have current FB\n"); ret = -EINVAL; goto out; } - fb = crtc->primary->fb; + + fb = old_fb; /* Make refcounting symmetric with the lookup path. */ drm_framebuffer_get(fb); } else { @@ -613,6 +630,13 @@ retry: ret = -ENOMEM; goto out; } + if (!file_priv->aspect_ratio_allowed && + (crtc_req->mode.flags & DRM_MODE_FLAG_PIC_AR_MASK) != DRM_MODE_FLAG_PIC_AR_NONE) { + DRM_DEBUG_KMS("Unexpected aspect-ratio flag bits\n"); + ret = -EINVAL; + goto out; + } + ret = drm_mode_convert_umode(dev, mode, &crtc_req->mode); if (ret) { @@ -627,8 +651,8 @@ retry: * match real hardware capabilities. Skip the check in that * case. */ - if (!crtc->primary->format_default) { - ret = drm_plane_check_pixel_format(crtc->primary, + if (!plane->format_default) { + ret = drm_plane_check_pixel_format(plane, fb->format->format, fb->modifier); if (ret) { diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h index 3c2b82865ad2..5d307b23a4e6 100644 --- a/drivers/gpu/drm/drm_crtc_internal.h +++ b/drivers/gpu/drm/drm_crtc_internal.h @@ -220,3 +220,5 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, /* drm_edid.c */ void drm_mode_fixup_1366x768(struct drm_display_mode *mode); +void drm_reset_display_info(struct drm_connector *connector); +u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid); diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index ffe14ec3e7f2..36c7609a4bd5 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -119,18 +119,32 @@ u8 drm_dp_get_adjust_request_pre_emphasis(const u8 link_status[DP_LINK_STATUS_SI EXPORT_SYMBOL(drm_dp_get_adjust_request_pre_emphasis); void drm_dp_link_train_clock_recovery_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { - if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0) + int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] & + DP_TRAINING_AUX_RD_MASK; + + if (rd_interval > 4) + DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n", + rd_interval); + + if (rd_interval == 0 || dpcd[DP_DPCD_REV] >= DP_DPCD_REV_14) udelay(100); else - mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4); + mdelay(rd_interval * 4); } EXPORT_SYMBOL(drm_dp_link_train_clock_recovery_delay); void drm_dp_link_train_channel_eq_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { - if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0) + int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] & + DP_TRAINING_AUX_RD_MASK; + + if (rd_interval > 4) + DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n", + rd_interval); + + if (rd_interval == 0) udelay(400); else - mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4); + mdelay(rd_interval * 4); } EXPORT_SYMBOL(drm_dp_link_train_channel_eq_delay); diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 6fac4129e6a2..658830620ca3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2941,12 +2941,14 @@ static void drm_dp_mst_dump_mstb(struct seq_file *m, } } +#define DP_PAYLOAD_TABLE_SIZE 64 + static bool dump_dp_payload_table(struct drm_dp_mst_topology_mgr *mgr, char *buf) { int i; - for (i = 0; i < 64; i += 16) { + for (i = 0; i < DP_PAYLOAD_TABLE_SIZE; i += 16) { if (drm_dp_dpcd_read(mgr->aux, DP_PAYLOAD_TABLE_UPDATE_STATUS + i, &buf[i], 16) != 16) @@ -3015,7 +3017,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m, mutex_lock(&mgr->lock); if (mgr->mst_primary) { - u8 buf[64]; + u8 buf[DP_PAYLOAD_TABLE_SIZE]; int ret; ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, buf, DP_RECEIVER_CAP_SIZE); @@ -3033,8 +3035,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m, seq_printf(m, " revision: hw: %x.%x sw: %x.%x\n", buf[0x9] >> 4, buf[0x9] & 0xf, buf[0xa], buf[0xb]); if (dump_dp_payload_table(mgr, buf)) - seq_printf(m, "payload table: %*ph\n", 63, buf); - + seq_printf(m, "payload table: %*ph\n", DP_PAYLOAD_TABLE_SIZE, buf); } mutex_unlock(&mgr->lock); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index c2c21d839727..b553a6f2ff0e 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -32,6 +32,7 @@ #include <linux/moduleparam.h> #include <linux/mount.h> #include <linux/slab.h> +#include <linux/srcu.h> #include <drm/drm_drv.h> #include <drm/drmP.h> @@ -75,6 +76,8 @@ static bool drm_core_init_complete = false; static struct dentry *drm_debugfs_root; +DEFINE_STATIC_SRCU(drm_unplug_srcu); + /* * DRM Minors * A DRM device can provide several char-dev interfaces on the DRM-Major. Each @@ -96,8 +99,6 @@ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev, return &dev->primary; case DRM_MINOR_RENDER: return &dev->render; - case DRM_MINOR_CONTROL: - return &dev->control; default: BUG(); } @@ -318,18 +319,51 @@ void drm_put_dev(struct drm_device *dev) } EXPORT_SYMBOL(drm_put_dev); -static void drm_device_set_unplugged(struct drm_device *dev) +/** + * drm_dev_enter - Enter device critical section + * @dev: DRM device + * @idx: Pointer to index that will be passed to the matching drm_dev_exit() + * + * This function marks and protects the beginning of a section that should not + * be entered after the device has been unplugged. The section end is marked + * with drm_dev_exit(). Calls to this function can be nested. + * + * Returns: + * True if it is OK to enter the section, false otherwise. + */ +bool drm_dev_enter(struct drm_device *dev, int *idx) { - smp_wmb(); - atomic_set(&dev->unplugged, 1); + *idx = srcu_read_lock(&drm_unplug_srcu); + + if (dev->unplugged) { + srcu_read_unlock(&drm_unplug_srcu, *idx); + return false; + } + + return true; } +EXPORT_SYMBOL(drm_dev_enter); + +/** + * drm_dev_exit - Exit device critical section + * @idx: index returned from drm_dev_enter() + * + * This function marks the end of a section that should not be entered after + * the device has been unplugged. + */ +void drm_dev_exit(int idx) +{ + srcu_read_unlock(&drm_unplug_srcu, idx); +} +EXPORT_SYMBOL(drm_dev_exit); /** * drm_dev_unplug - unplug a DRM device * @dev: DRM device * * This unplugs a hotpluggable DRM device, which makes it inaccessible to - * userspace operations. Entry-points can use drm_dev_is_unplugged(). This + * userspace operations. Entry-points can use drm_dev_enter() and + * drm_dev_exit() to protect device resources in a race free manner. This * essentially unregisters the device like drm_dev_unregister(), but can be * called while there are still open users of @dev. */ @@ -338,10 +372,18 @@ void drm_dev_unplug(struct drm_device *dev) drm_dev_unregister(dev); mutex_lock(&drm_global_mutex); - drm_device_set_unplugged(dev); if (dev->open_count == 0) drm_dev_put(dev); mutex_unlock(&drm_global_mutex); + + /* + * After synchronizing any critical read section is guaranteed to see + * the new value of ->unplugged, and any critical section which might + * still have seen the old value of ->unplugged is guaranteed to have + * finished. + */ + dev->unplugged = true; + synchronize_srcu(&drm_unplug_srcu); } EXPORT_SYMBOL(drm_dev_unplug); @@ -523,7 +565,6 @@ err_ctxbitmap: err_minors: drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); - drm_minor_free(dev, DRM_MINOR_CONTROL); drm_fs_inode_free(dev->anon_inode); err_free: mutex_destroy(&dev->master_mutex); @@ -559,7 +600,6 @@ void drm_dev_fini(struct drm_device *dev) drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); - drm_minor_free(dev, DRM_MINOR_CONTROL); mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->ctxlist_mutex); @@ -752,10 +792,6 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) mutex_lock(&drm_global_mutex); - ret = drm_minor_register(dev, DRM_MINOR_CONTROL); - if (ret) - goto err_minors; - ret = drm_minor_register(dev, DRM_MINOR_RENDER); if (ret) goto err_minors; @@ -793,7 +829,6 @@ err_minors: remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); - drm_minor_unregister(dev, DRM_MINOR_CONTROL); out_unlock: mutex_unlock(&drm_global_mutex); return ret; @@ -838,7 +873,6 @@ void drm_dev_unregister(struct drm_device *dev) remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); - drm_minor_unregister(dev, DRM_MINOR_CONTROL); } EXPORT_SYMBOL(drm_dev_unregister); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 39f1db4acda4..40e1e24f2ff0 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -2930,11 +2930,15 @@ cea_mode_alternate_timings(u8 vic, struct drm_display_mode *mode) static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_match, unsigned int clock_tolerance) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) return 0; + if (to_match->picture_aspect_ratio) + match_flags |= DRM_MODE_MATCH_ASPECT_RATIO; + for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) { struct drm_display_mode cea_mode = edid_cea_modes[vic]; unsigned int clock1, clock2; @@ -2948,7 +2952,7 @@ static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_m continue; do { - if (drm_mode_equal_no_clocks_no_stereo(to_match, &cea_mode)) + if (drm_mode_match(to_match, &cea_mode, match_flags)) return vic; } while (cea_mode_alternate_timings(vic, &cea_mode)); } @@ -2965,11 +2969,15 @@ static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_m */ u8 drm_match_cea_mode(const struct drm_display_mode *to_match) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) return 0; + if (to_match->picture_aspect_ratio) + match_flags |= DRM_MODE_MATCH_ASPECT_RATIO; + for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) { struct drm_display_mode cea_mode = edid_cea_modes[vic]; unsigned int clock1, clock2; @@ -2983,7 +2991,7 @@ u8 drm_match_cea_mode(const struct drm_display_mode *to_match) continue; do { - if (drm_mode_equal_no_clocks_no_stereo(to_match, &cea_mode)) + if (drm_mode_match(to_match, &cea_mode, match_flags)) return vic; } while (cea_mode_alternate_timings(vic, &cea_mode)); } @@ -3030,6 +3038,7 @@ hdmi_mode_alternate_clock(const struct drm_display_mode *hdmi_mode) static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_match, unsigned int clock_tolerance) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) @@ -3047,7 +3056,7 @@ static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_ abs(to_match->clock - clock2) > clock_tolerance) continue; - if (drm_mode_equal_no_clocks(to_match, hdmi_mode)) + if (drm_mode_match(to_match, hdmi_mode, match_flags)) return vic; } @@ -3064,6 +3073,7 @@ static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_ */ static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) @@ -3079,7 +3089,7 @@ static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match) if ((KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock1) || KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock2)) && - drm_mode_equal_no_clocks_no_stereo(to_match, hdmi_mode)) + drm_mode_match(to_match, hdmi_mode, match_flags)) return vic; } return 0; @@ -4455,7 +4465,6 @@ drm_reset_display_info(struct drm_connector *connector) info->non_desktop = 0; } -EXPORT_SYMBOL_GPL(drm_reset_display_info); u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid) { @@ -4533,7 +4542,6 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi info->color_formats |= DRM_COLOR_FORMAT_YCRCB422; return quirks; } -EXPORT_SYMBOL_GPL(drm_add_display_info); static int validate_displayid(u8 *displayid, int length, int idx) { @@ -4825,6 +4833,7 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame, const struct drm_display_mode *mode, bool is_hdmi2_sink) { + enum hdmi_picture_aspect picture_aspect; int err; if (!frame || !mode) @@ -4867,13 +4876,23 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame, * Populate picture aspect ratio from either * user input (if specified) or from the CEA mode list. */ - if (mode->picture_aspect_ratio == HDMI_PICTURE_ASPECT_4_3 || - mode->picture_aspect_ratio == HDMI_PICTURE_ASPECT_16_9) - frame->picture_aspect = mode->picture_aspect_ratio; - else if (frame->video_code > 0) - frame->picture_aspect = drm_get_cea_aspect_ratio( - frame->video_code); + picture_aspect = mode->picture_aspect_ratio; + if (picture_aspect == HDMI_PICTURE_ASPECT_NONE) + picture_aspect = drm_get_cea_aspect_ratio(frame->video_code); + + /* + * The infoframe can't convey anything but none, 4:3 + * and 16:9, so if the user has asked for anything else + * we can only satisfy it by specifying the right VIC. + */ + if (picture_aspect > HDMI_PICTURE_ASPECT_16_9) { + if (picture_aspect != + drm_get_cea_aspect_ratio(frame->video_code)) + return -EINVAL; + picture_aspect = HDMI_PICTURE_ASPECT_NONE; + } + frame->picture_aspect = picture_aspect; frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE; frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 0646b108030b..2ee1eaa66188 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2183,7 +2183,11 @@ static bool drm_target_cloned(struct drm_fb_helper *fb_helper, for (j = 0; j < i; j++) { if (!enabled[j]) continue; - if (!drm_mode_equal(modes[j], modes[i])) + if (!drm_mode_match(modes[j], modes[i], + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS)) can_clone = false; } } @@ -2203,7 +2207,11 @@ static bool drm_target_cloned(struct drm_fb_helper *fb_helper, fb_helper_conn = fb_helper->connector_info[i]; list_for_each_entry(mode, &fb_helper_conn->connector->modes, head) { - if (drm_mode_equal(mode, dmt_mode)) + if (drm_mode_match(mode, dmt_mode, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS)) modes[i] = mode; } if (!modes[i]) diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index ad67203de715..bfedceff87bb 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -468,29 +468,30 @@ int drm_mode_getfb(struct drm_device *dev, goto out; } + if (!fb->funcs->create_handle) { + ret = -ENODEV; + goto out; + } + r->height = fb->height; r->width = fb->width; r->depth = fb->format->depth; r->bpp = fb->format->cpp[0] * 8; r->pitch = fb->pitches[0]; - if (fb->funcs->create_handle) { - if (drm_is_current_master(file_priv) || capable(CAP_SYS_ADMIN) || - drm_is_control_client(file_priv)) { - ret = fb->funcs->create_handle(fb, file_priv, - &r->handle); - } else { - /* GET_FB() is an unprivileged ioctl so we must not - * return a buffer-handle to non-master processes! For - * backwards-compatibility reasons, we cannot make - * GET_FB() privileged, so just return an invalid handle - * for non-masters. */ - r->handle = 0; - ret = 0; - } - } else { - ret = -ENODEV; + + /* GET_FB() is an unprivileged ioctl so we must not return a + * buffer-handle to non-master processes! For + * backwards-compatibility reasons, we cannot make GET_FB() privileged, + * so just return an invalid handle for non-masters. + */ + if (!drm_is_current_master(file_priv) && !capable(CAP_SYS_ADMIN)) { + r->handle = 0; + ret = 0; + goto out; } + ret = fb->funcs->create_handle(fb, file_priv, &r->handle); + out: drm_framebuffer_put(fb); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 4975ba9a7bc8..4a16d7b26c89 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -436,9 +436,12 @@ err_unref: * @obj: object to register * @handlep: pionter to return the created handle to the caller * - * Create a handle for this object. This adds a handle reference - * to the object, which includes a regular reference count. Callers - * will likely want to dereference the object afterwards. + * Create a handle for this object. This adds a handle reference to the object, + * which includes a regular reference count. Callers will likely want to + * dereference the object afterwards. + * + * Since this publishes @obj to userspace it must be fully set up by this point, + * drivers must call this last in their buffer object creation callbacks. */ int drm_gem_handle_create(struct drm_file *file_priv, struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 4d682a6e8bcb..acfbc0641a06 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -22,6 +22,7 @@ #include <drm/drm_gem.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_modeset_helper.h> +#include <drm/drm_simple_kms_helper.h> /** * DOC: overview @@ -266,6 +267,24 @@ int drm_gem_fb_prepare_fb(struct drm_plane *plane, EXPORT_SYMBOL_GPL(drm_gem_fb_prepare_fb); /** + * drm_gem_fb_simple_display_pipe_prepare_fb - prepare_fb helper for + * &drm_simple_display_pipe + * @pipe: Simple display pipe + * @plane_state: Plane state + * + * This function uses drm_gem_fb_prepare_fb() to check if the plane FB has a + * &dma_buf attached, extracts the exclusive fence and attaches it to plane + * state for the atomic helper to wait on. Drivers can use this as their + * &drm_simple_display_pipe_funcs.prepare_fb callback. + */ +int drm_gem_fb_simple_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe, + struct drm_plane_state *plane_state) +{ + return drm_gem_fb_prepare_fb(&pipe->plane, plane_state); +} +EXPORT_SYMBOL(drm_gem_fb_simple_display_pipe_prepare_fb); + +/** * drm_gem_fbdev_fb_create - Create a GEM backed &drm_framebuffer for fbdev * emulation * @dev: DRM device diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index f8e96e648acf..67b1fca39aa6 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -105,7 +105,7 @@ static int compat_drm_version(struct file *file, unsigned int cmd, .desc = compat_ptr(v32.desc), }; err = drm_ioctl_kernel(file, drm_version, &v, - DRM_UNLOCKED|DRM_RENDER_ALLOW|DRM_CONTROL_ALLOW); + DRM_UNLOCKED|DRM_RENDER_ALLOW); if (err) return err; @@ -885,7 +885,7 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd, return -EFAULT; err = drm_ioctl_kernel(file, drm_mode_addfb2, &req64, - DRM_CONTROL_ALLOW|DRM_UNLOCKED); + DRM_UNLOCKED); if (err) return err; diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index af782911c505..0d4cfb232576 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -324,6 +324,15 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv) return -EINVAL; file_priv->atomic = req->value; file_priv->universal_planes = req->value; + /* + * No atomic user-space blows up on aspect ratio mode bits. + */ + file_priv->aspect_ratio_allowed = req->value; + break; + case DRM_CLIENT_CAP_ASPECT_RATIO: + if (req->value > 1) + return -EINVAL; + file_priv->aspect_ratio_allowed = req->value; break; default: return -EINVAL; @@ -510,13 +519,7 @@ int drm_ioctl_permit(u32 flags, struct drm_file *file_priv) /* MASTER is only for master or control clients */ if (unlikely((flags & DRM_MASTER) && - !drm_is_current_master(file_priv) && - !drm_is_control_client(file_priv))) - return -EACCES; - - /* Control clients must be explicitly allowed */ - if (unlikely(!(flags & DRM_CONTROL_ALLOW) && - drm_is_control_client(file_priv))) + !drm_is_current_master(file_priv))) return -EACCES; /* Render clients must be explicitly allowed */ @@ -539,7 +542,7 @@ EXPORT_SYMBOL(drm_ioctl_permit); /* Ioctl table */ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, - DRM_UNLOCKED|DRM_RENDER_ALLOW|DRM_CONTROL_ALLOW), + DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY), @@ -613,41 +616,41 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_CREATE, drm_syncobj_create_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), @@ -665,10 +668,10 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_UNLOCKED), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index d345563fdff3..50c73c0a20b9 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -340,7 +340,7 @@ static void _drm_lease_revoke(struct drm_master *top) break; /* Over */ - master = list_entry(master->lessee_list.next, struct drm_master, lessee_list); + master = list_next_entry(master, lessee_list); } } } diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index e82b61e08f8c..c78ca0e84ffd 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -939,17 +939,68 @@ struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev, } EXPORT_SYMBOL(drm_mode_duplicate); +static bool drm_mode_match_timings(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return mode1->hdisplay == mode2->hdisplay && + mode1->hsync_start == mode2->hsync_start && + mode1->hsync_end == mode2->hsync_end && + mode1->htotal == mode2->htotal && + mode1->hskew == mode2->hskew && + mode1->vdisplay == mode2->vdisplay && + mode1->vsync_start == mode2->vsync_start && + mode1->vsync_end == mode2->vsync_end && + mode1->vtotal == mode2->vtotal && + mode1->vscan == mode2->vscan; +} + +static bool drm_mode_match_clock(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + /* + * do clock check convert to PICOS + * so fb modes get matched the same + */ + if (mode1->clock && mode2->clock) + return KHZ2PICOS(mode1->clock) == KHZ2PICOS(mode2->clock); + else + return mode1->clock == mode2->clock; +} + +static bool drm_mode_match_flags(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return (mode1->flags & ~DRM_MODE_FLAG_3D_MASK) == + (mode2->flags & ~DRM_MODE_FLAG_3D_MASK); +} + +static bool drm_mode_match_3d_flags(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return (mode1->flags & DRM_MODE_FLAG_3D_MASK) == + (mode2->flags & DRM_MODE_FLAG_3D_MASK); +} + +static bool drm_mode_match_aspect_ratio(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return mode1->picture_aspect_ratio == mode2->picture_aspect_ratio; +} + /** - * drm_mode_equal - test modes for equality + * drm_mode_match - test modes for (partial) equality * @mode1: first mode * @mode2: second mode + * @match_flags: which parts need to match (DRM_MODE_MATCH_*) * * Check to see if @mode1 and @mode2 are equivalent. * * Returns: - * True if the modes are equal, false otherwise. + * True if the modes are (partially) equal, false otherwise. */ -bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2) +bool drm_mode_match(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2, + unsigned int match_flags) { if (!mode1 && !mode2) return true; @@ -957,15 +1008,49 @@ bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_displ if (!mode1 || !mode2) return false; - /* do clock check convert to PICOS so fb modes get matched - * the same */ - if (mode1->clock && mode2->clock) { - if (KHZ2PICOS(mode1->clock) != KHZ2PICOS(mode2->clock)) - return false; - } else if (mode1->clock != mode2->clock) + if (match_flags & DRM_MODE_MATCH_TIMINGS && + !drm_mode_match_timings(mode1, mode2)) + return false; + + if (match_flags & DRM_MODE_MATCH_CLOCK && + !drm_mode_match_clock(mode1, mode2)) + return false; + + if (match_flags & DRM_MODE_MATCH_FLAGS && + !drm_mode_match_flags(mode1, mode2)) + return false; + + if (match_flags & DRM_MODE_MATCH_3D_FLAGS && + !drm_mode_match_3d_flags(mode1, mode2)) return false; - return drm_mode_equal_no_clocks(mode1, mode2); + if (match_flags & DRM_MODE_MATCH_ASPECT_RATIO && + !drm_mode_match_aspect_ratio(mode1, mode2)) + return false; + + return true; +} +EXPORT_SYMBOL(drm_mode_match); + +/** + * drm_mode_equal - test modes for equality + * @mode1: first mode + * @mode2: second mode + * + * Check to see if @mode1 and @mode2 are equivalent. + * + * Returns: + * True if the modes are equal, false otherwise. + */ +bool drm_mode_equal(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return drm_mode_match(mode1, mode2, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS| + DRM_MODE_MATCH_ASPECT_RATIO); } EXPORT_SYMBOL(drm_mode_equal); @@ -980,13 +1065,13 @@ EXPORT_SYMBOL(drm_mode_equal); * Returns: * True if the modes are equal, false otherwise. */ -bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2) +bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) { - if ((mode1->flags & DRM_MODE_FLAG_3D_MASK) != - (mode2->flags & DRM_MODE_FLAG_3D_MASK)) - return false; - - return drm_mode_equal_no_clocks_no_stereo(mode1, mode2); + return drm_mode_match(mode1, mode2, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS); } EXPORT_SYMBOL(drm_mode_equal_no_clocks); @@ -1004,21 +1089,9 @@ EXPORT_SYMBOL(drm_mode_equal_no_clocks); bool drm_mode_equal_no_clocks_no_stereo(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2) { - if (mode1->hdisplay == mode2->hdisplay && - mode1->hsync_start == mode2->hsync_start && - mode1->hsync_end == mode2->hsync_end && - mode1->htotal == mode2->htotal && - mode1->hskew == mode2->hskew && - mode1->vdisplay == mode2->vdisplay && - mode1->vsync_start == mode2->vsync_start && - mode1->vsync_end == mode2->vsync_end && - mode1->vtotal == mode2->vtotal && - mode1->vscan == mode2->vscan && - (mode1->flags & ~DRM_MODE_FLAG_3D_MASK) == - (mode2->flags & ~DRM_MODE_FLAG_3D_MASK)) - return true; - - return false; + return drm_mode_match(mode1, mode2, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_FLAGS); } EXPORT_SYMBOL(drm_mode_equal_no_clocks_no_stereo); @@ -1575,6 +1648,26 @@ void drm_mode_convert_to_umode(struct drm_mode_modeinfo *out, out->vrefresh = in->vrefresh; out->flags = in->flags; out->type = in->type; + + switch (in->picture_aspect_ratio) { + case HDMI_PICTURE_ASPECT_4_3: + out->flags |= DRM_MODE_FLAG_PIC_AR_4_3; + break; + case HDMI_PICTURE_ASPECT_16_9: + out->flags |= DRM_MODE_FLAG_PIC_AR_16_9; + break; + case HDMI_PICTURE_ASPECT_64_27: + out->flags |= DRM_MODE_FLAG_PIC_AR_64_27; + break; + case HDMI_PICTURE_ASPECT_256_135: + out->flags |= DRM_MODE_FLAG_PIC_AR_256_135; + break; + case HDMI_PICTURE_ASPECT_RESERVED: + default: + out->flags |= DRM_MODE_FLAG_PIC_AR_NONE; + break; + } + strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN); out->name[DRM_DISPLAY_MODE_LEN-1] = 0; } @@ -1621,6 +1714,30 @@ int drm_mode_convert_umode(struct drm_device *dev, strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN); out->name[DRM_DISPLAY_MODE_LEN-1] = 0; + /* Clearing picture aspect ratio bits from out flags, + * as the aspect-ratio information is not stored in + * flags for kernel-mode, but in picture_aspect_ratio. + */ + out->flags &= ~DRM_MODE_FLAG_PIC_AR_MASK; + + switch (in->flags & DRM_MODE_FLAG_PIC_AR_MASK) { + case DRM_MODE_FLAG_PIC_AR_4_3: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_4_3; + break; + case DRM_MODE_FLAG_PIC_AR_16_9: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_16_9; + break; + case DRM_MODE_FLAG_PIC_AR_64_27: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_64_27; + break; + case DRM_MODE_FLAG_PIC_AR_256_135: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_256_135; + break; + default: + out->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE; + break; + } + out->status = drm_mode_validate_driver(dev, out); if (out->status != MODE_OK) return -EINVAL; diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 902cc1a71e45..fe9c6c731e87 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -60,7 +60,7 @@ static const struct drm_dmi_panel_orientation_data itworks_tw891 = { .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; -static const struct drm_dmi_panel_orientation_data vios_lth17 = { +static const struct drm_dmi_panel_orientation_data lcd800x1280_rightside_up = { .width = 800, .height = 1280, .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, @@ -102,12 +102,30 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "TW891"), }, .driver_data = (void *)&itworks_tw891, + }, { /* + * Lenovo Ideapad Miix 310 laptop, only some production batches + * have a portrait screen, the resolution checks makes the quirk + * apply only to those batches. + */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "80SG"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "MIIX 310-10ICR"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Lenovo Ideapad Miix 320 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "80XF"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* VIOS LTH17 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"), DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "LTH17"), }, - .driver_data = (void *)&vios_lth17, + .driver_data = (void *)&lcd800x1280_rightside_up, }, {} }; @@ -154,10 +172,9 @@ int drm_get_panel_orientation_quirk(int width, int height) if (!bios_date) continue; - for (i = 0; data->bios_dates[i]; i++) { - if (!strcmp(data->bios_dates[i], bios_date)) - return data->orientation; - } + i = match_string(data->bios_dates, -1, bios_date); + if (i >= 0) + return data->orientation; } return DRM_MODE_PANEL_ORIENTATION_UNKNOWN; diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 6d2a6e428a3e..035054455301 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -756,6 +756,7 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev = crtc->dev; + struct drm_plane *plane = crtc->cursor; struct drm_framebuffer *fb = NULL; struct drm_mode_fb_cmd2 fbreq = { .width = req->width, @@ -769,8 +770,8 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, uint32_t src_w = 0, src_h = 0; int ret = 0; - BUG_ON(!crtc->cursor); - WARN_ON(crtc->cursor->crtc != crtc && crtc->cursor->crtc != NULL); + BUG_ON(!plane); + WARN_ON(plane->crtc != crtc && plane->crtc != NULL); /* * Obtain fb we'll be using (either new or existing) and take an extra @@ -784,13 +785,18 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, DRM_DEBUG_KMS("failed to wrap cursor buffer in drm framebuffer\n"); return PTR_ERR(fb); } + fb->hot_x = req->hot_x; fb->hot_y = req->hot_y; } else { fb = NULL; } } else { - fb = crtc->cursor->fb; + if (plane->state) + fb = plane->state->fb; + else + fb = plane->fb; + if (fb) drm_framebuffer_get(fb); } @@ -810,7 +816,7 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, src_h = fb->height << 16; } - ret = __setplane_internal(crtc->cursor, crtc, fb, + ret = __setplane_internal(plane, crtc, fb, crtc_x, crtc_y, crtc_w, crtc_h, 0, 0, src_w, src_h, ctx); @@ -931,7 +937,8 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, { struct drm_mode_crtc_page_flip_target *page_flip = data; struct drm_crtc *crtc; - struct drm_framebuffer *fb = NULL; + struct drm_plane *plane; + struct drm_framebuffer *fb = NULL, *old_fb; struct drm_pending_vblank_event *e = NULL; u32 target_vblank = page_flip->sequence; struct drm_modeset_acquire_ctx ctx; @@ -959,6 +966,8 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, if (!crtc) return -ENOENT; + plane = crtc->primary; + if (crtc->funcs->page_flip_target) { u32 current_vblank; int r; @@ -1003,11 +1012,16 @@ retry: ret = drm_modeset_lock(&crtc->mutex, &ctx); if (ret) goto out; - ret = drm_modeset_lock(&crtc->primary->mutex, &ctx); + ret = drm_modeset_lock(&plane->mutex, &ctx); if (ret) goto out; - if (crtc->primary->fb == NULL) { + if (plane->state) + old_fb = plane->state->fb; + else + old_fb = plane->fb; + + if (old_fb == NULL) { /* The framebuffer is currently unbound, presumably * due to a hotplug event, that userspace has not * yet discovered. @@ -1022,8 +1036,8 @@ retry: goto out; } - if (crtc->state) { - const struct drm_plane_state *state = crtc->primary->state; + if (plane->state) { + const struct drm_plane_state *state = plane->state; ret = drm_framebuffer_check_src_coords(state->src_x, state->src_y, @@ -1031,12 +1045,13 @@ retry: state->src_h, fb); } else { - ret = drm_crtc_check_viewport(crtc, crtc->x, crtc->y, &crtc->mode, fb); + ret = drm_crtc_check_viewport(crtc, crtc->x, crtc->y, + &crtc->mode, fb); } if (ret) goto out; - if (crtc->primary->fb->format != fb->format) { + if (old_fb->format != fb->format) { DRM_DEBUG_KMS("Page flip is not allowed to change frame buffer format.\n"); ret = -EINVAL; goto out; @@ -1048,10 +1063,12 @@ retry: ret = -ENOMEM; goto out; } + e->event.base.type = DRM_EVENT_FLIP_COMPLETE; e->event.base.length = sizeof(e->event); e->event.vbl.user_data = page_flip->user_data; e->event.vbl.crtc_id = crtc->base.id; + ret = drm_event_reserve_init(dev, file_priv, &e->base, &e->event.base); if (ret) { kfree(e); @@ -1060,7 +1077,7 @@ retry: } } - crtc->primary->old_fb = crtc->primary->fb; + plane->old_fb = plane->fb; if (crtc->funcs->page_flip_target) ret = crtc->funcs->page_flip_target(crtc, fb, e, page_flip->flags, @@ -1073,19 +1090,18 @@ retry: if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) drm_event_cancel_free(dev, &e->base); /* Keep the old fb, don't unref it. */ - crtc->primary->old_fb = NULL; + plane->old_fb = NULL; } else { - crtc->primary->fb = fb; - /* Unref only the old framebuffer. */ - fb = NULL; + plane->fb = fb; + drm_framebuffer_get(fb); } out: if (fb) drm_framebuffer_put(fb); - if (crtc->primary->old_fb) - drm_framebuffer_put(crtc->primary->old_fb); - crtc->primary->old_fb = NULL; + if (plane->old_fb) + drm_framebuffer_put(plane->old_fb); + plane->old_fb = NULL; if (ret == -EDEADLK) { ret = drm_modeset_backoff(&ctx); diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 7856a9b3f8a8..397b46b33739 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -331,6 +331,9 @@ EXPORT_SYMBOL(drm_gem_map_dma_buf); /** * drm_gem_unmap_dma_buf - unmap_dma_buf implementation for GEM + * @attach: attachment to unmap buffer from + * @sgt: scatterlist info of the buffer to unmap + * @dir: direction of DMA transfer * * Not implemented. The unmap is done at drm_gem_map_detach(). This can be * used as the &dma_buf_ops.unmap_dma_buf callback. @@ -406,7 +409,10 @@ void *drm_gem_dmabuf_vmap(struct dma_buf *dma_buf) struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; - return dev->driver->gem_prime_vmap(obj); + if (dev->driver->gem_prime_vmap) + return dev->driver->gem_prime_vmap(obj); + else + return NULL; } EXPORT_SYMBOL(drm_gem_dmabuf_vmap); @@ -423,12 +429,15 @@ void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; - dev->driver->gem_prime_vunmap(obj, vaddr); + if (dev->driver->gem_prime_vunmap) + dev->driver->gem_prime_vunmap(obj, vaddr); } EXPORT_SYMBOL(drm_gem_dmabuf_vunmap); /** * drm_gem_dmabuf_kmap_atomic - map_atomic implementation for GEM + * @dma_buf: buffer to be mapped + * @page_num: page number within the buffer * * Not implemented. This can be used as the &dma_buf_ops.map_atomic callback. */ @@ -441,6 +450,9 @@ EXPORT_SYMBOL(drm_gem_dmabuf_kmap_atomic); /** * drm_gem_dmabuf_kunmap_atomic - unmap_atomic implementation for GEM + * @dma_buf: buffer to be unmapped + * @page_num: page number within the buffer + * @addr: virtual address of the buffer * * Not implemented. This can be used as the &dma_buf_ops.unmap_atomic callback. */ @@ -453,6 +465,8 @@ EXPORT_SYMBOL(drm_gem_dmabuf_kunmap_atomic); /** * drm_gem_dmabuf_kmap - map implementation for GEM + * @dma_buf: buffer to be mapped + * @page_num: page number within the buffer * * Not implemented. This can be used as the &dma_buf_ops.map callback. */ @@ -464,6 +478,9 @@ EXPORT_SYMBOL(drm_gem_dmabuf_kmap); /** * drm_gem_dmabuf_kunmap - unmap implementation for GEM + * @dma_buf: buffer to be unmapped + * @page_num: page number within the buffer + * @addr: virtual address of the buffer * * Not implemented. This can be used as the &dma_buf_ops.unmap callback. */ diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index 8f4672daac7f..1f8031e30f53 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -169,9 +169,9 @@ struct drm_property *drm_property_create_enum(struct drm_device *dev, return NULL; for (i = 0; i < num_values; i++) { - ret = drm_property_add_enum(property, i, - props[i].type, - props[i].name); + ret = drm_property_add_enum(property, + props[i].type, + props[i].name); if (ret) { drm_property_destroy(dev, property); return NULL; @@ -209,7 +209,7 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev, uint64_t supported_bits) { struct drm_property *property; - int i, ret, index = 0; + int i, ret; int num_values = hweight64(supported_bits); flags |= DRM_MODE_PROP_BITMASK; @@ -221,14 +221,9 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev, if (!(supported_bits & (1ULL << props[i].type))) continue; - if (WARN_ON(index >= num_values)) { - drm_property_destroy(dev, property); - return NULL; - } - - ret = drm_property_add_enum(property, index++, - props[i].type, - props[i].name); + ret = drm_property_add_enum(property, + props[i].type, + props[i].name); if (ret) { drm_property_destroy(dev, property); return NULL; @@ -376,7 +371,6 @@ EXPORT_SYMBOL(drm_property_create_bool); /** * drm_property_add_enum - add a possible value to an enumeration property * @property: enumeration property to change - * @index: index of the new enumeration * @value: value of the new enumeration * @name: symbolic name of the new enumeration * @@ -388,10 +382,11 @@ EXPORT_SYMBOL(drm_property_create_bool); * Returns: * Zero on success, error code on failure. */ -int drm_property_add_enum(struct drm_property *property, int index, +int drm_property_add_enum(struct drm_property *property, uint64_t value, const char *name) { struct drm_property_enum *prop_enum; + int index = 0; if (WARN_ON(strlen(name) >= DRM_PROP_NAME_LEN)) return -EINVAL; @@ -411,8 +406,12 @@ int drm_property_add_enum(struct drm_property *property, int index, list_for_each_entry(prop_enum, &property->enum_list, head) { if (WARN_ON(prop_enum->value == value)) return -EINVAL; + index++; } + if (WARN_ON(index >= property->num_values)) + return -EINVAL; + prop_enum = kzalloc(sizeof(struct drm_property_enum), GFP_KERNEL); if (!prop_enum) return -ENOMEM; diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c index 9817c1445ba9..8c057829b804 100644 --- a/drivers/gpu/drm/drm_rect.c +++ b/drivers/gpu/drm/drm_rect.c @@ -50,13 +50,25 @@ bool drm_rect_intersect(struct drm_rect *r1, const struct drm_rect *r2) } EXPORT_SYMBOL(drm_rect_intersect); +static u32 clip_scaled(u32 src, u32 dst, u32 clip) +{ + u64 tmp = mul_u32_u32(src, dst - clip); + + /* + * Round toward 1.0 when clipping so that we don't accidentally + * change upscaling to downscaling or vice versa. + */ + if (src < (dst << 16)) + return DIV_ROUND_UP_ULL(tmp, dst); + else + return DIV_ROUND_DOWN_ULL(tmp, dst); +} + /** * drm_rect_clip_scaled - perform a scaled clip operation * @src: source window rectangle * @dst: destination window rectangle * @clip: clip rectangle - * @hscale: horizontal scaling factor - * @vscale: vertical scaling factor * * Clip rectangle @dst by rectangle @clip. Clip rectangle @src by the * same amounts multiplied by @hscale and @vscale. @@ -66,33 +78,44 @@ EXPORT_SYMBOL(drm_rect_intersect); * %false otherwise */ bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst, - const struct drm_rect *clip, - int hscale, int vscale) + const struct drm_rect *clip) { int diff; diff = clip->x1 - dst->x1; if (diff > 0) { - int64_t tmp = src->x1 + (int64_t) diff * hscale; - src->x1 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_w = clip_scaled(drm_rect_width(src), + drm_rect_width(dst), diff); + + src->x1 = clamp_t(int64_t, src->x2 - new_src_w, INT_MIN, INT_MAX); + dst->x1 = clip->x1; } diff = clip->y1 - dst->y1; if (diff > 0) { - int64_t tmp = src->y1 + (int64_t) diff * vscale; - src->y1 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_h = clip_scaled(drm_rect_height(src), + drm_rect_height(dst), diff); + + src->y1 = clamp_t(int64_t, src->y2 - new_src_h, INT_MIN, INT_MAX); + dst->y1 = clip->y1; } diff = dst->x2 - clip->x2; if (diff > 0) { - int64_t tmp = src->x2 - (int64_t) diff * hscale; - src->x2 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_w = clip_scaled(drm_rect_width(src), + drm_rect_width(dst), diff); + + src->x2 = clamp_t(int64_t, src->x1 + new_src_w, INT_MIN, INT_MAX); + dst->x2 = clip->x2; } diff = dst->y2 - clip->y2; if (diff > 0) { - int64_t tmp = src->y2 - (int64_t) diff * vscale; - src->y2 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_h = clip_scaled(drm_rect_height(src), + drm_rect_height(dst), diff); + + src->y2 = clamp_t(int64_t, src->y1 + new_src_h, INT_MIN, INT_MAX); + dst->y2 = clip->y2; } - return drm_rect_intersect(dst, clip); + return drm_rect_visible(dst); } EXPORT_SYMBOL(drm_rect_clip_scaled); @@ -106,7 +129,10 @@ static int drm_calc_scale(int src, int dst) if (dst == 0) return 0; - scale = src / dst; + if (src > (dst << 16)) + return DIV_ROUND_UP(src, dst); + else + scale = src / dst; return scale; } @@ -121,6 +147,10 @@ static int drm_calc_scale(int src, int dst) * Calculate the horizontal scaling factor as * (@src width) / (@dst width). * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The horizontal scaling factor, or errno of out of limits. */ @@ -152,6 +182,10 @@ EXPORT_SYMBOL(drm_rect_calc_hscale); * Calculate the vertical scaling factor as * (@src height) / (@dst height). * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The vertical scaling factor, or errno of out of limits. */ @@ -189,6 +223,10 @@ EXPORT_SYMBOL(drm_rect_calc_vscale); * If the calculated scaling factor is above @max_vscale, * decrease the height of rectangle @src to compensate. * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The horizontal scaling factor. */ @@ -239,6 +277,10 @@ EXPORT_SYMBOL(drm_rect_calc_hscale_relaxed); * If the calculated scaling factor is above @max_vscale, * decrease the height of rectangle @src to compensate. * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The vertical scaling factor. */ @@ -373,8 +415,8 @@ EXPORT_SYMBOL(drm_rect_rotate); * them when doing a rotatation and its inverse. * That is, if you do :: * - * DRM_MODE_PROP_ROTATE(&r, width, height, rotation); - * DRM_MODE_ROTATE_inv(&r, width, height, rotation); + * drm_rect_rotate(&r, width, height, rotation); + * drm_rect_rotate_inv(&r, width, height, rotation); * * you will always get back the original rectangle. */ diff --git a/drivers/gpu/drm/drm_scdc_helper.c b/drivers/gpu/drm/drm_scdc_helper.c index 657ea5ab6c3f..870e25f1f788 100644 --- a/drivers/gpu/drm/drm_scdc_helper.c +++ b/drivers/gpu/drm/drm_scdc_helper.c @@ -141,7 +141,7 @@ bool drm_scdc_get_scrambling_status(struct i2c_adapter *adapter) ret = drm_scdc_readb(adapter, SCDC_SCRAMBLER_STATUS, &status); if (ret < 0) { - DRM_ERROR("Failed to read scrambling status: %d\n", ret); + DRM_DEBUG_KMS("Failed to read scrambling status: %d\n", ret); return false; } @@ -168,7 +168,7 @@ bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable) ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config); if (ret < 0) { - DRM_ERROR("Failed to read TMDS config: %d\n", ret); + DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret); return false; } @@ -179,7 +179,7 @@ bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable) ret = drm_scdc_writeb(adapter, SCDC_TMDS_CONFIG, config); if (ret < 0) { - DRM_ERROR("Failed to enable scrambling: %d\n", ret); + DRM_DEBUG_KMS("Failed to enable scrambling: %d\n", ret); return false; } @@ -223,7 +223,7 @@ bool drm_scdc_set_high_tmds_clock_ratio(struct i2c_adapter *adapter, bool set) ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config); if (ret < 0) { - DRM_ERROR("Failed to read TMDS config: %d\n", ret); + DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret); return false; } @@ -234,7 +234,7 @@ bool drm_scdc_set_high_tmds_clock_ratio(struct i2c_adapter *adapter, bool set) ret = drm_scdc_writeb(adapter, SCDC_TMDS_CONFIG, config); if (ret < 0) { - DRM_ERROR("Failed to set TMDS clock ratio: %d\n", ret); + DRM_DEBUG_KMS("Failed to set TMDS clock ratio: %d\n", ret); return false; } diff --git a/drivers/gpu/drm/drm_simple_kms_helper.c b/drivers/gpu/drm/drm_simple_kms_helper.c index 987a353c7f72..7a00455ca568 100644 --- a/drivers/gpu/drm/drm_simple_kms_helper.c +++ b/drivers/gpu/drm/drm_simple_kms_helper.c @@ -64,13 +64,15 @@ static int drm_simple_kms_crtc_check(struct drm_crtc *crtc, static void drm_simple_kms_crtc_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { + struct drm_plane *plane; struct drm_simple_display_pipe *pipe; pipe = container_of(crtc, struct drm_simple_display_pipe, crtc); if (!pipe->funcs || !pipe->funcs->enable) return; - pipe->funcs->enable(pipe, crtc->state); + plane = &pipe->plane; + pipe->funcs->enable(pipe, crtc->state, plane->state); } static void drm_simple_kms_crtc_disable(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 1c5b5ce1fd7f..b3c1daad1169 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -331,9 +331,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor) struct device *kdev; int r; - if (minor->type == DRM_MINOR_CONTROL) - minor_str = "controlD%d"; - else if (minor->type == DRM_MINOR_RENDER) + if (minor->type == DRM_MINOR_RENDER) minor_str = "renderD%d"; else minor_str = "card%d"; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index ab50090d066c..23e73c2a19f4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -116,7 +116,7 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file) drm_sched_entity_init(&gpu->sched, &ctx->sched_entity[i], &gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL], - 32, NULL); + NULL); } } diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 735ce47688f9..208bc27be3cc 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -1,6 +1,6 @@ config DRM_EXYNOS tristate "DRM Support for Samsung SoC EXYNOS Series" - depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM) + depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM) select DRM_KMS_HELPER select VIDEOMODE_HELPERS select SND_SOC_HDMI_CODEC if SND_SOC @@ -95,21 +95,31 @@ config DRM_EXYNOS_G2D help Choose this option if you want to use Exynos G2D for DRM. +config DRM_EXYNOS_IPP + bool + config DRM_EXYNOS_FIMC bool "FIMC" - depends on BROKEN && MFD_SYSCON + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos FIMC for DRM. config DRM_EXYNOS_ROTATOR bool "Rotator" - depends on BROKEN + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos Rotator for DRM. +config DRM_EXYNOS_SCALER + bool "Scaler" + select DRM_EXYNOS_IPP + help + Choose this option if you want to use Exynos Scaler for DRM. + config DRM_EXYNOS_GSC bool "GScaler" - depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n + depends on VIDEO_SAMSUNG_EXYNOS_GSC=n + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos GSC for DRM. diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile index a51c5459bb13..3b323f1e0475 100644 --- a/drivers/gpu/drm/exynos/Makefile +++ b/drivers/gpu/drm/exynos/Makefile @@ -18,8 +18,10 @@ exynosdrm-$(CONFIG_DRM_EXYNOS_MIXER) += exynos_mixer.o exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI) += exynos_hdmi.o exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI) += exynos_drm_vidi.o exynosdrm-$(CONFIG_DRM_EXYNOS_G2D) += exynos_drm_g2d.o +exynosdrm-$(CONFIG_DRM_EXYNOS_IPP) += exynos_drm_ipp.o exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC) += exynos_drm_fimc.o exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR) += exynos_drm_rotator.o +exynosdrm-$(CONFIG_DRM_EXYNOS_SCALER) += exynos_drm_scaler.o exynosdrm-$(CONFIG_DRM_EXYNOS_GSC) += exynos_drm_gsc.o exynosdrm-$(CONFIG_DRM_EXYNOS_MIC) += exynos_drm_mic.o diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c index 964831dab102..86330f396784 100644 --- a/drivers/gpu/drm/exynos/exynos_dp.c +++ b/drivers/gpu/drm/exynos/exynos_dp.c @@ -162,7 +162,7 @@ static int exynos_dp_bind(struct device *dev, struct device *master, void *data) dp->drm_dev = drm_dev; dp->plat_data.dev_type = EXYNOS_DP; - dp->plat_data.power_on = exynos_dp_poweron; + dp->plat_data.power_on_start = exynos_dp_poweron; dp->plat_data.power_off = exynos_dp_poweroff; dp->plat_data.attach = exynos_dp_bridge_attach; dp->plat_data.get_modes = exynos_dp_get_modes; diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index a518e9c6d6cc..a81b4a5e24a7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -27,35 +27,23 @@ #include "exynos_drm_fb.h" #include "exynos_drm_gem.h" #include "exynos_drm_plane.h" +#include "exynos_drm_ipp.h" #include "exynos_drm_vidi.h" #include "exynos_drm_g2d.h" #include "exynos_drm_iommu.h" #define DRIVER_NAME "exynos" #define DRIVER_DESC "Samsung SoC DRM" -#define DRIVER_DATE "20110530" -#define DRIVER_MAJOR 1 -#define DRIVER_MINOR 0 - -int exynos_atomic_check(struct drm_device *dev, - struct drm_atomic_state *state) -{ - int ret; - - ret = drm_atomic_helper_check_modeset(dev, state); - if (ret) - return ret; +#define DRIVER_DATE "20180330" - ret = drm_atomic_normalize_zpos(dev, state); - if (ret) - return ret; - - ret = drm_atomic_helper_check_planes(dev, state); - if (ret) - return ret; - - return ret; -} +/* + * Interface history: + * + * 1.0 - Original version + * 1.1 - Upgrade IPP driver to version 2.0 + */ +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 1 static int exynos_drm_open(struct drm_device *dev, struct drm_file *file) { @@ -108,6 +96,16 @@ static const struct drm_ioctl_desc exynos_ioctls[] = { DRM_AUTH | DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC, exynos_g2d_exec_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_RESOURCES, + exynos_drm_ipp_get_res_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_CAPS, exynos_drm_ipp_get_caps_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_LIMITS, + exynos_drm_ipp_get_limits_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_COMMIT, exynos_drm_ipp_commit_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), }; static const struct file_operations exynos_drm_driver_fops = { @@ -204,6 +202,7 @@ struct exynos_drm_driver_info { #define DRM_COMPONENT_DRIVER BIT(0) /* supports component framework */ #define DRM_VIRTUAL_DEVICE BIT(1) /* create virtual platform device */ #define DRM_DMA_DEVICE BIT(2) /* can be used for dma allocations */ +#define DRM_FIMC_DEVICE BIT(3) /* devices shared with V4L2 subsystem */ #define DRV_PTR(drv, cond) (IS_ENABLED(cond) ? &drv : NULL) @@ -243,10 +242,16 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = { DRV_PTR(g2d_driver, CONFIG_DRM_EXYNOS_G2D), }, { DRV_PTR(fimc_driver, CONFIG_DRM_EXYNOS_FIMC), + DRM_COMPONENT_DRIVER | DRM_FIMC_DEVICE, }, { DRV_PTR(rotator_driver, CONFIG_DRM_EXYNOS_ROTATOR), + DRM_COMPONENT_DRIVER + }, { + DRV_PTR(scaler_driver, CONFIG_DRM_EXYNOS_SCALER), + DRM_COMPONENT_DRIVER }, { DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC), + DRM_COMPONENT_DRIVER }, { &exynos_drm_platform_driver, DRM_VIRTUAL_DEVICE @@ -274,7 +279,11 @@ static struct component_match *exynos_drm_match_add(struct device *dev) &info->driver->driver, (void *)platform_bus_type.match))) { put_device(p); - component_match_add(dev, &match, compare_dev, d); + + if (!(info->flags & DRM_FIMC_DEVICE) || + exynos_drm_check_fimc_device(d) == 0) + component_match_add(dev, &match, + compare_dev, d); p = d; } put_device(p); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index df2262f70d91..0f6d079a55c9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -273,9 +273,17 @@ static inline int exynos_dpi_bind(struct drm_device *dev, } #endif +#ifdef CONFIG_DRM_EXYNOS_FIMC +int exynos_drm_check_fimc_device(struct device *dev); +#else +static inline int exynos_drm_check_fimc_device(struct device *dev) +{ + return 0; +} +#endif + int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, bool nonblock); -int exynos_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); extern struct platform_driver fimd_driver; @@ -289,6 +297,7 @@ extern struct platform_driver vidi_driver; extern struct platform_driver g2d_driver; extern struct platform_driver fimc_driver; extern struct platform_driver rotator_driver; +extern struct platform_driver scaler_driver; extern struct platform_driver gsc_driver; extern struct platform_driver mic_driver; #endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 7904ffa9abfb..eae44fd714f0 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -270,7 +270,6 @@ struct exynos_dsi { u32 lanes; u32 mode_flags; u32 format; - struct videomode vm; int state; struct drm_property *brightness; @@ -881,30 +880,30 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi) static void exynos_dsi_set_display_mode(struct exynos_dsi *dsi) { - struct videomode *vm = &dsi->vm; + struct drm_display_mode *m = &dsi->encoder.crtc->state->adjusted_mode; unsigned int num_bits_resol = dsi->driver_data->num_bits_resol; u32 reg; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { reg = DSIM_CMD_ALLOW(0xf) - | DSIM_STABLE_VFP(vm->vfront_porch) - | DSIM_MAIN_VBP(vm->vback_porch); + | DSIM_STABLE_VFP(m->vsync_start - m->vdisplay) + | DSIM_MAIN_VBP(m->vtotal - m->vsync_end); exynos_dsi_write(dsi, DSIM_MVPORCH_REG, reg); - reg = DSIM_MAIN_HFP(vm->hfront_porch) - | DSIM_MAIN_HBP(vm->hback_porch); + reg = DSIM_MAIN_HFP(m->hsync_start - m->hdisplay) + | DSIM_MAIN_HBP(m->htotal - m->hsync_end); exynos_dsi_write(dsi, DSIM_MHPORCH_REG, reg); - reg = DSIM_MAIN_VSA(vm->vsync_len) - | DSIM_MAIN_HSA(vm->hsync_len); + reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start) + | DSIM_MAIN_HSA(m->hsync_end - m->hsync_start); exynos_dsi_write(dsi, DSIM_MSYNC_REG, reg); } - reg = DSIM_MAIN_HRESOL(vm->hactive, num_bits_resol) | - DSIM_MAIN_VRESOL(vm->vactive, num_bits_resol); + reg = DSIM_MAIN_HRESOL(m->hdisplay, num_bits_resol) | + DSIM_MAIN_VRESOL(m->vdisplay, num_bits_resol); exynos_dsi_write(dsi, DSIM_MDRESOL_REG, reg); - dev_dbg(dsi->dev, "LCD size = %dx%d\n", vm->hactive, vm->vactive); + dev_dbg(dsi->dev, "LCD size = %dx%d\n", m->hdisplay, m->vdisplay); } static void exynos_dsi_set_display_enable(struct exynos_dsi *dsi, bool enable) @@ -1485,26 +1484,7 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder) return 0; } -static void exynos_dsi_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct exynos_dsi *dsi = encoder_to_dsi(encoder); - struct videomode *vm = &dsi->vm; - struct drm_display_mode *m = adjusted_mode; - - vm->hactive = m->hdisplay; - vm->vactive = m->vdisplay; - vm->vfront_porch = m->vsync_start - m->vdisplay; - vm->vback_porch = m->vtotal - m->vsync_end; - vm->vsync_len = m->vsync_end - m->vsync_start; - vm->hfront_porch = m->hsync_start - m->hdisplay; - vm->hback_porch = m->htotal - m->hsync_end; - vm->hsync_len = m->hsync_end - m->hsync_start; -} - static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = { - .mode_set = exynos_dsi_mode_set, .enable = exynos_dsi_enable, .disable = exynos_dsi_disable, }; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index f0e79178bde6..7fcc1a7ab1a0 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -161,7 +161,7 @@ static struct drm_mode_config_helper_funcs exynos_drm_mode_config_helpers = { static const struct drm_mode_config_funcs exynos_drm_mode_config_funcs = { .fb_create = exynos_user_fb_create, .output_poll_changed = drm_fb_helper_output_poll_changed, - .atomic_check = exynos_atomic_check, + .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; @@ -182,4 +182,6 @@ void exynos_drm_mode_config_init(struct drm_device *dev) dev->mode_config.helper_private = &exynos_drm_mode_config_helpers; dev->mode_config.allow_fb_modifiers = true; + + dev->mode_config.normalize_zpos = true; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 5b18b5c5fdf2..4dfbfc7f3b84 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -12,6 +12,7 @@ * */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/platform_device.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> @@ -24,8 +25,8 @@ #include <drm/exynos_drm.h> #include "regs-fimc.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" -#include "exynos_drm_fimc.h" /* * FIMC stands for Fully Interactive Mobile Camera and @@ -33,23 +34,6 @@ * input DMA reads image data from the memory. * output DMA writes image data to memory. * FIMC supports image rotation and image effect functions. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> FIMC H/W ----> Memory. - * Writeback operation : supports cloned screen with FIMD. - * FIMD ----> FIMC H/W ----> Memory. - * Output operation : supports direct display using local path. - * Memory ----> FIMC H/W ----> FIMD. - */ - -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. added check_prepare api for right register. - * 5. need to add supported list in prop_list. - * 6. check prescaler/scaler optimization. */ #define FIMC_MAX_DEVS 4 @@ -59,29 +43,19 @@ #define FIMC_BUF_STOP 1 #define FIMC_BUF_START 2 #define FIMC_WIDTH_ITU_709 1280 -#define FIMC_REFRESH_MAX 60 -#define FIMC_REFRESH_MIN 12 -#define FIMC_CROP_MAX 8192 -#define FIMC_CROP_MIN 32 -#define FIMC_SCALE_MAX 4224 -#define FIMC_SCALE_MIN 32 +#define FIMC_AUTOSUSPEND_DELAY 2000 + +static unsigned int fimc_mask = 0xc; +module_param_named(fimc_devs, fimc_mask, uint, 0644); +MODULE_PARM_DESC(fimc_devs, "Alias mask for assigning FIMC devices to Exynos DRM"); #define get_fimc_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct fimc_context, ippdrv); -enum fimc_wb { - FIMC_WB_NONE, - FIMC_WB_A, - FIMC_WB_B, -}; enum { FIMC_CLK_LCLK, FIMC_CLK_GATE, FIMC_CLK_WB_A, FIMC_CLK_WB_B, - FIMC_CLK_MUX, - FIMC_CLK_PARENT, FIMC_CLKS_MAX }; @@ -90,12 +64,8 @@ static const char * const fimc_clock_names[] = { [FIMC_CLK_GATE] = "fimc", [FIMC_CLK_WB_A] = "pxl_async0", [FIMC_CLK_WB_B] = "pxl_async1", - [FIMC_CLK_MUX] = "mux", - [FIMC_CLK_PARENT] = "parent", }; -#define FIMC_DEFAULT_LCLK_FREQUENCY 133000000UL - /* * A structure of scaler. * @@ -107,7 +77,7 @@ static const char * const fimc_clock_names[] = { * @vratio: vertical ratio. */ struct fimc_scaler { - bool range; + bool range; bool bypass; bool up_h; bool up_v; @@ -116,56 +86,32 @@ struct fimc_scaler { }; /* - * A structure of scaler capability. - * - * find user manual table 43-1. - * @in_hori: scaler input horizontal size. - * @bypass: scaler bypass mode. - * @dst_h_wo_rot: target horizontal size without output rotation. - * @dst_h_rot: target horizontal size with output rotation. - * @rl_w_wo_rot: real width without input rotation. - * @rl_h_rot: real height without output rotation. - */ -struct fimc_capability { - /* scaler */ - u32 in_hori; - u32 bypass; - /* output rotator */ - u32 dst_h_wo_rot; - u32 dst_h_rot; - /* input rotator */ - u32 rl_w_wo_rot; - u32 rl_h_rot; -}; - -/* * A structure of fimc context. * - * @ippdrv: prepare initialization using ippdrv. * @regs_res: register resources. * @regs: memory mapped io registers. * @lock: locking of operations. * @clocks: fimc clocks. - * @clk_frequency: LCLK clock frequency. - * @sysreg: handle to SYSREG block regmap. * @sc: scaler infomations. * @pol: porarity of writeback. * @id: fimc id. * @irq: irq number. - * @suspended: qos operations. */ struct fimc_context { - struct exynos_drm_ippdrv ippdrv; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + struct exynos_drm_ipp_task *task; + struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct resource *regs_res; void __iomem *regs; spinlock_t lock; struct clk *clocks[FIMC_CLKS_MAX]; - u32 clk_frequency; - struct regmap *sysreg; struct fimc_scaler sc; int id; int irq; - bool suspended; }; static u32 fimc_read(struct fimc_context *ctx, u32 reg) @@ -217,19 +163,10 @@ static void fimc_sw_reset(struct fimc_context *ctx) fimc_write(ctx, 0x0, EXYNOS_CIFCNTSEQ); } -static int fimc_set_camblk_fimd0_wb(struct fimc_context *ctx) -{ - return regmap_update_bits(ctx->sysreg, SYSREG_CAMERA_BLK, - SYSREG_FIMD0WB_DEST_MASK, - ctx->id << SYSREG_FIMD0WB_DEST_SHIFT); -} - -static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb) +static void fimc_set_type_ctrl(struct fimc_context *ctx) { u32 cfg; - DRM_DEBUG_KMS("wb[%d]\n", wb); - cfg = fimc_read(ctx, EXYNOS_CIGCTRL); cfg &= ~(EXYNOS_CIGCTRL_TESTPATTERN_MASK | EXYNOS_CIGCTRL_SELCAM_ITU_MASK | @@ -238,23 +175,10 @@ static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb) EXYNOS_CIGCTRL_SELWB_CAMIF_MASK | EXYNOS_CIGCTRL_SELWRITEBACK_MASK); - switch (wb) { - case FIMC_WB_A: - cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_A | - EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK); - break; - case FIMC_WB_B: - cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_B | - EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK); - break; - case FIMC_WB_NONE: - default: - cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A | - EXYNOS_CIGCTRL_SELWRITEBACK_A | - EXYNOS_CIGCTRL_SELCAM_MIPI_A | - EXYNOS_CIGCTRL_SELCAM_FIMC_ITU); - break; - } + cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A | + EXYNOS_CIGCTRL_SELWRITEBACK_A | + EXYNOS_CIGCTRL_SELCAM_MIPI_A | + EXYNOS_CIGCTRL_SELCAM_FIMC_ITU); fimc_write(ctx, cfg, EXYNOS_CIGCTRL); } @@ -296,7 +220,6 @@ static void fimc_clear_irq(struct fimc_context *ctx) static bool fimc_check_ovf(struct fimc_context *ctx) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 status, flag; status = fimc_read(ctx, EXYNOS_CISTATUS); @@ -310,7 +233,7 @@ static bool fimc_check_ovf(struct fimc_context *ctx) EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB | EXYNOS_CIWDOFST_CLROVFICR); - dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n", + dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n", ctx->id, status); return true; } @@ -376,10 +299,8 @@ static void fimc_handle_lastend(struct fimc_context *ctx, bool enable) fimc_write(ctx, cfg, EXYNOS_CIOCTRL); } - -static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) +static void fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -392,12 +313,12 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) case DRM_FORMAT_RGB565: cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB565; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_RGB888: case DRM_FORMAT_XRGB8888: cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB888; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; default: /* bypass */ break; @@ -438,20 +359,13 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) cfg |= (EXYNOS_MSCTRL_ORDER2P_LSB_CBCR | EXYNOS_MSCTRL_C_INT_IN_2PLANE); break; - default: - dev_err(ippdrv->dev, "invalid source yuv order 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_MSCTRL); - - return 0; } -static int fimc_src_set_fmt(struct device *dev, u32 fmt) +static void fimc_src_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -485,9 +399,6 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_NV21: cfg |= EXYNOS_MSCTRL_INFORMAT_YCBCR420; break; - default: - dev_err(ippdrv->dev, "invalid source format 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_MSCTRL); @@ -495,22 +406,22 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt) cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM); cfg &= ~EXYNOS_CIDMAPARAM_R_MODE_MASK; - cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR; + if (tiled) + cfg |= EXYNOS_CIDMAPARAM_R_MODE_64X32; + else + cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR; fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM); - return fimc_src_set_fmt_order(ctx, fmt); + fimc_src_set_fmt_order(ctx, fmt); } -static int fimc_src_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void fimc_src_set_transf(struct fimc_context *ctx, unsigned int rotation) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg1, cfg2; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); + DRM_DEBUG_KMS("rotation[%x]\n", rotation); cfg1 = fimc_read(ctx, EXYNOS_MSCTRL); cfg1 &= ~(EXYNOS_MSCTRL_FLIP_X_MIRROR | @@ -520,61 +431,56 @@ static int fimc_src_set_transf(struct device *dev, cfg2 &= ~EXYNOS_CITRGFMT_INROT90_CLOCKWISE; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_X) cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_90: + case DRM_MODE_ROTATE_90: cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR | EXYNOS_MSCTRL_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR | EXYNOS_MSCTRL_FLIP_Y_MIRROR); cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } fimc_write(ctx, cfg1, EXYNOS_MSCTRL); fimc_write(ctx, cfg2, EXYNOS_CITRGFMT); - *swap = (cfg2 & EXYNOS_CITRGFMT_INROT90_CLOCKWISE) ? 1 : 0; - - return 0; } -static int fimc_set_window(struct fimc_context *ctx, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_set_window(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { u32 cfg, h1, h2, v1, v2; /* cropped image */ - h1 = pos->x; - h2 = sz->hsize - pos->w - pos->x; - v1 = pos->y; - v2 = sz->vsize - pos->h - pos->y; + h1 = buf->rect.x; + h2 = buf->buf.width - buf->rect.w - buf->rect.x; + v1 = buf->rect.y; + v2 = buf->buf.height - buf->rect.h - buf->rect.y; DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]hsize[%d]vsize[%d]\n", - pos->x, pos->y, pos->w, pos->h, sz->hsize, sz->vsize); + buf->rect.x, buf->rect.y, buf->rect.w, buf->rect.h, + buf->buf.width, buf->buf.height); DRM_DEBUG_KMS("h1[%d]h2[%d]v1[%d]v2[%d]\n", h1, h2, v1, v2); /* @@ -592,42 +498,30 @@ static int fimc_set_window(struct fimc_context *ctx, cfg = (EXYNOS_CIWDOFST2_WINHOROFST2(h2) | EXYNOS_CIWDOFST2_WINVEROFST2(v2)); fimc_write(ctx, cfg, EXYNOS_CIWDOFST2); - - return 0; } -static int fimc_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_src_set_size(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_pos img_pos = *pos; - struct drm_exynos_sz img_sz = *sz; u32 cfg; - DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n", - swap, sz->hsize, sz->vsize); + DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height); /* original size */ - cfg = (EXYNOS_ORGISIZE_HORIZONTAL(img_sz.hsize) | - EXYNOS_ORGISIZE_VERTICAL(img_sz.vsize)); + cfg = (EXYNOS_ORGISIZE_HORIZONTAL(buf->buf.width) | + EXYNOS_ORGISIZE_VERTICAL(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_ORGISIZE); - DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - img_sz.hsize = sz->vsize; - img_sz.vsize = sz->hsize; - } + DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y, + buf->rect.w, buf->rect.h); /* set input DMA image size */ cfg = fimc_read(ctx, EXYNOS_CIREAL_ISIZE); cfg &= ~(EXYNOS_CIREAL_ISIZE_HEIGHT_MASK | EXYNOS_CIREAL_ISIZE_WIDTH_MASK); - cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(img_pos.w) | - EXYNOS_CIREAL_ISIZE_HEIGHT(img_pos.h)); + cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(buf->rect.w) | + EXYNOS_CIREAL_ISIZE_HEIGHT(buf->rect.h)); fimc_write(ctx, cfg, EXYNOS_CIREAL_ISIZE); /* @@ -635,91 +529,34 @@ static int fimc_src_set_size(struct device *dev, int swap, * for now, we support only ITU601 8 bit mode */ cfg = (EXYNOS_CISRCFMT_ITU601_8BIT | - EXYNOS_CISRCFMT_SOURCEHSIZE(img_sz.hsize) | - EXYNOS_CISRCFMT_SOURCEVSIZE(img_sz.vsize)); + EXYNOS_CISRCFMT_SOURCEHSIZE(buf->buf.width) | + EXYNOS_CISRCFMT_SOURCEVSIZE(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_CISRCFMT); /* offset Y(RGB), Cb, Cr */ - cfg = (EXYNOS_CIIYOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIIYOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIIYOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIIYOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIIYOFF); - cfg = (EXYNOS_CIICBOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIICBOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIICBOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIICBOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIICBOFF); - cfg = (EXYNOS_CIICROFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIICROFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIICROFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIICROFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIICROFF); - return fimc_set_window(ctx, &img_pos, &img_sz); + fimc_set_window(ctx, buf); } -static int fimc_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void fimc_src_set_addr(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > FIMC_MAX_SRC) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -ENOMEM; - } - - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - config = &property->config[EXYNOS_DRM_OPS_SRC]; - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y], - EXYNOS_CIIYSA0); - - if (config->fmt == DRM_FORMAT_YVU420) { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIICBSA0); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIICRSA0); - } else { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIICBSA0); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIICRSA0); - } - break; - case IPP_BUF_DEQUEUE: - fimc_write(ctx, 0x0, EXYNOS_CIIYSA0); - fimc_write(ctx, 0x0, EXYNOS_CIICBSA0); - fimc_write(ctx, 0x0, EXYNOS_CIICRSA0); - break; - default: - /* bypass */ - break; - } - - return 0; + fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIIYSA(0)); + fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIICBSA(0)); + fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIICRSA(0)); } -static struct exynos_drm_ipp_ops fimc_src_ops = { - .set_fmt = fimc_src_set_fmt, - .set_transf = fimc_src_set_transf, - .set_size = fimc_src_set_size, - .set_addr = fimc_src_set_addr, -}; - -static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) +static void fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -732,11 +569,11 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) case DRM_FORMAT_RGB565: cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB565; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_RGB888: cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_XRGB8888: cfg |= (EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888 | EXYNOS_CISCCTRL_EXTRGB_EXTENSION); @@ -784,20 +621,13 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) cfg |= EXYNOS_CIOCTRL_ORDER2P_LSB_CBCR; cfg |= EXYNOS_CIOCTRL_YCBCR_2PLANE; break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CIOCTRL); - - return 0; } -static int fimc_dst_set_fmt(struct device *dev, u32 fmt) +static void fimc_dst_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -837,10 +667,6 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_NV21: cfg |= EXYNOS_CITRGFMT_OUTFORMAT_YCBCR420; break; - default: - dev_err(ippdrv->dev, "invalid target format 0x%x.\n", - fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CITRGFMT); @@ -849,73 +675,67 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt) cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM); cfg &= ~EXYNOS_CIDMAPARAM_W_MODE_MASK; - cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR; + if (tiled) + cfg |= EXYNOS_CIDMAPARAM_W_MODE_64X32; + else + cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR; fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM); - return fimc_dst_set_fmt_order(ctx, fmt); + fimc_dst_set_fmt_order(ctx, fmt); } -static int fimc_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void fimc_dst_set_transf(struct fimc_context *ctx, unsigned int rotation) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); + DRM_DEBUG_KMS("rotation[0x%x]\n", rotation); cfg = fimc_read(ctx, EXYNOS_CITRGFMT); cfg &= ~EXYNOS_CITRGFMT_FLIP_MASK; cfg &= ~EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_X) cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_90: + case DRM_MODE_ROTATE_90: cfg |= EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg |= (EXYNOS_CITRGFMT_FLIP_X_MIRROR | EXYNOS_CITRGFMT_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg |= (EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE | EXYNOS_CITRGFMT_FLIP_X_MIRROR | EXYNOS_CITRGFMT_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CITRGFMT); - *swap = (cfg & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) ? 1 : 0; - - return 0; } static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc, - struct drm_exynos_pos *src, struct drm_exynos_pos *dst) + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg, cfg_ext, shfactor; u32 pre_dst_width, pre_dst_height; u32 hfactor, vfactor; @@ -942,13 +762,13 @@ static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc, /* fimc_ippdrv_check_property assures that dividers are not null */ hfactor = fls(src_w / dst_w / 2); if (hfactor > FIMC_SHFACTOR / 2) { - dev_err(ippdrv->dev, "failed to get ratio horizontal.\n"); + dev_err(ctx->dev, "failed to get ratio horizontal.\n"); return -EINVAL; } vfactor = fls(src_h / dst_h / 2); if (vfactor > FIMC_SHFACTOR / 2) { - dev_err(ippdrv->dev, "failed to get ratio vertical.\n"); + dev_err(ctx->dev, "failed to get ratio vertical.\n"); return -EINVAL; } @@ -1019,83 +839,77 @@ static void fimc_set_scaler(struct fimc_context *ctx, struct fimc_scaler *sc) fimc_write(ctx, cfg_ext, EXYNOS_CIEXTEN); } -static int fimc_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_dst_set_size(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_pos img_pos = *pos; - struct drm_exynos_sz img_sz = *sz; - u32 cfg; + u32 cfg, cfg_ext; - DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n", - swap, sz->hsize, sz->vsize); + DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height); /* original size */ - cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(img_sz.hsize) | - EXYNOS_ORGOSIZE_VERTICAL(img_sz.vsize)); + cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(buf->buf.width) | + EXYNOS_ORGOSIZE_VERTICAL(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_ORGOSIZE); - DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h); + DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y, + buf->rect.w, buf->rect.h); /* CSC ITU */ cfg = fimc_read(ctx, EXYNOS_CIGCTRL); cfg &= ~EXYNOS_CIGCTRL_CSC_MASK; - if (sz->hsize >= FIMC_WIDTH_ITU_709) + if (buf->buf.width >= FIMC_WIDTH_ITU_709) cfg |= EXYNOS_CIGCTRL_CSC_ITU709; else cfg |= EXYNOS_CIGCTRL_CSC_ITU601; fimc_write(ctx, cfg, EXYNOS_CIGCTRL); - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - img_sz.hsize = sz->vsize; - img_sz.vsize = sz->hsize; - } + cfg_ext = fimc_read(ctx, EXYNOS_CITRGFMT); /* target image size */ cfg = fimc_read(ctx, EXYNOS_CITRGFMT); cfg &= ~(EXYNOS_CITRGFMT_TARGETH_MASK | EXYNOS_CITRGFMT_TARGETV_MASK); - cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(img_pos.w) | - EXYNOS_CITRGFMT_TARGETVSIZE(img_pos.h)); + if (cfg_ext & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) + cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.h) | + EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.w)); + else + cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.w) | + EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.h)); fimc_write(ctx, cfg, EXYNOS_CITRGFMT); /* target area */ - cfg = EXYNOS_CITAREA_TARGET_AREA(img_pos.w * img_pos.h); + cfg = EXYNOS_CITAREA_TARGET_AREA(buf->rect.w * buf->rect.h); fimc_write(ctx, cfg, EXYNOS_CITAREA); /* offset Y(RGB), Cb, Cr */ - cfg = (EXYNOS_CIOYOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOYOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOYOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOYOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOYOFF); - cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOCBOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOCBOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOCBOFF); - cfg = (EXYNOS_CIOCROFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOCROFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOCROFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOCROFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOCROFF); - - return 0; } static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) + bool enqueue) { unsigned long flags; u32 buf_num; u32 cfg; - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); + DRM_DEBUG_KMS("buf_id[%d]enqueu[%d]\n", buf_id, enqueue); spin_lock_irqsave(&ctx->lock, flags); cfg = fimc_read(ctx, EXYNOS_CIFCNTSEQ); - if (buf_type == IPP_BUF_ENQUEUE) + if (enqueue) cfg |= (1 << buf_id); else cfg &= ~(1 << buf_id); @@ -1104,88 +918,29 @@ static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, buf_num = hweight32(cfg); - if (buf_type == IPP_BUF_ENQUEUE && buf_num >= FIMC_BUF_START) + if (enqueue && buf_num >= FIMC_BUF_START) fimc_mask_irq(ctx, true); - else if (buf_type == IPP_BUF_DEQUEUE && buf_num <= FIMC_BUF_STOP) + else if (!enqueue && buf_num <= FIMC_BUF_STOP) fimc_mask_irq(ctx, false); spin_unlock_irqrestore(&ctx->lock, flags); } -static int fimc_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void fimc_dst_set_addr(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); + fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIOYSA(0)); + fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIOCBSA(0)); + fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIOCRSA(0)); - if (buf_id > FIMC_MAX_DST) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -ENOMEM; - } - - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - config = &property->config[EXYNOS_DRM_OPS_DST]; - - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y], - EXYNOS_CIOYSA(buf_id)); - - if (config->fmt == DRM_FORMAT_YVU420) { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIOCRSA(buf_id)); - } else { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIOCRSA(buf_id)); - } - break; - case IPP_BUF_DEQUEUE: - fimc_write(ctx, 0x0, EXYNOS_CIOYSA(buf_id)); - fimc_write(ctx, 0x0, EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, 0x0, EXYNOS_CIOCRSA(buf_id)); - break; - default: - /* bypass */ - break; - } - - fimc_dst_set_buf_seq(ctx, buf_id, buf_type); - - return 0; + fimc_dst_set_buf_seq(ctx, 0, true); } -static struct exynos_drm_ipp_ops fimc_dst_ops = { - .set_fmt = fimc_dst_set_fmt, - .set_transf = fimc_dst_set_transf, - .set_size = fimc_dst_set_size, - .set_addr = fimc_dst_set_addr, -}; +static void fimc_stop(struct fimc_context *ctx); static irqreturn_t fimc_irq_handler(int irq, void *dev_id) { struct fimc_context *ctx = dev_id; - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = - c_node->event_work; int buf_id; DRM_DEBUG_KMS("fimc id[%d]\n", ctx->id); @@ -1203,170 +958,19 @@ static irqreturn_t fimc_irq_handler(int irq, void *dev_id) DRM_DEBUG_KMS("buf_id[%d]\n", buf_id); - fimc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = buf_id; - queue_work(ippdrv->event_workq, &event_work->work); - - return IRQ_HANDLED; -} - -static int fimc_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->writeback = 1; - prop_list->refresh_min = FIMC_REFRESH_MIN; - prop_list->refresh_max = FIMC_REFRESH_MAX; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_NONE) | - (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 1; - prop_list->crop = 1; - prop_list->crop_max.hsize = FIMC_CROP_MAX; - prop_list->crop_max.vsize = FIMC_CROP_MAX; - prop_list->crop_min.hsize = FIMC_CROP_MIN; - prop_list->crop_min.vsize = FIMC_CROP_MIN; - prop_list->scale = 1; - prop_list->scale_max.hsize = FIMC_SCALE_MAX; - prop_list->scale_max.vsize = FIMC_SCALE_MAX; - prop_list->scale_min.hsize = FIMC_SCALE_MIN; - prop_list->scale_min.vsize = FIMC_SCALE_MIN; - - return 0; -} - -static inline bool fimc_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} - -static int fimc_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) -{ - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos *pos; - struct drm_exynos_sz *sz; - bool swap; - int i; - - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; - - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - /* check for flip */ - if (!fimc_check_drm_flip(config->flip)) { - DRM_ERROR("invalid flip.\n"); - goto err_property; - } - - /* check for degree */ - switch (config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - break; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - swap = false; - break; - default: - DRM_ERROR("invalid degree.\n"); - goto err_property; - } - - /* check for buffer bound */ - if ((pos->x + pos->w > sz->hsize) || - (pos->y + pos->h > sz->vsize)) { - DRM_ERROR("out of buf bound.\n"); - goto err_property; - } + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - /* check for crop */ - if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) { - if (swap) { - if ((pos->h < pp->crop_min.hsize) || - (sz->vsize > pp->crop_max.hsize) || - (pos->w < pp->crop_min.vsize) || - (sz->hsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->crop_min.hsize) || - (sz->hsize > pp->crop_max.hsize) || - (pos->h < pp->crop_min.vsize) || - (sz->vsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } - } - - /* check for scale */ - if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) { - if (swap) { - if ((pos->h < pp->scale_min.hsize) || - (sz->vsize > pp->scale_max.hsize) || - (pos->w < pp->scale_min.vsize) || - (sz->hsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->scale_min.hsize) || - (sz->hsize > pp->scale_max.hsize) || - (pos->h < pp->scale_min.vsize) || - (sz->vsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } - } + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, 0); } - return 0; + fimc_dst_set_buf_seq(ctx, buf_id, false); + fimc_stop(ctx); -err_property: - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; - - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n", - i ? "dst" : "src", config->flip, config->degree, - pos->x, pos->y, pos->w, pos->h, - sz->hsize, sz->vsize); - } - - return -EINVAL; + return IRQ_HANDLED; } static void fimc_clear_addr(struct fimc_context *ctx) @@ -1386,10 +990,8 @@ static void fimc_clear_addr(struct fimc_context *ctx) } } -static int fimc_ippdrv_reset(struct device *dev) +static void fimc_reset(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - /* reset h/w block */ fimc_sw_reset(ctx); @@ -1397,82 +999,26 @@ static int fimc_ippdrv_reset(struct device *dev) memset(&ctx->sc, 0x0, sizeof(ctx->sc)); fimc_clear_addr(ctx); - - return 0; } -static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void fimc_start(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX]; - struct drm_exynos_ipp_set_wb set_wb; - int ret, i; u32 cfg0, cfg1; - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - fimc_mask_irq(ctx, true); - for_each_ipp_ops(i) { - config = &property->config[i]; - img_pos[i] = config->pos; - } - - ret = fimc_set_prescaler(ctx, &ctx->sc, - &img_pos[EXYNOS_DRM_OPS_SRC], - &img_pos[EXYNOS_DRM_OPS_DST]); - if (ret) { - dev_err(dev, "failed to set prescaler.\n"); - return ret; - } - - /* If set ture, we can save jpeg about screen */ + /* If set true, we can save jpeg about screen */ fimc_handle_jpeg(ctx, false); fimc_set_scaler(ctx, &ctx->sc); - switch (cmd) { - case IPP_CMD_M2M: - fimc_set_type_ctrl(ctx, FIMC_WB_NONE); - fimc_handle_lastend(ctx, false); - - /* setup dma */ - cfg0 = fimc_read(ctx, EXYNOS_MSCTRL); - cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK; - cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY; - fimc_write(ctx, cfg0, EXYNOS_MSCTRL); - break; - case IPP_CMD_WB: - fimc_set_type_ctrl(ctx, FIMC_WB_A); - fimc_handle_lastend(ctx, true); - - /* setup FIMD */ - ret = fimc_set_camblk_fimd0_wb(ctx); - if (ret < 0) { - dev_err(dev, "camblk setup failed.\n"); - return ret; - } + fimc_set_type_ctrl(ctx); + fimc_handle_lastend(ctx, false); - set_wb.enable = 1; - set_wb.refresh = property->refresh_rate; - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - ret = -EINVAL; - dev_err(dev, "invalid operations.\n"); - return ret; - } + /* setup dma */ + cfg0 = fimc_read(ctx, EXYNOS_MSCTRL); + cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK; + cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY; + fimc_write(ctx, cfg0, EXYNOS_MSCTRL); /* Reset status */ fimc_write(ctx, 0x0, EXYNOS_CISTATUS); @@ -1498,36 +1044,18 @@ static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) fimc_clear_bits(ctx, EXYNOS_CIOCTRL, EXYNOS_CIOCTRL_WEAVE_MASK); - if (cmd == IPP_CMD_M2M) - fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID); - - return 0; + fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID); } -static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void fimc_stop(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_ipp_set_wb set_wb = {0, 0}; u32 cfg; - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - switch (cmd) { - case IPP_CMD_M2M: - /* Source clear */ - cfg = fimc_read(ctx, EXYNOS_MSCTRL); - cfg &= ~EXYNOS_MSCTRL_INPUT_MASK; - cfg &= ~EXYNOS_MSCTRL_ENVID; - fimc_write(ctx, cfg, EXYNOS_MSCTRL); - break; - case IPP_CMD_WB: - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - dev_err(dev, "invalid operations.\n"); - break; - } + /* Source clear */ + cfg = fimc_read(ctx, EXYNOS_MSCTRL); + cfg &= ~EXYNOS_MSCTRL_INPUT_MASK; + cfg &= ~EXYNOS_MSCTRL_ENVID; + fimc_write(ctx, cfg, EXYNOS_MSCTRL); fimc_mask_irq(ctx, false); @@ -1545,6 +1073,87 @@ static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) fimc_set_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_IRQ_END_DISABLE); } +static int fimc_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct fimc_context *ctx = + container_of(ipp, struct fimc_context, ipp); + + pm_runtime_get_sync(ctx->dev); + ctx->task = task; + + fimc_src_set_fmt(ctx, task->src.buf.fourcc, task->src.buf.modifier); + fimc_src_set_size(ctx, &task->src); + fimc_src_set_transf(ctx, DRM_MODE_ROTATE_0); + fimc_src_set_addr(ctx, &task->src); + fimc_dst_set_fmt(ctx, task->dst.buf.fourcc, task->dst.buf.modifier); + fimc_dst_set_transf(ctx, task->transform.rotation); + fimc_dst_set_size(ctx, &task->dst); + fimc_dst_set_addr(ctx, &task->dst); + fimc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect); + fimc_start(ctx); + + return 0; +} + +static void fimc_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct fimc_context *ctx = + container_of(ipp, struct fimc_context, ipp); + + fimc_reset(ctx); + + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; + + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, -EIO); + } +} + +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = fimc_commit, + .abort = fimc_abort, +}; + +static int fimc_bind(struct device *dev, struct device *master, void *data) +{ + struct fimc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + ctx->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + ctx->formats, ctx->num_formats, "fimc"); + + dev_info(dev, "The exynos fimc has been probed successfully\n"); + + return 0; +} + +static void fimc_unbind(struct device *dev, struct device *master, + void *data) +{ + struct fimc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(drm_dev, dev); +} + +static const struct component_ops fimc_component_ops = { + .bind = fimc_bind, + .unbind = fimc_unbind, +}; + static void fimc_put_clocks(struct fimc_context *ctx) { int i; @@ -1559,7 +1168,7 @@ static void fimc_put_clocks(struct fimc_context *ctx) static int fimc_setup_clocks(struct fimc_context *ctx) { - struct device *fimc_dev = ctx->ippdrv.dev; + struct device *fimc_dev = ctx->dev; struct device *dev; int ret, i; @@ -1574,8 +1183,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx) ctx->clocks[i] = clk_get(dev, fimc_clock_names[i]); if (IS_ERR(ctx->clocks[i])) { - if (i >= FIMC_CLK_MUX) - break; ret = PTR_ERR(ctx->clocks[i]); dev_err(fimc_dev, "failed to get clock: %s\n", fimc_clock_names[i]); @@ -1583,20 +1190,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx) } } - /* Optional FIMC LCLK parent clock setting */ - if (!IS_ERR(ctx->clocks[FIMC_CLK_PARENT])) { - ret = clk_set_parent(ctx->clocks[FIMC_CLK_MUX], - ctx->clocks[FIMC_CLK_PARENT]); - if (ret < 0) { - dev_err(fimc_dev, "failed to set parent.\n"); - goto e_clk_free; - } - } - - ret = clk_set_rate(ctx->clocks[FIMC_CLK_LCLK], ctx->clk_frequency); - if (ret < 0) - goto e_clk_free; - ret = clk_prepare_enable(ctx->clocks[FIMC_CLK_LCLK]); if (!ret) return ret; @@ -1605,57 +1198,118 @@ e_clk_free: return ret; } -static int fimc_parse_dt(struct fimc_context *ctx) +int exynos_drm_check_fimc_device(struct device *dev) { - struct device_node *node = ctx->ippdrv.dev->of_node; + unsigned int id = of_alias_get_id(dev->of_node, "fimc"); - /* Handle only devices that support the LCD Writeback data path */ - if (!of_property_read_bool(node, "samsung,lcd-wb")) - return -ENODEV; + if (id >= 0 && (BIT(id) & fimc_mask)) + return 0; + return -ENODEV; +} - if (of_property_read_u32(node, "clock-frequency", - &ctx->clk_frequency)) - ctx->clk_frequency = FIMC_DEFAULT_LCLK_FREQUENCY; +static const unsigned int fimc_formats[] = { + DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, + DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61, + DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, + DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422, + DRM_FORMAT_YUV444, +}; - ctx->id = of_alias_get_id(node, "fimc"); +static const unsigned int fimc_tiled_formats[] = { + DRM_FORMAT_NV12, DRM_FORMAT_NV21, +}; - if (ctx->id < 0) { - dev_err(ctx->ippdrv.dev, "failed to get node alias id.\n"); - return -EINVAL; - } +static const struct drm_exynos_ipp_limit fimc_4210_limits_v1[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4224, 2 }, .v = { 16, 0, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1920 }, .v = { 128, 0 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; - return 0; -} +static const struct drm_exynos_ipp_limit fimc_4210_limits_v2[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 1920, 2 }, .v = { 16, 0, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1366 }, .v = { 128, 0 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; + +static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v1[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 128, 1920, 2 }, .v = { 128, 0, 2 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; + +static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v2[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 128, 1366, 2 }, .v = { 128, 0, 2 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; static int fimc_probe(struct platform_device *pdev) { + const struct drm_exynos_ipp_limit *limits; + struct exynos_drm_ipp_formats *formats; struct device *dev = &pdev->dev; struct fimc_context *ctx; struct resource *res; - struct exynos_drm_ippdrv *ippdrv; int ret; + int i, j, num_limits, num_formats; - if (!dev->of_node) { - dev_err(dev, "device tree node not found.\n"); + if (exynos_drm_check_fimc_device(dev) != 0) return -ENODEV; - } ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - ctx->ippdrv.dev = dev; + ctx->dev = dev; + ctx->id = of_alias_get_id(dev->of_node, "fimc"); - ret = fimc_parse_dt(ctx); - if (ret < 0) - return ret; + /* construct formats/limits array */ + num_formats = ARRAY_SIZE(fimc_formats) + ARRAY_SIZE(fimc_tiled_formats); + formats = devm_kzalloc(dev, sizeof(*formats) * num_formats, GFP_KERNEL); + if (!formats) + return -ENOMEM; + + /* linear formats */ + if (ctx->id < 3) { + limits = fimc_4210_limits_v1; + num_limits = ARRAY_SIZE(fimc_4210_limits_v1); + } else { + limits = fimc_4210_limits_v2; + num_limits = ARRAY_SIZE(fimc_4210_limits_v2); + } + for (i = 0; i < ARRAY_SIZE(fimc_formats); i++) { + formats[i].fourcc = fimc_formats[i]; + formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[i].limits = limits; + formats[i].num_limits = num_limits; + } - ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node, - "samsung,sysreg"); - if (IS_ERR(ctx->sysreg)) { - dev_err(dev, "syscon regmap lookup failed.\n"); - return PTR_ERR(ctx->sysreg); + /* tiled formats */ + if (ctx->id < 3) { + limits = fimc_4210_limits_tiled_v1; + num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v1); + } else { + limits = fimc_4210_limits_tiled_v2; + num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v2); } + for (j = i, i = 0; i < ARRAY_SIZE(fimc_tiled_formats); j++, i++) { + formats[j].fourcc = fimc_tiled_formats[i]; + formats[j].modifier = DRM_FORMAT_MOD_SAMSUNG_64_32_TILE; + formats[j].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[j].limits = limits; + formats[j].num_limits = num_limits; + } + + ctx->formats = formats; + ctx->num_formats = num_formats; /* resource memory */ ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1670,9 +1324,8 @@ static int fimc_probe(struct platform_device *pdev) return -ENOENT; } - ctx->irq = res->start; - ret = devm_request_threaded_irq(dev, ctx->irq, NULL, fimc_irq_handler, - IRQF_ONESHOT, "drm_fimc", ctx); + ret = devm_request_irq(dev, res->start, fimc_irq_handler, + 0, dev_name(dev), ctx); if (ret < 0) { dev_err(dev, "failed to request irq.\n"); return ret; @@ -1682,39 +1335,24 @@ static int fimc_probe(struct platform_device *pdev) if (ret < 0) return ret; - ippdrv = &ctx->ippdrv; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &fimc_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &fimc_dst_ops; - ippdrv->check_property = fimc_ippdrv_check_property; - ippdrv->reset = fimc_ippdrv_reset; - ippdrv->start = fimc_ippdrv_start; - ippdrv->stop = fimc_ippdrv_stop; - ret = fimc_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - goto err_put_clk; - } - - DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv); - spin_lock_init(&ctx->lock); platform_set_drvdata(pdev, ctx); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, FIMC_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm fimc device.\n"); + ret = component_add(dev, &fimc_component_ops); + if (ret) goto err_pm_dis; - } dev_info(dev, "drm fimc registered successfully.\n"); return 0; err_pm_dis: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); -err_put_clk: fimc_put_clocks(ctx); return ret; @@ -1724,42 +1362,24 @@ static int fimc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - exynos_drm_ippdrv_unregister(ippdrv); + component_del(dev, &fimc_component_ops); + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); fimc_put_clocks(ctx); - pm_runtime_set_suspended(dev); - pm_runtime_disable(dev); return 0; } #ifdef CONFIG_PM -static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable) -{ - DRM_DEBUG_KMS("enable[%d]\n", enable); - - if (enable) { - clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]); - clk_prepare_enable(ctx->clocks[FIMC_CLK_WB_A]); - ctx->suspended = false; - } else { - clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]); - clk_disable_unprepare(ctx->clocks[FIMC_CLK_WB_A]); - ctx->suspended = true; - } - - return 0; -} - static int fimc_runtime_suspend(struct device *dev) { struct fimc_context *ctx = get_fimc_context(dev); DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - return fimc_clk_ctrl(ctx, false); + clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]); + return 0; } static int fimc_runtime_resume(struct device *dev) @@ -1767,8 +1387,7 @@ static int fimc_runtime_resume(struct device *dev) struct fimc_context *ctx = get_fimc_context(dev); DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - return fimc_clk_ctrl(ctx, true); + return clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]); } #endif @@ -1795,4 +1414,3 @@ struct platform_driver fimc_driver = { .pm = &fimc_pm_ops, }, }; - diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.h b/drivers/gpu/drm/exynos/exynos_drm_fimc.h deleted file mode 100644 index 127a424c5fdf..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * Eunchul Kim <chulspro.kim@samsung.com> - * Jinyoung Jeon <jy0.jeon@samsung.com> - * Sangmin Lee <lsmin.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_FIMC_H_ -#define _EXYNOS_DRM_FIMC_H_ - -/* - * TODO - * FIMD output interface notifier callback. - */ - -#endif /* _EXYNOS_DRM_FIMC_H_ */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index d42ae2bc3e56..01b1570d0c3a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -121,6 +121,12 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = { .has_limited_fmt = 1, }; +static struct fimd_driver_data s5pv210_fimd_driver_data = { + .timing_base = 0x0, + .has_shadowcon = 1, + .has_clksel = 1, +}; + static struct fimd_driver_data exynos3_fimd_driver_data = { .timing_base = 0x20000, .lcdblk_offset = 0x210, @@ -193,6 +199,8 @@ struct fimd_context { static const struct of_device_id fimd_driver_dt_match[] = { { .compatible = "samsung,s3c6400-fimd", .data = &s3c64xx_fimd_driver_data }, + { .compatible = "samsung,s5pv210-fimd", + .data = &s5pv210_fimd_driver_data }, { .compatible = "samsung,exynos3250-fimd", .data = &exynos3_fimd_driver_data }, { .compatible = "samsung,exynos4210-fimd", diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 11cc01b47bc0..6e1494fa71b4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -431,37 +431,24 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, return 0; } -int exynos_drm_gem_fault(struct vm_fault *vmf) +vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj); unsigned long pfn; pgoff_t page_offset; - int ret; page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; if (page_offset >= (exynos_gem->size >> PAGE_SHIFT)) { DRM_ERROR("invalid page offset\n"); - ret = -EINVAL; - goto out; + return VM_FAULT_SIGBUS; } pfn = page_to_pfn(exynos_gem->pages[page_offset]); - ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); - -out: - switch (ret) { - case 0: - case -ERESTARTSYS: - case -EINTR: - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - default: - return VM_FAULT_SIGBUS; - } + return vmf_insert_mixed(vma, vmf->address, + __pfn_to_pfn_t(pfn, PFN_DEV)); } static int exynos_drm_gem_mmap_obj(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index 5a4c7de80f65..9057d7f1d6ed 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -13,6 +13,7 @@ #define _EXYNOS_DRM_GEM_H_ #include <drm/drm_gem.h> +#include <linux/mm_types.h> #define to_exynos_gem(x) container_of(x, struct exynos_drm_gem, base) @@ -111,7 +112,7 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, struct drm_mode_create_dumb *args); /* page fault handler and mmap fault address(virtual) to physical memory. */ -int exynos_drm_gem_fault(struct vm_fault *vmf); +vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf); /* set vm_flags and we can change the vm attribute to other one at here. */ int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 0506b2b17ac1..e99dd1e4ba65 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -12,18 +12,20 @@ * */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/platform_device.h> #include <linux/clk.h> #include <linux/pm_runtime.h> #include <linux/mfd/syscon.h> +#include <linux/of_device.h> #include <linux/regmap.h> #include <drm/drmP.h> #include <drm/exynos_drm.h> #include "regs-gsc.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" -#include "exynos_drm_gsc.h" /* * GSC stands for General SCaler and @@ -31,26 +33,10 @@ * input DMA reads image data from the memory. * output DMA writes image data to memory. * GSC supports image rotation and image effect functions. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> GSC H/W ----> Memory. - * Writeback operation : supports cloned screen with FIMD. - * FIMD ----> GSC H/W ----> Memory. - * Output operation : supports direct display using local path. - * Memory ----> GSC H/W ----> FIMD, Mixer. */ -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. added check_prepare api for right register. - * 5. need to add supported list in prop_list. - * 6. check prescaler/scaler optimization. - */ -#define GSC_MAX_DEVS 4 +#define GSC_MAX_CLOCKS 8 #define GSC_MAX_SRC 4 #define GSC_MAX_DST 16 #define GSC_RESET_TIMEOUT 50 @@ -65,8 +51,6 @@ #define GSC_SC_DOWN_RATIO_4_8 131072 #define GSC_SC_DOWN_RATIO_3_8 174762 #define GSC_SC_DOWN_RATIO_2_8 262144 -#define GSC_REFRESH_MIN 12 -#define GSC_REFRESH_MAX 60 #define GSC_CROP_MAX 8192 #define GSC_CROP_MIN 32 #define GSC_SCALE_MAX 4224 @@ -77,10 +61,9 @@ #define GSC_COEF_H_8T 8 #define GSC_COEF_V_4T 4 #define GSC_COEF_DEPTH 3 +#define GSC_AUTOSUSPEND_DELAY 2000 #define get_gsc_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct gsc_context, ippdrv); #define gsc_read(offset) readl(ctx->regs + (offset)) #define gsc_write(cfg, offset) writel(cfg, ctx->regs + (offset)) @@ -104,50 +87,47 @@ struct gsc_scaler { }; /* - * A structure of scaler capability. - * - * find user manual 49.2 features. - * @tile_w: tile mode or rotation width. - * @tile_h: tile mode or rotation height. - * @w: other cases width. - * @h: other cases height. - */ -struct gsc_capability { - /* tile or rotation */ - u32 tile_w; - u32 tile_h; - /* other cases */ - u32 w; - u32 h; -}; - -/* * A structure of gsc context. * - * @ippdrv: prepare initialization using ippdrv. * @regs_res: register resources. * @regs: memory mapped io registers. - * @sysreg: handle to SYSREG block regmap. - * @lock: locking of operations. * @gsc_clk: gsc gate clock. * @sc: scaler infomations. * @id: gsc id. * @irq: irq number. * @rotation: supports rotation of src. - * @suspended: qos operations. */ struct gsc_context { - struct exynos_drm_ippdrv ippdrv; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + struct exynos_drm_ipp_task *task; + struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct resource *regs_res; void __iomem *regs; - struct regmap *sysreg; - struct mutex lock; - struct clk *gsc_clk; + const char **clk_names; + struct clk *clocks[GSC_MAX_CLOCKS]; + int num_clocks; struct gsc_scaler sc; int id; int irq; bool rotation; - bool suspended; +}; + +/** + * struct gsc_driverdata - per device type driver data for init time. + * + * @limits: picture size limits array + * @clk_names: names of clocks needed by this variant + * @num_clocks: the number of clocks needed by this variant + */ +struct gsc_driverdata { + const struct drm_exynos_ipp_limit *limits; + int num_limits; + const char *clk_names[GSC_MAX_CLOCKS]; + int num_clocks; }; /* 8-tap Filter Coefficient */ @@ -438,25 +418,6 @@ static int gsc_sw_reset(struct gsc_context *ctx) return 0; } -static void gsc_set_gscblk_fimd_wb(struct gsc_context *ctx, bool enable) -{ - unsigned int gscblk_cfg; - - if (!ctx->sysreg) - return; - - regmap_read(ctx->sysreg, SYSREG_GSCBLK_CFG1, &gscblk_cfg); - - if (enable) - gscblk_cfg |= GSC_BLK_DISP1WB_DEST(ctx->id) | - GSC_BLK_GSCL_WB_IN_SRC_SEL(ctx->id) | - GSC_BLK_SW_RESET_WB_DEST(ctx->id); - else - gscblk_cfg |= GSC_BLK_PXLASYNC_LO_MASK_WB(ctx->id); - - regmap_write(ctx->sysreg, SYSREG_GSCBLK_CFG1, gscblk_cfg); -} - static void gsc_handle_irq(struct gsc_context *ctx, bool enable, bool overflow, bool done) { @@ -487,10 +448,8 @@ static void gsc_handle_irq(struct gsc_context *ctx, bool enable, } -static int gsc_src_set_fmt(struct device *dev, u32 fmt) +static void gsc_src_set_fmt(struct gsc_context *ctx, u32 fmt) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -506,6 +465,7 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt) cfg |= GSC_IN_RGB565; break; case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: cfg |= GSC_IN_XRGB8888; break; case DRM_FORMAT_BGRX8888: @@ -548,115 +508,84 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt) cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_2P); break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } gsc_write(cfg, GSC_IN_CON); - - return 0; } -static int gsc_src_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void gsc_src_set_transf(struct gsc_context *ctx, unsigned int rotation) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); - cfg = gsc_read(GSC_IN_CON); cfg &= ~GSC_IN_ROT_MASK; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_Y) cfg |= GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg |= GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_90: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_90_XFLIP; - else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_90_YFLIP; - else - cfg |= GSC_IN_ROT_90; + case DRM_MODE_ROTATE_90: + cfg |= GSC_IN_ROT_90; + if (rotation & DRM_MODE_REFLECT_Y) + cfg |= GSC_IN_ROT_XFLIP; + if (rotation & DRM_MODE_REFLECT_X) + cfg |= GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg |= GSC_IN_ROT_180; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg |= GSC_IN_ROT_270; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~GSC_IN_ROT_YFLIP; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } gsc_write(cfg, GSC_IN_CON); ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0; - *swap = ctx->rotation; - - return 0; } -static int gsc_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void gsc_src_set_size(struct gsc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_pos img_pos = *pos; struct gsc_scaler *sc = &ctx->sc; u32 cfg; - DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n", - swap, pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - } - /* pixel offset */ - cfg = (GSC_SRCIMG_OFFSET_X(img_pos.x) | - GSC_SRCIMG_OFFSET_Y(img_pos.y)); + cfg = (GSC_SRCIMG_OFFSET_X(buf->rect.x) | + GSC_SRCIMG_OFFSET_Y(buf->rect.y)); gsc_write(cfg, GSC_SRCIMG_OFFSET); /* cropped size */ - cfg = (GSC_CROPPED_WIDTH(img_pos.w) | - GSC_CROPPED_HEIGHT(img_pos.h)); + cfg = (GSC_CROPPED_WIDTH(buf->rect.w) | + GSC_CROPPED_HEIGHT(buf->rect.h)); gsc_write(cfg, GSC_CROPPED_SIZE); - DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize); - /* original size */ cfg = gsc_read(GSC_SRCIMG_SIZE); cfg &= ~(GSC_SRCIMG_HEIGHT_MASK | GSC_SRCIMG_WIDTH_MASK); - cfg |= (GSC_SRCIMG_WIDTH(sz->hsize) | - GSC_SRCIMG_HEIGHT(sz->vsize)); + cfg |= (GSC_SRCIMG_WIDTH(buf->buf.width) | + GSC_SRCIMG_HEIGHT(buf->buf.height)); gsc_write(cfg, GSC_SRCIMG_SIZE); cfg = gsc_read(GSC_IN_CON); cfg &= ~GSC_IN_RGB_TYPE_MASK; - DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range); - - if (pos->w >= GSC_WIDTH_ITU_709) + if (buf->rect.w >= GSC_WIDTH_ITU_709) if (sc->range) cfg |= GSC_IN_RGB_HD_WIDE; else @@ -668,103 +597,39 @@ static int gsc_src_set_size(struct device *dev, int swap, cfg |= GSC_IN_RGB_SD_NARROW; gsc_write(cfg, GSC_IN_CON); - - return 0; } -static int gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id, + bool enqueue) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - bool masked; + bool masked = !enqueue; u32 cfg; u32 mask = 0x00000001 << buf_id; - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); - /* mask register set */ cfg = gsc_read(GSC_IN_BASE_ADDR_Y_MASK); - switch (buf_type) { - case IPP_BUF_ENQUEUE: - masked = false; - break; - case IPP_BUF_DEQUEUE: - masked = true; - break; - default: - dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n"); - return -EINVAL; - } - /* sequence id */ cfg &= ~mask; cfg |= masked << buf_id; gsc_write(cfg, GSC_IN_BASE_ADDR_Y_MASK); gsc_write(cfg, GSC_IN_BASE_ADDR_CB_MASK); gsc_write(cfg, GSC_IN_BASE_ADDR_CR_MASK); - - return 0; } -static int gsc_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_src_set_addr(struct gsc_context *ctx, u32 buf_id, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EFAULT; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > GSC_MAX_SRC) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -EINVAL; - } - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y], - GSC_IN_BASE_ADDR_Y(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB], - GSC_IN_BASE_ADDR_CB(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR], - GSC_IN_BASE_ADDR_CR(buf_id)); - break; - case IPP_BUF_DEQUEUE: - gsc_write(0x0, GSC_IN_BASE_ADDR_Y(buf_id)); - gsc_write(0x0, GSC_IN_BASE_ADDR_CB(buf_id)); - gsc_write(0x0, GSC_IN_BASE_ADDR_CR(buf_id)); - break; - default: - /* bypass */ - break; - } + gsc_write(buf->dma_addr[0], GSC_IN_BASE_ADDR_Y(buf_id)); + gsc_write(buf->dma_addr[1], GSC_IN_BASE_ADDR_CB(buf_id)); + gsc_write(buf->dma_addr[2], GSC_IN_BASE_ADDR_CR(buf_id)); - return gsc_src_set_buf_seq(ctx, buf_id, buf_type); + gsc_src_set_buf_seq(ctx, buf_id, true); } -static struct exynos_drm_ipp_ops gsc_src_ops = { - .set_fmt = gsc_src_set_fmt, - .set_transf = gsc_src_set_transf, - .set_size = gsc_src_set_size, - .set_addr = gsc_src_set_addr, -}; - -static int gsc_dst_set_fmt(struct device *dev, u32 fmt) +static void gsc_dst_set_fmt(struct gsc_context *ctx, u32 fmt) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -779,8 +644,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_RGB565: cfg |= GSC_OUT_RGB565; break; + case DRM_FORMAT_ARGB8888: case DRM_FORMAT_XRGB8888: - cfg |= GSC_OUT_XRGB8888; + cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_GLOBAL_ALPHA(0xff)); break; case DRM_FORMAT_BGRX8888: cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_RB_SWAP); @@ -819,69 +685,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt) cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_2P); break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } gsc_write(cfg, GSC_OUT_CON); - - return 0; -} - -static int gsc_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) -{ - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - u32 cfg; - - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); - - cfg = gsc_read(GSC_IN_CON); - cfg &= ~GSC_IN_ROT_MASK; - - switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_YFLIP; - break; - case EXYNOS_DRM_DEGREE_90: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_90_XFLIP; - else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_90_YFLIP; - else - cfg |= GSC_IN_ROT_90; - break; - case EXYNOS_DRM_DEGREE_180: - cfg |= GSC_IN_ROT_180; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg &= ~GSC_IN_ROT_YFLIP; - break; - case EXYNOS_DRM_DEGREE_270: - cfg |= GSC_IN_ROT_270; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg &= ~GSC_IN_ROT_YFLIP; - break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; - } - - gsc_write(cfg, GSC_IN_CON); - - ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0; - *swap = ctx->rotation; - - return 0; } static int gsc_get_ratio_shift(u32 src, u32 dst, u32 *ratio) @@ -919,9 +725,9 @@ static void gsc_get_prescaler_shfactor(u32 hratio, u32 vratio, u32 *shfactor) } static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc, - struct drm_exynos_pos *src, struct drm_exynos_pos *dst) + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; u32 src_w, src_h, dst_w, dst_h; int ret = 0; @@ -939,13 +745,13 @@ static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc, ret = gsc_get_ratio_shift(src_w, dst_w, &sc->pre_hratio); if (ret) { - dev_err(ippdrv->dev, "failed to get ratio horizontal.\n"); + dev_err(ctx->dev, "failed to get ratio horizontal.\n"); return ret; } ret = gsc_get_ratio_shift(src_h, dst_h, &sc->pre_vratio); if (ret) { - dev_err(ippdrv->dev, "failed to get ratio vertical.\n"); + dev_err(ctx->dev, "failed to get ratio vertical.\n"); return ret; } @@ -1039,47 +845,37 @@ static void gsc_set_scaler(struct gsc_context *ctx, struct gsc_scaler *sc) gsc_write(cfg, GSC_MAIN_V_RATIO); } -static int gsc_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void gsc_dst_set_size(struct gsc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_pos img_pos = *pos; struct gsc_scaler *sc = &ctx->sc; u32 cfg; - DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n", - swap, pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - } - /* pixel offset */ - cfg = (GSC_DSTIMG_OFFSET_X(pos->x) | - GSC_DSTIMG_OFFSET_Y(pos->y)); + cfg = (GSC_DSTIMG_OFFSET_X(buf->rect.x) | + GSC_DSTIMG_OFFSET_Y(buf->rect.y)); gsc_write(cfg, GSC_DSTIMG_OFFSET); /* scaled size */ - cfg = (GSC_SCALED_WIDTH(img_pos.w) | GSC_SCALED_HEIGHT(img_pos.h)); + if (ctx->rotation) + cfg = (GSC_SCALED_WIDTH(buf->rect.h) | + GSC_SCALED_HEIGHT(buf->rect.w)); + else + cfg = (GSC_SCALED_WIDTH(buf->rect.w) | + GSC_SCALED_HEIGHT(buf->rect.h)); gsc_write(cfg, GSC_SCALED_SIZE); - DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize); - /* original size */ cfg = gsc_read(GSC_DSTIMG_SIZE); - cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | - GSC_DSTIMG_WIDTH_MASK); - cfg |= (GSC_DSTIMG_WIDTH(sz->hsize) | - GSC_DSTIMG_HEIGHT(sz->vsize)); + cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | GSC_DSTIMG_WIDTH_MASK); + cfg |= GSC_DSTIMG_WIDTH(buf->buf.width) | + GSC_DSTIMG_HEIGHT(buf->buf.height); gsc_write(cfg, GSC_DSTIMG_SIZE); cfg = gsc_read(GSC_OUT_CON); cfg &= ~GSC_OUT_RGB_TYPE_MASK; - DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range); - - if (pos->w >= GSC_WIDTH_ITU_709) + if (buf->rect.w >= GSC_WIDTH_ITU_709) if (sc->range) cfg |= GSC_OUT_RGB_HD_WIDE; else @@ -1091,8 +887,6 @@ static int gsc_dst_set_size(struct device *dev, int swap, cfg |= GSC_OUT_RGB_SD_NARROW; gsc_write(cfg, GSC_OUT_CON); - - return 0; } static int gsc_dst_get_buf_seq(struct gsc_context *ctx) @@ -1111,35 +905,16 @@ static int gsc_dst_get_buf_seq(struct gsc_context *ctx) return buf_num; } -static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, + bool enqueue) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - bool masked; + bool masked = !enqueue; u32 cfg; u32 mask = 0x00000001 << buf_id; - int ret = 0; - - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); - - mutex_lock(&ctx->lock); /* mask register set */ cfg = gsc_read(GSC_OUT_BASE_ADDR_Y_MASK); - switch (buf_type) { - case IPP_BUF_ENQUEUE: - masked = false; - break; - case IPP_BUF_DEQUEUE: - masked = true; - break; - default: - dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n"); - ret = -EINVAL; - goto err_unlock; - } - /* sequence id */ cfg &= ~mask; cfg |= masked << buf_id; @@ -1148,94 +923,29 @@ static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, gsc_write(cfg, GSC_OUT_BASE_ADDR_CR_MASK); /* interrupt enable */ - if (buf_type == IPP_BUF_ENQUEUE && - gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START) + if (enqueue && gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START) gsc_handle_irq(ctx, true, false, true); /* interrupt disable */ - if (buf_type == IPP_BUF_DEQUEUE && - gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP) + if (!enqueue && gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP) gsc_handle_irq(ctx, false, false, true); - -err_unlock: - mutex_unlock(&ctx->lock); - return ret; } -static int gsc_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_dst_set_addr(struct gsc_context *ctx, + u32 buf_id, struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EFAULT; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > GSC_MAX_DST) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -EINVAL; - } - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y], - GSC_OUT_BASE_ADDR_Y(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB], - GSC_OUT_BASE_ADDR_CB(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR], - GSC_OUT_BASE_ADDR_CR(buf_id)); - break; - case IPP_BUF_DEQUEUE: - gsc_write(0x0, GSC_OUT_BASE_ADDR_Y(buf_id)); - gsc_write(0x0, GSC_OUT_BASE_ADDR_CB(buf_id)); - gsc_write(0x0, GSC_OUT_BASE_ADDR_CR(buf_id)); - break; - default: - /* bypass */ - break; - } + gsc_write(buf->dma_addr[0], GSC_OUT_BASE_ADDR_Y(buf_id)); + gsc_write(buf->dma_addr[1], GSC_OUT_BASE_ADDR_CB(buf_id)); + gsc_write(buf->dma_addr[2], GSC_OUT_BASE_ADDR_CR(buf_id)); - return gsc_dst_set_buf_seq(ctx, buf_id, buf_type); -} - -static struct exynos_drm_ipp_ops gsc_dst_ops = { - .set_fmt = gsc_dst_set_fmt, - .set_transf = gsc_dst_set_transf, - .set_size = gsc_dst_set_size, - .set_addr = gsc_dst_set_addr, -}; - -static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable) -{ - DRM_DEBUG_KMS("enable[%d]\n", enable); - - if (enable) { - clk_prepare_enable(ctx->gsc_clk); - ctx->suspended = false; - } else { - clk_disable_unprepare(ctx->gsc_clk); - ctx->suspended = true; - } - - return 0; + gsc_dst_set_buf_seq(ctx, buf_id, true); } static int gsc_get_src_buf_index(struct gsc_context *ctx) { u32 cfg, curr_index, i; u32 buf_id = GSC_MAX_SRC; - int ret; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); @@ -1249,19 +959,15 @@ static int gsc_get_src_buf_index(struct gsc_context *ctx) } } + DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, + curr_index, buf_id); + if (buf_id == GSC_MAX_SRC) { DRM_ERROR("failed to get in buffer index.\n"); return -EINVAL; } - ret = gsc_src_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - if (ret < 0) { - DRM_ERROR("failed to dequeue.\n"); - return ret; - } - - DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, - curr_index, buf_id); + gsc_src_set_buf_seq(ctx, buf_id, false); return buf_id; } @@ -1270,7 +976,6 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) { u32 cfg, curr_index, i; u32 buf_id = GSC_MAX_DST; - int ret; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); @@ -1289,11 +994,7 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) return -EINVAL; } - ret = gsc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - if (ret < 0) { - DRM_ERROR("failed to dequeue.\n"); - return ret; - } + gsc_dst_set_buf_seq(ctx, buf_id, false); DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, curr_index, buf_id); @@ -1304,215 +1005,55 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) static irqreturn_t gsc_irq_handler(int irq, void *dev_id) { struct gsc_context *ctx = dev_id; - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = - c_node->event_work; u32 status; - int buf_id[EXYNOS_DRM_OPS_MAX]; + int err = 0; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); status = gsc_read(GSC_IRQ); if (status & GSC_IRQ_STATUS_OR_IRQ) { - dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n", + dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n", ctx->id, status); - return IRQ_NONE; + err = -EINVAL; } if (status & GSC_IRQ_STATUS_OR_FRM_DONE) { - dev_dbg(ippdrv->dev, "occurred frame done at %d, status 0x%x.\n", - ctx->id, status); - - buf_id[EXYNOS_DRM_OPS_SRC] = gsc_get_src_buf_index(ctx); - if (buf_id[EXYNOS_DRM_OPS_SRC] < 0) - return IRQ_HANDLED; - - buf_id[EXYNOS_DRM_OPS_DST] = gsc_get_dst_buf_index(ctx); - if (buf_id[EXYNOS_DRM_OPS_DST] < 0) - return IRQ_HANDLED; - - DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n", - buf_id[EXYNOS_DRM_OPS_SRC], buf_id[EXYNOS_DRM_OPS_DST]); - - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_SRC] = - buf_id[EXYNOS_DRM_OPS_SRC]; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = - buf_id[EXYNOS_DRM_OPS_DST]; - queue_work(ippdrv->event_workq, &event_work->work); - } - - return IRQ_HANDLED; -} - -static int gsc_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->writeback = 1; - prop_list->refresh_min = GSC_REFRESH_MIN; - prop_list->refresh_max = GSC_REFRESH_MAX; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 1; - prop_list->crop = 1; - prop_list->crop_max.hsize = GSC_CROP_MAX; - prop_list->crop_max.vsize = GSC_CROP_MAX; - prop_list->crop_min.hsize = GSC_CROP_MIN; - prop_list->crop_min.vsize = GSC_CROP_MIN; - prop_list->scale = 1; - prop_list->scale_max.hsize = GSC_SCALE_MAX; - prop_list->scale_max.vsize = GSC_SCALE_MAX; - prop_list->scale_min.hsize = GSC_SCALE_MIN; - prop_list->scale_min.vsize = GSC_SCALE_MIN; - - return 0; -} - -static inline bool gsc_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} - -static int gsc_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) -{ - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos *pos; - struct drm_exynos_sz *sz; - bool swap; - int i; - - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; + int src_buf_id, dst_buf_id; - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - /* check for flip */ - if (!gsc_check_drm_flip(config->flip)) { - DRM_ERROR("invalid flip.\n"); - goto err_property; - } - - /* check for degree */ - switch (config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - break; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - swap = false; - break; - default: - DRM_ERROR("invalid degree.\n"); - goto err_property; - } + dev_dbg(ctx->dev, "occurred frame done at %d, status 0x%x.\n", + ctx->id, status); - /* check for buffer bound */ - if ((pos->x + pos->w > sz->hsize) || - (pos->y + pos->h > sz->vsize)) { - DRM_ERROR("out of buf bound.\n"); - goto err_property; - } + src_buf_id = gsc_get_src_buf_index(ctx); + dst_buf_id = gsc_get_dst_buf_index(ctx); - /* check for crop */ - if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) { - if (swap) { - if ((pos->h < pp->crop_min.hsize) || - (sz->vsize > pp->crop_max.hsize) || - (pos->w < pp->crop_min.vsize) || - (sz->hsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->crop_min.hsize) || - (sz->hsize > pp->crop_max.hsize) || - (pos->h < pp->crop_min.vsize) || - (sz->vsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } - } + DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n", src_buf_id, + dst_buf_id); - /* check for scale */ - if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) { - if (swap) { - if ((pos->h < pp->scale_min.hsize) || - (sz->vsize > pp->scale_max.hsize) || - (pos->w < pp->scale_min.vsize) || - (sz->hsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->scale_min.hsize) || - (sz->hsize > pp->scale_max.hsize) || - (pos->h < pp->scale_min.vsize) || - (sz->vsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } - } + if (src_buf_id < 0 || dst_buf_id < 0) + err = -EINVAL; } - return 0; - -err_property: - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n", - i ? "dst" : "src", config->flip, config->degree, - pos->x, pos->y, pos->w, pos->h, - sz->hsize, sz->vsize); + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, err); } - return -EINVAL; + return IRQ_HANDLED; } - -static int gsc_ippdrv_reset(struct device *dev) +static int gsc_reset(struct gsc_context *ctx) { - struct gsc_context *ctx = get_gsc_context(dev); struct gsc_scaler *sc = &ctx->sc; int ret; /* reset h/w block */ ret = gsc_sw_reset(ctx); if (ret < 0) { - dev_err(dev, "failed to reset hardware.\n"); + dev_err(ctx->dev, "failed to reset hardware.\n"); return ret; } @@ -1523,166 +1064,172 @@ static int gsc_ippdrv_reset(struct device *dev) return 0; } -static int gsc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void gsc_start(struct gsc_context *ctx) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX]; - struct drm_exynos_ipp_set_wb set_wb; u32 cfg; - int ret, i; - - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; gsc_handle_irq(ctx, true, false, true); - for_each_ipp_ops(i) { - config = &property->config[i]; - img_pos[i] = config->pos; - } + /* enable one shot */ + cfg = gsc_read(GSC_ENABLE); + cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK | + GSC_ENABLE_CLK_GATE_MODE_MASK); + cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT; + gsc_write(cfg, GSC_ENABLE); - switch (cmd) { - case IPP_CMD_M2M: - /* enable one shot */ - cfg = gsc_read(GSC_ENABLE); - cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK | - GSC_ENABLE_CLK_GATE_MODE_MASK); - cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT; - gsc_write(cfg, GSC_ENABLE); - - /* src dma memory */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= GSC_IN_PATH_MEMORY; - gsc_write(cfg, GSC_IN_CON); - - /* dst dma memory */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - case IPP_CMD_WB: - set_wb.enable = 1; - set_wb.refresh = property->refresh_rate; - gsc_set_gscblk_fimd_wb(ctx, set_wb.enable); - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - - /* src local path */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= (GSC_IN_PATH_LOCAL | GSC_IN_LOCAL_FIMD_WB); - gsc_write(cfg, GSC_IN_CON); - - /* dst dma memory */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - case IPP_CMD_OUTPUT: - /* src dma memory */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= GSC_IN_PATH_MEMORY; - gsc_write(cfg, GSC_IN_CON); - - /* dst local path */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - default: - ret = -EINVAL; - dev_err(dev, "invalid operations.\n"); - return ret; - } + /* src dma memory */ + cfg = gsc_read(GSC_IN_CON); + cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); + cfg |= GSC_IN_PATH_MEMORY; + gsc_write(cfg, GSC_IN_CON); - ret = gsc_set_prescaler(ctx, &ctx->sc, - &img_pos[EXYNOS_DRM_OPS_SRC], - &img_pos[EXYNOS_DRM_OPS_DST]); - if (ret) { - dev_err(dev, "failed to set prescaler.\n"); - return ret; - } + /* dst dma memory */ + cfg = gsc_read(GSC_OUT_CON); + cfg |= GSC_OUT_PATH_MEMORY; + gsc_write(cfg, GSC_OUT_CON); gsc_set_scaler(ctx, &ctx->sc); cfg = gsc_read(GSC_ENABLE); cfg |= GSC_ENABLE_ON; gsc_write(cfg, GSC_ENABLE); +} + +static int gsc_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct gsc_context *ctx = container_of(ipp, struct gsc_context, ipp); + int ret; + + pm_runtime_get_sync(ctx->dev); + ctx->task = task; + + ret = gsc_reset(ctx); + if (ret) { + pm_runtime_put_autosuspend(ctx->dev); + ctx->task = NULL; + return ret; + } + + gsc_src_set_fmt(ctx, task->src.buf.fourcc); + gsc_src_set_transf(ctx, task->transform.rotation); + gsc_src_set_size(ctx, &task->src); + gsc_src_set_addr(ctx, 0, &task->src); + gsc_dst_set_fmt(ctx, task->dst.buf.fourcc); + gsc_dst_set_size(ctx, &task->dst); + gsc_dst_set_addr(ctx, 0, &task->dst); + gsc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect); + gsc_start(ctx); return 0; } -static void gsc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void gsc_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_ipp_set_wb set_wb = {0, 0}; - u32 cfg; + struct gsc_context *ctx = + container_of(ipp, struct gsc_context, ipp); - DRM_DEBUG_KMS("cmd[%d]\n", cmd); + gsc_reset(ctx); + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - switch (cmd) { - case IPP_CMD_M2M: - /* bypass */ - break; - case IPP_CMD_WB: - gsc_set_gscblk_fimd_wb(ctx, set_wb.enable); - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - dev_err(dev, "invalid operations.\n"); - break; + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, -EIO); } +} - gsc_handle_irq(ctx, false, false, true); +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = gsc_commit, + .abort = gsc_abort, +}; - /* reset sequence */ - gsc_write(0xff, GSC_OUT_BASE_ADDR_Y_MASK); - gsc_write(0xff, GSC_OUT_BASE_ADDR_CB_MASK); - gsc_write(0xff, GSC_OUT_BASE_ADDR_CR_MASK); +static int gsc_bind(struct device *dev, struct device *master, void *data) +{ + struct gsc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; - cfg = gsc_read(GSC_ENABLE); - cfg &= ~GSC_ENABLE_ON; - gsc_write(cfg, GSC_ENABLE); + ctx->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + ctx->formats, ctx->num_formats, "gsc"); + + dev_info(dev, "The exynos gscaler has been probed successfully\n"); + + return 0; +} + +static void gsc_unbind(struct device *dev, struct device *master, + void *data) +{ + struct gsc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(drm_dev, dev); } +static const struct component_ops gsc_component_ops = { + .bind = gsc_bind, + .unbind = gsc_unbind, +}; + +static const unsigned int gsc_formats[] = { + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, DRM_FORMAT_BGRX8888, + DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61, + DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, + DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422, +}; + static int gsc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct gsc_driverdata *driver_data; + struct exynos_drm_ipp_formats *formats; struct gsc_context *ctx; struct resource *res; - struct exynos_drm_ippdrv *ippdrv; - int ret; + int ret, i; ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - if (dev->of_node) { - ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node, - "samsung,sysreg"); - if (IS_ERR(ctx->sysreg)) { - dev_warn(dev, "failed to get system register.\n"); - ctx->sysreg = NULL; - } + formats = devm_kzalloc(dev, sizeof(*formats) * + (ARRAY_SIZE(gsc_formats)), GFP_KERNEL); + if (!formats) + return -ENOMEM; + + driver_data = (struct gsc_driverdata *)of_device_get_match_data(dev); + ctx->dev = dev; + ctx->num_clocks = driver_data->num_clocks; + ctx->clk_names = driver_data->clk_names; + + for (i = 0; i < ARRAY_SIZE(gsc_formats); i++) { + formats[i].fourcc = gsc_formats[i]; + formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[i].limits = driver_data->limits; + formats[i].num_limits = driver_data->num_limits; } + ctx->formats = formats; + ctx->num_formats = ARRAY_SIZE(gsc_formats); /* clock control */ - ctx->gsc_clk = devm_clk_get(dev, "gscl"); - if (IS_ERR(ctx->gsc_clk)) { - dev_err(dev, "failed to get gsc clock.\n"); - return PTR_ERR(ctx->gsc_clk); + for (i = 0; i < ctx->num_clocks; i++) { + ctx->clocks[i] = devm_clk_get(dev, ctx->clk_names[i]); + if (IS_ERR(ctx->clocks[i])) { + dev_err(dev, "failed to get clock: %s\n", + ctx->clk_names[i]); + return PTR_ERR(ctx->clocks[i]); + } } /* resource memory */ @@ -1699,8 +1246,8 @@ static int gsc_probe(struct platform_device *pdev) } ctx->irq = res->start; - ret = devm_request_threaded_irq(dev, ctx->irq, NULL, gsc_irq_handler, - IRQF_ONESHOT, "drm_gsc", ctx); + ret = devm_request_irq(dev, ctx->irq, gsc_irq_handler, 0, + dev_name(dev), ctx); if (ret < 0) { dev_err(dev, "failed to request irq.\n"); return ret; @@ -1709,38 +1256,22 @@ static int gsc_probe(struct platform_device *pdev) /* context initailization */ ctx->id = pdev->id; - ippdrv = &ctx->ippdrv; - ippdrv->dev = dev; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &gsc_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &gsc_dst_ops; - ippdrv->check_property = gsc_ippdrv_check_property; - ippdrv->reset = gsc_ippdrv_reset; - ippdrv->start = gsc_ippdrv_start; - ippdrv->stop = gsc_ippdrv_stop; - ret = gsc_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - return ret; - } - - DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv); - - mutex_init(&ctx->lock); platform_set_drvdata(pdev, ctx); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, GSC_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm gsc device.\n"); - goto err_ippdrv_register; - } + ret = component_add(dev, &gsc_component_ops); + if (ret) + goto err_pm_dis; dev_info(dev, "drm gsc registered successfully.\n"); return 0; -err_ippdrv_register: +err_pm_dis: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return ret; } @@ -1748,13 +1279,8 @@ err_ippdrv_register: static int gsc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - exynos_drm_ippdrv_unregister(ippdrv); - mutex_destroy(&ctx->lock); - - pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return 0; @@ -1763,19 +1289,32 @@ static int gsc_remove(struct platform_device *pdev) static int __maybe_unused gsc_runtime_suspend(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); + int i; DRM_DEBUG_KMS("id[%d]\n", ctx->id); - return gsc_clk_ctrl(ctx, false); + for (i = ctx->num_clocks - 1; i >= 0; i--) + clk_disable_unprepare(ctx->clocks[i]); + + return 0; } static int __maybe_unused gsc_runtime_resume(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); + int i, ret; DRM_DEBUG_KMS("id[%d]\n", ctx->id); - return gsc_clk_ctrl(ctx, true); + for (i = 0; i < ctx->num_clocks; i++) { + ret = clk_prepare_enable(ctx->clocks[i]); + if (ret) { + while (--i > 0) + clk_disable_unprepare(ctx->clocks[i]); + return ret; + } + } + return 0; } static const struct dev_pm_ops gsc_pm_ops = { @@ -1784,9 +1323,66 @@ static const struct dev_pm_ops gsc_pm_ops = { SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL) }; +static const struct drm_exynos_ipp_limit gsc_5250_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2048 }, .v = { 16, 2048 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static const struct drm_exynos_ipp_limit gsc_5420_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 16, 2016 }, .v = { 8, 2016 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static const struct drm_exynos_ipp_limit gsc_5433_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 8191, 2 }, .v = { 16, 8191, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 1 }, .v = { 8, 3344, 1 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2047 }, .v = { 8, 8191 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static struct gsc_driverdata gsc_exynos5250_drvdata = { + .clk_names = {"gscl"}, + .num_clocks = 1, + .limits = gsc_5250_limits, + .num_limits = ARRAY_SIZE(gsc_5250_limits), +}; + +static struct gsc_driverdata gsc_exynos5420_drvdata = { + .clk_names = {"gscl"}, + .num_clocks = 1, + .limits = gsc_5420_limits, + .num_limits = ARRAY_SIZE(gsc_5420_limits), +}; + +static struct gsc_driverdata gsc_exynos5433_drvdata = { + .clk_names = {"pclk", "aclk", "aclk_xiu", "aclk_gsclbend"}, + .num_clocks = 4, + .limits = gsc_5433_limits, + .num_limits = ARRAY_SIZE(gsc_5433_limits), +}; + static const struct of_device_id exynos_drm_gsc_of_match[] = { - { .compatible = "samsung,exynos5-gsc" }, - { }, + { + .compatible = "samsung,exynos5-gsc", + .data = &gsc_exynos5250_drvdata, + }, { + .compatible = "samsung,exynos5250-gsc", + .data = &gsc_exynos5250_drvdata, + }, { + .compatible = "samsung,exynos5420-gsc", + .data = &gsc_exynos5420_drvdata, + }, { + .compatible = "samsung,exynos5433-gsc", + .data = &gsc_exynos5433_drvdata, + }, { + }, }; MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match); @@ -1800,4 +1396,3 @@ struct platform_driver gsc_driver = { .of_match_table = of_match_ptr(exynos_drm_gsc_of_match), }, }; - diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.h b/drivers/gpu/drm/exynos/exynos_drm_gsc.h deleted file mode 100644 index 29ec1c5efcf2..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * Eunchul Kim <chulspro.kim@samsung.com> - * Jinyoung Jeon <jy0.jeon@samsung.com> - * Sangmin Lee <lsmin.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_GSC_H_ -#define _EXYNOS_DRM_GSC_H_ - -/* - * TODO - * FIMD output interface notifier callback. - * Mixer output interface notifier callback. - */ - -#endif /* _EXYNOS_DRM_GSC_H_ */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c new file mode 100644 index 000000000000..26374e58c557 --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -0,0 +1,916 @@ +/* + * Copyright (C) 2017 Samsung Electronics Co.Ltd + * Authors: + * Marek Szyprowski <m.szyprowski@samsung.com> + * + * Exynos DRM Image Post Processing (IPP) related functions + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + */ + + +#include <drm/drmP.h> +#include <drm/drm_mode.h> +#include <uapi/drm/exynos_drm.h> + +#include "exynos_drm_drv.h" +#include "exynos_drm_gem.h" +#include "exynos_drm_ipp.h" + +static int num_ipp; +static LIST_HEAD(ipp_list); + +/** + * exynos_drm_ipp_register - Register a new picture processor hardware module + * @dev: DRM device + * @ipp: ipp module to init + * @funcs: callbacks for the new ipp object + * @caps: bitmask of ipp capabilities (%DRM_EXYNOS_IPP_CAP_*) + * @formats: array of supported formats + * @num_formats: size of the supported formats array + * @name: name (for debugging purposes) + * + * Initializes a ipp module. + * + * Returns: + * Zero on success, error code on failure. + */ +int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp, + const struct exynos_drm_ipp_funcs *funcs, unsigned int caps, + const struct exynos_drm_ipp_formats *formats, + unsigned int num_formats, const char *name) +{ + WARN_ON(!ipp); + WARN_ON(!funcs); + WARN_ON(!formats); + WARN_ON(!num_formats); + + spin_lock_init(&ipp->lock); + INIT_LIST_HEAD(&ipp->todo_list); + init_waitqueue_head(&ipp->done_wq); + ipp->dev = dev; + ipp->funcs = funcs; + ipp->capabilities = caps; + ipp->name = name; + ipp->formats = formats; + ipp->num_formats = num_formats; + + /* ipp_list modification is serialized by component framework */ + list_add_tail(&ipp->head, &ipp_list); + ipp->id = num_ipp++; + + DRM_DEBUG_DRIVER("Registered ipp %d\n", ipp->id); + + return 0; +} + +/** + * exynos_drm_ipp_unregister - Unregister the picture processor module + * @dev: DRM device + * @ipp: ipp module + */ +void exynos_drm_ipp_unregister(struct drm_device *dev, + struct exynos_drm_ipp *ipp) +{ + WARN_ON(ipp->task); + WARN_ON(!list_empty(&ipp->todo_list)); + list_del(&ipp->head); +} + +/** + * exynos_drm_ipp_ioctl_get_res_ioctl - enumerate all ipp modules + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a list of ipp ids. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_res *resp = data; + struct exynos_drm_ipp *ipp; + uint32_t __user *ipp_ptr = (uint32_t __user *) + (unsigned long)resp->ipp_id_ptr; + unsigned int count = num_ipp, copied = 0; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (count && resp->count_ipps >= count) { + list_for_each_entry(ipp, &ipp_list, head) { + if (put_user(ipp->id, ipp_ptr + copied)) + return -EFAULT; + copied++; + } + } + resp->count_ipps = count; + + return 0; +} + +static inline struct exynos_drm_ipp *__ipp_get(uint32_t id) +{ + struct exynos_drm_ipp *ipp; + + list_for_each_entry(ipp, &ipp_list, head) + if (ipp->id == id) + return ipp; + return NULL; +} + +/** + * exynos_drm_ipp_ioctl_get_caps - get ipp module capabilities and formats + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a structure describing ipp module capabilities. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_caps *resp = data; + void __user *ptr = (void __user *)(unsigned long)resp->formats_ptr; + struct exynos_drm_ipp *ipp; + int i; + + ipp = __ipp_get(resp->ipp_id); + if (!ipp) + return -ENOENT; + + resp->ipp_id = ipp->id; + resp->capabilities = ipp->capabilities; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (resp->formats_count >= ipp->num_formats) { + for (i = 0; i < ipp->num_formats; i++) { + struct drm_exynos_ipp_format tmp = { + .fourcc = ipp->formats[i].fourcc, + .type = ipp->formats[i].type, + .modifier = ipp->formats[i].modifier, + }; + + if (copy_to_user(ptr, &tmp, sizeof(tmp))) + return -EFAULT; + ptr += sizeof(tmp); + } + } + resp->formats_count = ipp->num_formats; + + return 0; +} + +static inline const struct exynos_drm_ipp_formats *__ipp_format_get( + struct exynos_drm_ipp *ipp, uint32_t fourcc, + uint64_t mod, unsigned int type) +{ + int i; + + for (i = 0; i < ipp->num_formats; i++) { + if ((ipp->formats[i].type & type) && + ipp->formats[i].fourcc == fourcc && + ipp->formats[i].modifier == mod) + return &ipp->formats[i]; + } + return NULL; +} + +/** + * exynos_drm_ipp_get_limits_ioctl - get ipp module limits + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a structure describing ipp module limitations for provided + * picture format. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_limits *resp = data; + void __user *ptr = (void __user *)(unsigned long)resp->limits_ptr; + const struct exynos_drm_ipp_formats *format; + struct exynos_drm_ipp *ipp; + + if (resp->type != DRM_EXYNOS_IPP_FORMAT_SOURCE && + resp->type != DRM_EXYNOS_IPP_FORMAT_DESTINATION) + return -EINVAL; + + ipp = __ipp_get(resp->ipp_id); + if (!ipp) + return -ENOENT; + + format = __ipp_format_get(ipp, resp->fourcc, resp->modifier, + resp->type); + if (!format) + return -EINVAL; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (format->num_limits && resp->limits_count >= format->num_limits) + if (copy_to_user((void __user *)ptr, format->limits, + sizeof(*format->limits) * format->num_limits)) + return -EFAULT; + resp->limits_count = format->num_limits; + + return 0; +} + +struct drm_pending_exynos_ipp_event { + struct drm_pending_event base; + struct drm_exynos_ipp_event event; +}; + +static inline struct exynos_drm_ipp_task * + exynos_drm_ipp_task_alloc(struct exynos_drm_ipp *ipp) +{ + struct exynos_drm_ipp_task *task; + + task = kzalloc(sizeof(*task), GFP_KERNEL); + if (!task) + return NULL; + + task->dev = ipp->dev; + task->ipp = ipp; + + /* some defaults */ + task->src.rect.w = task->dst.rect.w = UINT_MAX; + task->src.rect.h = task->dst.rect.h = UINT_MAX; + task->transform.rotation = DRM_MODE_ROTATE_0; + + DRM_DEBUG_DRIVER("Allocated task %pK\n", task); + + return task; +} + +static const struct exynos_drm_param_map { + unsigned int id; + unsigned int size; + unsigned int offset; +} exynos_drm_ipp_params_maps[] = { + { + DRM_EXYNOS_IPP_TASK_BUFFER | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE, + sizeof(struct drm_exynos_ipp_task_buffer), + offsetof(struct exynos_drm_ipp_task, src.buf), + }, { + DRM_EXYNOS_IPP_TASK_BUFFER | + DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION, + sizeof(struct drm_exynos_ipp_task_buffer), + offsetof(struct exynos_drm_ipp_task, dst.buf), + }, { + DRM_EXYNOS_IPP_TASK_RECTANGLE | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE, + sizeof(struct drm_exynos_ipp_task_rect), + offsetof(struct exynos_drm_ipp_task, src.rect), + }, { + DRM_EXYNOS_IPP_TASK_RECTANGLE | + DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION, + sizeof(struct drm_exynos_ipp_task_rect), + offsetof(struct exynos_drm_ipp_task, dst.rect), + }, { + DRM_EXYNOS_IPP_TASK_TRANSFORM, + sizeof(struct drm_exynos_ipp_task_transform), + offsetof(struct exynos_drm_ipp_task, transform), + }, { + DRM_EXYNOS_IPP_TASK_ALPHA, + sizeof(struct drm_exynos_ipp_task_alpha), + offsetof(struct exynos_drm_ipp_task, alpha), + }, +}; + +static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task, + struct drm_exynos_ioctl_ipp_commit *arg) +{ + const struct exynos_drm_param_map *map = exynos_drm_ipp_params_maps; + void __user *params = (void __user *)(unsigned long)arg->params_ptr; + unsigned int size = arg->params_size; + uint32_t id; + int i; + + while (size) { + if (get_user(id, (uint32_t __user *)params)) + return -EFAULT; + + for (i = 0; i < ARRAY_SIZE(exynos_drm_ipp_params_maps); i++) + if (map[i].id == id) + break; + if (i == ARRAY_SIZE(exynos_drm_ipp_params_maps) || + map[i].size > size) + return -EINVAL; + + if (copy_from_user((void *)task + map[i].offset, params, + map[i].size)) + return -EFAULT; + + params += map[i].size; + size -= map[i].size; + } + + DRM_DEBUG_DRIVER("Got task %pK configuration from userspace\n", task); + return 0; +} + +static int exynos_drm_ipp_task_setup_buffer(struct exynos_drm_ipp_buffer *buf, + struct drm_file *filp) +{ + int ret = 0; + int i; + + /* basic checks */ + if (buf->buf.width == 0 || buf->buf.height == 0) + return -EINVAL; + buf->format = drm_format_info(buf->buf.fourcc); + for (i = 0; i < buf->format->num_planes; i++) { + unsigned int width = (i == 0) ? buf->buf.width : + DIV_ROUND_UP(buf->buf.width, buf->format->hsub); + + if (buf->buf.pitch[i] == 0) + buf->buf.pitch[i] = width * buf->format->cpp[i]; + if (buf->buf.pitch[i] < width * buf->format->cpp[i]) + return -EINVAL; + if (!buf->buf.gem_id[i]) + return -ENOENT; + } + + /* pitch for additional planes must match */ + if (buf->format->num_planes > 2 && + buf->buf.pitch[1] != buf->buf.pitch[2]) + return -EINVAL; + + /* get GEM buffers and check their size */ + for (i = 0; i < buf->format->num_planes; i++) { + unsigned int height = (i == 0) ? buf->buf.height : + DIV_ROUND_UP(buf->buf.height, buf->format->vsub); + unsigned long size = height * buf->buf.pitch[i]; + struct drm_gem_object *obj = drm_gem_object_lookup(filp, + buf->buf.gem_id[i]); + if (!obj) { + ret = -ENOENT; + goto gem_free; + } + buf->exynos_gem[i] = to_exynos_gem(obj); + + if (size + buf->buf.offset[i] > buf->exynos_gem[i]->size) { + i++; + ret = -EINVAL; + goto gem_free; + } + buf->dma_addr[i] = buf->exynos_gem[i]->dma_addr + + buf->buf.offset[i]; + } + + return 0; +gem_free: + while (i--) { + drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base); + buf->exynos_gem[i] = NULL; + } + return ret; +} + +static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf) +{ + int i; + + if (!buf->exynos_gem[0]) + return; + for (i = 0; i < buf->format->num_planes; i++) + drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base); +} + +static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + DRM_DEBUG_DRIVER("Freeing task %pK\n", task); + + exynos_drm_ipp_task_release_buf(&task->src); + exynos_drm_ipp_task_release_buf(&task->dst); + if (task->event) + drm_event_cancel_free(ipp->dev, &task->event->base); + kfree(task); +} + +struct drm_ipp_limit { + struct drm_exynos_ipp_limit_val h; + struct drm_exynos_ipp_limit_val v; +}; + +enum drm_ipp_size_id { + IPP_LIMIT_BUFFER, IPP_LIMIT_AREA, IPP_LIMIT_ROTATED, IPP_LIMIT_MAX +}; + +static const enum drm_ipp_size_id limit_id_fallback[IPP_LIMIT_MAX][4] = { + [IPP_LIMIT_BUFFER] = { DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, + [IPP_LIMIT_AREA] = { DRM_EXYNOS_IPP_LIMIT_SIZE_AREA, + DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, + [IPP_LIMIT_ROTATED] = { DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED, + DRM_EXYNOS_IPP_LIMIT_SIZE_AREA, + DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, +}; + +static inline void __limit_set_val(unsigned int *ptr, unsigned int val) +{ + if (!*ptr) + *ptr = val; +} + +static void __get_size_limit(const struct drm_exynos_ipp_limit *limits, + unsigned int num_limits, enum drm_ipp_size_id id, + struct drm_ipp_limit *res) +{ + const struct drm_exynos_ipp_limit *l = limits; + int i = 0; + + memset(res, 0, sizeof(*res)); + for (i = 0; limit_id_fallback[id][i]; i++) + for (l = limits; l - limits < num_limits; l++) { + if (((l->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) != + DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE) || + ((l->type & DRM_EXYNOS_IPP_LIMIT_SIZE_MASK) != + limit_id_fallback[id][i])) + continue; + __limit_set_val(&res->h.min, l->h.min); + __limit_set_val(&res->h.max, l->h.max); + __limit_set_val(&res->h.align, l->h.align); + __limit_set_val(&res->v.min, l->v.min); + __limit_set_val(&res->v.max, l->v.max); + __limit_set_val(&res->v.align, l->v.align); + } +} + +static inline bool __align_check(unsigned int val, unsigned int align) +{ + if (align && (val & (align - 1))) { + DRM_DEBUG_DRIVER("Value %d exceeds HW limits (align %d)\n", + val, align); + return false; + } + return true; +} + +static inline bool __size_limit_check(unsigned int val, + struct drm_exynos_ipp_limit_val *l) +{ + if ((l->min && val < l->min) || (l->max && val > l->max)) { + DRM_DEBUG_DRIVER("Value %d exceeds HW limits (min %d, max %d)\n", + val, l->min, l->max); + return false; + } + return __align_check(val, l->align); +} + +static int exynos_drm_ipp_check_size_limits(struct exynos_drm_ipp_buffer *buf, + const struct drm_exynos_ipp_limit *limits, unsigned int num_limits, + bool rotate, bool swap) +{ + enum drm_ipp_size_id id = rotate ? IPP_LIMIT_ROTATED : IPP_LIMIT_AREA; + struct drm_ipp_limit l; + struct drm_exynos_ipp_limit_val *lh = &l.h, *lv = &l.v; + + if (!limits) + return 0; + + __get_size_limit(limits, num_limits, IPP_LIMIT_BUFFER, &l); + if (!__size_limit_check(buf->buf.width, &l.h) || + !__size_limit_check(buf->buf.height, &l.v)) + return -EINVAL; + + if (swap) { + lv = &l.h; + lh = &l.v; + } + __get_size_limit(limits, num_limits, id, &l); + if (!__size_limit_check(buf->rect.w, lh) || + !__align_check(buf->rect.x, lh->align) || + !__size_limit_check(buf->rect.h, lv) || + !__align_check(buf->rect.y, lv->align)) + return -EINVAL; + + return 0; +} + +static inline bool __scale_limit_check(unsigned int src, unsigned int dst, + unsigned int min, unsigned int max) +{ + if ((max && (dst << 16) > src * max) || + (min && (dst << 16) < src * min)) { + DRM_DEBUG_DRIVER("Scale from %d to %d exceeds HW limits (ratio min %d.%05d, max %d.%05d)\n", + src, dst, + min >> 16, 100000 * (min & 0xffff) / (1 << 16), + max >> 16, 100000 * (max & 0xffff) / (1 << 16)); + return false; + } + return true; +} + +static int exynos_drm_ipp_check_scale_limits( + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst, + const struct drm_exynos_ipp_limit *limits, + unsigned int num_limits, bool swap) +{ + const struct drm_exynos_ipp_limit_val *lh, *lv; + int dw, dh; + + for (; num_limits; limits++, num_limits--) + if ((limits->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) == + DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE) + break; + if (!num_limits) + return 0; + + lh = (!swap) ? &limits->h : &limits->v; + lv = (!swap) ? &limits->v : &limits->h; + dw = (!swap) ? dst->w : dst->h; + dh = (!swap) ? dst->h : dst->w; + + if (!__scale_limit_check(src->w, dw, lh->min, lh->max) || + !__scale_limit_check(src->h, dh, lv->min, lv->max)) + return -EINVAL; + + return 0; +} + +static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) +{ + struct exynos_drm_ipp *ipp = task->ipp; + const struct exynos_drm_ipp_formats *src_fmt, *dst_fmt; + struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; + unsigned int rotation = task->transform.rotation; + int ret = 0; + bool swap = drm_rotation_90_or_270(rotation); + bool rotate = (rotation != DRM_MODE_ROTATE_0); + bool scale = false; + + DRM_DEBUG_DRIVER("Checking task %pK\n", task); + + if (src->rect.w == UINT_MAX) + src->rect.w = src->buf.width; + if (src->rect.h == UINT_MAX) + src->rect.h = src->buf.height; + if (dst->rect.w == UINT_MAX) + dst->rect.w = dst->buf.width; + if (dst->rect.h == UINT_MAX) + dst->rect.h = dst->buf.height; + + if (src->rect.x + src->rect.w > (src->buf.width) || + src->rect.y + src->rect.h > (src->buf.height) || + dst->rect.x + dst->rect.w > (dst->buf.width) || + dst->rect.y + dst->rect.h > (dst->buf.height)) { + DRM_DEBUG_DRIVER("Task %pK: defined area is outside provided buffers\n", + task); + return -EINVAL; + } + + if ((!swap && (src->rect.w != dst->rect.w || + src->rect.h != dst->rect.h)) || + (swap && (src->rect.w != dst->rect.h || + src->rect.h != dst->rect.w))) + scale = true; + + if ((!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CROP) && + (src->rect.x || src->rect.y || dst->rect.x || dst->rect.y)) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_ROTATE) && rotate) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) && + src->buf.fourcc != dst->buf.fourcc)) { + DRM_DEBUG_DRIVER("Task %pK: hw capabilities exceeded\n", task); + return -EINVAL; + } + + src_fmt = __ipp_format_get(ipp, src->buf.fourcc, src->buf.modifier, + DRM_EXYNOS_IPP_FORMAT_SOURCE); + if (!src_fmt) { + DRM_DEBUG_DRIVER("Task %pK: src format not supported\n", task); + return -EINVAL; + } + ret = exynos_drm_ipp_check_size_limits(src, src_fmt->limits, + src_fmt->num_limits, + rotate, false); + if (ret) + return ret; + ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect, + src_fmt->limits, + src_fmt->num_limits, swap); + if (ret) + return ret; + + dst_fmt = __ipp_format_get(ipp, dst->buf.fourcc, dst->buf.modifier, + DRM_EXYNOS_IPP_FORMAT_DESTINATION); + if (!dst_fmt) { + DRM_DEBUG_DRIVER("Task %pK: dst format not supported\n", task); + return -EINVAL; + } + ret = exynos_drm_ipp_check_size_limits(dst, dst_fmt->limits, + dst_fmt->num_limits, + false, swap); + if (ret) + return ret; + ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect, + dst_fmt->limits, + dst_fmt->num_limits, swap); + if (ret) + return ret; + + DRM_DEBUG_DRIVER("Task %pK: all checks done.\n", task); + + return ret; +} + +static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task, + struct drm_file *filp) +{ + struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; + int ret = 0; + + DRM_DEBUG_DRIVER("Setting buffer for task %pK\n", task); + + ret = exynos_drm_ipp_task_setup_buffer(src, filp); + if (ret) { + DRM_DEBUG_DRIVER("Task %pK: src buffer setup failed\n", task); + return ret; + } + ret = exynos_drm_ipp_task_setup_buffer(dst, filp); + if (ret) { + DRM_DEBUG_DRIVER("Task %pK: dst buffer setup failed\n", task); + return ret; + } + + DRM_DEBUG_DRIVER("Task %pK: buffers prepared.\n", task); + + return ret; +} + + +static int exynos_drm_ipp_event_create(struct exynos_drm_ipp_task *task, + struct drm_file *file_priv, uint64_t user_data) +{ + struct drm_pending_exynos_ipp_event *e = NULL; + int ret; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->event.base.type = DRM_EXYNOS_IPP_EVENT; + e->event.base.length = sizeof(e->event); + e->event.user_data = user_data; + + ret = drm_event_reserve_init(task->dev, file_priv, &e->base, + &e->event.base); + if (ret) + goto free; + + task->event = e; + return 0; +free: + kfree(e); + return ret; +} + +static void exynos_drm_ipp_event_send(struct exynos_drm_ipp_task *task) +{ + struct timespec64 now; + + ktime_get_ts64(&now); + task->event->event.tv_sec = now.tv_sec; + task->event->event.tv_usec = now.tv_nsec / NSEC_PER_USEC; + task->event->event.sequence = atomic_inc_return(&task->ipp->sequence); + + drm_send_event(task->dev, &task->event->base); +} + +static int exynos_drm_ipp_task_cleanup(struct exynos_drm_ipp_task *task) +{ + int ret = task->ret; + + if (ret == 0 && task->event) { + exynos_drm_ipp_event_send(task); + /* ensure event won't be canceled on task free */ + task->event = NULL; + } + + exynos_drm_ipp_task_free(task->ipp, task); + return ret; +} + +static void exynos_drm_ipp_cleanup_work(struct work_struct *work) +{ + struct exynos_drm_ipp_task *task = container_of(work, + struct exynos_drm_ipp_task, cleanup_work); + + exynos_drm_ipp_task_cleanup(task); +} + +static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp); + +/** + * exynos_drm_ipp_task_done - finish given task and set return code + * @task: ipp task to finish + * @ret: error code or 0 if operation has been performed successfully + */ +void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret) +{ + struct exynos_drm_ipp *ipp = task->ipp; + unsigned long flags; + + DRM_DEBUG_DRIVER("ipp: %d, task %pK done: %d\n", ipp->id, task, ret); + + spin_lock_irqsave(&ipp->lock, flags); + if (ipp->task == task) + ipp->task = NULL; + task->flags |= DRM_EXYNOS_IPP_TASK_DONE; + task->ret = ret; + spin_unlock_irqrestore(&ipp->lock, flags); + + exynos_drm_ipp_next_task(ipp); + wake_up(&ipp->done_wq); + + if (task->flags & DRM_EXYNOS_IPP_TASK_ASYNC) { + INIT_WORK(&task->cleanup_work, exynos_drm_ipp_cleanup_work); + schedule_work(&task->cleanup_work); + } +} + +static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp) +{ + struct exynos_drm_ipp_task *task; + unsigned long flags; + int ret; + + DRM_DEBUG_DRIVER("ipp: %d, try to run new task\n", ipp->id); + + spin_lock_irqsave(&ipp->lock, flags); + + if (ipp->task || list_empty(&ipp->todo_list)) { + spin_unlock_irqrestore(&ipp->lock, flags); + return; + } + + task = list_first_entry(&ipp->todo_list, struct exynos_drm_ipp_task, + head); + list_del_init(&task->head); + ipp->task = task; + + spin_unlock_irqrestore(&ipp->lock, flags); + + DRM_DEBUG_DRIVER("ipp: %d, selected task %pK to run\n", ipp->id, task); + + ret = ipp->funcs->commit(ipp, task); + if (ret) + exynos_drm_ipp_task_done(task, ret); +} + +static void exynos_drm_ipp_schedule_task(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + unsigned long flags; + + spin_lock_irqsave(&ipp->lock, flags); + list_add(&task->head, &ipp->todo_list); + spin_unlock_irqrestore(&ipp->lock, flags); + + exynos_drm_ipp_next_task(ipp); +} + +static void exynos_drm_ipp_task_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + unsigned long flags; + + spin_lock_irqsave(&ipp->lock, flags); + if (task->flags & DRM_EXYNOS_IPP_TASK_DONE) { + /* already completed task */ + exynos_drm_ipp_task_cleanup(task); + } else if (ipp->task != task) { + /* task has not been scheduled for execution yet */ + list_del_init(&task->head); + exynos_drm_ipp_task_cleanup(task); + } else { + /* + * currently processed task, call abort() and perform + * cleanup with async worker + */ + task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; + spin_unlock_irqrestore(&ipp->lock, flags); + if (ipp->funcs->abort) + ipp->funcs->abort(ipp, task); + return; + } + spin_unlock_irqrestore(&ipp->lock, flags); +} + +/** + * exynos_drm_ipp_commit_ioctl - perform image processing operation + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a ipp task from the set of properties provided from the user + * and try to schedule it to framebuffer processor hardware. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_commit *arg = data; + struct exynos_drm_ipp *ipp; + struct exynos_drm_ipp_task *task; + int ret = 0; + + if ((arg->flags & ~DRM_EXYNOS_IPP_FLAGS) || arg->reserved) + return -EINVAL; + + /* can't test and expect an event at the same time */ + if ((arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY) && + (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT)) + return -EINVAL; + + ipp = __ipp_get(arg->ipp_id); + if (!ipp) + return -ENOENT; + + task = exynos_drm_ipp_task_alloc(ipp); + if (!task) + return -ENOMEM; + + ret = exynos_drm_ipp_task_set(task, arg); + if (ret) + goto free; + + ret = exynos_drm_ipp_task_check(task); + if (ret) + goto free; + + ret = exynos_drm_ipp_task_setup_buffers(task, file_priv); + if (ret || arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY) + goto free; + + if (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT) { + ret = exynos_drm_ipp_event_create(task, file_priv, + arg->user_data); + if (ret) + goto free; + } + + /* + * Queue task for processing on the hardware. task object will be + * then freed after exynos_drm_ipp_task_done() + */ + if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) { + DRM_DEBUG_DRIVER("ipp: %d, nonblocking processing task %pK\n", + ipp->id, task); + + task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; + exynos_drm_ipp_schedule_task(task->ipp, task); + ret = 0; + } else { + DRM_DEBUG_DRIVER("ipp: %d, processing task %pK\n", ipp->id, + task); + exynos_drm_ipp_schedule_task(ipp, task); + ret = wait_event_interruptible(ipp->done_wq, + task->flags & DRM_EXYNOS_IPP_TASK_DONE); + if (ret) + exynos_drm_ipp_task_abort(ipp, task); + else + ret = exynos_drm_ipp_task_cleanup(task); + } + return ret; +free: + exynos_drm_ipp_task_free(ipp, task); + + return ret; +} diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h new file mode 100644 index 000000000000..0b27d4a9bf94 --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2017 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef _EXYNOS_DRM_IPP_H_ +#define _EXYNOS_DRM_IPP_H_ + +#include <drm/drmP.h> + +struct exynos_drm_ipp; +struct exynos_drm_ipp_task; + +/** + * struct exynos_drm_ipp_funcs - exynos_drm_ipp control functions + */ +struct exynos_drm_ipp_funcs { + /** + * @commit: + * + * This is the main entry point to start framebuffer processing + * in the hardware. The exynos_drm_ipp_task has been already validated. + * This function must not wait until the device finishes processing. + * When the driver finishes processing, it has to call + * exynos_exynos_drm_ipp_task_done() function. + * + * RETURNS: + * + * 0 on success or negative error codes in case of failure. + */ + int (*commit)(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task); + + /** + * @abort: + * + * Informs the driver that it has to abort the currently running + * task as soon as possible (i.e. as soon as it can stop the device + * safely), even if the task would not have been finished by then. + * After the driver performs the necessary steps, it has to call + * exynos_drm_ipp_task_done() (as if the task ended normally). + * This function does not have to (and will usually not) wait + * until the device enters a state when it can be stopped. + */ + void (*abort)(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task); +}; + +/** + * struct exynos_drm_ipp - central picture processor module structure + */ +struct exynos_drm_ipp { + struct drm_device *dev; + struct list_head head; + unsigned int id; + + const char *name; + const struct exynos_drm_ipp_funcs *funcs; + unsigned int capabilities; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + atomic_t sequence; + + spinlock_t lock; + struct exynos_drm_ipp_task *task; + struct list_head todo_list; + wait_queue_head_t done_wq; +}; + +struct exynos_drm_ipp_buffer { + struct drm_exynos_ipp_task_buffer buf; + struct drm_exynos_ipp_task_rect rect; + + struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER]; + const struct drm_format_info *format; + dma_addr_t dma_addr[MAX_FB_BUFFER]; +}; + +/** + * struct exynos_drm_ipp_task - a structure describing transformation that + * has to be performed by the picture processor hardware module + */ +struct exynos_drm_ipp_task { + struct drm_device *dev; + struct exynos_drm_ipp *ipp; + struct list_head head; + + struct exynos_drm_ipp_buffer src; + struct exynos_drm_ipp_buffer dst; + + struct drm_exynos_ipp_task_transform transform; + struct drm_exynos_ipp_task_alpha alpha; + + struct work_struct cleanup_work; + unsigned int flags; + int ret; + + struct drm_pending_exynos_ipp_event *event; +}; + +#define DRM_EXYNOS_IPP_TASK_DONE (1 << 0) +#define DRM_EXYNOS_IPP_TASK_ASYNC (1 << 1) + +struct exynos_drm_ipp_formats { + uint32_t fourcc; + uint32_t type; + uint64_t modifier; + const struct drm_exynos_ipp_limit *limits; + unsigned int num_limits; +}; + +/* helper macros to set exynos_drm_ipp_formats structure and limits*/ +#define IPP_SRCDST_MFORMAT(f, m, l) \ + .fourcc = DRM_FORMAT_##f, .modifier = m, .limits = l, \ + .num_limits = ARRAY_SIZE(l), \ + .type = (DRM_EXYNOS_IPP_FORMAT_SOURCE | \ + DRM_EXYNOS_IPP_FORMAT_DESTINATION) + +#define IPP_SRCDST_FORMAT(f, l) IPP_SRCDST_MFORMAT(f, 0, l) + +#define IPP_SIZE_LIMIT(l, val...) \ + .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE | \ + DRM_EXYNOS_IPP_LIMIT_SIZE_##l), val + +#define IPP_SCALE_LIMIT(val...) \ + .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE), val + +int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp, + const struct exynos_drm_ipp_funcs *funcs, unsigned int caps, + const struct exynos_drm_ipp_formats *formats, + unsigned int num_formats, const char *name); +void exynos_drm_ipp_unregister(struct drm_device *dev, + struct exynos_drm_ipp *ipp); + +void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret); + +#ifdef CONFIG_DRM_EXYNOS_IPP +int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv); +#else +static inline int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_res *resp = data; + + resp->count_ipps = 0; + return 0; +} +static inline int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +static inline int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +static inline int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +#endif +#endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index 79282a820ecc..1a76dd3d52e1 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/err.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -22,29 +23,18 @@ #include <drm/exynos_drm.h> #include "regs-rotator.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" /* * Rotator supports image crop/rotator and input/output DMA operations. * input DMA reads image data from the memory. * output DMA writes image data to memory. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> Rotator H/W ----> Memory. */ -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. need to add supported list in prop_list. - */ +#define ROTATOR_AUTOSUSPEND_DELAY 2000 -#define get_rot_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct rot_context, ippdrv); -#define rot_read(offset) readl(rot->regs + (offset)) +#define rot_read(offset) readl(rot->regs + (offset)) #define rot_write(cfg, offset) writel(cfg, rot->regs + (offset)) enum rot_irq_status { @@ -52,54 +42,28 @@ enum rot_irq_status { ROT_IRQ_STATUS_ILLEGAL = 9, }; -/* - * A structure of limitation. - * - * @min_w: minimum width. - * @min_h: minimum height. - * @max_w: maximum width. - * @max_h: maximum height. - * @align: align size. - */ -struct rot_limit { - u32 min_w; - u32 min_h; - u32 max_w; - u32 max_h; - u32 align; -}; - -/* - * A structure of limitation table. - * - * @ycbcr420_2p: case of YUV. - * @rgb888: case of RGB. - */ -struct rot_limit_table { - struct rot_limit ycbcr420_2p; - struct rot_limit rgb888; +struct rot_variant { + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; }; /* * A structure of rotator context. * @ippdrv: prepare initialization using ippdrv. - * @regs_res: register resources. * @regs: memory mapped io registers. * @clock: rotator gate clock. * @limit_tbl: limitation of rotator. * @irq: irq number. - * @cur_buf_id: current operation buffer id. - * @suspended: suspended state. */ struct rot_context { - struct exynos_drm_ippdrv ippdrv; - struct resource *regs_res; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; void __iomem *regs; struct clk *clock; - struct rot_limit_table *limit_tbl; - int irq; - int cur_buf_id[EXYNOS_DRM_OPS_MAX]; - bool suspended; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct exynos_drm_ipp_task *task; }; static void rotator_reg_set_irq(struct rot_context *rot, bool enable) @@ -114,15 +78,6 @@ static void rotator_reg_set_irq(struct rot_context *rot, bool enable) rot_write(val, ROT_CONFIG); } -static u32 rotator_reg_get_fmt(struct rot_context *rot) -{ - u32 val = rot_read(ROT_CONTROL); - - val &= ROT_CONTROL_FMT_MASK; - - return val; -} - static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot) { u32 val = rot_read(ROT_STATUS); @@ -138,9 +93,6 @@ static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot) static irqreturn_t rotator_irq_handler(int irq, void *arg) { struct rot_context *rot = arg; - struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = c_node->event_work; enum rot_irq_status irq_status; u32 val; @@ -152,56 +104,21 @@ static irqreturn_t rotator_irq_handler(int irq, void *arg) val |= ROT_STATUS_IRQ_PENDING((u32)irq_status); rot_write(val, ROT_STATUS); - if (irq_status == ROT_IRQ_STATUS_COMPLETE) { - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = - rot->cur_buf_id[EXYNOS_DRM_OPS_DST]; - queue_work(ippdrv->event_workq, &event_work->work); - } else { - DRM_ERROR("the SFR is set illegally\n"); + if (rot->task) { + struct exynos_drm_ipp_task *task = rot->task; + + rot->task = NULL; + pm_runtime_mark_last_busy(rot->dev); + pm_runtime_put_autosuspend(rot->dev); + exynos_drm_ipp_task_done(task, + irq_status == ROT_IRQ_STATUS_COMPLETE ? 0 : -EINVAL); } return IRQ_HANDLED; } -static void rotator_align_size(struct rot_context *rot, u32 fmt, u32 *hsize, - u32 *vsize) +static void rotator_src_set_fmt(struct rot_context *rot, u32 fmt) { - struct rot_limit_table *limit_tbl = rot->limit_tbl; - struct rot_limit *limit; - u32 mask, val; - - /* Get size limit */ - if (fmt == ROT_CONTROL_FMT_RGB888) - limit = &limit_tbl->rgb888; - else - limit = &limit_tbl->ycbcr420_2p; - - /* Get mask for rounding to nearest aligned val */ - mask = ~((1 << limit->align) - 1); - - /* Set aligned width */ - val = ROT_ALIGN(*hsize, limit->align, mask); - if (val < limit->min_w) - *hsize = ROT_MIN(limit->min_w, mask); - else if (val > limit->max_w) - *hsize = ROT_MAX(limit->max_w, mask); - else - *hsize = val; - - /* Set aligned height */ - val = ROT_ALIGN(*vsize, limit->align, mask); - if (val < limit->min_h) - *vsize = ROT_MIN(limit->min_h, mask); - else if (val > limit->max_h) - *vsize = ROT_MAX(limit->max_h, mask); - else - *vsize = val; -} - -static int rotator_src_set_fmt(struct device *dev, u32 fmt) -{ - struct rot_context *rot = dev_get_drvdata(dev); u32 val; val = rot_read(ROT_CONTROL); @@ -214,515 +131,176 @@ static int rotator_src_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_XRGB8888: val |= ROT_CONTROL_FMT_RGB888; break; - default: - DRM_ERROR("invalid image format\n"); - return -EINVAL; } rot_write(val, ROT_CONTROL); - - return 0; } -static inline bool rotator_check_reg_fmt(u32 fmt) +static void rotator_src_set_buf(struct rot_context *rot, + struct exynos_drm_ipp_buffer *buf) { - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) || - (fmt == ROT_CONTROL_FMT_RGB888)) - return true; - - return false; -} - -static int rotator_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, - struct drm_exynos_sz *sz) -{ - struct rot_context *rot = dev_get_drvdata(dev); - u32 fmt, hsize, vsize; u32 val; - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Align buffer size */ - hsize = sz->hsize; - vsize = sz->vsize; - rotator_align_size(rot, fmt, &hsize, &vsize); - /* Set buffer size configuration */ - val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize); + val = ROT_SET_BUF_SIZE_H(buf->buf.height) | + ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]); rot_write(val, ROT_SRC_BUF_SIZE); /* Set crop image position configuration */ - val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x); + val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x); rot_write(val, ROT_SRC_CROP_POS); - val = ROT_SRC_CROP_SIZE_H(pos->h) | ROT_SRC_CROP_SIZE_W(pos->w); + val = ROT_SRC_CROP_SIZE_H(buf->rect.h) | + ROT_SRC_CROP_SIZE_W(buf->rect.w); rot_write(val, ROT_SRC_CROP_SIZE); - return 0; + /* Set buffer DMA address */ + rot_write(buf->dma_addr[0], ROT_SRC_BUF_ADDR(0)); + rot_write(buf->dma_addr[1], ROT_SRC_BUF_ADDR(1)); } -static int rotator_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, - u32 buf_id, enum drm_exynos_ipp_buf_type buf_type) +static void rotator_dst_set_transf(struct rot_context *rot, + unsigned int rotation) { - struct rot_context *rot = dev_get_drvdata(dev); - dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX]; - u32 val, fmt, hsize, vsize; - int i; - - /* Set current buf_id */ - rot->cur_buf_id[EXYNOS_DRM_OPS_SRC] = buf_id; - - switch (buf_type) { - case IPP_BUF_ENQUEUE: - /* Set address configuration */ - for_each_ipp_planar(i) - addr[i] = buf_info->base[i]; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Re-set cb planar for NV12 format */ - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) && - !addr[EXYNOS_DRM_PLANAR_CB]) { - - val = rot_read(ROT_SRC_BUF_SIZE); - hsize = ROT_GET_BUF_SIZE_W(val); - vsize = ROT_GET_BUF_SIZE_H(val); - - /* Set cb planar */ - addr[EXYNOS_DRM_PLANAR_CB] = - addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize; - } - - for_each_ipp_planar(i) - rot_write(addr[i], ROT_SRC_BUF_ADDR(i)); - break; - case IPP_BUF_DEQUEUE: - for_each_ipp_planar(i) - rot_write(0x0, ROT_SRC_BUF_ADDR(i)); - break; - default: - /* Nothing to do */ - break; - } - - return 0; -} - -static int rotator_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) -{ - struct rot_context *rot = dev_get_drvdata(dev); u32 val; /* Set transform configuration */ val = rot_read(ROT_CONTROL); val &= ~ROT_CONTROL_FLIP_MASK; - switch (flip) { - case EXYNOS_DRM_FLIP_VERTICAL: - val |= ROT_CONTROL_FLIP_VERTICAL; - break; - case EXYNOS_DRM_FLIP_HORIZONTAL: + if (rotation & DRM_MODE_REFLECT_X) val |= ROT_CONTROL_FLIP_HORIZONTAL; - break; - default: - /* Flip None */ - break; - } + if (rotation & DRM_MODE_REFLECT_Y) + val |= ROT_CONTROL_FLIP_VERTICAL; val &= ~ROT_CONTROL_ROT_MASK; - switch (degree) { - case EXYNOS_DRM_DEGREE_90: + if (rotation & DRM_MODE_ROTATE_90) val |= ROT_CONTROL_ROT_90; - break; - case EXYNOS_DRM_DEGREE_180: + else if (rotation & DRM_MODE_ROTATE_180) val |= ROT_CONTROL_ROT_180; - break; - case EXYNOS_DRM_DEGREE_270: + else if (rotation & DRM_MODE_ROTATE_270) val |= ROT_CONTROL_ROT_270; - break; - default: - /* Rotation 0 Degree */ - break; - } rot_write(val, ROT_CONTROL); - - /* Check degree for setting buffer size swap */ - if ((degree == EXYNOS_DRM_DEGREE_90) || - (degree == EXYNOS_DRM_DEGREE_270)) - *swap = true; - else - *swap = false; - - return 0; } -static int rotator_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, - struct drm_exynos_sz *sz) +static void rotator_dst_set_buf(struct rot_context *rot, + struct exynos_drm_ipp_buffer *buf) { - struct rot_context *rot = dev_get_drvdata(dev); - u32 val, fmt, hsize, vsize; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Align buffer size */ - hsize = sz->hsize; - vsize = sz->vsize; - rotator_align_size(rot, fmt, &hsize, &vsize); + u32 val; /* Set buffer size configuration */ - val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize); + val = ROT_SET_BUF_SIZE_H(buf->buf.height) | + ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]); rot_write(val, ROT_DST_BUF_SIZE); /* Set crop image position configuration */ - val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x); + val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x); rot_write(val, ROT_DST_CROP_POS); - return 0; + /* Set buffer DMA address */ + rot_write(buf->dma_addr[0], ROT_DST_BUF_ADDR(0)); + rot_write(buf->dma_addr[1], ROT_DST_BUF_ADDR(1)); } -static int rotator_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, - u32 buf_id, enum drm_exynos_ipp_buf_type buf_type) +static void rotator_start(struct rot_context *rot) { - struct rot_context *rot = dev_get_drvdata(dev); - dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX]; - u32 val, fmt, hsize, vsize; - int i; - - /* Set current buf_id */ - rot->cur_buf_id[EXYNOS_DRM_OPS_DST] = buf_id; - - switch (buf_type) { - case IPP_BUF_ENQUEUE: - /* Set address configuration */ - for_each_ipp_planar(i) - addr[i] = buf_info->base[i]; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Re-set cb planar for NV12 format */ - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) && - !addr[EXYNOS_DRM_PLANAR_CB]) { - /* Get buf size */ - val = rot_read(ROT_DST_BUF_SIZE); - - hsize = ROT_GET_BUF_SIZE_W(val); - vsize = ROT_GET_BUF_SIZE_H(val); - - /* Set cb planar */ - addr[EXYNOS_DRM_PLANAR_CB] = - addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize; - } - - for_each_ipp_planar(i) - rot_write(addr[i], ROT_DST_BUF_ADDR(i)); - break; - case IPP_BUF_DEQUEUE: - for_each_ipp_planar(i) - rot_write(0x0, ROT_DST_BUF_ADDR(i)); - break; - default: - /* Nothing to do */ - break; - } + u32 val; - return 0; + /* Set interrupt enable */ + rotator_reg_set_irq(rot, true); + + val = rot_read(ROT_CONTROL); + val |= ROT_CONTROL_START; + rot_write(val, ROT_CONTROL); } -static struct exynos_drm_ipp_ops rot_src_ops = { - .set_fmt = rotator_src_set_fmt, - .set_size = rotator_src_set_size, - .set_addr = rotator_src_set_addr, -}; +static int rotator_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct rot_context *rot = + container_of(ipp, struct rot_context, ipp); -static struct exynos_drm_ipp_ops rot_dst_ops = { - .set_transf = rotator_dst_set_transf, - .set_size = rotator_dst_set_size, - .set_addr = rotator_dst_set_addr, -}; + pm_runtime_get_sync(rot->dev); + rot->task = task; -static int rotator_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 0; - prop_list->crop = 0; - prop_list->scale = 0; + rotator_src_set_fmt(rot, task->src.buf.fourcc); + rotator_src_set_buf(rot, &task->src); + rotator_dst_set_transf(rot, task->transform.rotation); + rotator_dst_set_buf(rot, &task->dst); + rotator_start(rot); return 0; } -static inline bool rotator_check_drm_fmt(u32 fmt) -{ - switch (fmt) { - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_NV12: - return true; - default: - DRM_DEBUG_KMS("not support format\n"); - return false; - } -} - -static inline bool rotator_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} +static const struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = rotator_commit, +}; -static int rotator_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) +static int rotator_bind(struct device *dev, struct device *master, void *data) { - struct drm_exynos_ipp_config *src_config = - &property->config[EXYNOS_DRM_OPS_SRC]; - struct drm_exynos_ipp_config *dst_config = - &property->config[EXYNOS_DRM_OPS_DST]; - struct drm_exynos_pos *src_pos = &src_config->pos; - struct drm_exynos_pos *dst_pos = &dst_config->pos; - struct drm_exynos_sz *src_sz = &src_config->sz; - struct drm_exynos_sz *dst_sz = &dst_config->sz; - bool swap = false; - - /* Check format configuration */ - if (src_config->fmt != dst_config->fmt) { - DRM_DEBUG_KMS("not support csc feature\n"); - return -EINVAL; - } - - if (!rotator_check_drm_fmt(dst_config->fmt)) { - DRM_DEBUG_KMS("invalid format\n"); - return -EINVAL; - } - - /* Check transform configuration */ - if (src_config->degree != EXYNOS_DRM_DEGREE_0) { - DRM_DEBUG_KMS("not support source-side rotation\n"); - return -EINVAL; - } - - switch (dst_config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - /* No problem */ - break; - default: - DRM_DEBUG_KMS("invalid degree\n"); - return -EINVAL; - } - - if (src_config->flip != EXYNOS_DRM_FLIP_NONE) { - DRM_DEBUG_KMS("not support source-side flip\n"); - return -EINVAL; - } + struct rot_context *rot = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &rot->ipp; - if (!rotator_check_drm_flip(dst_config->flip)) { - DRM_DEBUG_KMS("invalid flip\n"); - return -EINVAL; - } + rot->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); - /* Check size configuration */ - if ((src_pos->x + src_pos->w > src_sz->hsize) || - (src_pos->y + src_pos->h > src_sz->vsize)) { - DRM_DEBUG_KMS("out of source buffer bound\n"); - return -EINVAL; - } + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE, + rot->formats, rot->num_formats, "rotator"); - if (swap) { - if ((dst_pos->x + dst_pos->h > dst_sz->vsize) || - (dst_pos->y + dst_pos->w > dst_sz->hsize)) { - DRM_DEBUG_KMS("out of destination buffer bound\n"); - return -EINVAL; - } - - if ((src_pos->w != dst_pos->h) || (src_pos->h != dst_pos->w)) { - DRM_DEBUG_KMS("not support scale feature\n"); - return -EINVAL; - } - } else { - if ((dst_pos->x + dst_pos->w > dst_sz->hsize) || - (dst_pos->y + dst_pos->h > dst_sz->vsize)) { - DRM_DEBUG_KMS("out of destination buffer bound\n"); - return -EINVAL; - } - - if ((src_pos->w != dst_pos->w) || (src_pos->h != dst_pos->h)) { - DRM_DEBUG_KMS("not support scale feature\n"); - return -EINVAL; - } - } + dev_info(dev, "The exynos rotator has been probed successfully\n"); return 0; } -static int rotator_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void rotator_unbind(struct device *dev, struct device *master, + void *data) { struct rot_context *rot = dev_get_drvdata(dev); - u32 val; - - if (rot->suspended) { - DRM_ERROR("suspended state\n"); - return -EPERM; - } - - if (cmd != IPP_CMD_M2M) { - DRM_ERROR("not support cmd: %d\n", cmd); - return -EINVAL; - } - - /* Set interrupt enable */ - rotator_reg_set_irq(rot, true); - - val = rot_read(ROT_CONTROL); - val |= ROT_CONTROL_START; - - rot_write(val, ROT_CONTROL); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &rot->ipp; - return 0; + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(rot->drm_dev, rot->dev); } -static struct rot_limit_table rot_limit_tbl_4210 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_64K, - .max_h = SZ_64K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_16K, - .max_h = SZ_16K, - .align = 2, - }, -}; - -static struct rot_limit_table rot_limit_tbl_4x12 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_32K, - .max_h = SZ_32K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_8K, - .max_h = SZ_8K, - .align = 2, - }, +static const struct component_ops rotator_component_ops = { + .bind = rotator_bind, + .unbind = rotator_unbind, }; -static struct rot_limit_table rot_limit_tbl_5250 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_32K, - .max_h = SZ_32K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_8K, - .max_h = SZ_8K, - .align = 1, - }, -}; - -static const struct of_device_id exynos_rotator_match[] = { - { - .compatible = "samsung,exynos4210-rotator", - .data = &rot_limit_tbl_4210, - }, - { - .compatible = "samsung,exynos4212-rotator", - .data = &rot_limit_tbl_4x12, - }, - { - .compatible = "samsung,exynos5250-rotator", - .data = &rot_limit_tbl_5250, - }, - {}, -}; -MODULE_DEVICE_TABLE(of, exynos_rotator_match); - static int rotator_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct resource *regs_res; struct rot_context *rot; - struct exynos_drm_ippdrv *ippdrv; + const struct rot_variant *variant; + int irq; int ret; - if (!dev->of_node) { - dev_err(dev, "cannot find of_node.\n"); - return -ENODEV; - } - rot = devm_kzalloc(dev, sizeof(*rot), GFP_KERNEL); if (!rot) return -ENOMEM; - rot->limit_tbl = (struct rot_limit_table *) - of_device_get_match_data(dev); - rot->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - rot->regs = devm_ioremap_resource(dev, rot->regs_res); + variant = of_device_get_match_data(dev); + rot->formats = variant->formats; + rot->num_formats = variant->num_formats; + rot->dev = dev; + regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + rot->regs = devm_ioremap_resource(dev, regs_res); if (IS_ERR(rot->regs)) return PTR_ERR(rot->regs); - rot->irq = platform_get_irq(pdev, 0); - if (rot->irq < 0) { + irq = platform_get_irq(pdev, 0); + if (irq < 0) { dev_err(dev, "failed to get irq\n"); - return rot->irq; + return irq; } - ret = devm_request_threaded_irq(dev, rot->irq, NULL, - rotator_irq_handler, IRQF_ONESHOT, "drm_rotator", rot); + ret = devm_request_irq(dev, irq, rotator_irq_handler, 0, dev_name(dev), + rot); if (ret < 0) { dev_err(dev, "failed to request irq\n"); return ret; @@ -734,35 +312,19 @@ static int rotator_probe(struct platform_device *pdev) return PTR_ERR(rot->clock); } + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, ROTATOR_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - - ippdrv = &rot->ippdrv; - ippdrv->dev = dev; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &rot_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &rot_dst_ops; - ippdrv->check_property = rotator_ippdrv_check_property; - ippdrv->start = rotator_ippdrv_start; - ret = rotator_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - goto err_ippdrv_register; - } - - DRM_DEBUG_KMS("ippdrv[%pK]\n", ippdrv); - platform_set_drvdata(pdev, rot); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm rotator device\n"); - goto err_ippdrv_register; - } - - dev_info(dev, "The exynos rotator is probed successfully\n"); + ret = component_add(dev, &rotator_component_ops); + if (ret) + goto err_component; return 0; -err_ippdrv_register: +err_component: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return ret; } @@ -770,45 +332,101 @@ err_ippdrv_register: static int rotator_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct rot_context *rot = dev_get_drvdata(dev); - struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv; - - exynos_drm_ippdrv_unregister(ippdrv); + component_del(dev, &rotator_component_ops); + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return 0; } #ifdef CONFIG_PM -static int rotator_clk_crtl(struct rot_context *rot, bool enable) -{ - if (enable) { - clk_prepare_enable(rot->clock); - rot->suspended = false; - } else { - clk_disable_unprepare(rot->clock); - rot->suspended = true; - } - - return 0; -} - static int rotator_runtime_suspend(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); - return rotator_clk_crtl(rot, false); + clk_disable_unprepare(rot->clock); + return 0; } static int rotator_runtime_resume(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); - return rotator_clk_crtl(rot, true); + return clk_prepare_enable(rot->clock); } #endif +static const struct drm_exynos_ipp_limit rotator_4210_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_16K }, .v = { 8, SZ_16K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4412_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) }, +}; + +static const struct drm_exynos_ipp_limit rotator_5250_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4210_yuv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_64K }, .v = { 32, SZ_64K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4412_yuv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_32K }, .v = { 32, SZ_32K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) }, +}; + +static const struct exynos_drm_ipp_formats rotator_4210_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_4210_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4210_yuv_limits) }, +}; + +static const struct exynos_drm_ipp_formats rotator_4412_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_4412_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) }, +}; + +static const struct exynos_drm_ipp_formats rotator_5250_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_5250_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) }, +}; + +static const struct rot_variant rotator_4210_data = { + .formats = rotator_4210_formats, + .num_formats = ARRAY_SIZE(rotator_4210_formats), +}; + +static const struct rot_variant rotator_4412_data = { + .formats = rotator_4412_formats, + .num_formats = ARRAY_SIZE(rotator_4412_formats), +}; + +static const struct rot_variant rotator_5250_data = { + .formats = rotator_5250_formats, + .num_formats = ARRAY_SIZE(rotator_5250_formats), +}; + +static const struct of_device_id exynos_rotator_match[] = { + { + .compatible = "samsung,exynos4210-rotator", + .data = &rotator_4210_data, + }, { + .compatible = "samsung,exynos4212-rotator", + .data = &rotator_4412_data, + }, { + .compatible = "samsung,exynos5250-rotator", + .data = &rotator_5250_data, + }, { + }, +}; +MODULE_DEVICE_TABLE(of, exynos_rotator_match); + static const struct dev_pm_ops rotator_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) @@ -820,7 +438,7 @@ struct platform_driver rotator_driver = { .probe = rotator_probe, .remove = rotator_remove, .driver = { - .name = "exynos-rot", + .name = "exynos-rotator", .owner = THIS_MODULE, .pm = &rotator_pm_ops, .of_match_table = exynos_rotator_match, diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c new file mode 100644 index 000000000000..63b05b7c846a --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -0,0 +1,694 @@ +/* + * Copyright (C) 2017 Samsung Electronics Co.Ltd + * Author: + * Andrzej Pietrasiewicz <andrzej.p@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundationr + */ + +#include <linux/kernel.h> +#include <linux/component.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/platform_device.h> +#include <linux/clk.h> +#include <linux/of_device.h> +#include <linux/pm_runtime.h> + +#include <drm/drmP.h> +#include <drm/exynos_drm.h> +#include "regs-scaler.h" +#include "exynos_drm_fb.h" +#include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" +#include "exynos_drm_ipp.h" + +#define scaler_read(offset) readl(scaler->regs + (offset)) +#define scaler_write(cfg, offset) writel(cfg, scaler->regs + (offset)) +#define SCALER_MAX_CLK 4 +#define SCALER_AUTOSUSPEND_DELAY 2000 + +struct scaler_data { + const char *clk_name[SCALER_MAX_CLK]; + unsigned int num_clk; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; +}; + +struct scaler_context { + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + void __iomem *regs; + struct clk *clock[SCALER_MAX_CLK]; + struct exynos_drm_ipp_task *task; + const struct scaler_data *scaler_data; +}; + +static u32 scaler_get_format(u32 drm_fmt) +{ + switch (drm_fmt) { + case DRM_FORMAT_NV21: + return SCALER_YUV420_2P_UV; + case DRM_FORMAT_NV12: + return SCALER_YUV420_2P_VU; + case DRM_FORMAT_YUV420: + return SCALER_YUV420_3P; + case DRM_FORMAT_YUYV: + return SCALER_YUV422_1P_YUYV; + case DRM_FORMAT_UYVY: + return SCALER_YUV422_1P_UYVY; + case DRM_FORMAT_YVYU: + return SCALER_YUV422_1P_YVYU; + case DRM_FORMAT_NV61: + return SCALER_YUV422_2P_UV; + case DRM_FORMAT_NV16: + return SCALER_YUV422_2P_VU; + case DRM_FORMAT_YUV422: + return SCALER_YUV422_3P; + case DRM_FORMAT_NV42: + return SCALER_YUV444_2P_UV; + case DRM_FORMAT_NV24: + return SCALER_YUV444_2P_VU; + case DRM_FORMAT_YUV444: + return SCALER_YUV444_3P; + case DRM_FORMAT_RGB565: + return SCALER_RGB_565; + case DRM_FORMAT_XRGB1555: + return SCALER_ARGB1555; + case DRM_FORMAT_ARGB1555: + return SCALER_ARGB1555; + case DRM_FORMAT_XRGB4444: + return SCALER_ARGB4444; + case DRM_FORMAT_ARGB4444: + return SCALER_ARGB4444; + case DRM_FORMAT_XRGB8888: + return SCALER_ARGB8888; + case DRM_FORMAT_ARGB8888: + return SCALER_ARGB8888; + case DRM_FORMAT_RGBX8888: + return SCALER_RGBA8888; + case DRM_FORMAT_RGBA8888: + return SCALER_RGBA8888; + default: + break; + } + + return 0; +} + +static inline void scaler_enable_int(struct scaler_context *scaler) +{ + u32 val; + + val = SCALER_INT_EN_TIMEOUT | + SCALER_INT_EN_ILLEGAL_BLEND | + SCALER_INT_EN_ILLEGAL_RATIO | + SCALER_INT_EN_ILLEGAL_DST_HEIGHT | + SCALER_INT_EN_ILLEGAL_DST_WIDTH | + SCALER_INT_EN_ILLEGAL_DST_V_POS | + SCALER_INT_EN_ILLEGAL_DST_H_POS | + SCALER_INT_EN_ILLEGAL_DST_C_SPAN | + SCALER_INT_EN_ILLEGAL_DST_Y_SPAN | + SCALER_INT_EN_ILLEGAL_DST_CR_BASE | + SCALER_INT_EN_ILLEGAL_DST_CB_BASE | + SCALER_INT_EN_ILLEGAL_DST_Y_BASE | + SCALER_INT_EN_ILLEGAL_DST_COLOR | + SCALER_INT_EN_ILLEGAL_SRC_HEIGHT | + SCALER_INT_EN_ILLEGAL_SRC_WIDTH | + SCALER_INT_EN_ILLEGAL_SRC_CV_POS | + SCALER_INT_EN_ILLEGAL_SRC_CH_POS | + SCALER_INT_EN_ILLEGAL_SRC_YV_POS | + SCALER_INT_EN_ILLEGAL_SRC_YH_POS | + SCALER_INT_EN_ILLEGAL_DST_SPAN | + SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN | + SCALER_INT_EN_ILLEGAL_SRC_CR_BASE | + SCALER_INT_EN_ILLEGAL_SRC_CB_BASE | + SCALER_INT_EN_ILLEGAL_SRC_Y_BASE | + SCALER_INT_EN_ILLEGAL_SRC_COLOR | + SCALER_INT_EN_FRAME_END; + scaler_write(val, SCALER_INT_EN); +} + +static inline void scaler_set_src_fmt(struct scaler_context *scaler, + u32 src_fmt) +{ + u32 val; + + val = SCALER_SRC_CFG_SET_COLOR_FORMAT(src_fmt); + scaler_write(val, SCALER_SRC_CFG); +} + +static inline void scaler_set_src_base(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *src_buf) +{ + static unsigned int bases[] = { + SCALER_SRC_Y_BASE, + SCALER_SRC_CB_BASE, + SCALER_SRC_CR_BASE, + }; + int i; + + for (i = 0; i < src_buf->format->num_planes; ++i) + scaler_write(src_buf->dma_addr[i], bases[i]); +} + +static inline void scaler_set_src_span(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *src_buf) +{ + u32 val; + + val = SCALER_SRC_SPAN_SET_Y_SPAN(src_buf->buf.pitch[0] / + src_buf->format->cpp[0]); + + if (src_buf->format->num_planes > 1) + val |= SCALER_SRC_SPAN_SET_C_SPAN(src_buf->buf.pitch[1]); + + scaler_write(val, SCALER_SRC_SPAN); +} + +static inline void scaler_set_src_luma_pos(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *src_pos) +{ + u32 val; + + val = SCALER_SRC_Y_POS_SET_YH_POS(src_pos->x << 2); + val |= SCALER_SRC_Y_POS_SET_YV_POS(src_pos->y << 2); + scaler_write(val, SCALER_SRC_Y_POS); + scaler_write(val, SCALER_SRC_C_POS); /* ATTENTION! */ +} + +static inline void scaler_set_src_wh(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *src_pos) +{ + u32 val; + + val = SCALER_SRC_WH_SET_WIDTH(src_pos->w); + val |= SCALER_SRC_WH_SET_HEIGHT(src_pos->h); + scaler_write(val, SCALER_SRC_WH); +} + +static inline void scaler_set_dst_fmt(struct scaler_context *scaler, + u32 dst_fmt) +{ + u32 val; + + val = SCALER_DST_CFG_SET_COLOR_FORMAT(dst_fmt); + scaler_write(val, SCALER_DST_CFG); +} + +static inline void scaler_set_dst_base(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *dst_buf) +{ + static unsigned int bases[] = { + SCALER_DST_Y_BASE, + SCALER_DST_CB_BASE, + SCALER_DST_CR_BASE, + }; + int i; + + for (i = 0; i < dst_buf->format->num_planes; ++i) + scaler_write(dst_buf->dma_addr[i], bases[i]); +} + +static inline void scaler_set_dst_span(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *dst_buf) +{ + u32 val; + + val = SCALER_DST_SPAN_SET_Y_SPAN(dst_buf->buf.pitch[0] / + dst_buf->format->cpp[0]); + + if (dst_buf->format->num_planes > 1) + val |= SCALER_DST_SPAN_SET_C_SPAN(dst_buf->buf.pitch[1]); + + scaler_write(val, SCALER_DST_SPAN); +} + +static inline void scaler_set_dst_luma_pos(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val; + + val = SCALER_DST_WH_SET_WIDTH(dst_pos->w); + val |= SCALER_DST_WH_SET_HEIGHT(dst_pos->h); + scaler_write(val, SCALER_DST_WH); +} + +static inline void scaler_set_dst_wh(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val; + + val = SCALER_DST_POS_SET_H_POS(dst_pos->x); + val |= SCALER_DST_POS_SET_V_POS(dst_pos->y); + scaler_write(val, SCALER_DST_POS); +} + +static inline void scaler_set_hv_ratio(struct scaler_context *scaler, + unsigned int rotation, + struct drm_exynos_ipp_task_rect *src_pos, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val, h_ratio, v_ratio; + + if (drm_rotation_90_or_270(rotation)) { + h_ratio = (src_pos->h << 16) / dst_pos->w; + v_ratio = (src_pos->w << 16) / dst_pos->h; + } else { + h_ratio = (src_pos->w << 16) / dst_pos->w; + v_ratio = (src_pos->h << 16) / dst_pos->h; + } + + val = SCALER_H_RATIO_SET(h_ratio); + scaler_write(val, SCALER_H_RATIO); + + val = SCALER_V_RATIO_SET(v_ratio); + scaler_write(val, SCALER_V_RATIO); +} + +static inline void scaler_set_rotation(struct scaler_context *scaler, + unsigned int rotation) +{ + u32 val = 0; + + if (rotation & DRM_MODE_ROTATE_90) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_90); + else if (rotation & DRM_MODE_ROTATE_180) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_180); + else if (rotation & DRM_MODE_ROTATE_270) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_270); + if (rotation & DRM_MODE_REFLECT_X) + val |= SCALER_ROT_CFG_FLIP_X_EN; + if (rotation & DRM_MODE_REFLECT_Y) + val |= SCALER_ROT_CFG_FLIP_Y_EN; + scaler_write(val, SCALER_ROT_CFG); +} + +static inline void scaler_set_csc(struct scaler_context *scaler, + const struct drm_format_info *fmt) +{ + static const u32 csc_mtx[2][3][3] = { + { /* YCbCr to RGB */ + {0x254, 0x000, 0x331}, + {0x254, 0xf38, 0xe60}, + {0x254, 0x409, 0x000}, + }, + { /* RGB to YCbCr */ + {0x084, 0x102, 0x032}, + {0xfb4, 0xf6b, 0x0e1}, + {0x0e1, 0xf44, 0xfdc}, + }, + }; + int i, j, dir; + + switch (fmt->format) { + case DRM_FORMAT_RGB565: + case DRM_FORMAT_XRGB1555: + case DRM_FORMAT_ARGB1555: + case DRM_FORMAT_XRGB4444: + case DRM_FORMAT_ARGB4444: + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_RGBX8888: + case DRM_FORMAT_RGBA8888: + dir = 1; + break; + default: + dir = 0; + } + + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + scaler_write(csc_mtx[dir][i][j], SCALER_CSC_COEF(j, i)); +} + +static inline void scaler_set_timer(struct scaler_context *scaler, + unsigned int timer, unsigned int divider) +{ + u32 val; + + val = SCALER_TIMEOUT_CTRL_TIMER_ENABLE; + val |= SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(timer); + val |= SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(divider); + scaler_write(val, SCALER_TIMEOUT_CTRL); +} + +static inline void scaler_start_hw(struct scaler_context *scaler) +{ + scaler_write(SCALER_CFG_START_CMD, SCALER_CFG); +} + +static int scaler_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct scaler_context *scaler = + container_of(ipp, struct scaler_context, ipp); + + u32 src_fmt = scaler_get_format(task->src.buf.fourcc); + struct drm_exynos_ipp_task_rect *src_pos = &task->src.rect; + + u32 dst_fmt = scaler_get_format(task->dst.buf.fourcc); + struct drm_exynos_ipp_task_rect *dst_pos = &task->dst.rect; + + scaler->task = task; + + pm_runtime_get_sync(scaler->dev); + + scaler_set_src_fmt(scaler, src_fmt); + scaler_set_src_base(scaler, &task->src); + scaler_set_src_span(scaler, &task->src); + scaler_set_src_luma_pos(scaler, src_pos); + scaler_set_src_wh(scaler, src_pos); + + scaler_set_dst_fmt(scaler, dst_fmt); + scaler_set_dst_base(scaler, &task->dst); + scaler_set_dst_span(scaler, &task->dst); + scaler_set_dst_luma_pos(scaler, dst_pos); + scaler_set_dst_wh(scaler, dst_pos); + + scaler_set_hv_ratio(scaler, task->transform.rotation, src_pos, dst_pos); + scaler_set_rotation(scaler, task->transform.rotation); + + scaler_set_csc(scaler, task->src.format); + + scaler_set_timer(scaler, 0xffff, 0xf); + + scaler_enable_int(scaler); + scaler_start_hw(scaler); + + return 0; +} + +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = scaler_commit, +}; + +static inline void scaler_disable_int(struct scaler_context *scaler) +{ + scaler_write(0, SCALER_INT_EN); +} + +static inline u32 scaler_get_int_status(struct scaler_context *scaler) +{ + return scaler_read(SCALER_INT_STATUS); +} + +static inline bool scaler_task_done(u32 val) +{ + return val & SCALER_INT_STATUS_FRAME_END ? 0 : -EINVAL; +} + +static irqreturn_t scaler_irq_handler(int irq, void *arg) +{ + struct scaler_context *scaler = arg; + + u32 val = scaler_get_int_status(scaler); + + scaler_disable_int(scaler); + + if (scaler->task) { + struct exynos_drm_ipp_task *task = scaler->task; + + scaler->task = NULL; + pm_runtime_mark_last_busy(scaler->dev); + pm_runtime_put_autosuspend(scaler->dev); + exynos_drm_ipp_task_done(task, scaler_task_done(val)); + } + + return IRQ_HANDLED; +} + +static int scaler_bind(struct device *dev, struct device *master, void *data) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &scaler->ipp; + + scaler->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + scaler->scaler_data->formats, + scaler->scaler_data->num_formats, "scaler"); + + dev_info(dev, "The exynos scaler has been probed successfully\n"); + + return 0; +} + +static void scaler_unbind(struct device *dev, struct device *master, + void *data) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &scaler->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(scaler->drm_dev, scaler->dev); +} + +static const struct component_ops scaler_component_ops = { + .bind = scaler_bind, + .unbind = scaler_unbind, +}; + +static int scaler_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *regs_res; + struct scaler_context *scaler; + int irq; + int ret, i; + + scaler = devm_kzalloc(dev, sizeof(*scaler), GFP_KERNEL); + if (!scaler) + return -ENOMEM; + + scaler->scaler_data = + (struct scaler_data *)of_device_get_match_data(dev); + + scaler->dev = dev; + regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + scaler->regs = devm_ioremap_resource(dev, regs_res); + if (IS_ERR(scaler->regs)) + return PTR_ERR(scaler->regs); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "failed to get irq\n"); + return irq; + } + + ret = devm_request_threaded_irq(dev, irq, NULL, scaler_irq_handler, + IRQF_ONESHOT, "drm_scaler", scaler); + if (ret < 0) { + dev_err(dev, "failed to request irq\n"); + return ret; + } + + for (i = 0; i < scaler->scaler_data->num_clk; ++i) { + scaler->clock[i] = devm_clk_get(dev, + scaler->scaler_data->clk_name[i]); + if (IS_ERR(scaler->clock[i])) { + dev_err(dev, "failed to get clock\n"); + return PTR_ERR(scaler->clock[i]); + } + } + + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, SCALER_AUTOSUSPEND_DELAY); + pm_runtime_enable(dev); + platform_set_drvdata(pdev, scaler); + + ret = component_add(dev, &scaler_component_ops); + if (ret) + goto err_ippdrv_register; + + return 0; + +err_ippdrv_register: + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); + return ret; +} + +static int scaler_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + + component_del(dev, &scaler_component_ops); + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); + + return 0; +} + +#ifdef CONFIG_PM + +static int clk_disable_unprepare_wrapper(struct clk *clk) +{ + clk_disable_unprepare(clk); + + return 0; +} + +static int scaler_clk_ctrl(struct scaler_context *scaler, bool enable) +{ + int (*clk_fun)(struct clk *clk), i; + + clk_fun = enable ? clk_prepare_enable : clk_disable_unprepare_wrapper; + + for (i = 0; i < scaler->scaler_data->num_clk; ++i) + clk_fun(scaler->clock[i]); + + return 0; +} + +static int scaler_runtime_suspend(struct device *dev) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + + return scaler_clk_ctrl(scaler, false); +} + +static int scaler_runtime_resume(struct device *dev) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + + return scaler_clk_ctrl(scaler, true); +} +#endif + +static const struct dev_pm_ops scaler_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(scaler_runtime_suspend, scaler_runtime_resume, NULL) +}; + +static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_hv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_h_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 1) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct drm_exynos_ipp_limit scaler_5420_one_pixel_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct exynos_drm_ipp_formats exynos5420_formats[] = { + /* SCALER_YUV420_2P_UV */ + { IPP_SRCDST_FORMAT(NV21, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV420_2P_VU */ + { IPP_SRCDST_FORMAT(NV12, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV420_3P */ + { IPP_SRCDST_FORMAT(YUV420, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV422_1P_YUYV */ + { IPP_SRCDST_FORMAT(YUYV, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_1P_UYVY */ + { IPP_SRCDST_FORMAT(UYVY, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_1P_YVYU */ + { IPP_SRCDST_FORMAT(YVYU, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_2P_UV */ + { IPP_SRCDST_FORMAT(NV61, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_2P_VU */ + { IPP_SRCDST_FORMAT(NV16, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_3P */ + { IPP_SRCDST_FORMAT(YUV422, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV444_2P_UV */ + { IPP_SRCDST_FORMAT(NV42, scaler_5420_one_pixel_limits) }, + + /* SCALER_YUV444_2P_VU */ + { IPP_SRCDST_FORMAT(NV24, scaler_5420_one_pixel_limits) }, + + /* SCALER_YUV444_3P */ + { IPP_SRCDST_FORMAT(YUV444, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGB_565 */ + { IPP_SRCDST_FORMAT(RGB565, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB1555 */ + { IPP_SRCDST_FORMAT(XRGB1555, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB1555 */ + { IPP_SRCDST_FORMAT(ARGB1555, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB4444 */ + { IPP_SRCDST_FORMAT(XRGB4444, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB4444 */ + { IPP_SRCDST_FORMAT(ARGB4444, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB8888 */ + { IPP_SRCDST_FORMAT(XRGB8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB8888 */ + { IPP_SRCDST_FORMAT(ARGB8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGBA8888 */ + { IPP_SRCDST_FORMAT(RGBX8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGBA8888 */ + { IPP_SRCDST_FORMAT(RGBA8888, scaler_5420_one_pixel_limits) }, +}; + +static const struct scaler_data exynos5420_data = { + .clk_name = {"mscl"}, + .num_clk = 1, + .formats = exynos5420_formats, + .num_formats = ARRAY_SIZE(exynos5420_formats), +}; + +static const struct scaler_data exynos5433_data = { + .clk_name = {"pclk", "aclk", "aclk_xiu"}, + .num_clk = 3, + .formats = exynos5420_formats, /* intentional */ + .num_formats = ARRAY_SIZE(exynos5420_formats), +}; + +static const struct of_device_id exynos_scaler_match[] = { + { + .compatible = "samsung,exynos5420-scaler", + .data = &exynos5420_data, + }, { + .compatible = "samsung,exynos5433-scaler", + .data = &exynos5433_data, + }, { + }, +}; +MODULE_DEVICE_TABLE(of, exynos_scaler_match); + +struct platform_driver scaler_driver = { + .probe = scaler_probe, + .remove = scaler_remove, + .driver = { + .name = "exynos-scaler", + .owner = THIS_MODULE, + .pm = &scaler_pm_ops, + .of_match_table = exynos_scaler_match, + }, +}; diff --git a/drivers/gpu/drm/exynos/regs-scaler.h b/drivers/gpu/drm/exynos/regs-scaler.h new file mode 100644 index 000000000000..fc7ccad75e74 --- /dev/null +++ b/drivers/gpu/drm/exynos/regs-scaler.h @@ -0,0 +1,426 @@ +/* drivers/gpu/drm/exynos/regs-scaler.h + * + * Copyright (c) 2017 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com> + * + * Register definition file for Samsung scaler driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef EXYNOS_REGS_SCALER_H +#define EXYNOS_REGS_SCALER_H + +/* Register part */ + +/* Global setting */ +#define SCALER_STATUS 0x0 /* no shadow */ +#define SCALER_CFG 0x4 + +/* Interrupt */ +#define SCALER_INT_EN 0x8 /* no shadow */ +#define SCALER_INT_STATUS 0xc /* no shadow */ + +/* SRC */ +#define SCALER_SRC_CFG 0x10 +#define SCALER_SRC_Y_BASE 0x14 +#define SCALER_SRC_CB_BASE 0x18 +#define SCALER_SRC_CR_BASE 0x294 +#define SCALER_SRC_SPAN 0x1c +#define SCALER_SRC_Y_POS 0x20 +#define SCALER_SRC_WH 0x24 +#define SCALER_SRC_C_POS 0x28 + +/* DST */ +#define SCALER_DST_CFG 0x30 +#define SCALER_DST_Y_BASE 0x34 +#define SCALER_DST_CB_BASE 0x38 +#define SCALER_DST_CR_BASE 0x298 +#define SCALER_DST_SPAN 0x3c +#define SCALER_DST_WH 0x40 +#define SCALER_DST_POS 0x44 + +/* Ratio */ +#define SCALER_H_RATIO 0x50 +#define SCALER_V_RATIO 0x54 + +/* Rotation */ +#define SCALER_ROT_CFG 0x58 + +/* Coefficient */ +/* + * YHCOEF_{x}{A|B|C|D} CHCOEF_{x}{A|B|C|D} + * + * A B C D A B C D + * 0 60 64 68 6c 140 144 148 14c + * 1 70 74 78 7c 150 154 158 15c + * 2 80 84 88 8c 160 164 168 16c + * 3 90 94 98 9c 170 174 178 17c + * 4 a0 a4 a8 ac 180 184 188 18c + * 5 b0 b4 b8 bc 190 194 198 19c + * 6 c0 c4 c8 cc 1a0 1a4 1a8 1ac + * 7 d0 d4 d8 dc 1b0 1b4 1b8 1bc + * 8 e0 e4 e8 ec 1c0 1c4 1c8 1cc + * + * + * YVCOEF_{x}{A|B} CVCOEF_{x}{A|B} + * + * A B A B + * 0 f0 f4 1d0 1d4 + * 1 f8 fc 1d8 1dc + * 2 100 104 1e0 1e4 + * 3 108 10c 1e8 1ec + * 4 110 114 1f0 1f4 + * 5 118 11c 1f8 1fc + * 6 120 124 200 204 + * 7 128 12c 208 20c + * 8 130 134 210 214 + */ +#define _SCALER_HCOEF_DELTA(r, c) ((r) * 0x10 + (c) * 0x4) +#define _SCALER_VCOEF_DELTA(r, c) ((r) * 0x8 + (c) * 0x4) + +#define SCALER_YHCOEF(r, c) (0x60 + _SCALER_HCOEF_DELTA((r), (c))) +#define SCALER_YVCOEF(r, c) (0xf0 + _SCALER_VCOEF_DELTA((r), (c))) +#define SCALER_CHCOEF(r, c) (0x140 + _SCALER_HCOEF_DELTA((r), (c))) +#define SCALER_CVCOEF(r, c) (0x1d0 + _SCALER_VCOEF_DELTA((r), (c))) + + +/* Color Space Conversion */ +#define SCALER_CSC_COEF(x, y) (0x220 + (y) * 0xc + (x) * 0x4) + +/* Dithering */ +#define SCALER_DITH_CFG 0x250 + +/* Version Number */ +#define SCALER_VER 0x260 /* no shadow */ + +/* Cycle count and Timeout */ +#define SCALER_CYCLE_COUNT 0x278 /* no shadow */ +#define SCALER_TIMEOUT_CTRL 0x2c0 /* no shadow */ +#define SCALER_TIMEOUT_CNT 0x2c4 /* no shadow */ + +/* Blending */ +#define SCALER_SRC_BLEND_COLOR 0x280 +#define SCALER_SRC_BLEND_ALPHA 0x284 +#define SCALER_DST_BLEND_COLOR 0x288 +#define SCALER_DST_BLEND_ALPHA 0x28c + +/* Color Fill */ +#define SCALER_FILL_COLOR 0x290 + +/* Multiple Command Queue */ +#define SCALER_ADDR_Q_CONFIG 0x2a0 /* no shadow */ +#define SCALER_SRC_ADDR_Q_STATUS 0x2a4 /* no shadow */ +#define SCALER_SRC_ADDR_Q 0x2a8 /* no shadow */ + +/* CRC */ +#define SCALER_CRC_COLOR00_10 0x2b0 /* no shadow */ +#define SCALER_CRC_COLOR20_30 0x2b4 /* no shadow */ +#define SCALER_CRC_COLOR01_11 0x2b8 /* no shadow */ +#define SCALER_CRC_COLOR21_31 0x2bc /* no shadow */ + +/* Shadow Registers */ +#define SCALER_SHADOW_OFFSET 0x1000 + + +/* Bit definition part */ +#define SCALER_MASK(hi_b, lo_b) ((1 << ((hi_b) - (lo_b) + 1)) - 1) +#define SCALER_GET(reg, hi_b, lo_b) \ + (((reg) >> (lo_b)) & SCALER_MASK(hi_b, lo_b)) +#define SCALER_SET(val, hi_b, lo_b) \ + (((val) & SCALER_MASK(hi_b, lo_b)) << lo_b) + +/* SCALER_STATUS */ +#define SCALER_STATUS_SCALER_RUNNING (1 << 1) +#define SCALER_STATUS_SCALER_READY_CLK_DOWN (1 << 0) + +/* SCALER_CFG */ +#define SCALER_CFG_FILL_EN (1 << 24) +#define SCALER_CFG_BLEND_COLOR_DIVIDE_ALPHA_EN (1 << 17) +#define SCALER_CFG_BLEND_EN (1 << 16) +#define SCALER_CFG_CSC_Y_OFFSET_SRC_EN (1 << 10) +#define SCALER_CFG_CSC_Y_OFFSET_DST_EN (1 << 9) +#define SCALER_CFG_16_BURST_MODE (1 << 8) +#define SCALER_CFG_SOFT_RESET (1 << 1) +#define SCALER_CFG_START_CMD (1 << 0) + +/* SCALER_INT_EN */ +#define SCALER_INT_EN_TIMEOUT (1 << 31) +#define SCALER_INT_EN_ILLEGAL_BLEND (1 << 24) +#define SCALER_INT_EN_ILLEGAL_RATIO (1 << 23) +#define SCALER_INT_EN_ILLEGAL_DST_HEIGHT (1 << 22) +#define SCALER_INT_EN_ILLEGAL_DST_WIDTH (1 << 21) +#define SCALER_INT_EN_ILLEGAL_DST_V_POS (1 << 20) +#define SCALER_INT_EN_ILLEGAL_DST_H_POS (1 << 19) +#define SCALER_INT_EN_ILLEGAL_DST_C_SPAN (1 << 18) +#define SCALER_INT_EN_ILLEGAL_DST_Y_SPAN (1 << 17) +#define SCALER_INT_EN_ILLEGAL_DST_CR_BASE (1 << 16) +#define SCALER_INT_EN_ILLEGAL_DST_CB_BASE (1 << 15) +#define SCALER_INT_EN_ILLEGAL_DST_Y_BASE (1 << 14) +#define SCALER_INT_EN_ILLEGAL_DST_COLOR (1 << 13) +#define SCALER_INT_EN_ILLEGAL_SRC_HEIGHT (1 << 12) +#define SCALER_INT_EN_ILLEGAL_SRC_WIDTH (1 << 11) +#define SCALER_INT_EN_ILLEGAL_SRC_CV_POS (1 << 10) +#define SCALER_INT_EN_ILLEGAL_SRC_CH_POS (1 << 9) +#define SCALER_INT_EN_ILLEGAL_SRC_YV_POS (1 << 8) +#define SCALER_INT_EN_ILLEGAL_SRC_YH_POS (1 << 7) +#define SCALER_INT_EN_ILLEGAL_DST_SPAN (1 << 6) +#define SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN (1 << 5) +#define SCALER_INT_EN_ILLEGAL_SRC_CR_BASE (1 << 4) +#define SCALER_INT_EN_ILLEGAL_SRC_CB_BASE (1 << 3) +#define SCALER_INT_EN_ILLEGAL_SRC_Y_BASE (1 << 2) +#define SCALER_INT_EN_ILLEGAL_SRC_COLOR (1 << 1) +#define SCALER_INT_EN_FRAME_END (1 << 0) + +/* SCALER_INT_STATUS */ +#define SCALER_INT_STATUS_TIMEOUT (1 << 31) +#define SCALER_INT_STATUS_ILLEGAL_BLEND (1 << 24) +#define SCALER_INT_STATUS_ILLEGAL_RATIO (1 << 23) +#define SCALER_INT_STATUS_ILLEGAL_DST_HEIGHT (1 << 22) +#define SCALER_INT_STATUS_ILLEGAL_DST_WIDTH (1 << 21) +#define SCALER_INT_STATUS_ILLEGAL_DST_V_POS (1 << 20) +#define SCALER_INT_STATUS_ILLEGAL_DST_H_POS (1 << 19) +#define SCALER_INT_STATUS_ILLEGAL_DST_C_SPAN (1 << 18) +#define SCALER_INT_STATUS_ILLEGAL_DST_Y_SPAN (1 << 17) +#define SCALER_INT_STATUS_ILLEGAL_DST_CR_BASE (1 << 16) +#define SCALER_INT_STATUS_ILLEGAL_DST_CB_BASE (1 << 15) +#define SCALER_INT_STATUS_ILLEGAL_DST_Y_BASE (1 << 14) +#define SCALER_INT_STATUS_ILLEGAL_DST_COLOR (1 << 13) +#define SCALER_INT_STATUS_ILLEGAL_SRC_HEIGHT (1 << 12) +#define SCALER_INT_STATUS_ILLEGAL_SRC_WIDTH (1 << 11) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CV_POS (1 << 10) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CH_POS (1 << 9) +#define SCALER_INT_STATUS_ILLEGAL_SRC_YV_POS (1 << 8) +#define SCALER_INT_STATUS_ILLEGAL_SRC_YH_POS (1 << 7) +#define SCALER_INT_STATUS_ILLEGAL_DST_SPAN (1 << 6) +#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_SPAN (1 << 5) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CR_BASE (1 << 4) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CB_BASE (1 << 3) +#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_BASE (1 << 2) +#define SCALER_INT_STATUS_ILLEGAL_SRC_COLOR (1 << 1) +#define SCALER_INT_STATUS_FRAME_END (1 << 0) + +/* SCALER_SRC_CFG */ +#define SCALER_SRC_CFG_TILE_EN (1 << 10) +#define SCALER_SRC_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5) +#define SCALER_SRC_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5) +#define SCALER_SRC_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0) +#define SCALER_SRC_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0) +#define SCALER_YUV420_2P_UV 0 +#define SCALER_YUV422_2P_UV 2 +#define SCALER_YUV444_2P_UV 3 +#define SCALER_RGB_565 4 +#define SCALER_ARGB1555 5 +#define SCALER_ARGB8888 6 +#define SCALER_ARGB8888_PRE 7 +#define SCALER_YUV422_1P_YVYU 9 +#define SCALER_YUV422_1P_YUYV 10 +#define SCALER_YUV422_1P_UYVY 11 +#define SCALER_ARGB4444 12 +#define SCALER_L8A8 13 +#define SCALER_RGBA8888 14 +#define SCALER_L8 15 +#define SCALER_YUV420_2P_VU 16 +#define SCALER_YUV422_2P_VU 18 +#define SCALER_YUV444_2P_VU 19 +#define SCALER_YUV420_3P 20 +#define SCALER_YUV422_3P 22 +#define SCALER_YUV444_3P 23 + +/* SCALER_SRC_SPAN */ +#define SCALER_SRC_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16) +#define SCALER_SRC_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16) +#define SCALER_SRC_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0) +#define SCALER_SRC_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0) + +/* SCALER_SRC_Y_POS */ +#define SCALER_SRC_Y_POS_GET_YH_POS(r) SCALER_GET(r, 31, 16) +#define SCALER_SRC_Y_POS_SET_YH_POS(v) SCALER_SET(v, 31, 16) +#define SCALER_SRC_Y_POS_GET_YV_POS(r) SCALER_GET(r, 15, 0) +#define SCALER_SRC_Y_POS_SET_YV_POS(v) SCALER_SET(v, 15, 0) + +/* SCALER_SRC_WH */ +#define SCALER_SRC_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16) +#define SCALER_SRC_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16) +#define SCALER_SRC_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0) +#define SCALER_SRC_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0) + +/* SCALER_SRC_C_POS */ +#define SCALER_SRC_C_POS_GET_CH_POS(r) SCALER_GET(r, 31, 16) +#define SCALER_SRC_C_POS_SET_CH_POS(v) SCALER_SET(v, 31, 16) +#define SCALER_SRC_C_POS_GET_CV_POS(r) SCALER_GET(r, 15, 0) +#define SCALER_SRC_C_POS_SET_CV_POS(v) SCALER_SET(v, 15, 0) + +/* SCALER_DST_CFG */ +#define SCALER_DST_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5) +#define SCALER_DST_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5) +#define SCALER_DST_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0) +#define SCALER_DST_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0) + +/* SCALER_DST_SPAN */ +#define SCALER_DST_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0) + +/* SCALER_DST_WH */ +#define SCALER_DST_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0) + +/* SCALER_DST_POS */ +#define SCALER_DST_POS_GET_H_POS(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_POS_SET_H_POS(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_POS_GET_V_POS(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_POS_SET_V_POS(v) SCALER_SET(v, 13, 0) + +/* SCALER_H_RATIO */ +#define SCALER_H_RATIO_GET(r) SCALER_GET(r, 18, 0) +#define SCALER_H_RATIO_SET(v) SCALER_SET(v, 18, 0) + +/* SCALER_V_RATIO */ +#define SCALER_V_RATIO_GET(r) SCALER_GET(r, 18, 0) +#define SCALER_V_RATIO_SET(v) SCALER_SET(v, 18, 0) + +/* SCALER_ROT_CFG */ +#define SCALER_ROT_CFG_FLIP_X_EN (1 << 3) +#define SCALER_ROT_CFG_FLIP_Y_EN (1 << 2) +#define SCALER_ROT_CFG_GET_ROTMODE(r) SCALER_GET(r, 1, 0) +#define SCALER_ROT_CFG_SET_ROTMODE(v) SCALER_SET(v, 1, 0) +#define SCALER_ROT_MODE_90 1 +#define SCALER_ROT_MODE_180 2 +#define SCALER_ROT_MODE_270 3 + +/* SCALER_HCOEF, SCALER_VCOEF */ +#define SCALER_COEF_SHIFT(i) (16 * (1 - (i) % 2)) +#define SCALER_COEF_GET(r, i) \ + (((r) >> SCALER_COEF_SHIFT(i)) & 0x1ff) +#define SCALER_COEF_SET(v, i) \ + (((v) & 0x1ff) << SCALER_COEF_SHIFT(i)) + +/* SCALER_CSC_COEFxy */ +#define SCALER_CSC_COEF_GET(r) SCALER_GET(r, 11, 0) +#define SCALER_CSC_COEF_SET(v) SCALER_SET(v, 11, 0) + +/* SCALER_DITH_CFG */ +#define SCALER_DITH_CFG_GET_R_TYPE(r) SCALER_GET(r, 8, 6) +#define SCALER_DITH_CFG_SET_R_TYPE(v) SCALER_SET(v, 8, 6) +#define SCALER_DITH_CFG_GET_G_TYPE(r) SCALER_GET(r, 5, 3) +#define SCALER_DITH_CFG_SET_G_TYPE(v) SCALER_SET(v, 5, 3) +#define SCALER_DITH_CFG_GET_B_TYPE(r) SCALER_GET(r, 2, 0) +#define SCALER_DITH_CFG_SET_B_TYPE(v) SCALER_SET(v, 2, 0) + +/* SCALER_TIMEOUT_CTRL */ +#define SCALER_TIMEOUT_CTRL_GET_TIMER_VALUE(r) SCALER_GET(r, 31, 16) +#define SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(v) SCALER_SET(v, 31, 16) +#define SCALER_TIMEOUT_CTRL_GET_TIMER_DIV(r) SCALER_GET(r, 7, 4) +#define SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(v) SCALER_SET(v, 7, 4) +#define SCALER_TIMEOUT_CTRL_TIMER_ENABLE (1 << 0) + +/* SCALER_TIMEOUT_CNT */ +#define SCALER_TIMEOUT_CTRL_GET_TIMER_COUNT(r) SCALER_GET(r, 31, 16) + +/* SCALER_SRC_BLEND_COLOR */ +#define SCALER_SRC_BLEND_COLOR_SEL_INV (1 << 31) +#define SCALER_SRC_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_SRC_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_SRC_BLEND_COLOR_OP_SEL_INV (1 << 28) +#define SCALER_SRC_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_SRC_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_SRC_BLEND_ALPHA */ +#define SCALER_SRC_BLEND_ALPHA_SEL_INV (1 << 31) +#define SCALER_SRC_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_SRC_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_SRC_BLEND_ALPHA_OP_SEL_INV (1 << 28) +#define SCALER_SRC_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_SRC_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_SRC_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0) +#define SCALER_SRC_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0) + +/* SCALER_DST_BLEND_COLOR */ +#define SCALER_DST_BLEND_COLOR_SEL_INV (1 << 31) +#define SCALER_DST_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_DST_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_DST_BLEND_COLOR_OP_SEL_INV (1 << 28) +#define SCALER_DST_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_DST_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_DST_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_DST_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_DST_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_DST_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_DST_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_DST_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_DST_BLEND_ALPHA */ +#define SCALER_DST_BLEND_ALPHA_SEL_INV (1 << 31) +#define SCALER_DST_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_DST_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_DST_BLEND_ALPHA_OP_SEL_INV (1 << 28) +#define SCALER_DST_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_DST_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_DST_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0) +#define SCALER_DST_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0) + +/* SCALER_FILL_COLOR */ +#define SCALER_FILL_COLOR_GET_ALPHA(r) SCALER_GET(r, 31, 24) +#define SCALER_FILL_COLOR_SET_ALPHA(v) SCALER_SET(v, 31, 24) +#define SCALER_FILL_COLOR_GET_FILL_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_FILL_COLOR_SET_FILL_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_FILL_COLOR_GET_FILL_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_FILL_COLOR_SET_FILL_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_FILL_COLOR_GET_FILL_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_FILL_COLOR_SET_FILL_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_ADDR_Q_CONFIG */ +#define SCALER_ADDR_Q_CONFIG_RST (1 << 0) + +/* SCALER_SRC_ADDR_Q_STATUS */ +#define SCALER_SRC_ADDR_Q_STATUS_Y_FULL (1 << 23) +#define SCALER_SRC_ADDR_Q_STATUS_Y_EMPTY (1 << 22) +#define SCALER_SRC_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16) +#define SCALER_SRC_ADDR_Q_STATUS_CB_FULL (1 << 15) +#define SCALER_SRC_ADDR_Q_STATUS_CB_EMPTY (1 << 14) +#define SCALER_SRC_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8) +#define SCALER_SRC_ADDR_Q_STATUS_CR_FULL (1 << 7) +#define SCALER_SRC_ADDR_Q_STATUS_CR_EMPTY (1 << 6) +#define SCALER_SRC_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0) + +/* SCALER_DST_ADDR_Q_STATUS */ +#define SCALER_DST_ADDR_Q_STATUS_Y_FULL (1 << 23) +#define SCALER_DST_ADDR_Q_STATUS_Y_EMPTY (1 << 22) +#define SCALER_DST_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16) +#define SCALER_DST_ADDR_Q_STATUS_CB_FULL (1 << 15) +#define SCALER_DST_ADDR_Q_STATUS_CB_EMPTY (1 << 14) +#define SCALER_DST_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8) +#define SCALER_DST_ADDR_Q_STATUS_CR_FULL (1 << 7) +#define SCALER_DST_ADDR_Q_STATUS_CR_EMPTY (1 << 6) +#define SCALER_DST_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0) + +/* SCALER_CRC_COLOR00_10 */ +#define SCALER_CRC_COLOR00_10_GET_00(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR00_10_GET_10(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR20_30 */ +#define SCALER_CRC_COLOR20_30_GET_20(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR20_30_GET_30(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR01_11 */ +#define SCALER_CRC_COLOR01_11_GET_01(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR01_11_GET_11(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR21_31 */ +#define SCALER_CRC_COLOR21_31_GET_21(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR21_31_GET_31(r) SCALER_GET(r, 15, 0) + +#endif /* EXYNOS_REGS_SCALER_H */ diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c index 3a3bf752e03a..34b85767e4da 100644 --- a/drivers/gpu/drm/gma500/cdv_device.c +++ b/drivers/gpu/drm/gma500/cdv_device.c @@ -485,7 +485,7 @@ void cdv_intel_attach_force_audio_property(struct drm_connector *connector) return; for (i = 0; i < ARRAY_SIZE(force_audio_names); i++) - drm_property_add_enum(prop, i, i-1, force_audio_names[i]); + drm_property_add_enum(prop, i-1, force_audio_names[i]); dev_priv->force_audio_property = prop; } @@ -514,7 +514,7 @@ void cdv_intel_attach_broadcast_rgb_property(struct drm_connector *connector) return; for (i = 0; i < ARRAY_SIZE(broadcast_rgb_names); i++) - drm_property_add_enum(prop, i, i, broadcast_rgb_names[i]); + drm_property_add_enum(prop, i, broadcast_rgb_names[i]); dev_priv->broadcast_rgb_property = prop; } diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c index b837e7a92196..cb5a14b7ec7f 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_crt.c +++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c @@ -64,7 +64,7 @@ static void cdv_intel_crt_dpms(struct drm_encoder *encoder, int mode) REG_WRITE(reg, temp); } -static int cdv_intel_crt_mode_valid(struct drm_connector *connector, +static enum drm_mode_status cdv_intel_crt_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { if (mode->flags & DRM_MODE_FLAG_DBLSCAN) diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c index a4bb89b7878f..5ea785f07ba8 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_dp.c +++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c @@ -505,7 +505,7 @@ static void cdv_intel_edp_backlight_off (struct gma_encoder *intel_encoder) msleep(intel_dp->backlight_off_delay); } -static int +static enum drm_mode_status cdv_intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c index 563f193fcfac..f0878998526a 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c +++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c @@ -223,7 +223,7 @@ static int cdv_hdmi_get_modes(struct drm_connector *connector) return ret; } -static int cdv_hdmi_mode_valid(struct drm_connector *connector, +static enum drm_mode_status cdv_hdmi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { if (mode->clock > 165000) diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c index e64960db3224..de9531caaca0 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c +++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c @@ -244,7 +244,7 @@ static void cdv_intel_lvds_restore(struct drm_connector *connector) { } -static int cdv_intel_lvds_mode_valid(struct drm_connector *connector, +static enum drm_mode_status cdv_intel_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c index acb3848ef1c9..fe020926ea4f 100644 --- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c +++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c @@ -346,7 +346,7 @@ static int mdfld_dsi_connector_get_modes(struct drm_connector *connector) return 0; } -static int mdfld_dsi_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status mdfld_dsi_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct mdfld_dsi_connector *dsi_connector = diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c index 8b2eb32ee988..78566a80ad25 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c @@ -509,7 +509,7 @@ static void oaktrail_hdmi_dpms(struct drm_encoder *encoder, int mode) HDMI_WRITE(HDMI_VIDEO_REG, temp); } -static int oaktrail_hdmi_mode_valid(struct drm_connector *connector, +static enum drm_mode_status oaktrail_hdmi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { if (mode->clock > 165000) diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h index e8e4ea14b12b..e05e5399af2d 100644 --- a/drivers/gpu/drm/gma500/psb_intel_drv.h +++ b/drivers/gpu/drm/gma500/psb_intel_drv.h @@ -255,7 +255,7 @@ extern int intelfb_remove(struct drm_device *dev, extern bool psb_intel_lvds_mode_fixup(struct drm_encoder *encoder, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); -extern int psb_intel_lvds_mode_valid(struct drm_connector *connector, +extern enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode); extern int psb_intel_lvds_set_property(struct drm_connector *connector, struct drm_property *property, diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c index be3eefec5152..8baf6325c6e4 100644 --- a/drivers/gpu/drm/gma500/psb_intel_lvds.c +++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c @@ -343,7 +343,7 @@ static void psb_intel_lvds_restore(struct drm_connector *connector) } } -int psb_intel_lvds_mode_valid(struct drm_connector *connector, +enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_psb_private *dev_priv = connector->dev->dev_private; diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c index 84507912be84..f2ee6aa10afa 100644 --- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c +++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c @@ -1157,7 +1157,7 @@ static void psb_intel_sdvo_dpms(struct drm_encoder *encoder, int mode) return; } -static int psb_intel_sdvo_mode_valid(struct drm_connector *connector, +static enum drm_mode_status psb_intel_sdvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct psb_intel_sdvo *psb_intel_sdvo = intel_attached_sdvo(connector); @@ -2281,7 +2281,7 @@ static bool psb_intel_sdvo_tv_create_property(struct psb_intel_sdvo *psb_intel_s for (i = 0; i < psb_intel_sdvo_connector->format_supported_num; i++) drm_property_add_enum( - psb_intel_sdvo_connector->tv_format, i, + psb_intel_sdvo_connector->tv_format, i, tv_format_names[psb_intel_sdvo_connector->tv_format_supported[i]]); psb_intel_sdvo->tv_format_index = psb_intel_sdvo_connector->tv_format_supported[0]; diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index f4eba87c96f3..d2f4749ebf8d 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -27,7 +27,7 @@ static int hibmc_connector_get_modes(struct drm_connector *connector) return drm_add_modes_noedid(connector, 800, 600); } -static int hibmc_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status hibmc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { return MODE_OK; diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c index 9e67a7b4e3a4..421c8a72369e 100644 --- a/drivers/gpu/drm/i2c/tda998x_drv.c +++ b/drivers/gpu/drm/i2c/tda998x_drv.c @@ -1106,7 +1106,7 @@ static int tda998x_connector_get_modes(struct drm_connector *connector) return n; } -static int tda998x_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status tda998x_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { /* TDA19988 dotclock can go up to 165MHz */ diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 108d21f34777..9de8b1c51a5c 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -25,7 +25,8 @@ config DRM_I915_DEBUG select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y - select DRM_DEBUG_MM_SELFTEST + select STACKDEPOT if DRM=y # for DRM_DEBUG_MM + select DRM_DEBUG_SELFTEST select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST @@ -89,6 +90,18 @@ config DRM_I915_SW_FENCE_CHECK_DAG If in doubt, say "N". +config DRM_I915_DEBUG_GUC + bool "Enable additional driver debugging for GuC" + depends on DRM_I915 + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will help resolve GuC related issues. + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_I915_SELFTEST bool "Enable selftests upon driver load" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4eee91a3a236..4c6adae23e18 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -12,12 +12,16 @@ # Note the danger in using -Wall -Wextra is that when CI updates gcc we # will most likely get a sudden build breakage... Hopefully we will fix # new warnings before CI updates! -subdir-ccflags-y := -Wall -Wextra +subdir-ccflags-y := -Wall -Wextra -Wvla subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) subdir-ccflags-y += $(call cc-disable-warning, type-limits) subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) subdir-ccflags-y += $(call cc-disable-warning, implicit-fallthrough) subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) +# clang warnings +subdir-ccflags-y += $(call cc-disable-warning, sign-compare) +subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) +subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable @@ -43,7 +47,8 @@ i915-y := i915_drv.o \ intel_csr.o \ intel_device_info.o \ intel_pm.o \ - intel_runtime_pm.o + intel_runtime_pm.o \ + intel_workarounds.o i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o @@ -66,11 +71,11 @@ i915-y += i915_cmd_parser.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ - i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ i915_query.o \ i915_request.o \ + i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ @@ -79,7 +84,8 @@ i915-y += i915_cmd_parser.o \ intel_lrc.o \ intel_mocs.o \ intel_ringbuffer.o \ - intel_uncore.o + intel_uncore.o \ + intel_wopcm.o # general-purpose microcontroller (GuC) support i915-y += intel_uc.o \ @@ -152,7 +158,8 @@ i915-y += dvo_ch7017.o \ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/i915_random.o \ - selftests/i915_selftest.o + selftests/i915_selftest.o \ + selftests/igt_flush_test.o # virtual gpu code i915-y += i915_vgpu.o @@ -171,7 +178,8 @@ i915-y += i915_perf.o \ i915_oa_glk.o \ i915_oa_cflgt2.o \ i915_oa_cflgt3.o \ - i915_oa_cnl.o + i915_oa_cnl.o \ + i915_oa_icl.o ifeq ($(CONFIG_DRM_I915_GVT),y) i915-y += intel_gvt.o diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index d85939bd7b47..718ca08f9575 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -813,15 +813,31 @@ static inline bool is_force_nonpriv_mmio(unsigned int offset) } static int force_nonpriv_reg_handler(struct parser_exec_state *s, - unsigned int offset, unsigned int index) + unsigned int offset, unsigned int index, char *cmd) { struct intel_gvt *gvt = s->vgpu->gvt; - unsigned int data = cmd_val(s, index + 1); + unsigned int data; + u32 ring_base; + u32 nopid; + struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; + + if (!strcmp(cmd, "lri")) + data = cmd_val(s, index + 1); + else { + gvt_err("Unexpected forcenonpriv 0x%x write from cmd %s\n", + offset, cmd); + return -EINVAL; + } + + ring_base = dev_priv->engine[s->ring_id]->mmio_base; + nopid = i915_mmio_reg_offset(RING_NOPID(ring_base)); - if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) { + if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data) && + data != nopid) { gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n", offset, data); - return -EPERM; + patch_value(s, cmd_ptr(s, index), nopid); + return 0; } return 0; } @@ -869,7 +885,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, return -EINVAL; if (is_force_nonpriv_mmio(offset) && - force_nonpriv_reg_handler(s, offset, index)) + force_nonpriv_reg_handler(s, offset, index, cmd)) return -EPERM; if (offset == i915_mmio_reg_offset(DERRMR) || @@ -1604,7 +1620,8 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) { /* BDW decides privilege based on address space */ - if (cmd_val(s, 0) & (1 << 8)) + if (cmd_val(s, 0) & (1 << 8) && + !(s->vgpu->scan_nonprivbb & (1 << s->ring_id))) return 0; } return 1; @@ -1618,6 +1635,8 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) bool bb_end = false; struct intel_vgpu *vgpu = s->vgpu; u32 cmd; + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; *bb_size = 0; @@ -1629,18 +1648,22 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) cmd = cmd_val(s, 0); info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } do { - if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, + if (copy_gma_to_hva(s->vgpu, mm, gma, gma + 4, &cmd) < 0) return -EFAULT; info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } @@ -1666,6 +1689,9 @@ static int perform_bb_shadow(struct parser_exec_state *s) unsigned long gma = 0; unsigned long bb_size; int ret = 0; + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; + unsigned long gma_start_offset = 0; /* get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); @@ -1680,8 +1706,24 @@ static int perform_bb_shadow(struct parser_exec_state *s) if (!bb) return -ENOMEM; + bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true; + + /* the gma_start_offset stores the batch buffer's start gma's + * offset relative to page boundary. so for non-privileged batch + * buffer, the shadowed gem object holds exactly the same page + * layout as original gem object. This is for the convience of + * replacing the whole non-privilged batch buffer page to this + * shadowed one in PPGTT at the same gma address. (this replacing + * action is not implemented yet now, but may be necessary in + * future). + * for prileged batch buffer, we just change start gma address to + * that of shadowed page. + */ + if (bb->ppgtt) + gma_start_offset = gma & ~I915_GTT_PAGE_MASK; + bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv, - roundup(bb_size, PAGE_SIZE)); + roundup(bb_size + gma_start_offset, PAGE_SIZE)); if (IS_ERR(bb->obj)) { ret = PTR_ERR(bb->obj); goto err_free_bb; @@ -1702,9 +1744,9 @@ static int perform_bb_shadow(struct parser_exec_state *s) bb->clflush &= ~CLFLUSH_BEFORE; } - ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, + ret = copy_gma_to_hva(s->vgpu, mm, gma, gma + bb_size, - bb->va); + bb->va + gma_start_offset); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); ret = -EFAULT; @@ -1730,7 +1772,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) * buffer's gma in pair. After all, we don't want to pin the shadow * buffer here (too early). */ - s->ip_va = bb->va; + s->ip_va = bb->va + gma_start_offset; s->ip_gma = gma; return 0; err_unmap: @@ -2469,15 +2511,18 @@ static int cmd_parser_exec(struct parser_exec_state *s) info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } s->info = info; trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va, - cmd_length(s), s->buf_type); + cmd_length(s), s->buf_type, s->buf_addr_type, + s->workload, info->name); if (info->handler) { ret = info->handler(s); diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index 32a66dfdf112..2ec89bcb59f1 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -122,18 +122,69 @@ static int vgpu_mmio_diff_show(struct seq_file *s, void *unused) seq_printf(s, "Total: %d, Diff: %d\n", param.total, param.diff); return 0; } +DEFINE_SHOW_ATTRIBUTE(vgpu_mmio_diff); -static int vgpu_mmio_diff_open(struct inode *inode, struct file *file) +static int +vgpu_scan_nonprivbb_get(void *data, u64 *val) { - return single_open(file, vgpu_mmio_diff_show, inode->i_private); + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; + *val = vgpu->scan_nonprivbb; + return 0; } -static const struct file_operations vgpu_mmio_diff_fops = { - .open = vgpu_mmio_diff_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +/* + * set/unset bit engine_id of vgpu->scan_nonprivbb to turn on/off scanning + * of non-privileged batch buffer. e.g. + * if vgpu->scan_nonprivbb=3, then it will scan non-privileged batch buffer + * on engine 0 and 1. + */ +static int +vgpu_scan_nonprivbb_set(void *data, u64 val) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + enum intel_engine_id id; + char buf[128], *s; + int len; + + val &= (1 << I915_NUM_ENGINES) - 1; + + if (vgpu->scan_nonprivbb == val) + return 0; + + if (!val) + goto done; + + len = sprintf(buf, + "gvt: vgpu %d turns on non-privileged batch buffers scanning on Engines:", + vgpu->id); + + s = buf + len; + + for (id = 0; id < I915_NUM_ENGINES; id++) { + struct intel_engine_cs *engine; + + engine = dev_priv->engine[id]; + if (engine && (val & (1 << id))) { + len = snprintf(s, 4, "%d, ", engine->id); + s += len; + } else + val &= ~(1 << id); + } + + if (val) + sprintf(s, "low performance expected."); + + pr_warn("%s\n", buf); + +done: + vgpu->scan_nonprivbb = val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops, + vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set, + "0x%llx\n"); /** * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU @@ -162,6 +213,11 @@ int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) if (!ent) return -ENOMEM; + ent = debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs, + vgpu, &vgpu_scan_nonprivbb_fops); + if (!ent) + return -ENOMEM; + return 0; } diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index efacd8abbedc..05d15a095310 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -99,7 +99,6 @@ struct intel_vgpu_fence { struct intel_vgpu_mmio { void *vreg; void *sreg; - bool disable_warn_untrack; }; #define INTEL_GVT_MAX_BAR_NUM 4 @@ -226,6 +225,7 @@ struct intel_vgpu { struct completion vblank_done; + u32 scan_nonprivbb; }; /* validating GM healthy status*/ diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a33c1c3e4a21..4b6532fb789a 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -191,6 +191,8 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, unsigned int max_fence = vgpu_fence_sz(vgpu); if (fence_num >= max_fence) { + gvt_vgpu_err("access oob fence reg %d/%d\n", + fence_num, max_fence); /* When guest access oob fence regs without access * pv_info first, we treat guest not supporting GVT, @@ -200,11 +202,6 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); - if (!vgpu->mmio.disable_warn_untrack) { - gvt_vgpu_err("found oob fence register access\n"); - gvt_vgpu_err("total fence %d, access fence %d\n", - max_fence, fence_num); - } memset(p_data, 0, bytes); return -EINVAL; } @@ -477,22 +474,28 @@ static int force_nonpriv_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { u32 reg_nonpriv = *(u32 *)p_data; + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); + u32 ring_base; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ret = -EINVAL; - if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) { - gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n", - vgpu->id, offset, bytes); + if ((bytes != 4) || ((offset & (bytes - 1)) != 0) || ring_id < 0) { + gvt_err("vgpu(%d) ring %d Invalid FORCE_NONPRIV offset %x(%dB)\n", + vgpu->id, ring_id, offset, bytes); return ret; } - if (in_whitelist(reg_nonpriv)) { + ring_base = dev_priv->engine[ring_id]->mmio_base; + + if (in_whitelist(reg_nonpriv) || + reg_nonpriv == i915_mmio_reg_offset(RING_NOPID(ring_base))) { ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data, bytes); - } else { - gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n", - vgpu->id, reg_nonpriv); - } - return ret; + } else + gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n", + vgpu->id, reg_nonpriv, offset); + + return 0; } static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, @@ -3092,9 +3095,7 @@ int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset, */ mmio_info = find_mmio_info(gvt, offset); if (!mmio_info) { - if (!vgpu->mmio.disable_warn_untrack) - gvt_vgpu_err("untracked MMIO %08x len %d\n", - offset, bytes); + gvt_dbg_mmio("untracked MMIO %08x len %d\n", offset, bytes); goto default_rw; } diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 11b71b33f1c0..e4960aff68bd 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -244,8 +244,6 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr) /* set the bit 0:2(Core C-State ) to C0 */ vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0; - - vgpu->mmio.disable_warn_untrack = false; } else { #define GVT_GEN8_MMIO_RESET_OFFSET (0x44200) /* only reset the engine related, so starting with 0x44200 diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index a5bac83d53a9..0f949554d118 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -448,7 +448,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id) { - u32 *reg_state = ctx->engine[ring_id].lrc_reg_state; + u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 75b7bc7b344c..d053cbe1dc94 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -53,7 +53,6 @@ struct vgpu_sched_data { bool active; ktime_t sched_in_time; - ktime_t sched_out_time; ktime_t sched_time; ktime_t left_ts; ktime_t allocated_ts; @@ -66,17 +65,22 @@ struct gvt_sched_data { struct hrtimer timer; unsigned long period; struct list_head lru_runq_head; + ktime_t expire_time; }; -static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu) +static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time) { ktime_t delta_ts; - struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data; + struct vgpu_sched_data *vgpu_data; - delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time; + if (!vgpu || vgpu == vgpu->gvt->idle_vgpu) + return; - vgpu_data->sched_time += delta_ts; - vgpu_data->left_ts -= delta_ts; + vgpu_data = vgpu->sched_data; + delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time); + vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts); + vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts); + vgpu_data->sched_in_time = cur_time; } #define GVT_TS_BALANCE_PERIOD_MS 100 @@ -150,11 +154,7 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) } cur_time = ktime_get(); - if (scheduler->current_vgpu) { - vgpu_data = scheduler->current_vgpu->sched_data; - vgpu_data->sched_out_time = cur_time; - vgpu_update_timeslice(scheduler->current_vgpu); - } + vgpu_update_timeslice(scheduler->current_vgpu, cur_time); vgpu_data = scheduler->next_vgpu->sched_data; vgpu_data->sched_in_time = cur_time; @@ -226,17 +226,22 @@ out: void intel_gvt_schedule(struct intel_gvt *gvt) { struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; - static uint64_t timer_check; + ktime_t cur_time; mutex_lock(&gvt->lock); + cur_time = ktime_get(); if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, (void *)&gvt->service_request)) { - if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS)) + if (cur_time >= sched_data->expire_time) { gvt_balance_timeslice(sched_data); + sched_data->expire_time = ktime_add_ms( + cur_time, GVT_TS_BALANCE_PERIOD_MS); + } } clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request); + vgpu_update_timeslice(gvt->scheduler.current_vgpu, cur_time); tbs_sched_func(sched_data); mutex_unlock(&gvt->lock); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 638abe84857c..c2d183b91500 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -58,7 +58,7 @@ static void update_shadow_pdps(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -97,7 +97,7 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload, i915_mmio_reg_offset(EU_PERF_CNTL6), }; - if (!workload || !reg_state || workload->ring_id != RCS) + if (workload->ring_id != RCS) return; if (save) { @@ -130,7 +130,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *dst; @@ -283,7 +283,7 @@ static int shadow_context_status_change(struct notifier_block *nb, static void shadow_context_descriptor_update(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u64 desc = 0; desc = ce->lrc_desc; @@ -389,7 +389,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) * shadow_ctx pages invalid. So gvt need to pin itself. After update * the guest context, gvt can unpin the shadow_ctx safely. */ - ring = engine->context_pin(engine, shadow_ctx); + ring = intel_context_pin(shadow_ctx, engine); if (IS_ERR(ring)) { ret = PTR_ERR(ring); gvt_vgpu_err("fail to pin shadow context\n"); @@ -403,7 +403,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) return 0; err_unpin: - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); err_scan: @@ -437,7 +437,7 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) return 0; err_unpin: - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); release_shadow_wa_ctx(&workload->wa_ctx); return ret; } @@ -452,12 +452,6 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) int ret; list_for_each_entry(bb, &workload->shadow_bb, list) { - bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0); - if (IS_ERR(bb->vma)) { - ret = PTR_ERR(bb->vma); - goto err; - } - /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va * is only updated into ring_scan_buffer, not real ring address * allocated in later copy_workload_to_ring_buffer. pls be noted @@ -469,25 +463,53 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) bb->bb_start_cmd_va = workload->shadow_ring_buffer_va + bb->bb_offset; - /* relocate shadow batch buffer */ - bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); - if (gmadr_bytes == 8) - bb->bb_start_cmd_va[2] = 0; + if (bb->ppgtt) { + /* for non-priv bb, scan&shadow is only for + * debugging purpose, so the content of shadow bb + * is the same as original bb. Therefore, + * here, rather than switch to shadow bb's gma + * address, we directly use original batch buffer's + * gma address, and send original bb to hardware + * directly + */ + if (bb->clflush & CLFLUSH_AFTER) { + drm_clflush_virt_range(bb->va, + bb->obj->base.size); + bb->clflush &= ~CLFLUSH_AFTER; + } + i915_gem_obj_finish_shmem_access(bb->obj); + bb->accessing = false; + + } else { + bb->vma = i915_gem_object_ggtt_pin(bb->obj, + NULL, 0, 0, 0); + if (IS_ERR(bb->vma)) { + ret = PTR_ERR(bb->vma); + goto err; + } - /* No one is going to touch shadow bb from now on. */ - if (bb->clflush & CLFLUSH_AFTER) { - drm_clflush_virt_range(bb->va, bb->obj->base.size); - bb->clflush &= ~CLFLUSH_AFTER; - } + /* relocate shadow batch buffer */ + bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); + if (gmadr_bytes == 8) + bb->bb_start_cmd_va[2] = 0; - ret = i915_gem_object_set_to_gtt_domain(bb->obj, false); - if (ret) - goto err; + /* No one is going to touch shadow bb from now on. */ + if (bb->clflush & CLFLUSH_AFTER) { + drm_clflush_virt_range(bb->va, + bb->obj->base.size); + bb->clflush &= ~CLFLUSH_AFTER; + } - i915_gem_obj_finish_shmem_access(bb->obj); - bb->accessing = false; + ret = i915_gem_object_set_to_gtt_domain(bb->obj, + false); + if (ret) + goto err; - i915_vma_move_to_active(bb->vma, workload->req, 0); + i915_gem_obj_finish_shmem_access(bb->obj); + bb->accessing = false; + + i915_vma_move_to_active(bb->vma, workload->req, 0); + } } return 0; err: @@ -504,7 +526,7 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) struct intel_vgpu_submission *s = &workload->vgpu->submission; struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -666,7 +688,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ret = prepare_workload(workload); if (ret) { - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); goto out; } @@ -749,7 +771,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) struct i915_gem_context *shadow_ctx = s->shadow_ctx; int ring_id = workload->ring_id; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *src; @@ -876,7 +898,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) } mutex_lock(&dev_priv->drm.struct_mutex); /* unpin shadow ctx as the shadow_ctx update is done */ - engine->context_unpin(engine, s->shadow_ctx); + intel_context_unpin(s->shadow_ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -1134,9 +1156,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) if (IS_ERR(s->shadow_ctx)) return PTR_ERR(s->shadow_ctx); - if (HAS_LOGICAL_RING_PREEMPTION(vgpu->gvt->dev_priv)) - s->shadow_ctx->priority = INT_MAX; - bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload", diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 486ed57a4ad1..6c644782193e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -125,6 +125,7 @@ struct intel_vgpu_shadow_bb { unsigned int clflush; bool accessing; unsigned long bb_offset; + bool ppgtt; }; #define workload_q_head(vgpu, ring_id) \ diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 82093f1e8612..1fd64202d74e 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -224,19 +224,25 @@ TRACE_EVENT(oos_sync, TP_printk("%s", __entry->buf) ); +#define GVT_CMD_STR_LEN 40 TRACE_EVENT(gvt_command, - TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, - u32 buf_type), + TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, + u32 cmd_len, u32 buf_type, u32 buf_addr_type, + void *workload, char *cmd_name), - TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type), + TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type, + buf_addr_type, workload, cmd_name), TP_STRUCT__entry( __field(u8, vgpu_id) __field(u8, ring_id) __field(u32, ip_gma) __field(u32, buf_type) + __field(u32, buf_addr_type) __field(u32, cmd_len) + __field(void*, workload) __dynamic_array(u32, raw_cmd, cmd_len) + __array(char, cmd_name, GVT_CMD_STR_LEN) ), TP_fast_assign( @@ -244,17 +250,25 @@ TRACE_EVENT(gvt_command, __entry->ring_id = ring_id; __entry->ip_gma = ip_gma; __entry->buf_type = buf_type; + __entry->buf_addr_type = buf_addr_type; __entry->cmd_len = cmd_len; + __entry->workload = workload; + snprintf(__entry->cmd_name, GVT_CMD_STR_LEN, "%s", cmd_name); memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va)); ), - TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s", + TP_printk("vgpu%d ring %d: address_type %u, buf_type %u, ip_gma %08x,cmd (name=%s,len=%u,raw cmd=%s), workload=%p\n", __entry->vgpu_id, __entry->ring_id, + __entry->buf_addr_type, __entry->buf_type, __entry->ip_gma, - __print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4)) + __entry->cmd_name, + __entry->cmd_len, + __print_array(__get_dynamic_array(raw_cmd), + __entry->cmd_len, 4), + __entry->workload) ); #define GVT_TEMP_STR_LEN 10 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 89f7ff2c652e..13e7b9e4a6e6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -377,16 +377,19 @@ static void print_batch_pool_stats(struct seq_file *m, print_file_stats(m, "[k]batch pool", stats); } -static int per_file_ctx_stats(int id, void *ptr, void *data) +static int per_file_ctx_stats(int idx, void *ptr, void *data) { struct i915_gem_context *ctx = ptr; - int n; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, ctx->i915, id) { + struct intel_context *ce = to_intel_context(ctx, engine); - for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { - if (ctx->engine[n].state) - per_file_stats(0, ctx->engine[n].state->obj, data); - if (ctx->engine[n].ring) - per_file_stats(0, ctx->engine[n].ring->vma->obj, data); + if (ce->state) + per_file_stats(0, ce->state->obj, data); + if (ce->ring) + per_file_stats(0, ce->ring->vma->obj, data); } return 0; @@ -1215,20 +1218,20 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || - IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); + INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(dev_priv) || - IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); + INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || - IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); + INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -1340,10 +1343,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) struct rb_node *rb; seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", + seq_printf(m, "\tseqno = %x [current %x, last %x]\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine), - engine->timeline->inflight_seqnos); + intel_engine_last_submit(engine)); seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, @@ -1796,9 +1798,9 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_rps *rps = &dev_priv->gt_pm.rps; - int ret = 0; - int gpu_freq, ia_freq; unsigned int max_gpu_freq, min_gpu_freq; + int gpu_freq, ia_freq; + int ret; if (!HAS_LLC(dev_priv)) return -ENODEV; @@ -1809,13 +1811,12 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) if (ret) goto out; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; + if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = rps->min_freq_softlimit / GEN9_FREQ_SCALER; - max_gpu_freq = rps->max_freq_softlimit / GEN9_FREQ_SCALER; - } else { - min_gpu_freq = rps->min_freq_softlimit; - max_gpu_freq = rps->max_freq_softlimit; + min_gpu_freq /= GEN9_FREQ_SCALER; + max_gpu_freq /= GEN9_FREQ_SCALER; } seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); @@ -1828,7 +1829,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", intel_gpu_freq(dev_priv, (gpu_freq * (IS_GEN9_BC(dev_priv) || - IS_CANNONLAKE(dev_priv) ? + INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); @@ -1923,8 +1924,8 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) { - seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u)", - ring->space, ring->head, ring->tail); + seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, emit: %u)", + ring->space, ring->head, ring->tail, ring->emit); } static int i915_context_status(struct seq_file *m, void *unused) @@ -1961,7 +1962,8 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_putc(m, '\n'); for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(ctx, engine); seq_printf(m, "%s: ", engine->name); if (ce->state) @@ -2326,30 +2328,45 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) return 0; } -static void i915_guc_log_info(struct seq_file *m, - struct drm_i915_private *dev_priv) +static const char * +stringify_guc_log_type(enum guc_log_buffer_type type) { - struct intel_guc *guc = &dev_priv->guc; + switch (type) { + case GUC_ISR_LOG_BUFFER: + return "ISR"; + case GUC_DPC_LOG_BUFFER: + return "DPC"; + case GUC_CRASH_DUMP_LOG_BUFFER: + return "CRASH"; + default: + MISSING_CASE(type); + } - seq_puts(m, "\nGuC logging stats:\n"); + return ""; +} - seq_printf(m, "\tISR: flush count %10u, overflow count %10u\n", - guc->log.flush_count[GUC_ISR_LOG_BUFFER], - guc->log.total_overflow_count[GUC_ISR_LOG_BUFFER]); +static void i915_guc_log_info(struct seq_file *m, + struct drm_i915_private *dev_priv) +{ + struct intel_guc_log *log = &dev_priv->guc.log; + enum guc_log_buffer_type type; - seq_printf(m, "\tDPC: flush count %10u, overflow count %10u\n", - guc->log.flush_count[GUC_DPC_LOG_BUFFER], - guc->log.total_overflow_count[GUC_DPC_LOG_BUFFER]); + if (!intel_guc_log_relay_enabled(log)) { + seq_puts(m, "GuC log relay disabled\n"); + return; + } - seq_printf(m, "\tCRASH: flush count %10u, overflow count %10u\n", - guc->log.flush_count[GUC_CRASH_DUMP_LOG_BUFFER], - guc->log.total_overflow_count[GUC_CRASH_DUMP_LOG_BUFFER]); + seq_puts(m, "GuC logging stats:\n"); - seq_printf(m, "\tTotal flush interrupt count: %u\n", - guc->log.flush_interrupt_count); + seq_printf(m, "\tRelay full count: %u\n", + log->relay.full_count); - seq_printf(m, "\tCapture miss count: %u\n", - guc->log.capture_miss_count); + for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + seq_printf(m, "\t%s:\tflush count %10u, overflow count %10u\n", + stringify_guc_log_type(type), + log->stats[type].flush, + log->stats[type].sampled_overflow); + } } static void i915_guc_client_info(struct seq_file *m, @@ -2379,14 +2396,19 @@ static int i915_guc_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_guc *guc = &dev_priv->guc; - if (!USES_GUC_SUBMISSION(dev_priv)) + if (!USES_GUC(dev_priv)) return -ENODEV; + i915_guc_log_info(m, dev_priv); + + if (!USES_GUC_SUBMISSION(dev_priv)) + return 0; + GEM_BUG_ON(!guc->execbuf_client); - seq_printf(m, "Doorbell map:\n"); + seq_printf(m, "\nDoorbell map:\n"); seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); - seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline); + seq_printf(m, "Doorbell next cacheline: 0x%x\n", guc->db_cacheline); seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client); i915_guc_client_info(m, dev_priv, guc->execbuf_client); @@ -2396,8 +2418,6 @@ static int i915_guc_info(struct seq_file *m, void *data) i915_guc_client_info(m, dev_priv, guc->preempt_client); } - i915_guc_log_info(m, dev_priv); - /* Add more as required ... */ return 0; @@ -2496,35 +2516,73 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) return 0; } -static int i915_guc_log_control_get(void *data, u64 *val) +static int i915_guc_log_level_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; - if (!HAS_GUC(dev_priv)) + if (!USES_GUC(dev_priv)) return -ENODEV; - if (!dev_priv->guc.log.vma) - return -EINVAL; - - *val = i915_modparams.guc_log_level; + *val = intel_guc_log_level_get(&dev_priv->guc.log); return 0; } -static int i915_guc_log_control_set(void *data, u64 val) +static int i915_guc_log_level_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; - if (!HAS_GUC(dev_priv)) + if (!USES_GUC(dev_priv)) return -ENODEV; - return intel_guc_log_control(&dev_priv->guc, val); + return intel_guc_log_level_set(&dev_priv->guc.log, val); } -DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops, - i915_guc_log_control_get, i915_guc_log_control_set, +DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_level_fops, + i915_guc_log_level_get, i915_guc_log_level_set, "%lld\n"); +static int i915_guc_log_relay_open(struct inode *inode, struct file *file) +{ + struct drm_i915_private *dev_priv = inode->i_private; + + if (!USES_GUC(dev_priv)) + return -ENODEV; + + file->private_data = &dev_priv->guc.log; + + return intel_guc_log_relay_open(&dev_priv->guc.log); +} + +static ssize_t +i915_guc_log_relay_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + struct intel_guc_log *log = filp->private_data; + + intel_guc_log_relay_flush(log); + + return cnt; +} + +static int i915_guc_log_relay_release(struct inode *inode, struct file *file) +{ + struct drm_i915_private *dev_priv = inode->i_private; + + intel_guc_log_relay_close(&dev_priv->guc.log); + + return 0; +} + +static const struct file_operations i915_guc_log_relay_fops = { + .owner = THIS_MODULE, + .open = i915_guc_log_relay_open, + .write = i915_guc_log_relay_write, + .release = i915_guc_log_relay_release, +}; + static const char *psr2_live_status(u32 val) { static const char * const live_status[] = { @@ -2548,6 +2606,26 @@ static const char *psr2_live_status(u32 val) return "unknown"; } +static const char *psr_sink_status(u8 val) +{ + static const char * const sink_status[] = { + "inactive", + "transition to active, capture and display", + "active, display from RFB", + "active, capture and display on sink device timings", + "transition to inactive, capture and display, timing re-sync", + "reserved", + "reserved", + "sink internal error" + }; + + val &= DP_PSR_SINK_STATE_MASK; + if (val < ARRAY_SIZE(sink_status)) + return sink_status[val]; + + return "unknown"; +} + static int i915_edp_psr_status(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -2569,14 +2647,13 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) mutex_lock(&dev_priv->psr.lock); seq_printf(m, "Enabled: %s\n", yesno((bool)dev_priv->psr.enabled)); - seq_printf(m, "Active: %s\n", yesno(dev_priv->psr.active)); seq_printf(m, "Busy frontbuffer bits: 0x%03x\n", dev_priv->psr.busy_frontbuffer_bits); seq_printf(m, "Re-enable work scheduled: %s\n", yesno(work_busy(&dev_priv->psr.work.work))); if (HAS_DDI(dev_priv)) { - if (dev_priv->psr.psr2_support) + if (dev_priv->psr.psr2_enabled) enabled = I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE; else enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE; @@ -2624,18 +2701,67 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) seq_printf(m, "Performance_Counter: %u\n", psrperf); } - if (dev_priv->psr.psr2_support) { + if (dev_priv->psr.psr2_enabled) { u32 psr2 = I915_READ(EDP_PSR2_STATUS); seq_printf(m, "EDP_PSR2_STATUS: %x [%s]\n", psr2, psr2_live_status(psr2)); } + + if (dev_priv->psr.enabled) { + struct drm_dp_aux *aux = &dev_priv->psr.enabled->aux; + u8 val; + + if (drm_dp_dpcd_readb(aux, DP_PSR_STATUS, &val) == 1) + seq_printf(m, "Sink PSR status: 0x%x [%s]\n", val, + psr_sink_status(val)); + } mutex_unlock(&dev_priv->psr.lock); + if (READ_ONCE(dev_priv->psr.debug)) { + seq_printf(m, "Last attempted entry at: %lld\n", + dev_priv->psr.last_entry_attempt); + seq_printf(m, "Last exit at: %lld\n", + dev_priv->psr.last_exit); + } + intel_runtime_pm_put(dev_priv); return 0; } +static int +i915_edp_psr_debug_set(void *data, u64 val) +{ + struct drm_i915_private *dev_priv = data; + + if (!CAN_PSR(dev_priv)) + return -ENODEV; + + DRM_DEBUG_KMS("PSR debug %s\n", enableddisabled(val)); + + intel_runtime_pm_get(dev_priv); + intel_psr_irq_control(dev_priv, !!val); + intel_runtime_pm_put(dev_priv); + + return 0; +} + +static int +i915_edp_psr_debug_get(void *data, u64 *val) +{ + struct drm_i915_private *dev_priv = data; + + if (!CAN_PSR(dev_priv)) + return -ENODEV; + + *val = READ_ONCE(dev_priv->psr.debug); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_edp_psr_debug_fops, + i915_edp_psr_debug_get, i915_edp_psr_debug_set, + "%llu\n"); + static int i915_sink_crc(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -3231,7 +3357,8 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) for (i = 0; i < dev_priv->num_shared_dpll; i++) { struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i]; - seq_printf(m, "DPLL%i: %s, id: %i\n", i, pll->name, pll->id); + seq_printf(m, "DPLL%i: %s, id: %i\n", i, pll->info->name, + pll->info->id); seq_printf(m, " crtc_mask: 0x%08x, active: 0x%x, on: %s\n", pll->state.crtc_mask, pll->active_mask, yesno(pll->on)); seq_printf(m, " tracked hardware state:\n"); @@ -3241,6 +3368,28 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) seq_printf(m, " fp0: 0x%08x\n", pll->state.hw_state.fp0); seq_printf(m, " fp1: 0x%08x\n", pll->state.hw_state.fp1); seq_printf(m, " wrpll: 0x%08x\n", pll->state.hw_state.wrpll); + seq_printf(m, " cfgcr0: 0x%08x\n", pll->state.hw_state.cfgcr0); + seq_printf(m, " cfgcr1: 0x%08x\n", pll->state.hw_state.cfgcr1); + seq_printf(m, " mg_refclkin_ctl: 0x%08x\n", + pll->state.hw_state.mg_refclkin_ctl); + seq_printf(m, " mg_clktop2_coreclkctl1: 0x%08x\n", + pll->state.hw_state.mg_clktop2_coreclkctl1); + seq_printf(m, " mg_clktop2_hsclkctl: 0x%08x\n", + pll->state.hw_state.mg_clktop2_hsclkctl); + seq_printf(m, " mg_pll_div0: 0x%08x\n", + pll->state.hw_state.mg_pll_div0); + seq_printf(m, " mg_pll_div1: 0x%08x\n", + pll->state.hw_state.mg_pll_div1); + seq_printf(m, " mg_pll_lf: 0x%08x\n", + pll->state.hw_state.mg_pll_lf); + seq_printf(m, " mg_pll_frac_lock: 0x%08x\n", + pll->state.hw_state.mg_pll_frac_lock); + seq_printf(m, " mg_pll_ssc: 0x%08x\n", + pll->state.hw_state.mg_pll_ssc); + seq_printf(m, " mg_pll_bias: 0x%08x\n", + pll->state.hw_state.mg_pll_bias); + seq_printf(m, " mg_pll_tdc_coldst_bias: 0x%08x\n", + pll->state.hw_state.mg_pll_tdc_coldst_bias); } drm_modeset_unlock_all(dev); @@ -3249,24 +3398,13 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) static int i915_wa_registers(struct seq_file *m, void *unused) { - int i; - int ret; - struct intel_engine_cs *engine; struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; struct i915_workarounds *workarounds = &dev_priv->workarounds; - enum intel_engine_id id; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; + int i; intel_runtime_pm_get(dev_priv); seq_printf(m, "Workarounds applied: %d\n", workarounds->count); - for_each_engine(engine, dev_priv, id) - seq_printf(m, "HW whitelist count for %s: %d\n", - engine->name, workarounds->hw_whitelist_count[id]); for (i = 0; i < workarounds->count; ++i) { i915_reg_t addr; u32 mask, value, read; @@ -3282,7 +3420,6 @@ static int i915_wa_registers(struct seq_file *m, void *unused) } intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); return 0; } @@ -3567,7 +3704,8 @@ static ssize_t i915_displayport_test_active_write(struct file *file, static int i915_displayport_test_active_show(struct seq_file *m, void *data) { - struct drm_device *dev = m->private; + struct drm_i915_private *dev_priv = m->private; + struct drm_device *dev = &dev_priv->drm; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp; @@ -3601,10 +3739,8 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data) static int i915_displayport_test_active_open(struct inode *inode, struct file *file) { - struct drm_i915_private *dev_priv = inode->i_private; - return single_open(file, i915_displayport_test_active_show, - &dev_priv->drm); + inode->i_private); } static const struct file_operations i915_displayport_test_active_fops = { @@ -3618,7 +3754,8 @@ static const struct file_operations i915_displayport_test_active_fops = { static int i915_displayport_test_data_show(struct seq_file *m, void *data) { - struct drm_device *dev = m->private; + struct drm_i915_private *dev_priv = m->private; + struct drm_device *dev = &dev_priv->drm; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp; @@ -3657,26 +3794,12 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) return 0; } -static int i915_displayport_test_data_open(struct inode *inode, - struct file *file) -{ - struct drm_i915_private *dev_priv = inode->i_private; - - return single_open(file, i915_displayport_test_data_show, - &dev_priv->drm); -} - -static const struct file_operations i915_displayport_test_data_fops = { - .owner = THIS_MODULE, - .open = i915_displayport_test_data_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release -}; +DEFINE_SHOW_ATTRIBUTE(i915_displayport_test_data); static int i915_displayport_test_type_show(struct seq_file *m, void *data) { - struct drm_device *dev = m->private; + struct drm_i915_private *dev_priv = m->private; + struct drm_device *dev = &dev_priv->drm; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp; @@ -3703,23 +3826,7 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data) return 0; } - -static int i915_displayport_test_type_open(struct inode *inode, - struct file *file) -{ - struct drm_i915_private *dev_priv = inode->i_private; - - return single_open(file, i915_displayport_test_type_show, - &dev_priv->drm); -} - -static const struct file_operations i915_displayport_test_type_fops = { - .owner = THIS_MODULE, - .open = i915_displayport_test_type_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release -}; +DEFINE_SHOW_ATTRIBUTE(i915_displayport_test_type); static void wm_latency_show(struct seq_file *m, const uint16_t wm[8]) { @@ -3987,8 +4094,8 @@ i915_wedged_set(void *data, u64 val) engine->hangcheck.stalled = true; } - i915_handle_error(i915, val, "Manually set wedged engine mask = %llx", - val); + i915_handle_error(i915, val, I915_ERROR_CAPTURE, + "Manually set wedged engine mask = %llx", val); wait_on_bit(&i915->gpu_error.flags, I915_RESET_HANDOFF, @@ -4152,119 +4259,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops, "0x%08llx\n"); static int -i915_max_freq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit); - return 0; -} - -static int -i915_max_freq_set(void *data, u64 val) -{ - struct drm_i915_private *dev_priv = data; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 hw_max, hw_min; - int ret; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); - - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); - if (ret) - return ret; - - /* - * Turbo will still be enabled, but won't go above the set value. - */ - val = intel_freq_opcode(dev_priv, val); - - hw_max = rps->max_freq; - hw_min = rps->min_freq; - - if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - return -EINVAL; - } - - rps->max_freq_softlimit = val; - - if (intel_set_rps(dev_priv, val)) - DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - - mutex_unlock(&dev_priv->pcu_lock); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops, - i915_max_freq_get, i915_max_freq_set, - "%llu\n"); - -static int -i915_min_freq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit); - return 0; -} - -static int -i915_min_freq_set(void *data, u64 val) -{ - struct drm_i915_private *dev_priv = data; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 hw_max, hw_min; - int ret; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); - - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); - if (ret) - return ret; - - /* - * Turbo will still be enabled, but won't go below the set value. - */ - val = intel_freq_opcode(dev_priv, val); - - hw_max = rps->max_freq; - hw_min = rps->min_freq; - - if (val < hw_min || - val > hw_max || val > rps->max_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - return -EINVAL; - } - - rps->min_freq_softlimit = val; - - if (intel_set_rps(dev_priv, val)) - DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - - mutex_unlock(&dev_priv->pcu_lock); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops, - i915_min_freq_get, i915_min_freq_set, - "%llu\n"); - -static int i915_cache_sharing_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; @@ -4316,9 +4310,10 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops, static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { - int ss_max = 2; +#define SS_MAX 2 + const int ss_max = SS_MAX; + u32 sig1[SS_MAX], sig2[SS_MAX]; int ss; - u32 sig1[ss_max], sig2[ss_max]; sig1[0] = I915_READ(CHV_POWER_SS0_SIG1); sig1[1] = I915_READ(CHV_POWER_SS1_SIG1); @@ -4342,15 +4337,16 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, sseu->eu_per_subslice = max_t(unsigned int, sseu->eu_per_subslice, eu_cnt); } +#undef SS_MAX } static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { +#define SS_MAX 6 const struct intel_device_info *info = INTEL_INFO(dev_priv); + u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2]; int s, ss; - u32 s_reg[info->sseu.max_slices]; - u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2]; for (s = 0; s < info->sseu.max_slices; s++) { /* @@ -4397,15 +4393,16 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, eu_cnt); } } +#undef SS_MAX } static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { +#define SS_MAX 3 const struct intel_device_info *info = INTEL_INFO(dev_priv); + u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2]; int s, ss; - u32 s_reg[info->sseu.max_slices]; - u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2]; for (s = 0; s < info->sseu.max_slices; s++) { s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s)); @@ -4452,6 +4449,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, eu_cnt); } } +#undef SS_MAX } static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, @@ -4703,6 +4701,67 @@ static int i915_drrs_ctl_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(i915_drrs_ctl_fops, NULL, i915_drrs_ctl_set, "%llu\n"); +static ssize_t +i915_fifo_underrun_reset_write(struct file *filp, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct drm_i915_private *dev_priv = filp->private_data; + struct intel_crtc *intel_crtc; + struct drm_device *dev = &dev_priv->drm; + int ret; + bool reset; + + ret = kstrtobool_from_user(ubuf, cnt, &reset); + if (ret) + return ret; + + if (!reset) + return cnt; + + for_each_intel_crtc(dev, intel_crtc) { + struct drm_crtc_commit *commit; + struct intel_crtc_state *crtc_state; + + ret = drm_modeset_lock_single_interruptible(&intel_crtc->base.mutex); + if (ret) + return ret; + + crtc_state = to_intel_crtc_state(intel_crtc->base.state); + commit = crtc_state->base.commit; + if (commit) { + ret = wait_for_completion_interruptible(&commit->hw_done); + if (!ret) + ret = wait_for_completion_interruptible(&commit->flip_done); + } + + if (!ret && crtc_state->base.active) { + DRM_DEBUG_KMS("Re-arming FIFO underruns on pipe %c\n", + pipe_name(intel_crtc->pipe)); + + intel_crtc_arm_fifo_underrun(intel_crtc, crtc_state); + } + + drm_modeset_unlock(&intel_crtc->base.mutex); + + if (ret) + return ret; + } + + ret = intel_fbc_reset_underrun(dev_priv); + if (ret) + return ret; + + return cnt; +} + +static const struct file_operations i915_fifo_underrun_reset_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = i915_fifo_underrun_reset_write, + .llseek = default_llseek, +}; + static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, @@ -4760,8 +4819,6 @@ static const struct i915_debugfs_files { const struct file_operations *fops; } i915_debugfs_files[] = { {"i915_wedged", &i915_wedged_fops}, - {"i915_max_freq", &i915_max_freq_fops}, - {"i915_min_freq", &i915_min_freq_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, {"i915_ring_missed_irq", &i915_ring_missed_irq_fops}, {"i915_ring_test_irq", &i915_ring_test_irq_fops}, @@ -4770,6 +4827,7 @@ static const struct i915_debugfs_files { {"i915_error_state", &i915_error_state_fops}, {"i915_gpu_info", &i915_gpu_info_fops}, #endif + {"i915_fifo_underrun_reset", &i915_fifo_underrun_reset_ops}, {"i915_next_seqno", &i915_next_seqno_fops}, {"i915_display_crc_ctl", &i915_display_crc_ctl_fops}, {"i915_pri_wm_latency", &i915_pri_wm_latency_fops}, @@ -4779,10 +4837,12 @@ static const struct i915_debugfs_files { {"i915_dp_test_data", &i915_displayport_test_data_fops}, {"i915_dp_test_type", &i915_displayport_test_type_fops}, {"i915_dp_test_active", &i915_displayport_test_active_fops}, - {"i915_guc_log_control", &i915_guc_log_control_fops}, + {"i915_guc_log_level", &i915_guc_log_level_fops}, + {"i915_guc_log_relay", &i915_guc_log_relay_fops}, {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops}, {"i915_ipc_status", &i915_ipc_status_fops}, - {"i915_drrs_ctl", &i915_drrs_ctl_fops} + {"i915_drrs_ctl", &i915_drrs_ctl_fops}, + {"i915_edp_psr_debug", &i915_edp_psr_debug_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) @@ -4876,19 +4936,7 @@ static int i915_dpcd_show(struct seq_file *m, void *data) return 0; } - -static int i915_dpcd_open(struct inode *inode, struct file *file) -{ - return single_open(file, i915_dpcd_show, inode->i_private); -} - -static const struct file_operations i915_dpcd_fops = { - .owner = THIS_MODULE, - .open = i915_dpcd_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(i915_dpcd); static int i915_panel_show(struct seq_file *m, void *data) { @@ -4910,19 +4958,7 @@ static int i915_panel_show(struct seq_file *m, void *data) return 0; } - -static int i915_panel_open(struct inode *inode, struct file *file) -{ - return single_open(file, i915_panel_show, inode->i_private); -} - -static const struct file_operations i915_panel_fops = { - .owner = THIS_MODULE, - .open = i915_panel_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(i915_panel); /** * i915_debugfs_connector_add - add i915 specific connector debugfs files diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3b4daafebdcb..9c449b8d8eab 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -101,7 +101,13 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, __builtin_return_address(0), &vaf); if (is_error && !shown_bug_once) { - dev_notice(kdev, "%s", FDO_BUG_MSG); + /* + * Ask the user to file a bug report for the error, except + * if they may have caused the bug by fiddling with unsafe + * module parameters. + */ + if (!test_taint(TAINT_USER)) + dev_notice(kdev, "%s", FDO_BUG_MSG); shown_bug_once = true; } @@ -377,9 +383,9 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool; break; case I915_PARAM_HUC_STATUS: - intel_runtime_pm_get(dev_priv); - value = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; - intel_runtime_pm_put(dev_priv); + value = intel_huc_check_status(&dev_priv->huc); + if (value < 0) + return value; break; case I915_PARAM_MMAP_GTT_VERSION: /* Though we've started our numbering from 1, and so class all @@ -695,11 +701,9 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_irq; - intel_uc_init_fw(dev_priv); - ret = i915_gem_init(dev_priv); if (ret) - goto cleanup_uc; + goto cleanup_irq; intel_setup_overlay(dev_priv); @@ -719,8 +723,6 @@ cleanup_gem: if (i915_gem_suspend(dev_priv)) DRM_ERROR("failed to idle hardware; continuing to unload!\n"); i915_gem_fini(dev_priv); -cleanup_uc: - intel_uc_fini_fw(dev_priv); cleanup_irq: drm_irq_uninstall(dev); intel_teardown_gmbus(dev_priv); @@ -922,16 +924,21 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->wm.wm_mutex); mutex_init(&dev_priv->pps_mutex); - intel_uc_init_early(dev_priv); i915_memcpy_init_early(dev_priv); ret = i915_workqueues_init(dev_priv); if (ret < 0) goto err_engines; + ret = i915_gem_init_early(dev_priv); + if (ret < 0) + goto err_workqueues; + /* This must be called before any calls to HAS_PCH_* */ intel_detect_pch(dev_priv); + intel_wopcm_init_early(&dev_priv->wopcm); + intel_uc_init_early(dev_priv); intel_pm_setup(dev_priv); intel_init_dpio(dev_priv); intel_power_domains_init(dev_priv); @@ -940,18 +947,13 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_init_display_hooks(dev_priv); intel_init_clock_gating_hooks(dev_priv); intel_init_audio_hooks(dev_priv); - ret = i915_gem_load_init(dev_priv); - if (ret < 0) - goto err_irq; - intel_display_crc_init(dev_priv); intel_detect_preproduction_hw(dev_priv); return 0; -err_irq: - intel_irq_fini(dev_priv); +err_workqueues: i915_workqueues_cleanup(dev_priv); err_engines: i915_engines_cleanup(dev_priv); @@ -964,8 +966,9 @@ err_engines: */ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) { - i915_gem_load_cleanup(dev_priv); intel_irq_fini(dev_priv); + intel_uc_cleanup_early(dev_priv); + i915_gem_cleanup_early(dev_priv); i915_workqueues_cleanup(dev_priv); i915_engines_cleanup(dev_priv); } @@ -1035,6 +1038,10 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) intel_uncore_init(dev_priv); + intel_device_info_init_mmio(dev_priv); + + intel_uncore_prune(dev_priv); + intel_uc_init_mmio(dev_priv); ret = intel_engines_init_mmio(dev_priv); @@ -1077,8 +1084,6 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) i915_modparams.enable_ppgtt); DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915_modparams.enable_ppgtt); - intel_uc_sanitize_options(dev_priv); - intel_gvt_sanitize_options(dev_priv); } @@ -1244,7 +1249,6 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) /* Reveal our presence to userspace */ if (drm_dev_register(dev, 0) == 0) { i915_debugfs_register(dev_priv); - i915_guc_log_register(dev_priv); i915_setup_sysfs(dev_priv); /* Depends on sysfs having been initialized */ @@ -1304,7 +1308,6 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); - i915_guc_log_unregister(dev_priv); drm_dev_unregister(&dev_priv->drm); i915_gem_shrinker_unregister(dev_priv); @@ -1463,7 +1466,6 @@ void i915_driver_unload(struct drm_device *dev) i915_reset_error_state(dev_priv); i915_gem_fini(dev_priv); - intel_uc_fini_fw(dev_priv); intel_fbc_cleanup_cfb(dev_priv); intel_power_domains_fini(dev_priv); @@ -1876,7 +1878,8 @@ static int i915_resume_switcheroo(struct drm_device *dev) /** * i915_reset - reset chip after a hang * @i915: #drm_i915_private to reset - * @flags: Instructions + * @stalled_mask: mask of the stalled engines with the guilty requests + * @reason: user error message for why we are resetting * * Reset the chip. Useful if a hang is detected. Marks the device as wedged * on failure. @@ -1891,12 +1894,16 @@ static int i915_resume_switcheroo(struct drm_device *dev) * - re-init interrupt state * - re-init display */ -void i915_reset(struct drm_i915_private *i915, unsigned int flags) +void i915_reset(struct drm_i915_private *i915, + unsigned int stalled_mask, + const char *reason) { struct i915_gpu_error *error = &i915->gpu_error; int ret; int i; + GEM_TRACE("flags=%lx\n", error->flags); + might_sleep(); lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); @@ -1908,8 +1915,8 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) if (!i915_gem_unset_wedged(i915)) goto wakeup; - if (!(flags & I915_RESET_QUIET)) - dev_notice(i915->drm.dev, "Resetting chip after gpu hang\n"); + if (reason) + dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); error->reset_count++; disable_irq(i915->drm.irq); @@ -1952,7 +1959,7 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) goto error; } - i915_gem_reset(i915); + i915_gem_reset(i915, stalled_mask); intel_overlay_reset(i915); /* @@ -1998,7 +2005,6 @@ taint: error: i915_gem_set_wedged(i915); i915_retire_requests(i915); - intel_gpu_reset(i915, ALL_ENGINES); goto finish; } @@ -2011,7 +2017,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, /** * i915_reset_engine - reset GPU engine to recover from a hang * @engine: engine to reset - * @flags: options + * @msg: reason for GPU reset; or NULL for no dev_notice() * * Reset a specific GPU engine. Useful if a hang is detected. * Returns zero on successful reset or otherwise an error code. @@ -2021,12 +2027,13 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, * - reset engine (which will force the engine to idle) * - re-init/configure engine */ -int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) +int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) { struct i915_gpu_error *error = &engine->i915->gpu_error; struct i915_request *active_request; int ret; + GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); active_request = i915_gem_reset_prepare_engine(engine); @@ -2036,10 +2043,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) goto out; } - if (!(flags & I915_RESET_QUIET)) { + if (msg) dev_notice(engine->i915->drm.dev, - "Resetting %s after gpu hang\n", engine->name); - } + "Resetting %s for %s\n", engine->name, msg); error->reset_engine_count[engine->id]++; if (!engine->i915->guc.execbuf_client) @@ -2059,7 +2065,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) * active request and can drop it, adjust head to skip the offending * request to resume executing remaining requests in the queue. */ - i915_gem_reset_engine(engine, active_request); + i915_gem_reset_engine(engine, active_request, true); /* * The engine and its registers (and workarounds in case of render) @@ -2468,10 +2474,13 @@ static void vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv, /* * RC6 transitioning can be delayed up to 2 msec (see * valleyview_enable_rps), use 3 msec for safety. + * + * This can fail to turn off the rc6 if the GPU is stuck after a failed + * reset and we are trying to force the machine to sleep. */ if (vlv_wait_for_pw_status(dev_priv, mask, val)) - DRM_ERROR("timeout waiting for GT wells to go %s\n", - onoff(wait_for_on)); + DRM_DEBUG_DRIVER("timeout waiting for GT wells to go %s\n", + onoff(wait_for_on)); } static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv) @@ -2822,10 +2831,10 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id_ioctl, 0), DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), - DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), - DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), - DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW), + DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER), + DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER), + DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER), + DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER), DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ce18b6cf6e68..34c125e2d90c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -64,6 +64,7 @@ #include "intel_opregion.h" #include "intel_ringbuffer.h" #include "intel_uncore.h" +#include "intel_wopcm.h" #include "intel_uc.h" #include "i915_gem.h" @@ -71,9 +72,10 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_timeline.h" - +#include "i915_gpu_error.h" #include "i915_request.h" +#include "i915_scheduler.h" +#include "i915_timeline.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -83,8 +85,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20180308" -#define DRIVER_TIMESTAMP 1520513379 +#define DRIVER_DATE "20180514" +#define DRIVER_TIMESTAMP 1526300884 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -261,6 +263,7 @@ enum hpd_pin { HPD_PORT_C, HPD_PORT_D, HPD_PORT_E, + HPD_PORT_F, HPD_NUM_PINS }; @@ -453,172 +456,6 @@ struct intel_csr { uint32_t allowed_dc_mask; }; -struct intel_display_error_state; - -struct i915_gpu_state { - struct kref ref; - ktime_t time; - ktime_t boottime; - ktime_t uptime; - - struct drm_i915_private *i915; - - char error_msg[128]; - bool simulated; - bool awake; - bool wakelock; - bool suspended; - int iommu; - u32 reset_count; - u32 suspend_count; - struct intel_device_info device_info; - struct intel_driver_caps driver_caps; - struct i915_params params; - - struct i915_error_uc { - struct intel_uc_fw guc_fw; - struct intel_uc_fw huc_fw; - struct drm_i915_error_object *guc_log; - } uc; - - /* Generic register state */ - u32 eir; - u32 pgtbl_er; - u32 ier; - u32 gtier[4], ngtier; - u32 ccid; - u32 derrmr; - u32 forcewake; - u32 error; /* gen6+ */ - u32 err_int; /* gen7 */ - u32 fault_data0; /* gen8, gen9 */ - u32 fault_data1; /* gen8, gen9 */ - u32 done_reg; - u32 gac_eco; - u32 gam_ecochk; - u32 gab_ctl; - u32 gfx_mode; - - u32 nfence; - u64 fence[I915_MAX_NUM_FENCES]; - struct intel_overlay_error_state *overlay; - struct intel_display_error_state *display; - - struct drm_i915_error_engine { - int engine_id; - /* Software tracked state */ - bool idle; - bool waiting; - int num_waiters; - unsigned long hangcheck_timestamp; - bool hangcheck_stalled; - enum intel_engine_hangcheck_action hangcheck_action; - struct i915_address_space *vm; - int num_requests; - u32 reset_count; - - /* position of active request inside the ring */ - u32 rq_head, rq_post, rq_tail; - - /* our own tracking of ring head and tail */ - u32 cpu_ring_head; - u32 cpu_ring_tail; - - u32 last_seqno; - - /* Register state */ - u32 start; - u32 tail; - u32 head; - u32 ctl; - u32 mode; - u32 hws; - u32 ipeir; - u32 ipehr; - u32 bbstate; - u32 instpm; - u32 instps; - u32 seqno; - u64 bbaddr; - u64 acthd; - u32 fault_reg; - u64 faddr; - u32 rc_psmi; /* sleep state */ - u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; - struct intel_instdone instdone; - - struct drm_i915_error_context { - char comm[TASK_COMM_LEN]; - pid_t pid; - u32 handle; - u32 hw_id; - int priority; - int ban_score; - int active; - int guilty; - bool bannable; - } context; - - struct drm_i915_error_object { - u64 gtt_offset; - u64 gtt_size; - int page_count; - int unused; - u32 *pages[0]; - } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; - - struct drm_i915_error_object **user_bo; - long user_bo_count; - - struct drm_i915_error_object *wa_ctx; - struct drm_i915_error_object *default_state; - - struct drm_i915_error_request { - long jiffies; - pid_t pid; - u32 context; - int priority; - int ban_score; - u32 seqno; - u32 head; - u32 tail; - } *requests, execlist[EXECLIST_MAX_PORTS]; - unsigned int num_ports; - - struct drm_i915_error_waiter { - char comm[TASK_COMM_LEN]; - pid_t pid; - u32 seqno; - } *waiters; - - struct { - u32 gfx_mode; - union { - u64 pdp[4]; - u32 pp_dir_base; - }; - } vm_info; - } engine[I915_NUM_ENGINES]; - - struct drm_i915_error_buffer { - u32 size; - u32 name; - u32 rseqno[I915_NUM_ENGINES], wseqno; - u64 gtt_offset; - u32 read_domains; - u32 write_domain; - s32 fence_reg:I915_MAX_NUM_FENCE_BITS; - u32 tiling:2; - u32 dirty:1; - u32 purgeable:1; - u32 userptr:1; - s32 engine:4; - u32 cache_level:3; - } *active_bo[I915_NUM_ENGINES], *pinned_bo; - u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; - struct i915_address_space *active_vm[I915_NUM_ENGINES]; -}; - enum i915_cache_level { I915_CACHE_NONE = 0, I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */ @@ -766,12 +603,16 @@ struct i915_psr { bool active; struct delayed_work work; unsigned busy_frontbuffer_bits; - bool psr2_support; - bool aux_frame_sync; + bool sink_psr2_support; bool link_standby; - bool y_cord_support; bool colorimetry_support; bool alpm; + bool has_hw_tracking; + bool psr2_enabled; + u8 sink_sync_latency; + bool debug; + ktime_t last_entry_attempt; + ktime_t last_exit; void (*enable_source)(struct intel_dp *, const struct intel_crtc_state *); @@ -1146,16 +987,6 @@ struct i915_gem_mm { u32 object_count; }; -struct drm_i915_error_state_buf { - struct drm_i915_private *i915; - unsigned bytes; - unsigned size; - int err; - u8 *buf; - loff_t start; - loff_t pos; -}; - #define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */ #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ @@ -1164,102 +995,6 @@ struct drm_i915_error_state_buf { #define I915_ENGINE_DEAD_TIMEOUT (4 * HZ) /* Seqno, head and subunits dead */ #define I915_SEQNO_DEAD_TIMEOUT (12 * HZ) /* Seqno dead with active head */ -struct i915_gpu_error { - /* For hangcheck timer */ -#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ -#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) - - struct delayed_work hangcheck_work; - - /* For reset and error_state handling. */ - spinlock_t lock; - /* Protected by the above dev->gpu_error.lock. */ - struct i915_gpu_state *first_error; - - atomic_t pending_fb_pin; - - unsigned long missed_irq_rings; - - /** - * State variable controlling the reset flow and count - * - * This is a counter which gets incremented when reset is triggered, - * - * Before the reset commences, the I915_RESET_BACKOFF bit is set - * meaning that any waiters holding onto the struct_mutex should - * relinquish the lock immediately in order for the reset to start. - * - * If reset is not completed succesfully, the I915_WEDGE bit is - * set meaning that hardware is terminally sour and there is no - * recovery. All waiters on the reset_queue will be woken when - * that happens. - * - * This counter is used by the wait_seqno code to notice that reset - * event happened and it needs to restart the entire ioctl (since most - * likely the seqno it waited for won't ever signal anytime soon). - * - * This is important for lock-free wait paths, where no contended lock - * naturally enforces the correct ordering between the bail-out of the - * waiter and the gpu reset work code. - */ - unsigned long reset_count; - - /** - * flags: Control various stages of the GPU reset - * - * #I915_RESET_BACKOFF - When we start a reset, we want to stop any - * other users acquiring the struct_mutex. To do this we set the - * #I915_RESET_BACKOFF bit in the error flags when we detect a reset - * and then check for that bit before acquiring the struct_mutex (in - * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a - * secondary role in preventing two concurrent global reset attempts. - * - * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the - * struct_mutex. We try to acquire the struct_mutex in the reset worker, - * but it may be held by some long running waiter (that we cannot - * interrupt without causing trouble). Once we are ready to do the GPU - * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If - * they already hold the struct_mutex and want to participate they can - * inspect the bit and do the reset directly, otherwise the worker - * waits for the struct_mutex. - * - * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to - * acquire the struct_mutex to reset an engine, we need an explicit - * flag to prevent two concurrent reset attempts in the same engine. - * As the number of engines continues to grow, allocate the flags from - * the most significant bits. - * - * #I915_WEDGED - If reset fails and we can no longer use the GPU, - * we set the #I915_WEDGED bit. Prior to command submission, e.g. - * i915_request_alloc(), this bit is checked and the sequence - * aborted (with -EIO reported to userspace) if set. - */ - unsigned long flags; -#define I915_RESET_BACKOFF 0 -#define I915_RESET_HANDOFF 1 -#define I915_RESET_MODESET 2 -#define I915_WEDGED (BITS_PER_LONG - 1) -#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES) - - /** Number of times an engine has been reset */ - u32 reset_engine_count[I915_NUM_ENGINES]; - - /** - * Waitqueue to signal when a hang is detected. Used to for waiters - * to release the struct_mutex for the reset to procede. - */ - wait_queue_head_t wait_queue; - - /** - * Waitqueue to signal when the reset has completed. Used by clients - * that wait for dev_priv->mm.wedged to settle. - */ - wait_queue_head_t reset_queue; - - /* For missed irq/seqno simulation. */ - unsigned long test_irq_rings; -}; - enum modeset_restore { MODESET_ON_LID_OPEN, MODESET_DONE, @@ -1338,6 +1073,7 @@ struct intel_vbt_data { } edp; struct { + bool enable; bool full_link; bool require_aux_wakeup; int idle_frames; @@ -1451,11 +1187,13 @@ static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1, } struct skl_ddb_allocation { - struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* packed/uv */ - struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES]; + /* packed/y */ + struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; + struct skl_ddb_entry uv_plane[I915_MAX_PIPES][I915_MAX_PLANES]; + u8 enabled_slices; /* GEN11 has configurable 2 slices */ }; -struct skl_wm_values { +struct skl_ddb_values { unsigned dirty_pipes; struct skl_ddb_allocation ddb; }; @@ -1470,6 +1208,7 @@ struct skl_wm_level { struct skl_wm_params { bool x_tiled, y_tiled; bool rc_surface; + bool is_planar; uint32_t width; uint8_t cpp; uint32_t plane_pixel_rate; @@ -1564,7 +1303,6 @@ struct i915_wa_reg { struct i915_workarounds { struct i915_wa_reg reg[I915_MAX_WA_REGS]; u32 count; - u32 hw_whitelist_count[I915_NUM_ENGINES]; }; struct i915_virtual_gpu { @@ -1860,6 +1598,8 @@ struct drm_i915_private { struct intel_gvt *gvt; + struct intel_wopcm wopcm; + struct intel_huc huc; struct intel_guc guc; @@ -2152,7 +1892,7 @@ struct drm_i915_private { /* current hardware state */ union { struct ilk_wm_values hw; - struct skl_wm_values skl_hw; + struct skl_ddb_values skl_hw; struct vlv_wm_values vlv; struct g4x_wm_values g4x; }; @@ -2321,8 +2061,11 @@ struct drm_i915_private { void (*cleanup_engine)(struct intel_engine_cs *engine); struct list_head timelines; - struct i915_gem_timeline global_timeline; + + struct list_head active_rings; + struct list_head closed_vma; u32 active_requests; + u32 request_serial; /** * Is the GPU currently considered idle, or busy executing @@ -2392,6 +2135,11 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) return to_i915(dev_get_drvdata(kdev)); } +static inline struct drm_i915_private *wopcm_to_i915(struct intel_wopcm *wopcm) +{ + return container_of(wopcm, struct drm_i915_private, wopcm); +} + static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) { return container_of(guc, struct drm_i915_private, guc); @@ -2411,8 +2159,10 @@ static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc) /* Iterator over subset of engines selected by mask */ #define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \ - for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask; \ - tmp__ ? (engine__ = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : 0; ) + for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->ring_mask; \ + (tmp__) ? \ + ((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \ + 0;) enum hdmi_force_audio { HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */ @@ -2720,6 +2470,15 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_CNL_REVID(p, since, until) \ (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) +#define ICL_REVID_A0 0x0 +#define ICL_REVID_A2 0x1 +#define ICL_REVID_B0 0x3 +#define ICL_REVID_B2 0x4 +#define ICL_REVID_C0 0x5 + +#define IS_ICL_REVID(p, since, until) \ + (IS_ICELAKE(p) && IS_REVID(p, since, until)) + /* * The genX designation typically refers to the render engine, so render * capability related checks should use IS_GEN, while display and other checks @@ -2963,10 +2722,11 @@ extern void i915_driver_unload(struct drm_device *dev); extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); -#define I915_RESET_QUIET BIT(0) -extern void i915_reset(struct drm_i915_private *i915, unsigned int flags); +extern void i915_reset(struct drm_i915_private *i915, + unsigned int stalled_mask, + const char *reason); extern int i915_reset_engine(struct intel_engine_cs *engine, - unsigned int flags); + const char *reason); extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv); extern int intel_reset_guc(struct drm_i915_private *dev_priv); @@ -3014,10 +2774,12 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) &dev_priv->gpu_error.hangcheck_work, delay); } -__printf(3, 4) +__printf(4, 5) void i915_handle_error(struct drm_i915_private *dev_priv, u32 engine_mask, + unsigned long flags, const char *fmt, ...); +#define I915_ERROR_CAPTURE BIT(0) extern void intel_irq_init(struct drm_i915_private *dev_priv); extern void intel_irq_fini(struct drm_i915_private *dev_priv); @@ -3132,8 +2894,8 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void i915_gem_sanitize(struct drm_i915_private *i915); -int i915_gem_load_init(struct drm_i915_private *dev_priv); -void i915_gem_load_cleanup(struct drm_i915_private *dev_priv); +int i915_gem_init_early(struct drm_i915_private *dev_priv); +void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); void i915_gem_load_init_fences(struct drm_i915_private *dev_priv); int i915_gem_freeze(struct drm_i915_private *dev_priv); int i915_gem_freeze_late(struct drm_i915_private *dev_priv); @@ -3388,13 +3150,15 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, struct i915_request * i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); -void i915_gem_reset(struct drm_i915_private *dev_priv); +void i915_gem_reset(struct drm_i915_private *dev_priv, + unsigned int stalled_mask); void i915_gem_reset_finish_engine(struct intel_engine_cs *engine); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct i915_request *request); + struct i915_request *request, + bool stalled); void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); @@ -3412,7 +3176,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, struct intel_rps_client *rps); int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int priority); + const struct i915_sched_attr *attr); #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX int __must_check @@ -3481,16 +3245,6 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } -static inline struct intel_timeline * -i915_gem_context_lookup_timeline(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct i915_address_space *vm; - - vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; - return &vm->timeline.engine[engine->id]; -} - int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, @@ -3589,64 +3343,6 @@ static inline int i915_debugfs_connector_add(struct drm_connector *connector) static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {} #endif -/* i915_gpu_error.c */ -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - -__printf(2, 3) -void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); -int i915_error_state_to_str(struct drm_i915_error_state_buf *estr, - const struct i915_gpu_state *gpu); -int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb, - struct drm_i915_private *i915, - size_t count, loff_t pos); -static inline void i915_error_state_buf_release( - struct drm_i915_error_state_buf *eb) -{ - kfree(eb->buf); -} - -struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); -void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, - const char *error_msg); - -static inline struct i915_gpu_state * -i915_gpu_state_get(struct i915_gpu_state *gpu) -{ - kref_get(&gpu->ref); - return gpu; -} - -void __i915_gpu_state_free(struct kref *kref); -static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) -{ - if (gpu) - kref_put(&gpu->ref, __i915_gpu_state_free); -} - -struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); -void i915_reset_error_state(struct drm_i915_private *i915); - -#else - -static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, - const char *error_msg) -{ -} - -static inline struct i915_gpu_state * -i915_first_error_state(struct drm_i915_private *i915) -{ - return NULL; -} - -static inline void i915_reset_error_state(struct drm_i915_private *i915) -{ -} - -#endif - const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7b5a9d7c9593..0a2070112b66 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -35,6 +35,7 @@ #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" +#include "intel_workarounds.h" #include "i915_gemfs.h" #include <linux/dma-fence-array.h> #include <linux/kthread.h> @@ -136,6 +137,102 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) return 0; } +static u32 __i915_gem_park(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(i915->gt.active_requests); + GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); + + if (!i915->gt.awake) + return I915_EPOCH_INVALID; + + GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); + + /* + * Be paranoid and flush a concurrent interrupt to make sure + * we don't reactivate any irq tasklets after parking. + * + * FIXME: Note that even though we have waited for execlists to be idle, + * there may still be an in-flight interrupt even though the CSB + * is now empty. synchronize_irq() makes sure that a residual interrupt + * is completed before we continue, but it doesn't prevent the HW from + * raising a spurious interrupt later. To complete the shield we should + * coordinate disabling the CS irq with flushing the interrupts. + */ + synchronize_irq(i915->drm.irq); + + intel_engines_park(i915); + i915_timelines_park(i915); + + i915_pmu_gt_parked(i915); + i915_vma_parked(i915); + + i915->gt.awake = false; + + if (INTEL_GEN(i915) >= 6) + gen6_rps_idle(i915); + + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); + + intel_runtime_pm_put(i915); + + return i915->gt.epoch; +} + +void i915_gem_park(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(i915->gt.active_requests); + + if (!i915->gt.awake) + return; + + /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ + mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); +} + +void i915_gem_unpark(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(!i915->gt.active_requests); + + if (i915->gt.awake) + return; + + intel_runtime_pm_get_noresume(i915); + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + + i915->gt.awake = true; + if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ + i915->gt.epoch = 1; + + intel_enable_gt_powersave(i915); + i915_update_gfx_val(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_busy(i915); + i915_pmu_gt_unparked(i915); + + intel_engines_unpark(i915); + + i915_queue_hangcheck(i915); + + queue_delayed_work(i915->wq, + &i915->gt.retire_work, + round_jiffies_up_relative(HZ)); +} + int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -469,7 +566,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, return timeout; } -static void __fence_set_priority(struct dma_fence *fence, int prio) +static void __fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { struct i915_request *rq; struct intel_engine_cs *engine; @@ -480,13 +578,16 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) rq = to_request(fence); engine = rq->engine; - rcu_read_lock(); + local_bh_disable(); + rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) - engine->schedule(rq, prio); + engine->schedule(rq, attr); rcu_read_unlock(); + local_bh_enable(); /* kick the tasklets if queues were reprioritised */ } -static void fence_set_priority(struct dma_fence *fence, int prio) +static void fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { /* Recurse once into a fence-array */ if (dma_fence_is_array(fence)) { @@ -494,16 +595,16 @@ static void fence_set_priority(struct dma_fence *fence, int prio) int i; for (i = 0; i < array->num_fences; i++) - __fence_set_priority(array->fences[i], prio); + __fence_set_priority(array->fences[i], attr); } else { - __fence_set_priority(fence, prio); + __fence_set_priority(fence, attr); } } int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int prio) + const struct i915_sched_attr *attr) { struct dma_fence *excl; @@ -518,7 +619,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, return ret; for (i = 0; i < count; i++) { - fence_set_priority(shared[i], prio); + fence_set_priority(shared[i], attr); dma_fence_put(shared[i]); } @@ -528,7 +629,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, } if (excl) { - fence_set_priority(excl, prio); + fence_set_priority(excl, attr); dma_fence_put(excl); } return 0; @@ -2879,8 +2980,8 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - list_for_each_entry(request, &engine->timeline->requests, link) { + spin_lock_irqsave(&engine->timeline.lock, flags); + list_for_each_entry(request, &engine->timeline.requests, link) { if (__i915_request_completed(request, request->global_seqno)) continue; @@ -2891,25 +2992,11 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) active = request; break; } - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); return active; } -static bool engine_stalled(struct intel_engine_cs *engine) -{ - if (!engine->hangcheck.stalled) - return false; - - /* Check for possible seqno movement after hang declaration */ - if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) { - DRM_DEBUG_DRIVER("%s pardoned\n", engine->name); - return false; - } - - return true; -} - /* * Ensure irq handler finishes, and not run again. * Also return the active request so that we only search for it once. @@ -2998,6 +3085,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) } i915_gem_revoke_fences(dev_priv); + intel_uc_sanitize(dev_priv); return err; } @@ -3025,15 +3113,15 @@ static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_gem_context *hung_ctx = request->ctx; - struct intel_timeline *timeline; + struct i915_timeline *timeline = request->timeline; unsigned long flags; - timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); + GEM_BUG_ON(timeline == &engine->timeline); - spin_lock_irqsave(&engine->timeline->lock, flags); - spin_lock(&timeline->lock); + spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); - list_for_each_entry_continue(request, &engine->timeline->requests, link) + list_for_each_entry_continue(request, &engine->timeline.requests, link) if (request->ctx == hung_ctx) skip_request(request); @@ -3041,13 +3129,14 @@ static void engine_skip_context(struct i915_request *request) skip_request(request); spin_unlock(&timeline->lock); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } /* Returns the request if it was guilty of the hang */ static struct i915_request * i915_gem_reset_request(struct intel_engine_cs *engine, - struct i915_request *request) + struct i915_request *request, + bool stalled) { /* The guilty request will get skipped on a hung engine. * @@ -3070,7 +3159,15 @@ i915_gem_reset_request(struct intel_engine_cs *engine, * subsequent hangs. */ - if (engine_stalled(engine)) { + if (i915_request_completed(request)) { + GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", + engine->name, request->global_seqno, + request->fence.context, request->fence.seqno, + intel_engine_get_seqno(engine)); + stalled = false; + } + + if (stalled) { i915_gem_context_mark_guilty(request->ctx); skip_request(request); @@ -3089,11 +3186,11 @@ i915_gem_reset_request(struct intel_engine_cs *engine, dma_fence_set_error(&request->fence, -EAGAIN); /* Rewind the engine to replay the incomplete rq */ - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); request = list_prev_entry(request, link); - if (&request->link == &engine->timeline->requests) + if (&request->link == &engine->timeline.requests) request = NULL; - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } } @@ -3101,7 +3198,8 @@ i915_gem_reset_request(struct intel_engine_cs *engine, } void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct i915_request *request) + struct i915_request *request, + bool stalled) { /* * Make sure this write is visible before we re-enable the interrupt @@ -3111,7 +3209,7 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine, smp_store_mb(engine->irq_posted, 0); if (request) - request = i915_gem_reset_request(engine, request); + request = i915_gem_reset_request(engine, request, stalled); if (request) { DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", @@ -3122,7 +3220,8 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine, engine->reset_hw(engine, request); } -void i915_gem_reset(struct drm_i915_private *dev_priv) +void i915_gem_reset(struct drm_i915_private *dev_priv, + unsigned int stalled_mask) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -3134,10 +3233,12 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct i915_gem_context *ctx; - i915_gem_reset_engine(engine, engine->hangcheck.active_request); + i915_gem_reset_engine(engine, + engine->hangcheck.active_request, + stalled_mask & ENGINE_MASK(id)); ctx = fetch_and_zero(&engine->last_retired_context); if (ctx) - engine->context_unpin(engine, ctx); + intel_context_unpin(ctx, engine); /* * Ostensibily, we always want a context loaded for powersaving, @@ -3160,13 +3261,6 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) } i915_gem_restore_fences(dev_priv); - - if (dev_priv->gt.awake) { - intel_sanitize_gt_powersave(dev_priv); - intel_enable_gt_powersave(dev_priv); - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_busy(dev_priv); - } } void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) @@ -3192,6 +3286,9 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) static void nop_submit_request(struct i915_request *request) { + GEM_TRACE("%s fence %llx:%d -> -EIO\n", + request->engine->name, + request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); i915_request_submit(request); @@ -3201,12 +3298,15 @@ static void nop_complete_submit_request(struct i915_request *request) { unsigned long flags; + GEM_TRACE("%s fence %llx:%d -> -EIO\n", + request->engine->name, + request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); - spin_lock_irqsave(&request->engine->timeline->lock, flags); + spin_lock_irqsave(&request->engine->timeline.lock, flags); __i915_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); - spin_unlock_irqrestore(&request->engine->timeline->lock, flags); + spin_unlock_irqrestore(&request->engine->timeline.lock, flags); } void i915_gem_set_wedged(struct drm_i915_private *i915) @@ -3214,7 +3314,9 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; - if (drm_debug & DRM_UT_DRIVER) { + GEM_TRACE("start\n"); + + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer(__func__); for_each_engine(engine, i915, id) @@ -3237,6 +3339,9 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) } i915->caps.scheduler = 0; + /* Even if the GPU reset fails, it should still stop the engines */ + intel_gpu_reset(i915, ALL_ENGINES); + /* * Make sure no one is running the old callback before we proceed with * cancelling requests and resetting the completion tracking. Otherwise @@ -3270,27 +3375,31 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); intel_engine_init_global_seqno(engine, intel_engine_last_submit(engine)); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); i915_gem_reset_finish_engine(engine); } + GEM_TRACE("end\n"); + wake_up_all(&i915->gpu_error.reset_queue); } bool i915_gem_unset_wedged(struct drm_i915_private *i915) { - struct i915_gem_timeline *tl; - int i; + struct i915_timeline *tl; lockdep_assert_held(&i915->drm.struct_mutex); if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) return true; - /* Before unwedging, make sure that all pending operations + GEM_TRACE("start\n"); + + /* + * Before unwedging, make sure that all pending operations * are flushed and errored out - we may have requests waiting upon * third party fences. We marked all inflight requests as EIO, and * every execbuf since returned EIO, for consistency we want all @@ -3300,31 +3409,33 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * No more can be submitted until we reset the wedged bit. */ list_for_each_entry(tl, &i915->gt.timelines, link) { - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - struct i915_request *rq; + struct i915_request *rq; - rq = i915_gem_active_peek(&tl->engine[i].last_request, - &i915->drm.struct_mutex); - if (!rq) - continue; + rq = i915_gem_active_peek(&tl->last_request, + &i915->drm.struct_mutex); + if (!rq) + continue; - /* We can't use our normal waiter as we want to - * avoid recursively trying to handle the current - * reset. The basic dma_fence_default_wait() installs - * a callback for dma_fence_signal(), which is - * triggered by our nop handler (indirectly, the - * callback enables the signaler thread which is - * woken by the nop_submit_request() advancing the seqno - * and when the seqno passes the fence, the signaler - * then signals the fence waking us up). - */ - if (dma_fence_default_wait(&rq->fence, true, - MAX_SCHEDULE_TIMEOUT) < 0) - return false; - } + /* + * We can't use our normal waiter as we want to + * avoid recursively trying to handle the current + * reset. The basic dma_fence_default_wait() installs + * a callback for dma_fence_signal(), which is + * triggered by our nop handler (indirectly, the + * callback enables the signaler thread which is + * woken by the nop_submit_request() advancing the seqno + * and when the seqno passes the fence, the signaler + * then signals the fence waking us up). + */ + if (dma_fence_default_wait(&rq->fence, true, + MAX_SCHEDULE_TIMEOUT) < 0) + return false; } + i915_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); - /* Undo nop_submit_request. We prevent all new i915 requests from + /* + * Undo nop_submit_request. We prevent all new i915 requests from * being queued (by disallowing execbuf whilst wedged) so having * waited for all active requests above, we know the system is idle * and do not have to worry about a thread being inside @@ -3335,6 +3446,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) intel_engines_reset_default_submission(i915); i915_gem_contexts_lost(i915); + GEM_TRACE("end\n"); + smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ clear_bit(I915_WEDGED, &i915->gpu_error.flags); @@ -3473,36 +3586,9 @@ i915_gem_idle_work_handler(struct work_struct *work) if (new_requests_since_last_retire(dev_priv)) goto out_unlock; - /* - * Be paranoid and flush a concurrent interrupt to make sure - * we don't reactivate any irq tasklets after parking. - * - * FIXME: Note that even though we have waited for execlists to be idle, - * there may still be an in-flight interrupt even though the CSB - * is now empty. synchronize_irq() makes sure that a residual interrupt - * is completed before we continue, but it doesn't prevent the HW from - * raising a spurious interrupt later. To complete the shield we should - * coordinate disabling the CS irq with flushing the interrupts. - */ - synchronize_irq(dev_priv->drm.irq); - - intel_engines_park(dev_priv); - i915_gem_timelines_park(dev_priv); + epoch = __i915_gem_park(dev_priv); - i915_pmu_gt_parked(dev_priv); - - GEM_BUG_ON(!dev_priv->gt.awake); - dev_priv->gt.awake = false; - epoch = dev_priv->gt.epoch; - GEM_BUG_ON(epoch == I915_EPOCH_INVALID); rearm_hangcheck = false; - - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_idle(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ); - - intel_runtime_pm_put(dev_priv); out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); @@ -3648,17 +3734,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) +static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) { - int ret, i; - - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - ret = i915_gem_active_wait(&tl->engine[i].last_request, flags); - if (ret) - return ret; - } - - return 0; + return i915_gem_active_wait(&tl->last_request, flags); } static int wait_for_engines(struct drm_i915_private *i915) @@ -3666,16 +3744,7 @@ static int wait_for_engines(struct drm_i915_private *i915) if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { dev_err(i915->drm.dev, "Failed to idle engines, declaring wedged!\n"); - if (drm_debug & DRM_UT_DRIVER) { - struct drm_printer p = drm_debug_printer(__func__); - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) - intel_engine_dump(engine, &p, - "%s\n", engine->name); - } - + GEM_TRACE_DUMP(); i915_gem_set_wedged(i915); return -EIO; } @@ -3685,30 +3754,37 @@ static int wait_for_engines(struct drm_i915_private *i915) int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { - int ret; - /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) return 0; if (flags & I915_WAIT_LOCKED) { - struct i915_gem_timeline *tl; + struct i915_timeline *tl; + int err; lockdep_assert_held(&i915->drm.struct_mutex); list_for_each_entry(tl, &i915->gt.timelines, link) { - ret = wait_for_timeline(tl, flags); - if (ret) - return ret; + err = wait_for_timeline(tl, flags); + if (err) + return err; } i915_retire_requests(i915); - ret = wait_for_engines(i915); + return wait_for_engines(i915); } else { - ret = wait_for_timeline(&i915->gt.global_timeline, flags); - } + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; - return ret; + for_each_engine(engine, i915, id) { + err = wait_for_timeline(&engine->timeline, flags); + if (err) + return err; + } + + return 0; + } } static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) @@ -4088,9 +4164,10 @@ out: } /* - * Prepare buffer for display plane (scanout, cursors, etc). - * Can be called from an uninterruptible phase (modesetting) and allows - * any flushes to be pipelined (for pageflips). + * Prepare buffer for display plane (scanout, cursors, etc). Can be called from + * an uninterruptible phase (modesetting) and allows any flushes to be pipelined + * (for pageflips). We only flush the caches while preparing the buffer for + * display, the callers are responsible for frontbuffer flush. */ struct i915_vma * i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, @@ -4146,9 +4223,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); - /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ __i915_gem_object_flush_for_display(obj); - intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); /* It should now be out of any other write domains, and we can update * the domain values for our changes. @@ -4723,7 +4798,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, &obj->vma_list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; - i915_vma_close(vma); + i915_vma_destroy(vma); } GEM_BUG_ON(!list_empty(&obj->vma_list)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); @@ -4878,7 +4953,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) enum intel_engine_id id; for_each_engine(engine, i915, id) { - GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request)); + GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); GEM_BUG_ON(engine->last_retired_context != kernel_context); } } @@ -4973,6 +5048,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machines is a good idea, we don't - just in case it leaves the * machine in an unusable condition. */ + intel_uc_sanitize(dev_priv); i915_gem_sanitize(dev_priv); intel_runtime_pm_put(dev_priv); @@ -5118,6 +5194,8 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) } } + intel_gt_workarounds_apply(dev_priv); + i915_gem_init_swizzling(dev_priv); /* @@ -5140,6 +5218,12 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) goto out; } + ret = intel_wopcm_init_hw(&dev_priv->wopcm); + if (ret) { + DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); + goto out; + } + /* We can't enable contexts until all firmware is loaded */ ret = intel_uc_init_hw(dev_priv); if (ret) { @@ -5207,7 +5291,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { struct i915_vma *state; - state = ctx->engine[id].state; + state = to_intel_context(ctx, engine)->state; if (!state) continue; @@ -5297,6 +5381,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) return ret; + ret = intel_wopcm_init(&dev_priv->wopcm); + if (ret) + return ret; + ret = intel_uc_init_misc(dev_priv); if (ret) return ret; @@ -5478,8 +5566,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); } -int -i915_gem_load_init(struct drm_i915_private *dev_priv) +int i915_gem_init_early(struct drm_i915_private *dev_priv) { int err = -ENOMEM; @@ -5512,12 +5599,9 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (!dev_priv->priorities) goto err_dependencies; - mutex_lock(&dev_priv->drm.struct_mutex); INIT_LIST_HEAD(&dev_priv->gt.timelines); - err = i915_gem_timeline_init__global(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - if (err) - goto err_priorities; + INIT_LIST_HEAD(&dev_priv->gt.active_rings); + INIT_LIST_HEAD(&dev_priv->gt.closed_vma); i915_gem_init__mm(dev_priv); @@ -5538,8 +5622,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) return 0; -err_priorities: - kmem_cache_destroy(dev_priv->priorities); err_dependencies: kmem_cache_destroy(dev_priv->dependencies); err_requests: @@ -5554,17 +5636,13 @@ err_out: return err; } -void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) +void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) { i915_gem_drain_freed_objects(dev_priv); GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.object_count); - - mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_timeline_fini(&dev_priv->gt.global_timeline); WARN_ON(!list_empty(&dev_priv->gt.timelines)); - mutex_unlock(&dev_priv->drm.struct_mutex); kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index f54c4ff74ded..525920404ede 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -27,7 +27,12 @@ #include <linux/bug.h> +struct drm_i915_private; + #ifdef CONFIG_DRM_I915_DEBUG_GEM + +#define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER) + #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ pr_err("%s:%d GEM_BUG_ON(%s)\n", \ __func__, __LINE__, __stringify(condition)); \ @@ -43,6 +48,9 @@ #define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr) #else + +#define GEM_SHOW_DEBUG() (0) + #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) @@ -53,10 +61,15 @@ #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM) #define GEM_TRACE(...) trace_printk(__VA_ARGS__) +#define GEM_TRACE_DUMP() ftrace_dump(DUMP_ALL) #else #define GEM_TRACE(...) do { } while (0) +#define GEM_TRACE_DUMP() do { } while (0) #endif #define I915_NUM_ENGINES 8 +void i915_gem_park(struct drm_i915_private *i915); +void i915_gem_unpark(struct drm_i915_private *i915); + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index d3cbe8432f48..f3890b664e3f 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -1,29 +1,11 @@ /* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2014-2018 Intel Corporation */ -#include "i915_drv.h" #include "i915_gem_batch_pool.h" +#include "i915_drv.h" /** * DOC: batch pool @@ -41,11 +23,11 @@ /** * i915_gem_batch_pool_init() - initialize a batch buffer pool - * @engine: the associated request submission engine * @pool: the batch buffer pool + * @engine: the associated request submission engine */ -void i915_gem_batch_pool_init(struct intel_engine_cs *engine, - struct i915_gem_batch_pool *pool) +void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool, + struct intel_engine_cs *engine) { int n; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h index 10d5ac4c00d3..56947daaaf65 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h @@ -1,31 +1,13 @@ /* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2014-2018 Intel Corporation */ #ifndef I915_GEM_BATCH_POOL_H #define I915_GEM_BATCH_POOL_H -#include "i915_drv.h" +#include <linux/types.h> struct intel_engine_cs; @@ -34,9 +16,8 @@ struct i915_gem_batch_pool { struct list_head cache_list[4]; }; -/* i915_gem_batch_pool.c */ -void i915_gem_batch_pool_init(struct intel_engine_cs *engine, - struct i915_gem_batch_pool *pool); +void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool, + struct intel_engine_cs *engine); void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); struct drm_i915_gem_object* i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f2cbea7cf940..33f8a4b3c981 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -90,6 +90,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" #include "i915_trace.h" +#include "intel_workarounds.h" #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 @@ -116,15 +117,15 @@ static void lut_close(struct i915_gem_context *ctx) static void i915_gem_context_free(struct i915_gem_context *ctx) { - int i; + unsigned int n; lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); i915_ppgtt_put(ctx->ppgtt); - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct intel_context *ce = &ctx->engine[i]; + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { + struct intel_context *ce = &ctx->__engine[n]; if (!ce->state) continue; @@ -280,7 +281,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, kref_init(&ctx->ref); list_add_tail(&ctx->link, &dev_priv->contexts.list); ctx->i915 = dev_priv; - ctx->priority = I915_PRIORITY_NORMAL; + ctx->sched.priority = I915_PRIORITY_NORMAL; INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -318,12 +319,13 @@ __create_hw_context(struct drm_i915_private *dev_priv, ctx->desc_template = default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); - /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not + /* + * GuC requires the ring to be placed in Non-WOPCM memory. If GuC is not * present or not in use we still need a small bias as ring wraparound * at offset 0 sometimes hangs. No idea why. */ if (USES_GUC(dev_priv)) - ctx->ggtt_offset_bias = GUC_WOPCM_TOP; + ctx->ggtt_offset_bias = dev_priv->guc.ggtt_pin_bias; else ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE; @@ -429,7 +431,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; i915_gem_context_clear_bannable(ctx); - ctx->priority = prio; + ctx->sched.priority = prio; ctx->ring_size = PAGE_SIZE; GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); @@ -458,11 +460,16 @@ static bool needs_preempt_context(struct drm_i915_private *i915) int i915_gem_contexts_init(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; + int ret; /* Reassure ourselves we are only called once */ GEM_BUG_ON(dev_priv->kernel_context); GEM_BUG_ON(dev_priv->preempt_context); + ret = intel_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + INIT_LIST_HEAD(&dev_priv->contexts.list); INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker); init_llist_head(&dev_priv->contexts.free_list); @@ -514,7 +521,7 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) if (!engine->last_retired_context) continue; - engine->context_unpin(engine, engine->last_retired_context); + intel_context_unpin(engine->last_retired_context, engine); engine->last_retired_context = NULL; } } @@ -570,19 +577,29 @@ void i915_gem_context_close(struct drm_file *file) idr_destroy(&file_priv->context_idr); } -static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) +static struct i915_request * +last_request_on_engine(struct i915_timeline *timeline, + struct intel_engine_cs *engine) { - struct i915_gem_timeline *timeline; + struct i915_request *rq; - list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { - struct intel_timeline *tl; + if (timeline == &engine->timeline) + return NULL; - if (timeline == &engine->i915->gt.global_timeline) - continue; + rq = i915_gem_active_raw(&timeline->last_request, + &engine->i915->drm.struct_mutex); + if (rq && rq->engine == engine) + return rq; + + return NULL; +} - tl = &timeline->engine[engine->id]; - if (i915_gem_active_peek(&tl->last_request, - &engine->i915->drm.struct_mutex)) +static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) +{ + struct i915_timeline *timeline; + + list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { + if (last_request_on_engine(timeline, engine)) return false; } @@ -592,7 +609,7 @@ static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - struct i915_gem_timeline *timeline; + struct i915_timeline *timeline; enum intel_engine_id id; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -612,11 +629,8 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { struct i915_request *prev; - struct intel_timeline *tl; - tl = &timeline->engine[engine->id]; - prev = i915_gem_active_raw(&tl->last_request, - &dev_priv->drm.struct_mutex); + prev = last_request_on_engine(timeline, engine); if (prev) i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, @@ -746,7 +760,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = i915_gem_context_is_bannable(ctx); break; case I915_CONTEXT_PARAM_PRIORITY: - args->value = ctx->priority; + args->value = ctx->sched.priority; break; default: ret = -EINVAL; @@ -819,7 +833,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, !capable(CAP_SYS_NICE)) ret = -EPERM; else - ctx->priority = priority; + ctx->sched.priority = priority; } break; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 7854262ddfd9..ace3b129c189 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -137,18 +137,7 @@ struct i915_gem_context { */ u32 user_handle; - /** - * @priority: execution and service priority - * - * All clients are equal, but some are more equal than others! - * - * Requests from a context with a greater (more positive) value of - * @priority will be executed before those with a lower @priority - * value, forming a simple QoS. - * - * The &drm_i915_private.kernel_context is assigned the lowest priority. - */ - int priority; + struct i915_sched_attr sched; /** ggtt_offset_bias: placement restriction for context objects */ u32 ggtt_offset_bias; @@ -160,7 +149,7 @@ struct i915_gem_context { u32 *lrc_reg_state; u64 lrc_desc; int pin_count; - } engine[I915_NUM_ENGINES]; + } __engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ u32 ring_size; @@ -267,6 +256,34 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) return !ctx->file_priv; } +static inline struct intel_context * +to_intel_context(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + return &ctx->__engine[engine->id]; +} + +static inline struct intel_ring * +intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + return engine->context_pin(engine, ctx); +} + +static inline void __intel_context_pin(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + struct intel_context *ce = to_intel_context(ctx, engine); + + GEM_BUG_ON(!ce->pin_count); + ce->pin_count++; +} + +static inline void intel_context_unpin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + engine->context_unpin(engine, ctx); +} + /* i915_gem_context.c */ int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0414228cd2b5..f627a8c47c58 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -81,6 +81,35 @@ enum { * but this remains just a hint as the kernel may choose a new location for * any object in the future. * + * At the level of talking to the hardware, submitting a batchbuffer for the + * GPU to execute is to add content to a buffer from which the HW + * command streamer is reading. + * + * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e. + * Execlists, this command is not placed on the same buffer as the + * remaining items. + * + * 2. Add a command to invalidate caches to the buffer. + * + * 3. Add a batchbuffer start command to the buffer; the start command is + * essentially a token together with the GPU address of the batchbuffer + * to be executed. + * + * 4. Add a pipeline flush to the buffer. + * + * 5. Add a memory write command to the buffer to record when the GPU + * is done executing the batchbuffer. The memory write writes the + * global sequence number of the request, ``i915_request::global_seqno``; + * the i915 driver uses the current value in the register to determine + * if the GPU has completed the batchbuffer. + * + * 6. Add a user interrupt command to the buffer. This command instructs + * the GPU to issue an interrupt when the command, pipeline flush and + * memory write are completed. + * + * 7. Inform the hardware of the additional commands added to the buffer + * (by updating the tail pointer). + * * Processing an execbuf ioctl is conceptually split up into a few phases. * * 1. Validation - Ensure all the pointers, handles and flags are valid. @@ -733,7 +762,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) } /* transfer ref to ctx */ - vma->open_count++; + if (!vma->open_count++) + i915_vma_reopen(vma); list_add(&lut->obj_link, &obj->lut_list); list_add(&lut->ctx_link, &eb->ctx->handles_list); lut->ctx = eb->ctx; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 21d72f695adb..996ab2ad6c45 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -110,7 +110,8 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma); static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv) { - /* Note that as an uncached mmio write, this should flush the + /* + * Note that as an uncached mmio write, this will flush the * WCB of the writes into the GGTT before it triggers the invalidate. */ I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); @@ -1161,6 +1162,27 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, vaddr[idx.pde] |= GEN8_PDE_IPS_64K; kunmap_atomic(vaddr); page_size = I915_GTT_PAGE_SIZE_64K; + + /* + * We write all 4K page entries, even when using 64K + * pages. In order to verify that the HW isn't cheating + * by using the 4K PTE instead of the 64K PTE, we want + * to remove all the surplus entries. If the HW skipped + * the 64K PTE, it will read/write into the scratch page + * instead - which we detect as missing results during + * selftests. + */ + if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { + u16 i; + + encode = pte_encode | vma->vm->scratch_page.daddr; + vaddr = kmap_atomic_px(pd->page_table[idx.pde]); + + for (i = 1; i < index; i += 16) + memset64(vaddr + i, encode, 15); + + kunmap_atomic(vaddr); + } } vma->page_sizes.gtt |= page_size; @@ -2111,8 +2133,6 @@ static void i915_address_space_init(struct i915_address_space *vm, struct drm_i915_private *dev_priv, const char *name) { - i915_gem_timeline_init(dev_priv, &vm->timeline, name); - drm_mm_init(&vm->mm, 0, vm->total); vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; @@ -2129,7 +2149,6 @@ static void i915_address_space_fini(struct i915_address_space *vm) if (pagevec_count(&vm->free_pages)) vm_free_pages_release(vm, true); - i915_gem_timeline_fini(&vm->timeline); drm_mm_takedown(&vm->mm); list_del(&vm->global_link); } @@ -2140,15 +2159,15 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv)) - I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); + else if (INTEL_GEN(dev_priv) >= 9) + I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); /* * To support 64K PTEs we need to first enable the use of the @@ -2222,6 +2241,12 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, void i915_ppgtt_close(struct i915_address_space *vm) { + GEM_BUG_ON(vm->closed); + vm->closed = true; +} + +static void ppgtt_destroy_vma(struct i915_address_space *vm) +{ struct list_head *phases[] = { &vm->active_list, &vm->inactive_list, @@ -2229,15 +2254,12 @@ void i915_ppgtt_close(struct i915_address_space *vm) NULL, }, **phase; - GEM_BUG_ON(vm->closed); vm->closed = true; - for (phase = phases; *phase; phase++) { struct i915_vma *vma, *vn; list_for_each_entry_safe(vma, vn, *phase, vm_link) - if (!i915_vma_is_closed(vma)) - i915_vma_close(vma); + i915_vma_destroy(vma); } } @@ -2248,7 +2270,8 @@ void i915_ppgtt_release(struct kref *kref) trace_i915_ppgtt_release(&ppgtt->base); - /* vmas should already be unbound and destroyed */ + ppgtt_destroy_vma(&ppgtt->base); + GEM_BUG_ON(!list_empty(&ppgtt->base.active_list)); GEM_BUG_ON(!list_empty(&ppgtt->base.inactive_list)); GEM_BUG_ON(!list_empty(&ppgtt->base.unbound_list)); @@ -2417,11 +2440,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, for_each_sgt_dma(addr, sgt_iter, vma->pages) gen8_set_pte(gtt_entries++, pte_encode | addr); - wmb(); - - /* This next bit makes the above posting read even more important. We - * want to flush the TLBs only after we're certain all the PTE updates - * have finished. + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. */ ggtt->invalidate(vm->i915); } @@ -2459,11 +2480,10 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, dma_addr_t addr; for_each_sgt_dma(addr, iter, vma->pages) iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); - wmb(); - /* This next bit makes the above posting read even more important. We - * want to flush the TLBs only after we're certain all the PTE updates - * have finished. + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. */ ggtt->invalidate(vm->i915); } @@ -3325,14 +3345,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - - if (INTEL_GEN(dev_priv) >= 9) { - size = gen8_get_total_gtt_size(snb_gmch_ctl); - } else if (IS_CHERRYVIEW(dev_priv)) { + if (IS_CHERRYVIEW(dev_priv)) size = chv_get_total_gtt_size(snb_gmch_ctl); - } else { + else size = gen8_get_total_gtt_size(snb_gmch_ctl); - } ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; ggtt->base.cleanup = gen6_gmch_remove; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 6efc017e8bb3..aec4f73574f4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -38,10 +38,9 @@ #include <linux/mm.h> #include <linux/pagevec.h> -#include "i915_gem_timeline.h" - #include "i915_request.h" #include "i915_selftest.h" +#include "i915_timeline.h" #define I915_GTT_PAGE_SIZE_4K BIT(12) #define I915_GTT_PAGE_SIZE_64K BIT(16) @@ -257,7 +256,6 @@ struct i915_pml4 { struct i915_address_space { struct drm_mm mm; - struct i915_gem_timeline timeline; struct drm_i915_private *i915; struct device *dma; /* Every address space belongs to a struct file - except for the global @@ -344,6 +342,7 @@ struct i915_address_space { void (*clear_pages)(struct i915_vma *vma); I915_SELFTEST_DECLARE(struct fault_attr fault_attr); + I915_SELFTEST_DECLARE(bool scrub_64K); }; #define i915_is_ggtt(V) (!(V)->file) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 62aa67960bf4..ad949cc30928 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -51,6 +51,10 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, if (!drm_mm_initialized(&dev_priv->mm.stolen)) return -ENODEV; + /* WaSkipStolenMemoryFirstPage:bdw+ */ + if (INTEL_GEN(dev_priv) >= 8 && start < 4096) + start = 4096; + mutex_lock(&dev_priv->mm.stolen_lock); ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, size, alignment, 0, @@ -121,8 +125,8 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { - DRM_DEBUG_KMS("GTT within stolen memory at %pR\n", &ggtt_res); - DRM_DEBUG_KMS("Stolen memory adjusted to %pR\n", dsm); + DRM_DEBUG_DRIVER("GTT within stolen memory at %pR\n", &ggtt_res); + DRM_DEBUG_DRIVER("Stolen memory adjusted to %pR\n", dsm); } } @@ -174,18 +178,19 @@ void i915_gem_cleanup_stolen(struct drm_device *dev) } static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, resource_size_t *size) + resource_size_t *base, + resource_size_t *size) { - uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ? - CTG_STOLEN_RESERVED : - ELK_STOLEN_RESERVED); + u32 reg_val = I915_READ(IS_GM45(dev_priv) ? + CTG_STOLEN_RESERVED : + ELK_STOLEN_RESERVED); resource_size_t stolen_top = dev_priv->dsm.end + 1; - if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) { - *base = 0; - *size = 0; + DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n", + IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val); + + if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) return; - } /* * Whether ILK really reuses the ELK register for this is unclear. @@ -193,30 +198,25 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, */ WARN(IS_GEN5(dev_priv), "ILK stolen reserved found? 0x%08x\n", reg_val); - *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; + if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK)) + return; + *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base); - /* On these platforms, the register doesn't have a size field, so the - * size is the distance between the base and the top of the stolen - * memory. We also have the genuine case where base is zero and there's - * nothing reserved. */ - if (*base == 0) - *size = 0; - else - *size = stolen_top - *base; + *size = stolen_top - *base; } static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, resource_size_t *size) + resource_size_t *base, + resource_size_t *size) { - uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { - *base = 0; - *size = 0; + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; - } *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; @@ -239,17 +239,44 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } } -static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, resource_size_t *size) +static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) { - uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = dev_priv->dsm.end + 1; + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { - *base = 0; - *size = 0; + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; + + switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { + default: + MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); + case GEN7_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; } + /* + * On vlv, the ADDR_MASK portion is left as 0 and HW deduces the + * reserved location as (top - size). + */ + *base = stolen_top - *size; +} + +static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + *base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK; switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { @@ -266,15 +293,15 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, resource_size_t *size) + resource_size_t *base, + resource_size_t *size) { - uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { - *base = 0; - *size = 0; + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; - } *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; @@ -298,36 +325,28 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, resource_size_t *size) + resource_size_t *base, + resource_size_t *size) { - uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top; + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = dev_priv->dsm.end + 1; - if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { - *base = 0; - *size = 0; + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; - } - stolen_top = dev_priv->dsm.end + 1; + if (!(reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK)) + return; *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; - - /* On these platforms, the register doesn't have a size field, so the - * size is the distance between the base and the top of the stolen - * memory. We also have the genuine case where base is zero and there's - * nothing reserved. */ - if (*base == 0) - *size = 0; - else - *size = stolen_top - *base; + *size = stolen_top - *base; } int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { resource_size_t reserved_base, stolen_top; resource_size_t reserved_total, reserved_size; - resource_size_t stolen_usable_start; mutex_init(&dev_priv->mm.stolen_lock); @@ -353,7 +372,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start); stolen_top = dev_priv->dsm.end + 1; - reserved_base = 0; + reserved_base = stolen_top; reserved_size = 0; switch (INTEL_GEN(dev_priv)) { @@ -373,8 +392,12 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) &reserved_base, &reserved_size); break; case 7: - gen7_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); + if (IS_VALLEYVIEW(dev_priv)) + vlv_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + else + gen7_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); break; default: if (IS_LP(dev_priv)) @@ -386,11 +409,16 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) break; } - /* It is possible for the reserved base to be zero, but the register - * field for size doesn't have a zero option. */ - if (reserved_base == 0) { - reserved_size = 0; + /* + * Our expectation is that the reserved space is at the top of the + * stolen region and *never* at the bottom. If we see !reserved_base, + * it likely means we failed to read the registers correctly. + */ + if (!reserved_base) { + DRM_ERROR("inconsistent reservation %pa + %pa; ignoring\n", + &reserved_base, &reserved_size); reserved_base = stolen_top; + reserved_size = 0; } dev_priv->dsm_reserved = @@ -406,21 +434,15 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) * memory, so just consider the start. */ reserved_total = stolen_top - reserved_base; - DRM_DEBUG_KMS("Memory reserved for graphics device: %lluK, usable: %lluK\n", - (u64)resource_size(&dev_priv->dsm) >> 10, - ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); - - stolen_usable_start = 0; - /* WaSkipStolenMemoryFirstPage:bdw+ */ - if (INTEL_GEN(dev_priv) >= 8) - stolen_usable_start = 4096; + DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n", + (u64)resource_size(&dev_priv->dsm) >> 10, + ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); dev_priv->stolen_usable_size = - resource_size(&dev_priv->dsm) - reserved_total - stolen_usable_start; + resource_size(&dev_priv->dsm) - reserved_total; /* Basic memrange allocator for stolen space. */ - drm_mm_init(&dev_priv->mm.stolen, stolen_usable_start, - dev_priv->stolen_usable_size); + drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); return 0; } @@ -580,8 +602,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv lockdep_assert_held(&dev_priv->drm.struct_mutex); - DRM_DEBUG_KMS("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", - &stolen_offset, >t_offset, &size); + DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", + &stolen_offset, >t_offset, &size); /* KISS and expect everything to be page-aligned */ if (WARN_ON(size == 0) || @@ -599,14 +621,14 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen); mutex_unlock(&dev_priv->mm.stolen_lock); if (ret) { - DRM_DEBUG_KMS("failed to allocate stolen space\n"); + DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); kfree(stolen); return NULL; } obj = _i915_gem_object_create_stolen(dev_priv, stolen); if (obj == NULL) { - DRM_DEBUG_KMS("failed to allocate stolen object\n"); + DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); i915_gem_stolen_remove_node(dev_priv, stolen); kfree(stolen); return NULL; @@ -635,7 +657,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv size, gtt_offset, obj->cache_level, 0); if (ret) { - DRM_DEBUG_KMS("failed to allocate stolen GTT space\n"); + DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); goto err_pages; } diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c deleted file mode 100644 index e9fd87604067..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_drv.h" -#include "i915_syncmap.h" - -static void __intel_timeline_init(struct intel_timeline *tl, - struct i915_gem_timeline *parent, - u64 context, - struct lock_class_key *lockclass, - const char *lockname) -{ - tl->fence_context = context; - tl->common = parent; - spin_lock_init(&tl->lock); - lockdep_set_class_and_name(&tl->lock, lockclass, lockname); - init_request_active(&tl->last_request, NULL); - INIT_LIST_HEAD(&tl->requests); - i915_syncmap_init(&tl->sync); -} - -static void __intel_timeline_fini(struct intel_timeline *tl) -{ - GEM_BUG_ON(!list_empty(&tl->requests)); - - i915_syncmap_free(&tl->sync); -} - -static int __i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *timeline, - const char *name, - struct lock_class_key *lockclass, - const char *lockname) -{ - unsigned int i; - u64 fences; - - lockdep_assert_held(&i915->drm.struct_mutex); - - /* - * Ideally we want a set of engines on a single leaf as we expect - * to mostly be tracking synchronisation between engines. It is not - * a huge issue if this is not the case, but we may want to mitigate - * any page crossing penalties if they become an issue. - */ - BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); - - timeline->i915 = i915; - timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL); - if (!timeline->name) - return -ENOMEM; - - list_add(&timeline->link, &i915->gt.timelines); - - /* Called during early_init before we know how many engines there are */ - fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine)); - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) - __intel_timeline_init(&timeline->engine[i], - timeline, fences++, - lockclass, lockname); - - return 0; -} - -int i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *timeline, - const char *name) -{ - static struct lock_class_key class; - - return __i915_gem_timeline_init(i915, timeline, name, - &class, "&timeline->lock"); -} - -int i915_gem_timeline_init__global(struct drm_i915_private *i915) -{ - static struct lock_class_key class; - - return __i915_gem_timeline_init(i915, - &i915->gt.global_timeline, - "[execution]", - &class, "&global_timeline->lock"); -} - -/** - * i915_gem_timelines_park - called when the driver idles - * @i915: the drm_i915_private device - * - * When the driver is completely idle, we know that all of our sync points - * have been signaled and our tracking is then entirely redundant. Any request - * to wait upon an older sync point will be completed instantly as we know - * the fence is signaled and therefore we will not even look them up in the - * sync point map. - */ -void i915_gem_timelines_park(struct drm_i915_private *i915) -{ - struct i915_gem_timeline *timeline; - int i; - - lockdep_assert_held(&i915->drm.struct_mutex); - - list_for_each_entry(timeline, &i915->gt.timelines, link) { - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { - struct intel_timeline *tl = &timeline->engine[i]; - - /* - * All known fences are completed so we can scrap - * the current sync point tracking and start afresh, - * any attempt to wait upon a previous sync point - * will be skipped as the fence was signaled. - */ - i915_syncmap_free(&tl->sync); - } - } -} - -void i915_gem_timeline_fini(struct i915_gem_timeline *timeline) -{ - int i; - - lockdep_assert_held(&timeline->i915->drm.struct_mutex); - - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) - __intel_timeline_fini(&timeline->engine[i]); - - list_del(&timeline->link); - kfree(timeline->name); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_timeline.c" -#include "selftests/i915_gem_timeline.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index f89ac7a8f95f..df234dc23274 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -32,6 +32,7 @@ #include <linux/zlib.h> #include <drm/drm_print.h> +#include "i915_gpu_error.h" #include "i915_drv.h" static inline const struct intel_engine_cs * @@ -403,16 +404,17 @@ static const char *bannable(const struct drm_i915_error_context *ctx) static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, - const struct drm_i915_error_request *erq) + const struct drm_i915_error_request *erq, + const unsigned long epoch) { if (!erq->seqno) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, erq->priority, - jiffies_to_msecs(jiffies - erq->jiffies), - erq->head, erq->tail); + erq->context, erq->seqno, erq->sched_attr.priority, + jiffies_to_msecs(erq->jiffies - epoch), + erq->start, erq->head, erq->tail); } static void error_print_context(struct drm_i915_error_state_buf *m, @@ -421,12 +423,13 @@ static void error_print_context(struct drm_i915_error_state_buf *m, { err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, - ctx->priority, ctx->ban_score, bannable(ctx), + ctx->sched_attr.priority, ctx->ban_score, bannable(ctx), ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, - const struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee, + const unsigned long epoch) { int n; @@ -496,14 +499,15 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, err_printf(m, " hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); err_printf(m, " hangcheck action: %s\n", hangcheck_action_to_str(ee->hangcheck_action)); - err_printf(m, " hangcheck action timestamp: %lu, %u ms ago\n", + err_printf(m, " hangcheck action timestamp: %dms (%lu%s)\n", + jiffies_to_msecs(ee->hangcheck_timestamp - epoch), ee->hangcheck_timestamp, - jiffies_to_msecs(jiffies - ee->hangcheck_timestamp)); + ee->hangcheck_timestamp == epoch ? "; epoch" : ""); err_printf(m, " engine reset count: %u\n", ee->reset_count); for (n = 0; n < ee->num_ports; n++) { err_printf(m, " ELSP[%d]:", n); - error_print_request(m, " ", &ee->execlist[n]); + error_print_request(m, " ", &ee->execlist[n], epoch); } error_print_context(m, " Active context: ", &ee->context); @@ -649,6 +653,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, ts = ktime_to_timespec64(error->uptime); err_printf(m, "Uptime: %lld s %ld us\n", (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); + err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ); + err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n", + error->capture, + jiffies_to_msecs(jiffies - error->capture), + jiffies_to_msecs(error->capture - error->epoch)); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && @@ -709,7 +718,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].engine_id != -1) - error_print_engine(m, &error->engine[i]); + error_print_engine(m, &error->engine[i], error->epoch); } for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) { @@ -768,7 +777,9 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, dev_priv->engine[i]->name, ee->num_requests); for (j = 0; j < ee->num_requests; j++) - error_print_request(m, " ", &ee->requests[j]); + error_print_request(m, " ", + &ee->requests[j], + error->epoch); } if (IS_ERR(ee->waiters)) { @@ -1277,10 +1288,11 @@ static void record_request(struct i915_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; - erq->priority = request->priotree.priority; + erq->sched_attr = request->sched.attr; erq->ban_score = atomic_read(&request->ctx->ban_score); erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; + erq->start = i915_ggtt_offset(request->ring->vma); erq->head = request->head; erq->tail = request->tail; @@ -1298,7 +1310,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline->requests, link) + list_for_each_entry_from(request, &engine->timeline.requests, link) count++; if (!count) return; @@ -1311,7 +1323,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline->requests, link) { + list_for_each_entry_from(request, &engine->timeline.requests, link) { if (count >= ee->num_requests) { /* * If the ring request list was changed in @@ -1371,7 +1383,7 @@ static void record_context(struct drm_i915_error_context *e, e->handle = ctx->user_handle; e->hw_id = ctx->hw_id; - e->priority = ctx->priority; + e->sched_attr = ctx->sched; e->ban_score = atomic_read(&ctx->ban_score); e->bannable = i915_gem_context_is_bannable(ctx); e->guilty = atomic_read(&ctx->guilty_count); @@ -1471,7 +1483,8 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->ctx = i915_error_object_create(i915, - request->ctx->engine[i].state); + to_intel_context(request->ctx, + engine)->state); error->simulated |= i915_gem_context_no_error_capture(request->ctx); @@ -1734,6 +1747,22 @@ static void capture_params(struct i915_gpu_state *error) #undef DUP } +static unsigned long capture_find_epoch(const struct i915_gpu_state *error) +{ + unsigned long epoch = error->capture; + int i; + + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + const struct drm_i915_error_engine *ee = &error->engine[i]; + + if (ee->hangcheck_stalled && + time_before(ee->hangcheck_timestamp, epoch)) + epoch = ee->hangcheck_timestamp; + } + + return epoch; +} + static int capture(void *data) { struct i915_gpu_state *error = data; @@ -1742,6 +1771,7 @@ static int capture(void *data) error->boottime = ktime_get_boottime(); error->uptime = ktime_sub(ktime_get(), error->i915->gt.last_init_time); + error->capture = jiffies; capture_params(error); capture_gen_state(error); @@ -1755,6 +1785,8 @@ static int capture(void *data) error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); + error->epoch = capture_find_epoch(error); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h new file mode 100644 index 000000000000..dac0f8c4c1cf --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -0,0 +1,366 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright � 2008-2018 Intel Corporation + */ + +#ifndef _I915_GPU_ERROR_H_ +#define _I915_GPU_ERROR_H_ + +#include <linux/kref.h> +#include <linux/ktime.h> +#include <linux/sched.h> + +#include <drm/drm_mm.h> + +#include "intel_device_info.h" +#include "intel_ringbuffer.h" +#include "intel_uc_fw.h" + +#include "i915_gem.h" +#include "i915_gem_gtt.h" +#include "i915_params.h" +#include "i915_scheduler.h" + +struct drm_i915_private; +struct intel_overlay_error_state; +struct intel_display_error_state; + +struct i915_gpu_state { + struct kref ref; + ktime_t time; + ktime_t boottime; + ktime_t uptime; + unsigned long capture; + unsigned long epoch; + + struct drm_i915_private *i915; + + char error_msg[128]; + bool simulated; + bool awake; + bool wakelock; + bool suspended; + int iommu; + u32 reset_count; + u32 suspend_count; + struct intel_device_info device_info; + struct intel_driver_caps driver_caps; + struct i915_params params; + + struct i915_error_uc { + struct intel_uc_fw guc_fw; + struct intel_uc_fw huc_fw; + struct drm_i915_error_object *guc_log; + } uc; + + /* Generic register state */ + u32 eir; + u32 pgtbl_er; + u32 ier; + u32 gtier[4], ngtier; + u32 ccid; + u32 derrmr; + u32 forcewake; + u32 error; /* gen6+ */ + u32 err_int; /* gen7 */ + u32 fault_data0; /* gen8, gen9 */ + u32 fault_data1; /* gen8, gen9 */ + u32 done_reg; + u32 gac_eco; + u32 gam_ecochk; + u32 gab_ctl; + u32 gfx_mode; + + u32 nfence; + u64 fence[I915_MAX_NUM_FENCES]; + struct intel_overlay_error_state *overlay; + struct intel_display_error_state *display; + + struct drm_i915_error_engine { + int engine_id; + /* Software tracked state */ + bool idle; + bool waiting; + int num_waiters; + unsigned long hangcheck_timestamp; + bool hangcheck_stalled; + enum intel_engine_hangcheck_action hangcheck_action; + struct i915_address_space *vm; + int num_requests; + u32 reset_count; + + /* position of active request inside the ring */ + u32 rq_head, rq_post, rq_tail; + + /* our own tracking of ring head and tail */ + u32 cpu_ring_head; + u32 cpu_ring_tail; + + u32 last_seqno; + + /* Register state */ + u32 start; + u32 tail; + u32 head; + u32 ctl; + u32 mode; + u32 hws; + u32 ipeir; + u32 ipehr; + u32 bbstate; + u32 instpm; + u32 instps; + u32 seqno; + u64 bbaddr; + u64 acthd; + u32 fault_reg; + u64 faddr; + u32 rc_psmi; /* sleep state */ + u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; + struct intel_instdone instdone; + + struct drm_i915_error_context { + char comm[TASK_COMM_LEN]; + pid_t pid; + u32 handle; + u32 hw_id; + int ban_score; + int active; + int guilty; + bool bannable; + struct i915_sched_attr sched_attr; + } context; + + struct drm_i915_error_object { + u64 gtt_offset; + u64 gtt_size; + int page_count; + int unused; + u32 *pages[0]; + } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; + + struct drm_i915_error_object **user_bo; + long user_bo_count; + + struct drm_i915_error_object *wa_ctx; + struct drm_i915_error_object *default_state; + + struct drm_i915_error_request { + long jiffies; + pid_t pid; + u32 context; + int ban_score; + u32 seqno; + u32 start; + u32 head; + u32 tail; + struct i915_sched_attr sched_attr; + } *requests, execlist[EXECLIST_MAX_PORTS]; + unsigned int num_ports; + + struct drm_i915_error_waiter { + char comm[TASK_COMM_LEN]; + pid_t pid; + u32 seqno; + } *waiters; + + struct { + u32 gfx_mode; + union { + u64 pdp[4]; + u32 pp_dir_base; + }; + } vm_info; + } engine[I915_NUM_ENGINES]; + + struct drm_i915_error_buffer { + u32 size; + u32 name; + u32 rseqno[I915_NUM_ENGINES], wseqno; + u64 gtt_offset; + u32 read_domains; + u32 write_domain; + s32 fence_reg:I915_MAX_NUM_FENCE_BITS; + u32 tiling:2; + u32 dirty:1; + u32 purgeable:1; + u32 userptr:1; + s32 engine:4; + u32 cache_level:3; + } *active_bo[I915_NUM_ENGINES], *pinned_bo; + u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; + struct i915_address_space *active_vm[I915_NUM_ENGINES]; +}; + +struct i915_gpu_error { + /* For hangcheck timer */ +#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ +#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) + + struct delayed_work hangcheck_work; + + /* For reset and error_state handling. */ + spinlock_t lock; + /* Protected by the above dev->gpu_error.lock. */ + struct i915_gpu_state *first_error; + + atomic_t pending_fb_pin; + + unsigned long missed_irq_rings; + + /** + * State variable controlling the reset flow and count + * + * This is a counter which gets incremented when reset is triggered, + * + * Before the reset commences, the I915_RESET_BACKOFF bit is set + * meaning that any waiters holding onto the struct_mutex should + * relinquish the lock immediately in order for the reset to start. + * + * If reset is not completed successfully, the I915_WEDGE bit is + * set meaning that hardware is terminally sour and there is no + * recovery. All waiters on the reset_queue will be woken when + * that happens. + * + * This counter is used by the wait_seqno code to notice that reset + * event happened and it needs to restart the entire ioctl (since most + * likely the seqno it waited for won't ever signal anytime soon). + * + * This is important for lock-free wait paths, where no contended lock + * naturally enforces the correct ordering between the bail-out of the + * waiter and the gpu reset work code. + */ + unsigned long reset_count; + + /** + * flags: Control various stages of the GPU reset + * + * #I915_RESET_BACKOFF - When we start a reset, we want to stop any + * other users acquiring the struct_mutex. To do this we set the + * #I915_RESET_BACKOFF bit in the error flags when we detect a reset + * and then check for that bit before acquiring the struct_mutex (in + * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a + * secondary role in preventing two concurrent global reset attempts. + * + * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the + * struct_mutex. We try to acquire the struct_mutex in the reset worker, + * but it may be held by some long running waiter (that we cannot + * interrupt without causing trouble). Once we are ready to do the GPU + * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If + * they already hold the struct_mutex and want to participate they can + * inspect the bit and do the reset directly, otherwise the worker + * waits for the struct_mutex. + * + * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to + * acquire the struct_mutex to reset an engine, we need an explicit + * flag to prevent two concurrent reset attempts in the same engine. + * As the number of engines continues to grow, allocate the flags from + * the most significant bits. + * + * #I915_WEDGED - If reset fails and we can no longer use the GPU, + * we set the #I915_WEDGED bit. Prior to command submission, e.g. + * i915_request_alloc(), this bit is checked and the sequence + * aborted (with -EIO reported to userspace) if set. + */ + unsigned long flags; +#define I915_RESET_BACKOFF 0 +#define I915_RESET_HANDOFF 1 +#define I915_RESET_MODESET 2 +#define I915_WEDGED (BITS_PER_LONG - 1) +#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES) + + /** Number of times an engine has been reset */ + u32 reset_engine_count[I915_NUM_ENGINES]; + + /** Set of stalled engines with guilty requests, in the current reset */ + u32 stalled_mask; + + /** Reason for the current *global* reset */ + const char *reason; + + /** + * Waitqueue to signal when a hang is detected. Used to for waiters + * to release the struct_mutex for the reset to procede. + */ + wait_queue_head_t wait_queue; + + /** + * Waitqueue to signal when the reset has completed. Used by clients + * that wait for dev_priv->mm.wedged to settle. + */ + wait_queue_head_t reset_queue; + + /* For missed irq/seqno simulation. */ + unsigned long test_irq_rings; +}; + +struct drm_i915_error_state_buf { + struct drm_i915_private *i915; + unsigned int bytes; + unsigned int size; + int err; + u8 *buf; + loff_t start; + loff_t pos; +}; + +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + +__printf(2, 3) +void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); +int i915_error_state_to_str(struct drm_i915_error_state_buf *estr, + const struct i915_gpu_state *gpu); +int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb, + struct drm_i915_private *i915, + size_t count, loff_t pos); + +static inline void +i915_error_state_buf_release(struct drm_i915_error_state_buf *eb) +{ + kfree(eb->buf); +} + +struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); +void i915_capture_error_state(struct drm_i915_private *dev_priv, + u32 engine_mask, + const char *error_msg); + +static inline struct i915_gpu_state * +i915_gpu_state_get(struct i915_gpu_state *gpu) +{ + kref_get(&gpu->ref); + return gpu; +} + +void __i915_gpu_state_free(struct kref *kref); +static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) +{ + if (gpu) + kref_put(&gpu->ref, __i915_gpu_state_free); +} + +struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); +void i915_reset_error_state(struct drm_i915_private *i915); + +#else + +static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, + u32 engine_mask, + const char *error_msg) +{ +} + +static inline struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) +{ + return NULL; +} + +static inline void i915_reset_error_state(struct drm_i915_private *i915) +{ +} + +#endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */ + +#endif /* _I915_GPU_ERROR_H_ */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 633c18785c1e..f9bc3aaa90d0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -243,6 +243,41 @@ void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv, spin_unlock_irq(&dev_priv->irq_lock); } +static u32 +gen11_gt_engine_identity(struct drm_i915_private * const i915, + const unsigned int bank, const unsigned int bit); + +bool gen11_reset_one_iir(struct drm_i915_private * const i915, + const unsigned int bank, + const unsigned int bit) +{ + void __iomem * const regs = i915->regs; + u32 dw; + + lockdep_assert_held(&i915->irq_lock); + + dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); + if (dw & BIT(bit)) { + /* + * According to the BSpec, DW_IIR bits cannot be cleared without + * first servicing the Selector & Shared IIR registers. + */ + gen11_gt_engine_identity(i915, bank, bit); + + /* + * We locked GT INT DW by reading it. If we want to (try + * to) recover from this succesfully, we need to clear + * our bit, otherwise we are locking the register for + * everybody. + */ + raw_reg_write(regs, GEN11_GT_INTR_DW(bank), BIT(bit)); + + return true; + } + + return false; +} + /** * ilk_update_display_irq - update DEIMR * @dev_priv: driver private @@ -308,17 +343,29 @@ void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv) { + WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11); + return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; } static i915_reg_t gen6_pm_imr(struct drm_i915_private *dev_priv) { - return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IMR(2) : GEN6_PMIMR; + if (INTEL_GEN(dev_priv) >= 11) + return GEN11_GPM_WGBOXPERF_INTR_MASK; + else if (INTEL_GEN(dev_priv) >= 8) + return GEN8_GT_IMR(2); + else + return GEN6_PMIMR; } static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv) { - return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IER(2) : GEN6_PMIER; + if (INTEL_GEN(dev_priv) >= 11) + return GEN11_GPM_WGBOXPERF_INTR_ENABLE; + else if (INTEL_GEN(dev_priv) >= 8) + return GEN8_GT_IER(2); + else + return GEN6_PMIER; } /** @@ -400,6 +447,18 @@ static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_m /* though a barrier is missing here, but don't really need a one */ } +void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv) +{ + spin_lock_irq(&dev_priv->irq_lock); + + while (gen11_reset_one_iir(dev_priv, 0, GEN11_GTPM)) + ; + + dev_priv->gt_pm.rps.pm_iir = 0; + + spin_unlock_irq(&dev_priv->irq_lock); +} + void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) { spin_lock_irq(&dev_priv->irq_lock); @@ -415,12 +474,14 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) if (READ_ONCE(rps->interrupts_enabled)) return; - if (WARN_ON_ONCE(IS_GEN11(dev_priv))) - return; - spin_lock_irq(&dev_priv->irq_lock); WARN_ON_ONCE(rps->pm_iir); - WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); + + if (INTEL_GEN(dev_priv) >= 11) + WARN_ON_ONCE(gen11_reset_one_iir(dev_priv, 0, GEN11_GTPM)); + else + WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); + rps->interrupts_enabled = true; gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); @@ -434,9 +495,6 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) if (!READ_ONCE(rps->interrupts_enabled)) return; - if (WARN_ON_ONCE(IS_GEN11(dev_priv))) - return; - spin_lock_irq(&dev_priv->irq_lock); rps->interrupts_enabled = false; @@ -453,7 +511,10 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) * state of the worker can be discarded. */ cancel_work_sync(&rps->work); - gen6_reset_rps_interrupts(dev_priv); + if (INTEL_GEN(dev_priv) >= 11) + gen11_reset_rps_interrupts(dev_priv); + else + gen6_reset_rps_interrupts(dev_priv); } void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv) @@ -1399,19 +1460,18 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, } static void -gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) +gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) { struct intel_engine_execlists * const execlists = &engine->execlists; bool tasklet = false; - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - if (READ_ONCE(engine->execlists.active)) { - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet = true; - } + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { + if (READ_ONCE(engine->execlists.active)) + tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, + &engine->irq_posted); } - if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { + if (iir & GT_RENDER_USER_INTERRUPT) { notify_ring(engine); tasklet |= USES_GUC_SUBMISSION(engine->i915); } @@ -1466,21 +1526,21 @@ static void gen8_gt_irq_handler(struct drm_i915_private *i915, { if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { gen8_cs_irq_handler(i915->engine[RCS], - gt_iir[0], GEN8_RCS_IRQ_SHIFT); + gt_iir[0] >> GEN8_RCS_IRQ_SHIFT); gen8_cs_irq_handler(i915->engine[BCS], - gt_iir[0], GEN8_BCS_IRQ_SHIFT); + gt_iir[0] >> GEN8_BCS_IRQ_SHIFT); } if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { gen8_cs_irq_handler(i915->engine[VCS], - gt_iir[1], GEN8_VCS1_IRQ_SHIFT); + gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT); gen8_cs_irq_handler(i915->engine[VCS2], - gt_iir[1], GEN8_VCS2_IRQ_SHIFT); + gt_iir[1] >> GEN8_VCS2_IRQ_SHIFT); } if (master_ctl & GEN8_GT_VECS_IRQ) { gen8_cs_irq_handler(i915->engine[VECS], - gt_iir[3], GEN8_VECS_IRQ_SHIFT); + gt_iir[3] >> GEN8_VECS_IRQ_SHIFT); } if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { @@ -1627,7 +1687,7 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, int head, tail; spin_lock(&pipe_crc->lock); - if (pipe_crc->source) { + if (pipe_crc->source && !crtc->base.crc.opened) { if (!pipe_crc->entries) { spin_unlock(&pipe_crc->lock); DRM_DEBUG_KMS("spurious interrupt\n"); @@ -1667,7 +1727,7 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, * On GEN8+ sometimes the second CRC is bonkers as well, so * don't trust that one either. */ - if (pipe_crc->skipped == 0 || + if (pipe_crc->skipped <= 0 || (INTEL_GEN(dev_priv) >= 8 && pipe_crc->skipped == 1)) { pipe_crc->skipped++; spin_unlock(&pipe_crc->lock); @@ -1766,37 +1826,8 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { - if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT) { - /* Sample the log buffer flush related bits & clear them out now - * itself from the message identity register to minimize the - * probability of losing a flush interrupt, when there are back - * to back flush interrupts. - * There can be a new flush interrupt, for different log buffer - * type (like for ISR), whilst Host is handling one (for DPC). - * Since same bit is used in message register for ISR & DPC, it - * could happen that GuC sets the bit for 2nd interrupt but Host - * clears out the bit on handling the 1st interrupt. - */ - u32 msg, flush; - - msg = I915_READ(SOFT_SCRATCH(15)); - flush = msg & (INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED | - INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER); - if (flush) { - /* Clear the message bits that are handled */ - I915_WRITE(SOFT_SCRATCH(15), msg & ~flush); - - /* Handle flush interrupt in bottom half */ - queue_work(dev_priv->guc.log.runtime.flush_wq, - &dev_priv->guc.log.runtime.flush_work); - - dev_priv->guc.log.flush_interrupt_count++; - } else { - /* Not clearing of unhandled event bits won't result in - * re-triggering of the interrupt. - */ - } - } + if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT) + intel_guc_to_host_event_handler(&dev_priv->guc); } static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv) @@ -2433,6 +2464,13 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, if (de_iir & DE_ERR_INT_IVB) ivb_err_int_handler(dev_priv); + if (de_iir & DE_EDP_PSR_INT_HSW) { + u32 psr_iir = I915_READ(EDP_PSR_IIR); + + intel_psr_irq_handler(dev_priv, psr_iir); + I915_WRITE(EDP_PSR_IIR, psr_iir); + } + if (de_iir & DE_AUX_CHANNEL_A_IVB) dp_aux_irq_handler(dev_priv); @@ -2562,11 +2600,25 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) if (master_ctl & GEN8_DE_MISC_IRQ) { iir = I915_READ(GEN8_DE_MISC_IIR); if (iir) { + bool found = false; + I915_WRITE(GEN8_DE_MISC_IIR, iir); ret = IRQ_HANDLED; - if (iir & GEN8_DE_MISC_GSE) + + if (iir & GEN8_DE_MISC_GSE) { intel_opregion_asle_intr(dev_priv); - else + found = true; + } + + if (iir & GEN8_DE_EDP_PSR) { + u32 psr_iir = I915_READ(EDP_PSR_IIR); + + intel_psr_irq_handler(dev_priv, psr_iir); + I915_WRITE(EDP_PSR_IIR, psr_iir); + found = true; + } + + if (!found) DRM_ERROR("Unexpected DE Misc interrupt\n"); } else @@ -2762,58 +2814,16 @@ static void __fini_wedge(struct wedge_me *w) (W)->i915; \ __fini_wedge((W))) -static __always_inline void -gen11_cs_irq_handler(struct intel_engine_cs * const engine, const u32 iir) -{ - gen8_cs_irq_handler(engine, iir, 0); -} - -static void -gen11_gt_engine_irq_handler(struct drm_i915_private * const i915, - const unsigned int bank, - const unsigned int engine_n, - const u16 iir) -{ - struct intel_engine_cs ** const engine = i915->engine; - - switch (bank) { - case 0: - switch (engine_n) { - - case GEN11_RCS0: - return gen11_cs_irq_handler(engine[RCS], iir); - - case GEN11_BCS: - return gen11_cs_irq_handler(engine[BCS], iir); - } - case 1: - switch (engine_n) { - - case GEN11_VCS(0): - return gen11_cs_irq_handler(engine[_VCS(0)], iir); - case GEN11_VCS(1): - return gen11_cs_irq_handler(engine[_VCS(1)], iir); - case GEN11_VCS(2): - return gen11_cs_irq_handler(engine[_VCS(2)], iir); - case GEN11_VCS(3): - return gen11_cs_irq_handler(engine[_VCS(3)], iir); - - case GEN11_VECS(0): - return gen11_cs_irq_handler(engine[_VECS(0)], iir); - case GEN11_VECS(1): - return gen11_cs_irq_handler(engine[_VECS(1)], iir); - } - } -} - static u32 -gen11_gt_engine_intr(struct drm_i915_private * const i915, - const unsigned int bank, const unsigned int bit) +gen11_gt_engine_identity(struct drm_i915_private * const i915, + const unsigned int bank, const unsigned int bit) { void __iomem * const regs = i915->regs; u32 timeout_ts; u32 ident; + lockdep_assert_held(&i915->irq_lock); + raw_reg_write(regs, GEN11_IIR_REG_SELECTOR(bank), BIT(bit)); /* @@ -2835,42 +2845,101 @@ gen11_gt_engine_intr(struct drm_i915_private * const i915, raw_reg_write(regs, GEN11_INTR_IDENTITY_REG(bank), GEN11_INTR_DATA_VALID); - return ident & GEN11_INTR_ENGINE_MASK; + return ident; } static void -gen11_gt_irq_handler(struct drm_i915_private * const i915, - const u32 master_ctl) +gen11_other_irq_handler(struct drm_i915_private * const i915, + const u8 instance, const u16 iir) +{ + if (instance == OTHER_GTPM_INSTANCE) + return gen6_rps_irq_handler(i915, iir); + + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", + instance, iir); +} + +static void +gen11_engine_irq_handler(struct drm_i915_private * const i915, + const u8 class, const u8 instance, const u16 iir) +{ + struct intel_engine_cs *engine; + + if (instance <= MAX_ENGINE_INSTANCE) + engine = i915->engine_class[class][instance]; + else + engine = NULL; + + if (likely(engine)) + return gen8_cs_irq_handler(engine, iir); + + WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", + class, instance); +} + +static void +gen11_gt_identity_handler(struct drm_i915_private * const i915, + const u32 identity) +{ + const u8 class = GEN11_INTR_ENGINE_CLASS(identity); + const u8 instance = GEN11_INTR_ENGINE_INSTANCE(identity); + const u16 intr = GEN11_INTR_ENGINE_INTR(identity); + + if (unlikely(!intr)) + return; + + if (class <= COPY_ENGINE_CLASS) + return gen11_engine_irq_handler(i915, class, instance, intr); + + if (class == OTHER_CLASS) + return gen11_other_irq_handler(i915, instance, intr); + + WARN_ONCE(1, "unknown interrupt class=0x%x, instance=0x%x, intr=0x%x\n", + class, instance, intr); +} + +static void +gen11_gt_bank_handler(struct drm_i915_private * const i915, + const unsigned int bank) { void __iomem * const regs = i915->regs; - unsigned int bank; + unsigned long intr_dw; + unsigned int bit; - for (bank = 0; bank < 2; bank++) { - unsigned long intr_dw; - unsigned int bit; + lockdep_assert_held(&i915->irq_lock); - if (!(master_ctl & GEN11_GT_DW_IRQ(bank))) - continue; + intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); - intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); + if (unlikely(!intr_dw)) { + DRM_ERROR("GT_INTR_DW%u blank!\n", bank); + return; + } - if (unlikely(!intr_dw)) { - DRM_ERROR("GT_INTR_DW%u blank!\n", bank); - continue; - } + for_each_set_bit(bit, &intr_dw, 32) { + const u32 ident = gen11_gt_engine_identity(i915, + bank, bit); - for_each_set_bit(bit, &intr_dw, 32) { - const u16 iir = gen11_gt_engine_intr(i915, bank, bit); + gen11_gt_identity_handler(i915, ident); + } - if (unlikely(!iir)) - continue; + /* Clear must be after shared has been served for engine */ + raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw); +} - gen11_gt_engine_irq_handler(i915, bank, bit, iir); - } +static void +gen11_gt_irq_handler(struct drm_i915_private * const i915, + const u32 master_ctl) +{ + unsigned int bank; - /* Clear must be after shared has been served for engine */ - raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw); + spin_lock(&i915->irq_lock); + + for (bank = 0; bank < 2; bank++) { + if (master_ctl & GEN11_GT_DW_IRQ(bank)) + gen11_gt_bank_handler(i915, bank); } + + spin_unlock(&i915->irq_lock); } static irqreturn_t gen11_irq_handler(int irq, void *arg) @@ -2912,15 +2981,11 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) return IRQ_HANDLED; } -/** - * i915_reset_device - do process context error handling work - * @dev_priv: i915 device private - * - * Fire an error uevent so userspace can see that a hang or error - * was detected. - */ -static void i915_reset_device(struct drm_i915_private *dev_priv) +static void i915_reset_device(struct drm_i915_private *dev_priv, + u32 engine_mask, + const char *reason) { + struct i915_gpu_error *error = &dev_priv->gpu_error; struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj; char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; @@ -2936,29 +3001,35 @@ static void i915_reset_device(struct drm_i915_private *dev_priv) i915_wedge_on_timeout(&w, dev_priv, 5*HZ) { intel_prepare_reset(dev_priv); + error->reason = reason; + error->stalled_mask = engine_mask; + /* Signal that locked waiters should reset the GPU */ - set_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags); - wake_up_all(&dev_priv->gpu_error.wait_queue); + smp_mb__before_atomic(); + set_bit(I915_RESET_HANDOFF, &error->flags); + wake_up_all(&error->wait_queue); /* Wait for anyone holding the lock to wakeup, without * blocking indefinitely on struct_mutex. */ do { if (mutex_trylock(&dev_priv->drm.struct_mutex)) { - i915_reset(dev_priv, 0); + i915_reset(dev_priv, engine_mask, reason); mutex_unlock(&dev_priv->drm.struct_mutex); } - } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags, + } while (wait_on_bit_timeout(&error->flags, I915_RESET_HANDOFF, TASK_UNINTERRUPTIBLE, 1)); + error->stalled_mask = 0; + error->reason = NULL; + intel_finish_reset(dev_priv); } - if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) - kobject_uevent_env(kobj, - KOBJ_CHANGE, reset_done_event); + if (!test_bit(I915_WEDGED, &error->flags)) + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); } static void i915_clear_error_registers(struct drm_i915_private *dev_priv) @@ -2990,6 +3061,7 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) * i915_handle_error - handle a gpu error * @dev_priv: i915 device private * @engine_mask: mask representing engines that are hung + * @flags: control flags * @fmt: Error message format string * * Do some basic checking of register state at error time and @@ -3000,16 +3072,23 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) */ void i915_handle_error(struct drm_i915_private *dev_priv, u32 engine_mask, + unsigned long flags, const char *fmt, ...) { struct intel_engine_cs *engine; unsigned int tmp; - va_list args; char error_msg[80]; + char *msg = NULL; - va_start(args, fmt); - vscnprintf(error_msg, sizeof(error_msg), fmt, args); - va_end(args); + if (fmt) { + va_list args; + + va_start(args, fmt); + vscnprintf(error_msg, sizeof(error_msg), fmt, args); + va_end(args); + + msg = error_msg; + } /* * In most cases it's guaranteed that we get here with an RPM @@ -3020,8 +3099,12 @@ void i915_handle_error(struct drm_i915_private *dev_priv, */ intel_runtime_pm_get(dev_priv); - i915_capture_error_state(dev_priv, engine_mask, error_msg); - i915_clear_error_registers(dev_priv); + engine_mask &= INTEL_INFO(dev_priv)->ring_mask; + + if (flags & I915_ERROR_CAPTURE) { + i915_capture_error_state(dev_priv, engine_mask, msg); + i915_clear_error_registers(dev_priv); + } /* * Try engine reset when available. We fall back to full reset if @@ -3034,7 +3117,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, &dev_priv->gpu_error.flags)) continue; - if (i915_reset_engine(engine, 0) == 0) + if (i915_reset_engine(engine, msg) == 0) engine_mask &= ~intel_engine_flag(engine); clear_bit(I915_RESET_ENGINE + engine->id, @@ -3064,7 +3147,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, TASK_UNINTERRUPTIBLE); } - i915_reset_device(dev_priv); + i915_reset_device(dev_priv, engine_mask, msg); for_each_engine(engine, dev_priv, tmp) { clear_bit(I915_RESET_ENGINE + engine->id, @@ -3286,6 +3369,11 @@ static void ironlake_irq_reset(struct drm_device *dev) if (IS_GEN7(dev_priv)) I915_WRITE(GEN7_ERR_INT, 0xffffffff); + if (IS_HASWELL(dev_priv)) { + I915_WRITE(EDP_PSR_IMR, 0xffffffff); + I915_WRITE(EDP_PSR_IIR, 0xffffffff); + } + gen5_gt_irq_reset(dev_priv); ibx_irq_reset(dev_priv); @@ -3324,6 +3412,9 @@ static void gen8_irq_reset(struct drm_device *dev) gen8_gt_irq_reset(dev_priv); + I915_WRITE(EDP_PSR_IMR, 0xffffffff); + I915_WRITE(EDP_PSR_IIR, 0xffffffff); + for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) @@ -3349,6 +3440,9 @@ static void gen11_gt_irq_reset(struct drm_i915_private *dev_priv) I915_WRITE(GEN11_VCS0_VCS1_INTR_MASK, ~0); I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~0); I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~0); + + I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); + I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); } static void gen11_irq_reset(struct drm_device *dev) @@ -3697,6 +3791,12 @@ static int ironlake_irq_postinstall(struct drm_device *dev) DE_DP_A_HOTPLUG); } + if (IS_HASWELL(dev_priv)) { + gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR); + intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + display_mask |= DE_EDP_PSR_INT_HSW; + } + dev_priv->irq_mask = ~display_mask; ibx_irq_pre_postinstall(dev); @@ -3807,7 +3907,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) uint32_t de_pipe_enables; u32 de_port_masked = GEN8_AUX_CHANNEL_A; u32 de_port_enables; - u32 de_misc_masked = GEN8_DE_MISC_GSE; + u32 de_misc_masked = GEN8_DE_MISC_GSE | GEN8_DE_EDP_PSR; enum pipe pipe; if (INTEL_GEN(dev_priv) >= 9) { @@ -3832,6 +3932,9 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (IS_BROADWELL(dev_priv)) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; + gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR); + intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + for_each_pipe(dev_priv, pipe) { dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; @@ -3887,7 +3990,14 @@ static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv) I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~(irqs | irqs << 16)); I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~(irqs | irqs << 16)); - dev_priv->pm_imr = 0xffffffff; /* TODO */ + /* + * RPS interrupts will get enabled/disabled on demand when RPS itself + * is enabled/disabled. + */ + dev_priv->pm_ier = 0x0; + dev_priv->pm_imr = ~dev_priv->pm_ier; + I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); + I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); } static int gen11_irq_postinstall(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_oa_icl.c b/drivers/gpu/drm/i915/i915_oa_icl.c new file mode 100644 index 000000000000..a5667926e3de --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_icl.c @@ -0,0 +1,118 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/sysfs.h> + +#include "i915_drv.h" +#include "i915_oa_icl.h" + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x0000ffff }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x0000ffff }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x0000ffff }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0xd04), 0x00000200 }, + { _MMIO(0x9840), 0x00000000 }, + { _MMIO(0x9884), 0x00000000 }, + { _MMIO(0x9888), 0x10060000 }, + { _MMIO(0x9888), 0x22060000 }, + { _MMIO(0x9888), 0x16060000 }, + { _MMIO(0x9888), 0x24060000 }, + { _MMIO(0x9888), 0x18060000 }, + { _MMIO(0x9888), 0x1a060000 }, + { _MMIO(0x9888), 0x12060000 }, + { _MMIO(0x9888), 0x14060000 }, + { _MMIO(0x9888), 0x10060000 }, + { _MMIO(0x9888), 0x22060000 }, + { _MMIO(0x9884), 0x00000003 }, + { _MMIO(0x9888), 0x16130000 }, + { _MMIO(0x9888), 0x24000001 }, + { _MMIO(0x9888), 0x0e130056 }, + { _MMIO(0x9888), 0x10130000 }, + { _MMIO(0x9888), 0x1a130000 }, + { _MMIO(0x9888), 0x541f0001 }, + { _MMIO(0x9888), 0x181f0000 }, + { _MMIO(0x9888), 0x4c1f0000 }, + { _MMIO(0x9888), 0x301f0000 }, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} + +void +i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv) +{ + strlcpy(dev_priv->perf.oa.test_config.uuid, + "a291665e-244b-4b76-9b9a-01de9d3c8068", + sizeof(dev_priv->perf.oa.test_config.uuid)); + dev_priv->perf.oa.test_config.id = 1; + + dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + + dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + + dev_priv->perf.oa.test_config.sysfs_metric.name = "a291665e-244b-4b76-9b9a-01de9d3c8068"; + dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + + dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; +} diff --git a/drivers/gpu/drm/i915/i915_oa_icl.h b/drivers/gpu/drm/i915/i915_oa_icl.h new file mode 100644 index 000000000000..ae1c24aafe4f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_icl.h @@ -0,0 +1,34 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_ICL_H__ +#define __I915_OA_ICL_H__ + +extern void i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 08108ce5be21..66ea3552c63e 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -164,6 +164,9 @@ i915_param_named_unsafe(guc_firmware_path, charp, 0400, i915_param_named_unsafe(huc_firmware_path, charp, 0400, "HuC firmware path to use instead of the default one"); +i915_param_named_unsafe(dmc_firmware_path, charp, 0400, + "DMC firmware path to use instead of the default one"); + i915_param_named_unsafe(enable_dp_mst, bool, 0600, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 430f5f9d0ff4..6684025b7af8 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -48,9 +48,10 @@ struct drm_printer; param(int, enable_ips, 1) \ param(int, invert_brightness, 0) \ param(int, enable_guc, 0) \ - param(int, guc_log_level, 0) \ + param(int, guc_log_level, -1) \ param(char *, guc_firmware_path, NULL) \ param(char *, huc_firmware_path, NULL) \ + param(char *, dmc_firmware_path, NULL) \ param(int, mmio_debug, 0) \ param(int, edp_vswing, 0) \ param(int, reset, 2) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 062e91b39085..4364922e935d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -602,6 +602,7 @@ static const struct intel_device_info intel_icelake_11_info = { PLATFORM(INTEL_ICELAKE), .is_alpha_support = 1, .has_resource_streamer = 0, + .ring_mask = RENDER_RING | BLT_RING | VEBOX_RING | BSD_RING | BSD3_RING, }; #undef GEN diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index abaca6edeb71..019bd2d073ad 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -209,6 +209,7 @@ #include "i915_oa_cflgt2.h" #include "i915_oa_cflgt3.h" #include "i915_oa_cnl.h" +#include "i915_oa_icl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -1042,7 +1043,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, I915_WRITE(GEN7_OASTATUS2, ((head & GEN7_OASTATUS2_HEAD_MASK) | - OA_MEM_SELECT_GGTT)); + GEN7_OASTATUS2_MEM_SELECT_GGTT)); dev_priv->perf.oa.oa_buffer.head = head; spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); @@ -1233,7 +1234,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * * NB: implied RCS engine... */ - ring = engine->context_pin(engine, stream->ctx); + ring = intel_context_pin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); if (IS_ERR(ring)) return PTR_ERR(ring); @@ -1245,7 +1246,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * with gen8+ and execlists */ dev_priv->perf.oa.specific_ctx_id = - i915_ggtt_offset(stream->ctx->engine[engine->id].state); + i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state); } return 0; @@ -1270,7 +1271,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) mutex_lock(&dev_priv->drm.struct_mutex); dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - engine->context_unpin(engine, stream->ctx); + intel_context_unpin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -1332,7 +1333,8 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) /* Pre-DevBDW: OABUFFER must be set with counters off, * before OASTATUS1, but after OASTATUS2 */ - I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */ + I915_WRITE(GEN7_OASTATUS2, + gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */ dev_priv->perf.oa.oa_buffer.head = gtt_offset; I915_WRITE(GEN7_OABUFFER, gtt_offset); @@ -1392,7 +1394,7 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) * bit." */ I915_WRITE(GEN8_OABUFFER, gtt_offset | - OABUFFER_SIZE_16M | OA_MEM_SELECT_GGTT); + OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ @@ -1693,7 +1695,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr const struct i915_oa_config *oa_config) { struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct i915_gem_timeline *timeline; + struct i915_timeline *timeline; struct i915_request *rq; int ret; @@ -1714,15 +1716,11 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { struct i915_request *prev; - struct intel_timeline *tl; - tl = &timeline->engine[engine->id]; - prev = i915_gem_active_raw(&tl->last_request, + prev = i915_gem_active_raw(&timeline->last_request, &dev_priv->drm.struct_mutex); if (prev) - i915_sw_fence_await_sw_fence_gfp(&rq->submit, - &prev->submit, - GFP_KERNEL); + i915_request_await_dma_fence(rq, &prev->fence); } i915_request_add(rq); @@ -1757,6 +1755,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { + struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct i915_gem_context *ctx; int ret; unsigned int wait_flags = I915_WAIT_LOCKED; @@ -1787,7 +1786,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - struct intel_context *ce = &ctx->engine[RCS]; + struct intel_context *ce = to_intel_context(ctx, engine); u32 *regs; /* OA settings will be set upon first use */ @@ -1840,7 +1839,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, * be read back from automatically triggered reports, as part of the * RPT_ID field. */ - if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) { + if (IS_GEN(dev_priv, 9, 11)) { I915_WRITE(GEN8_OA_DEBUG, _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); @@ -1870,7 +1869,6 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & ~GT_NOA_ENABLE)); - } static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) @@ -1885,6 +1883,13 @@ static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) static void gen7_oa_enable(struct drm_i915_private *dev_priv) { + struct i915_gem_context *ctx = + dev_priv->perf.oa.exclusive_stream->ctx; + u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; + bool periodic = dev_priv->perf.oa.periodic; + u32 period_exponent = dev_priv->perf.oa.period_exponent; + u32 report_format = dev_priv->perf.oa.oa_buffer.format; + /* * Reset buf pointers so we don't forward reports from before now. * @@ -1896,25 +1901,14 @@ static void gen7_oa_enable(struct drm_i915_private *dev_priv) */ gen7_init_oa_buffer(dev_priv); - if (dev_priv->perf.oa.exclusive_stream->enabled) { - struct i915_gem_context *ctx = - dev_priv->perf.oa.exclusive_stream->ctx; - u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; - - bool periodic = dev_priv->perf.oa.periodic; - u32 period_exponent = dev_priv->perf.oa.period_exponent; - u32 report_format = dev_priv->perf.oa.oa_buffer.format; - - I915_WRITE(GEN7_OACONTROL, - (ctx_id & GEN7_OACONTROL_CTX_MASK) | - (period_exponent << - GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | - (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | - (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | - (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | - GEN7_OACONTROL_ENABLE); - } else - I915_WRITE(GEN7_OACONTROL, 0); + I915_WRITE(GEN7_OACONTROL, + (ctx_id & GEN7_OACONTROL_CTX_MASK) | + (period_exponent << + GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | + (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | + (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | + (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | + GEN7_OACONTROL_ENABLE); } static void gen8_oa_enable(struct drm_i915_private *dev_priv) @@ -1966,11 +1960,19 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) static void gen7_oa_disable(struct drm_i915_private *dev_priv) { I915_WRITE(GEN7_OACONTROL, 0); + if (intel_wait_for_register(dev_priv, + GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); } static void gen8_oa_disable(struct drm_i915_private *dev_priv) { I915_WRITE(GEN8_OACONTROL, 0); + if (intel_wait_for_register(dev_priv, + GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); } /** @@ -2099,13 +2101,17 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (stream->ctx) { ret = oa_get_render_ctx_id(stream); - if (ret) + if (ret) { + DRM_DEBUG("Invalid context id to filter with\n"); return ret; + } } ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); - if (ret) + if (ret) { + DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); goto err_config; + } /* PRM - observability performance counters: * @@ -2132,8 +2138,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv, stream->oa_config); - if (ret) + if (ret) { + DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; + } stream->ops = &i915_oa_stream_ops; @@ -2745,7 +2753,8 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->ctx_handle = value; break; case DRM_I915_PERF_PROP_SAMPLE_OA: - props->sample_flags |= SAMPLE_OA_REPORT; + if (value) + props->sample_flags |= SAMPLE_OA_REPORT; break; case DRM_I915_PERF_PROP_OA_METRICS_SET: if (value == 0) { @@ -2935,6 +2944,8 @@ void i915_perf_register(struct drm_i915_private *dev_priv) i915_perf_load_test_config_cflgt3(dev_priv); } else if (IS_CANNONLAKE(dev_priv)) { i915_perf_load_test_config_cnl(dev_priv); + } else if (IS_ICELAKE(dev_priv)) { + i915_perf_load_test_config_icl(dev_priv); } if (dev_priv->perf.oa.test_config.id == 0) @@ -3292,6 +3303,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, mutex_unlock(&dev_priv->perf.metrics_lock); + DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); + return oa_config->id; sysfs_err: @@ -3348,6 +3361,9 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, &oa_config->sysfs_metric); idr_remove(&dev_priv->perf.metrics_idr, *arg); + + DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); + put_oa_config(dev_priv, oa_config); config_err: @@ -3467,7 +3483,7 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); } - } else if (IS_GEN10(dev_priv)) { + } else if (IS_GEN(dev_priv, 10, 11)) { dev_priv->perf.oa.ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; dev_priv->perf.oa.ops.is_valid_mux_reg = diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index f0519e31543a..dc87797db500 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1,33 +1,12 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ -#include <linux/perf_event.h> -#include <linux/pm_runtime.h> - -#include "i915_drv.h" #include "i915_pmu.h" #include "intel_ringbuffer.h" +#include "i915_drv.h" /* Frequency for the sampling timer for events which need it. */ #define FREQUENCY 200 diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index aa1b1a987ea1..2ba735299f7c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -1,29 +1,19 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ + #ifndef __I915_PMU_H__ #define __I915_PMU_H__ +#include <linux/hrtimer.h> +#include <linux/perf_event.h> +#include <linux/spinlock_types.h> +#include <drm/i915_drm.h> + +struct drm_i915_private; + enum { __I915_SAMPLE_FREQ_ACT = 0, __I915_SAMPLE_FREQ_REQ, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8a69a9275e28..f11bb213ec07 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -153,9 +153,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c)) #define _PLL(pll, a, b) ((a) + (pll)*((b)-(a))) #define _MMIO_PLL(pll, a, b) _MMIO(_PLL(pll, a, b)) -#define _MMIO_PORT6(port, a, b, c, d, e, f) _MMIO(_PICK(port, a, b, c, d, e, f)) -#define _MMIO_PORT6_LN(port, ln, a0, a1, b, c, d, e, f) \ - _MMIO(_PICK(port, a0, b, c, d, e, f) + (ln * (a1 - a0))) #define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) @@ -191,6 +188,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OTHER_CLASS 4 #define MAX_ENGINE_CLASS 4 +#define OTHER_GTPM_INSTANCE 1 #define MAX_ENGINE_INSTANCE 3 /* PCI config space */ @@ -304,6 +302,17 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN6_GRDOM_VECS (1 << 4) #define GEN9_GRDOM_GUC (1 << 5) #define GEN8_GRDOM_MEDIA2 (1 << 7) +/* GEN11 changed all bit defs except for FULL & RENDER */ +#define GEN11_GRDOM_FULL GEN6_GRDOM_FULL +#define GEN11_GRDOM_RENDER GEN6_GRDOM_RENDER +#define GEN11_GRDOM_BLT (1 << 2) +#define GEN11_GRDOM_GUC (1 << 3) +#define GEN11_GRDOM_MEDIA (1 << 5) +#define GEN11_GRDOM_MEDIA2 (1 << 6) +#define GEN11_GRDOM_MEDIA3 (1 << 7) +#define GEN11_GRDOM_MEDIA4 (1 << 8) +#define GEN11_GRDOM_VECS (1 << 13) +#define GEN11_GRDOM_VECS2 (1 << 14) #define RING_PP_DIR_BASE(engine) _MMIO((engine)->mmio_base+0x228) #define RING_PP_DIR_BASE_READ(engine) _MMIO((engine)->mmio_base+0x518) @@ -430,145 +439,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VGA_CR_INDEX_CGA 0x3d4 #define VGA_CR_DATA_CGA 0x3d5 -/* - * Instruction field definitions used by the command parser - */ -#define INSTR_CLIENT_SHIFT 29 -#define INSTR_MI_CLIENT 0x0 -#define INSTR_BC_CLIENT 0x2 -#define INSTR_RC_CLIENT 0x3 -#define INSTR_SUBCLIENT_SHIFT 27 -#define INSTR_SUBCLIENT_MASK 0x18000000 -#define INSTR_MEDIA_SUBCLIENT 0x2 -#define INSTR_26_TO_24_MASK 0x7000000 -#define INSTR_26_TO_24_SHIFT 24 - -/* - * Memory interface instructions used by the kernel - */ -#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) -/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */ -#define MI_GLOBAL_GTT (1<<22) - -#define MI_NOOP MI_INSTR(0, 0) -#define MI_USER_INTERRUPT MI_INSTR(0x02, 0) -#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0) -#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) -#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) -#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) -#define MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1) -#define MI_FLUSH MI_INSTR(0x04, 0) -#define MI_READ_FLUSH (1 << 0) -#define MI_EXE_FLUSH (1 << 1) -#define MI_NO_WRITE_FLUSH (1 << 2) -#define MI_SCENE_COUNT (1 << 3) /* just increment scene count */ -#define MI_END_SCENE (1 << 4) /* flush binner and incr scene count */ -#define MI_INVALIDATE_ISP (1 << 5) /* invalidate indirect state pointers */ -#define MI_REPORT_HEAD MI_INSTR(0x07, 0) -#define MI_ARB_ON_OFF MI_INSTR(0x08, 0) -#define MI_ARB_ENABLE (1<<0) -#define MI_ARB_DISABLE (0<<0) -#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) -#define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0) -#define MI_SUSPEND_FLUSH_EN (1<<0) -#define MI_SET_APPID MI_INSTR(0x0e, 0) -#define MI_OVERLAY_FLIP MI_INSTR(0x11, 0) -#define MI_OVERLAY_CONTINUE (0x0<<21) -#define MI_OVERLAY_ON (0x1<<21) -#define MI_OVERLAY_OFF (0x2<<21) -#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0) -#define MI_DISPLAY_FLIP MI_INSTR(0x14, 2) -#define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1) -#define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20) -/* IVB has funny definitions for which plane to flip. */ -#define MI_DISPLAY_FLIP_IVB_PLANE_A (0 << 19) -#define MI_DISPLAY_FLIP_IVB_PLANE_B (1 << 19) -#define MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19) -#define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19) -#define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19) -#define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19) -/* SKL ones */ -#define MI_DISPLAY_FLIP_SKL_PLANE_1_A (0 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_1_B (1 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_1_C (2 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_2_A (4 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_2_B (5 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_2_C (6 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_3_A (7 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_3_B (8 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_3_C (9 << 8) -#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6, gen7 */ -#define MI_SEMAPHORE_GLOBAL_GTT (1<<22) -#define MI_SEMAPHORE_UPDATE (1<<21) -#define MI_SEMAPHORE_COMPARE (1<<20) -#define MI_SEMAPHORE_REGISTER (1<<18) -#define MI_SEMAPHORE_SYNC_VR (0<<16) /* RCS wait for VCS (RVSYNC) */ -#define MI_SEMAPHORE_SYNC_VER (1<<16) /* RCS wait for VECS (RVESYNC) */ -#define MI_SEMAPHORE_SYNC_BR (2<<16) /* RCS wait for BCS (RBSYNC) */ -#define MI_SEMAPHORE_SYNC_BV (0<<16) /* VCS wait for BCS (VBSYNC) */ -#define MI_SEMAPHORE_SYNC_VEV (1<<16) /* VCS wait for VECS (VVESYNC) */ -#define MI_SEMAPHORE_SYNC_RV (2<<16) /* VCS wait for RCS (VRSYNC) */ -#define MI_SEMAPHORE_SYNC_RB (0<<16) /* BCS wait for RCS (BRSYNC) */ -#define MI_SEMAPHORE_SYNC_VEB (1<<16) /* BCS wait for VECS (BVESYNC) */ -#define MI_SEMAPHORE_SYNC_VB (2<<16) /* BCS wait for VCS (BVSYNC) */ -#define MI_SEMAPHORE_SYNC_BVE (0<<16) /* VECS wait for BCS (VEBSYNC) */ -#define MI_SEMAPHORE_SYNC_VVE (1<<16) /* VECS wait for VCS (VEVSYNC) */ -#define MI_SEMAPHORE_SYNC_RVE (2<<16) /* VECS wait for RCS (VERSYNC) */ -#define MI_SEMAPHORE_SYNC_INVALID (3<<16) -#define MI_SEMAPHORE_SYNC_MASK (3<<16) -#define MI_SET_CONTEXT MI_INSTR(0x18, 0) -#define MI_MM_SPACE_GTT (1<<8) -#define MI_MM_SPACE_PHYSICAL (0<<8) -#define MI_SAVE_EXT_STATE_EN (1<<3) -#define MI_RESTORE_EXT_STATE_EN (1<<2) -#define MI_FORCE_RESTORE (1<<1) -#define MI_RESTORE_INHIBIT (1<<0) -#define HSW_MI_RS_SAVE_STATE_EN (1<<3) -#define HSW_MI_RS_RESTORE_STATE_EN (1<<2) -#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ -#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) -#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ -#define MI_SEMAPHORE_POLL (1<<15) -#define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) -#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) -#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) -#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ -#define MI_USE_GGTT (1 << 22) /* g4x+ */ -#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) -#define MI_STORE_DWORD_INDEX_SHIFT 2 -/* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: - * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw - * simply ignores the register load under certain conditions. - * - One can actually load arbitrary many arbitrary registers: Simply issue x - * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! - */ -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) -#define MI_LRI_FORCE_POSTED (1<<12) -#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) -#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) -#define MI_SRM_LRM_GLOBAL_GTT (1<<22) -#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */ -#define MI_FLUSH_DW_STORE_INDEX (1<<21) -#define MI_INVALIDATE_TLB (1<<18) -#define MI_FLUSH_DW_OP_STOREDW (1<<14) -#define MI_FLUSH_DW_OP_MASK (3<<14) -#define MI_FLUSH_DW_NOTIFY (1<<8) -#define MI_INVALIDATE_BSD (1<<7) -#define MI_FLUSH_DW_USE_GTT (1<<2) -#define MI_FLUSH_DW_USE_PPGTT (0<<2) -#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) -#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) -#define MI_BATCH_BUFFER MI_INSTR(0x30, 1) -#define MI_BATCH_NON_SECURE (1) -/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ -#define MI_BATCH_NON_SECURE_I965 (1<<8) -#define MI_BATCH_PPGTT_HSW (1<<8) -#define MI_BATCH_NON_SECURE_HSW (1<<13) -#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) -#define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ -#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) -#define MI_BATCH_RESOURCE_STREAMER (1<<10) - #define MI_PREDICATE_SRC0 _MMIO(0x2400) #define MI_PREDICATE_SRC0_UDW _MMIO(0x2400 + 4) #define MI_PREDICATE_SRC1 _MMIO(0x2408) @@ -579,130 +449,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define LOWER_SLICE_DISABLED (0<<0) /* - * 3D instructions used by the kernel - */ -#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags)) - -#define GEN9_MEDIA_POOL_STATE ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4) -#define GEN9_MEDIA_POOL_ENABLE (1 << 31) -#define GFX_OP_RASTER_RULES ((0x3<<29)|(0x7<<24)) -#define GFX_OP_SCISSOR ((0x3<<29)|(0x1c<<24)|(0x10<<19)) -#define SC_UPDATE_SCISSOR (0x1<<1) -#define SC_ENABLE_MASK (0x1<<0) -#define SC_ENABLE (0x1<<0) -#define GFX_OP_LOAD_INDIRECT ((0x3<<29)|(0x1d<<24)|(0x7<<16)) -#define GFX_OP_SCISSOR_INFO ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1)) -#define SCI_YMIN_MASK (0xffff<<16) -#define SCI_XMIN_MASK (0xffff<<0) -#define SCI_YMAX_MASK (0xffff<<16) -#define SCI_XMAX_MASK (0xffff<<0) -#define GFX_OP_SCISSOR_ENABLE ((0x3<<29)|(0x1c<<24)|(0x10<<19)) -#define GFX_OP_SCISSOR_RECT ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1) -#define GFX_OP_COLOR_FACTOR ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0) -#define GFX_OP_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) -#define GFX_OP_MAP_INFO ((0x3<<29)|(0x1d<<24)|0x4) -#define GFX_OP_DESTBUFFER_VARS ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0) -#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) -#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) -#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) - -#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) -#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) -#define BLT_WRITE_A (2<<20) -#define BLT_WRITE_RGB (1<<20) -#define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A) -#define BLT_DEPTH_8 (0<<24) -#define BLT_DEPTH_16_565 (1<<24) -#define BLT_DEPTH_16_1555 (2<<24) -#define BLT_DEPTH_32 (3<<24) -#define BLT_ROP_SRC_COPY (0xcc<<16) -#define BLT_ROP_COLOR_COPY (0xf0<<16) -#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) /* 965+ only */ -#define XY_SRC_COPY_BLT_DST_TILED (1<<11) /* 965+ only */ -#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2) -#define ASYNC_FLIP (1<<22) -#define DISPLAY_PLANE_A (0<<20) -#define DISPLAY_PLANE_B (1<<20) -#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) -#define PIPE_CONTROL_FLUSH_L3 (1<<27) -#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ -#define PIPE_CONTROL_MMIO_WRITE (1<<23) -#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) -#define PIPE_CONTROL_CS_STALL (1<<20) -#define PIPE_CONTROL_TLB_INVALIDATE (1<<18) -#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) -#define PIPE_CONTROL_QW_WRITE (1<<14) -#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) -#define PIPE_CONTROL_DEPTH_STALL (1<<13) -#define PIPE_CONTROL_WRITE_FLUSH (1<<12) -#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ -#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on Ironlake */ -#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ -#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) -#define PIPE_CONTROL_NOTIFY (1<<8) -#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ -#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) -#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) -#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) -#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) -#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) -#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) -#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ - -/* - * Commands used only by the command parser - */ -#define MI_SET_PREDICATE MI_INSTR(0x01, 0) -#define MI_ARB_CHECK MI_INSTR(0x05, 0) -#define MI_RS_CONTROL MI_INSTR(0x06, 0) -#define MI_URB_ATOMIC_ALLOC MI_INSTR(0x09, 0) -#define MI_PREDICATE MI_INSTR(0x0C, 0) -#define MI_RS_CONTEXT MI_INSTR(0x0F, 0) -#define MI_TOPOLOGY_FILTER MI_INSTR(0x0D, 0) -#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0) -#define MI_URB_CLEAR MI_INSTR(0x19, 0) -#define MI_UPDATE_GTT MI_INSTR(0x23, 0) -#define MI_CLFLUSH MI_INSTR(0x27, 0) -#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) -#define MI_REPORT_PERF_COUNT_GGTT (1<<0) -#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) -#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) -#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) -#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) -#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) - -#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16)) -#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16)) -#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16)) -#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18) -#define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16)) -#define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16)) -#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16)) -#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16)) -#define GFX_OP_3DSTATE_SO_DECL_LIST \ - ((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16)) - -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16)) - -#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16)) - -#define COLOR_BLT ((0x2<<29)|(0x40<<22)) -#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) - -/* * Registers used only by the command parser */ #define BCS_SWCTRL _MMIO(0x22200) @@ -802,6 +548,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_OABUFFER_UDW _MMIO(0x23b4) #define GEN8_OABUFFER _MMIO(0x2b14) +#define GEN8_OABUFFER_MEM_SELECT_GGTT (1 << 0) /* 0: PPGTT, 1: GGTT */ #define GEN7_OASTATUS1 _MMIO(0x2364) #define GEN7_OASTATUS1_TAIL_MASK 0xffffffc0 @@ -810,7 +557,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN7_OASTATUS1_REPORT_LOST (1<<0) #define GEN7_OASTATUS2 _MMIO(0x2368) -#define GEN7_OASTATUS2_HEAD_MASK 0xffffffc0 +#define GEN7_OASTATUS2_HEAD_MASK 0xffffffc0 +#define GEN7_OASTATUS2_MEM_SELECT_GGTT (1 << 0) /* 0: PPGTT, 1: GGTT */ #define GEN8_OASTATUS _MMIO(0x2b08) #define GEN8_OASTATUS_OVERRUN_STATUS (1<<3) @@ -832,8 +580,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OABUFFER_SIZE_8M (6<<3) #define OABUFFER_SIZE_16M (7<<3) -#define OA_MEM_SELECT_GGTT (1<<0) - /* * Flexible, Aggregate EU Counter Registers. * Note: these aren't contiguous @@ -1127,6 +873,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 0 #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 1 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 #define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1 #define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT) @@ -1948,79 +1700,100 @@ enum i915_power_well_id { #define _CNL_PORT_PCS_DW1_LN0_C 0x162C04 #define _CNL_PORT_PCS_DW1_LN0_D 0x162E04 #define _CNL_PORT_PCS_DW1_LN0_F 0x162804 -#define CNL_PORT_PCS_DW1_GRP(port) _MMIO_PORT6(port, \ +#define CNL_PORT_PCS_DW1_GRP(port) _MMIO(_PICK(port, \ _CNL_PORT_PCS_DW1_GRP_AE, \ _CNL_PORT_PCS_DW1_GRP_B, \ _CNL_PORT_PCS_DW1_GRP_C, \ _CNL_PORT_PCS_DW1_GRP_D, \ _CNL_PORT_PCS_DW1_GRP_AE, \ - _CNL_PORT_PCS_DW1_GRP_F) -#define CNL_PORT_PCS_DW1_LN0(port) _MMIO_PORT6(port, \ + _CNL_PORT_PCS_DW1_GRP_F)) + +#define CNL_PORT_PCS_DW1_LN0(port) _MMIO(_PICK(port, \ _CNL_PORT_PCS_DW1_LN0_AE, \ _CNL_PORT_PCS_DW1_LN0_B, \ _CNL_PORT_PCS_DW1_LN0_C, \ _CNL_PORT_PCS_DW1_LN0_D, \ _CNL_PORT_PCS_DW1_LN0_AE, \ - _CNL_PORT_PCS_DW1_LN0_F) + _CNL_PORT_PCS_DW1_LN0_F)) +#define _ICL_PORT_PCS_DW1_GRP_A 0x162604 +#define _ICL_PORT_PCS_DW1_GRP_B 0x6C604 +#define _ICL_PORT_PCS_DW1_LN0_A 0x162804 +#define _ICL_PORT_PCS_DW1_LN0_B 0x6C804 +#define ICL_PORT_PCS_DW1_GRP(port) _MMIO_PORT(port,\ + _ICL_PORT_PCS_DW1_GRP_A, \ + _ICL_PORT_PCS_DW1_GRP_B) +#define ICL_PORT_PCS_DW1_LN0(port) _MMIO_PORT(port, \ + _ICL_PORT_PCS_DW1_LN0_A, \ + _ICL_PORT_PCS_DW1_LN0_B) #define COMMON_KEEPER_EN (1 << 26) -#define _CNL_PORT_TX_DW2_GRP_AE 0x162348 -#define _CNL_PORT_TX_DW2_GRP_B 0x1623C8 -#define _CNL_PORT_TX_DW2_GRP_C 0x162B48 -#define _CNL_PORT_TX_DW2_GRP_D 0x162BC8 -#define _CNL_PORT_TX_DW2_GRP_F 0x162A48 -#define _CNL_PORT_TX_DW2_LN0_AE 0x162448 -#define _CNL_PORT_TX_DW2_LN0_B 0x162648 -#define _CNL_PORT_TX_DW2_LN0_C 0x162C48 -#define _CNL_PORT_TX_DW2_LN0_D 0x162E48 -#define _CNL_PORT_TX_DW2_LN0_F 0x162848 -#define CNL_PORT_TX_DW2_GRP(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW2_GRP_AE, \ - _CNL_PORT_TX_DW2_GRP_B, \ - _CNL_PORT_TX_DW2_GRP_C, \ - _CNL_PORT_TX_DW2_GRP_D, \ - _CNL_PORT_TX_DW2_GRP_AE, \ - _CNL_PORT_TX_DW2_GRP_F) -#define CNL_PORT_TX_DW2_LN0(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW2_LN0_AE, \ - _CNL_PORT_TX_DW2_LN0_B, \ - _CNL_PORT_TX_DW2_LN0_C, \ - _CNL_PORT_TX_DW2_LN0_D, \ - _CNL_PORT_TX_DW2_LN0_AE, \ - _CNL_PORT_TX_DW2_LN0_F) -#define SWING_SEL_UPPER(x) ((x >> 3) << 15) +/* CNL Port TX registers */ +#define _CNL_PORT_TX_AE_GRP_OFFSET 0x162340 +#define _CNL_PORT_TX_B_GRP_OFFSET 0x1623C0 +#define _CNL_PORT_TX_C_GRP_OFFSET 0x162B40 +#define _CNL_PORT_TX_D_GRP_OFFSET 0x162BC0 +#define _CNL_PORT_TX_F_GRP_OFFSET 0x162A40 +#define _CNL_PORT_TX_AE_LN0_OFFSET 0x162440 +#define _CNL_PORT_TX_B_LN0_OFFSET 0x162640 +#define _CNL_PORT_TX_C_LN0_OFFSET 0x162C40 +#define _CNL_PORT_TX_D_LN0_OFFSET 0x162E40 +#define _CNL_PORT_TX_F_LN0_OFFSET 0x162840 +#define _CNL_PORT_TX_DW_GRP(port, dw) (_PICK((port), \ + _CNL_PORT_TX_AE_GRP_OFFSET, \ + _CNL_PORT_TX_B_GRP_OFFSET, \ + _CNL_PORT_TX_B_GRP_OFFSET, \ + _CNL_PORT_TX_D_GRP_OFFSET, \ + _CNL_PORT_TX_AE_GRP_OFFSET, \ + _CNL_PORT_TX_F_GRP_OFFSET) + \ + 4*(dw)) +#define _CNL_PORT_TX_DW_LN0(port, dw) (_PICK((port), \ + _CNL_PORT_TX_AE_LN0_OFFSET, \ + _CNL_PORT_TX_B_LN0_OFFSET, \ + _CNL_PORT_TX_B_LN0_OFFSET, \ + _CNL_PORT_TX_D_LN0_OFFSET, \ + _CNL_PORT_TX_AE_LN0_OFFSET, \ + _CNL_PORT_TX_F_LN0_OFFSET) + \ + 4*(dw)) + +#define CNL_PORT_TX_DW2_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 2)) +#define CNL_PORT_TX_DW2_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 2)) +#define _ICL_PORT_TX_DW2_GRP_A 0x162688 +#define _ICL_PORT_TX_DW2_GRP_B 0x6C688 +#define _ICL_PORT_TX_DW2_LN0_A 0x162888 +#define _ICL_PORT_TX_DW2_LN0_B 0x6C888 +#define ICL_PORT_TX_DW2_GRP(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW2_GRP_A, \ + _ICL_PORT_TX_DW2_GRP_B) +#define ICL_PORT_TX_DW2_LN0(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW2_LN0_A, \ + _ICL_PORT_TX_DW2_LN0_B) +#define SWING_SEL_UPPER(x) (((x) >> 3) << 15) #define SWING_SEL_UPPER_MASK (1 << 15) -#define SWING_SEL_LOWER(x) ((x & 0x7) << 11) +#define SWING_SEL_LOWER(x) (((x) & 0x7) << 11) #define SWING_SEL_LOWER_MASK (0x7 << 11) #define RCOMP_SCALAR(x) ((x) << 0) #define RCOMP_SCALAR_MASK (0xFF << 0) -#define _CNL_PORT_TX_DW4_GRP_AE 0x162350 -#define _CNL_PORT_TX_DW4_GRP_B 0x1623D0 -#define _CNL_PORT_TX_DW4_GRP_C 0x162B50 -#define _CNL_PORT_TX_DW4_GRP_D 0x162BD0 -#define _CNL_PORT_TX_DW4_GRP_F 0x162A50 #define _CNL_PORT_TX_DW4_LN0_AE 0x162450 #define _CNL_PORT_TX_DW4_LN1_AE 0x1624D0 -#define _CNL_PORT_TX_DW4_LN0_B 0x162650 -#define _CNL_PORT_TX_DW4_LN0_C 0x162C50 -#define _CNL_PORT_TX_DW4_LN0_D 0x162E50 -#define _CNL_PORT_TX_DW4_LN0_F 0x162850 -#define CNL_PORT_TX_DW4_GRP(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW4_GRP_AE, \ - _CNL_PORT_TX_DW4_GRP_B, \ - _CNL_PORT_TX_DW4_GRP_C, \ - _CNL_PORT_TX_DW4_GRP_D, \ - _CNL_PORT_TX_DW4_GRP_AE, \ - _CNL_PORT_TX_DW4_GRP_F) -#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO_PORT6_LN(port, ln, \ - _CNL_PORT_TX_DW4_LN0_AE, \ - _CNL_PORT_TX_DW4_LN1_AE, \ - _CNL_PORT_TX_DW4_LN0_B, \ - _CNL_PORT_TX_DW4_LN0_C, \ - _CNL_PORT_TX_DW4_LN0_D, \ - _CNL_PORT_TX_DW4_LN0_AE, \ - _CNL_PORT_TX_DW4_LN0_F) +#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 4)) +#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4)) +#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4) + \ + (ln * (_CNL_PORT_TX_DW4_LN1_AE - \ + _CNL_PORT_TX_DW4_LN0_AE))) +#define _ICL_PORT_TX_DW4_GRP_A 0x162690 +#define _ICL_PORT_TX_DW4_GRP_B 0x6C690 +#define _ICL_PORT_TX_DW4_LN0_A 0x162890 +#define _ICL_PORT_TX_DW4_LN1_A 0x162990 +#define _ICL_PORT_TX_DW4_LN0_B 0x6C890 +#define ICL_PORT_TX_DW4_GRP(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW4_GRP_A, \ + _ICL_PORT_TX_DW4_GRP_B) +#define ICL_PORT_TX_DW4_LN(port, ln) _MMIO(_PORT(port, \ + _ICL_PORT_TX_DW4_LN0_A, \ + _ICL_PORT_TX_DW4_LN0_B) + \ + (ln * (_ICL_PORT_TX_DW4_LN1_A - \ + _ICL_PORT_TX_DW4_LN0_A))) #define LOADGEN_SELECT (1 << 31) #define POST_CURSOR_1(x) ((x) << 12) #define POST_CURSOR_1_MASK (0x3F << 12) @@ -2029,64 +1802,147 @@ enum i915_power_well_id { #define CURSOR_COEFF(x) ((x) << 0) #define CURSOR_COEFF_MASK (0x3F << 0) -#define _CNL_PORT_TX_DW5_GRP_AE 0x162354 -#define _CNL_PORT_TX_DW5_GRP_B 0x1623D4 -#define _CNL_PORT_TX_DW5_GRP_C 0x162B54 -#define _CNL_PORT_TX_DW5_GRP_D 0x162BD4 -#define _CNL_PORT_TX_DW5_GRP_F 0x162A54 -#define _CNL_PORT_TX_DW5_LN0_AE 0x162454 -#define _CNL_PORT_TX_DW5_LN0_B 0x162654 -#define _CNL_PORT_TX_DW5_LN0_C 0x162C54 -#define _CNL_PORT_TX_DW5_LN0_D 0x162E54 -#define _CNL_PORT_TX_DW5_LN0_F 0x162854 -#define CNL_PORT_TX_DW5_GRP(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW5_GRP_AE, \ - _CNL_PORT_TX_DW5_GRP_B, \ - _CNL_PORT_TX_DW5_GRP_C, \ - _CNL_PORT_TX_DW5_GRP_D, \ - _CNL_PORT_TX_DW5_GRP_AE, \ - _CNL_PORT_TX_DW5_GRP_F) -#define CNL_PORT_TX_DW5_LN0(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW5_LN0_AE, \ - _CNL_PORT_TX_DW5_LN0_B, \ - _CNL_PORT_TX_DW5_LN0_C, \ - _CNL_PORT_TX_DW5_LN0_D, \ - _CNL_PORT_TX_DW5_LN0_AE, \ - _CNL_PORT_TX_DW5_LN0_F) +#define CNL_PORT_TX_DW5_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 5)) +#define CNL_PORT_TX_DW5_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 5)) +#define _ICL_PORT_TX_DW5_GRP_A 0x162694 +#define _ICL_PORT_TX_DW5_GRP_B 0x6C694 +#define _ICL_PORT_TX_DW5_LN0_A 0x162894 +#define _ICL_PORT_TX_DW5_LN0_B 0x6C894 +#define ICL_PORT_TX_DW5_GRP(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW5_GRP_A, \ + _ICL_PORT_TX_DW5_GRP_B) +#define ICL_PORT_TX_DW5_LN0(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW5_LN0_A, \ + _ICL_PORT_TX_DW5_LN0_B) #define TX_TRAINING_EN (1 << 31) +#define TAP2_DISABLE (1 << 30) #define TAP3_DISABLE (1 << 29) #define SCALING_MODE_SEL(x) ((x) << 18) #define SCALING_MODE_SEL_MASK (0x7 << 18) #define RTERM_SELECT(x) ((x) << 3) #define RTERM_SELECT_MASK (0x7 << 3) -#define _CNL_PORT_TX_DW7_GRP_AE 0x16235C -#define _CNL_PORT_TX_DW7_GRP_B 0x1623DC -#define _CNL_PORT_TX_DW7_GRP_C 0x162B5C -#define _CNL_PORT_TX_DW7_GRP_D 0x162BDC -#define _CNL_PORT_TX_DW7_GRP_F 0x162A5C -#define _CNL_PORT_TX_DW7_LN0_AE 0x16245C -#define _CNL_PORT_TX_DW7_LN0_B 0x16265C -#define _CNL_PORT_TX_DW7_LN0_C 0x162C5C -#define _CNL_PORT_TX_DW7_LN0_D 0x162E5C -#define _CNL_PORT_TX_DW7_LN0_F 0x16285C -#define CNL_PORT_TX_DW7_GRP(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW7_GRP_AE, \ - _CNL_PORT_TX_DW7_GRP_B, \ - _CNL_PORT_TX_DW7_GRP_C, \ - _CNL_PORT_TX_DW7_GRP_D, \ - _CNL_PORT_TX_DW7_GRP_AE, \ - _CNL_PORT_TX_DW7_GRP_F) -#define CNL_PORT_TX_DW7_LN0(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW7_LN0_AE, \ - _CNL_PORT_TX_DW7_LN0_B, \ - _CNL_PORT_TX_DW7_LN0_C, \ - _CNL_PORT_TX_DW7_LN0_D, \ - _CNL_PORT_TX_DW7_LN0_AE, \ - _CNL_PORT_TX_DW7_LN0_F) +#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 7)) +#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 7)) #define N_SCALAR(x) ((x) << 24) #define N_SCALAR_MASK (0x7F << 24) +#define _ICL_MG_PHY_PORT_LN(port, ln, ln0p1, ln0p2, ln1p1) \ + _MMIO(_PORT((port) - PORT_C, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1))) + +#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT1 0x16812C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT1 0x16852C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT2 0x16912C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT2 0x16952C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT3 0x16A12C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT3 0x16A52C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT4 0x16B12C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT4 0x16B52C +#define ICL_PORT_MG_TX1_LINK_PARAMS(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT1, \ + _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT2, \ + _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT1) + +#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT1 0x1680AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT1 0x1684AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT2 0x1690AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT2 0x1694AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT3 0x16A0AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT3 0x16A4AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT4 0x16B0AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT4 0x16B4AC +#define ICL_PORT_MG_TX2_LINK_PARAMS(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT1, \ + _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT2, \ + _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT1) +#define CRI_USE_FS32 (1 << 5) + +#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT1 0x16814C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT1 0x16854C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT2 0x16914C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT2 0x16954C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT3 0x16A14C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT3 0x16A54C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT4 0x16B14C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT4 0x16B54C +#define ICL_PORT_MG_TX1_PISO_READLOAD(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT1, \ + _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT2, \ + _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT1) + +#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT1 0x1680CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT1 0x1684CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT2 0x1690CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT2 0x1694CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT3 0x16A0CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT3 0x16A4CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT4 0x16B0CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT4 0x16B4CC +#define ICL_PORT_MG_TX2_PISO_READLOAD(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT1, \ + _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT2, \ + _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT1) +#define CRI_CALCINIT (1 << 1) + +#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT1 0x168148 +#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT1 0x168548 +#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT2 0x169148 +#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT2 0x169548 +#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT3 0x16A148 +#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT3 0x16A548 +#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT4 0x16B148 +#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT4 0x16B548 +#define ICL_PORT_MG_TX1_SWINGCTRL(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT1, \ + _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT2, \ + _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT1) + +#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT1 0x1680C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT1 0x1684C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT2 0x1690C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT2 0x1694C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT3 0x16A0C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT3 0x16A4C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT4 0x16B0C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT4 0x16B4C8 +#define ICL_PORT_MG_TX2_SWINGCTRL(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT1, \ + _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT2, \ + _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT1) +#define CRI_TXDEEMPH_OVERRIDE_17_12(x) ((x) << 0) +#define CRI_TXDEEMPH_OVERRIDE_17_12_MASK (0x3F << 0) + +#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT1 0x168144 +#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT1 0x168544 +#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT2 0x169144 +#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT2 0x169544 +#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT3 0x16A144 +#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT3 0x16A544 +#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT4 0x16B144 +#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT4 0x16B544 +#define ICL_PORT_MG_TX1_DRVCTRL(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_DRVCTRL_TX1LN0_PORT1, \ + _ICL_MG_TX_DRVCTRL_TX1LN0_PORT2, \ + _ICL_MG_TX_DRVCTRL_TX1LN1_PORT1) + +#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT1 0x1680C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT1 0x1684C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT2 0x1690C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT2 0x1694C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT3 0x16A0C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT3 0x16A4C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT4 0x16B0C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT4 0x16B4C4 +#define ICL_PORT_MG_TX2_DRVCTRL(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_DRVCTRL_TX2LN0_PORT1, \ + _ICL_MG_TX_DRVCTRL_TX2LN0_PORT2, \ + _ICL_MG_TX_DRVCTRL_TX2LN1_PORT1) +#define CRI_TXDEEMPH_OVERRIDE_11_6(x) ((x) << 24) +#define CRI_TXDEEMPH_OVERRIDE_11_6_MASK (0x3F << 24) +#define CRI_TXDEEMPH_OVERRIDE_EN (1 << 22) +#define CRI_TXDEEMPH_OVERRIDE_5_0(x) ((x) << 16) +#define CRI_TXDEEMPH_OVERRIDE_5_0_MASK (0x3F << 16) + /* The spec defines this only for BXT PHY0, but lets assume that this * would exist for PHY1 too if it had a second channel. */ @@ -2473,6 +2329,10 @@ enum i915_power_well_id { #define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3) #define GEN8_MCR_SUBSLICE(subslice) (((subslice) & 3) << 24) #define GEN8_MCR_SUBSLICE_MASK GEN8_MCR_SUBSLICE(3) +#define GEN11_MCR_SLICE(slice) (((slice) & 0xf) << 27) +#define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf) +#define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) +#define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7) #define RING_IPEIR(base) _MMIO((base)+0x64) #define RING_IPEHR(base) _MMIO((base)+0x68) /* @@ -2867,6 +2727,19 @@ enum i915_power_well_id { #define GEN10_EU_DISABLE3 _MMIO(0x9140) #define GEN10_EU_DIS_SS_MASK 0xff +#define GEN11_GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140) +#define GEN11_GT_VDBOX_DISABLE_MASK 0xff +#define GEN11_GT_VEBOX_DISABLE_SHIFT 16 +#define GEN11_GT_VEBOX_DISABLE_MASK (0xff << GEN11_GT_VEBOX_DISABLE_SHIFT) + +#define GEN11_EU_DISABLE _MMIO(0x9134) +#define GEN11_EU_DIS_MASK 0xFF + +#define GEN11_GT_SLICE_ENABLE _MMIO(0x9138) +#define GEN11_GT_S_ENA_MASK 0xFF + +#define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) @@ -3951,6 +3824,9 @@ enum { #define _CLKGATE_DIS_PSL_A 0x46520 #define _CLKGATE_DIS_PSL_B 0x46524 #define _CLKGATE_DIS_PSL_C 0x46528 +#define DUPS1_GATING_DIS (1 << 15) +#define DUPS2_GATING_DIS (1 << 19) +#define DUPS3_GATING_DIS (1 << 23) #define DPF_GATING_DIS (1 << 10) #define DPF_RAM_GATING_DIS (1 << 9) #define DPFR_GATING_DIS (1 << 8) @@ -3964,6 +3840,7 @@ enum { #define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) +#define MSCUNIT_CLKGATE_DIS (1 << 10) #define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) #define GWUNIT_CLKGATE_DIS (1 << 16) @@ -3971,6 +3848,9 @@ enum { #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS (1 << 20) +#define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560) +#define CGPSF_CLKGATE_DIS (1 << 3) + /* * Display engine regs */ @@ -4150,7 +4030,20 @@ enum { #define EDP_PSR_TP1_TIME_0us (3<<4) #define EDP_PSR_IDLE_FRAME_SHIFT 0 +/* Bspec claims those aren't shifted but stay at 0x64800 */ +#define EDP_PSR_IMR _MMIO(0x64834) +#define EDP_PSR_IIR _MMIO(0x64838) +#define EDP_PSR_ERROR(trans) (1 << (((trans) * 8 + 10) & 31)) +#define EDP_PSR_POST_EXIT(trans) (1 << (((trans) * 8 + 9) & 31)) +#define EDP_PSR_PRE_ENTRY(trans) (1 << (((trans) * 8 + 8) & 31)) + #define EDP_PSR_AUX_CTL _MMIO(dev_priv->psr_mmio_base + 0x10) +#define EDP_PSR_AUX_CTL_TIME_OUT_MASK (3 << 26) +#define EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK (0x1f << 20) +#define EDP_PSR_AUX_CTL_PRECHARGE_2US_MASK (0xf << 16) +#define EDP_PSR_AUX_CTL_ERROR_INTERRUPT (1 << 11) +#define EDP_PSR_AUX_CTL_BIT_CLOCK_2X_MASK (0x7ff) + #define EDP_PSR_AUX_DATA(i) _MMIO(dev_priv->psr_mmio_base + 0x14 + (i) * 4) /* 5 registers */ #define EDP_PSR_STATUS _MMIO(dev_priv->psr_mmio_base + 0x40) @@ -4180,17 +4073,19 @@ enum { #define EDP_PSR_PERF_CNT _MMIO(dev_priv->psr_mmio_base + 0x44) #define EDP_PSR_PERF_CNT_MASK 0xffffff -#define EDP_PSR_DEBUG _MMIO(dev_priv->psr_mmio_base + 0x60) +#define EDP_PSR_DEBUG _MMIO(dev_priv->psr_mmio_base + 0x60) /* PSR_MASK on SKL+ */ #define EDP_PSR_DEBUG_MASK_MAX_SLEEP (1<<28) #define EDP_PSR_DEBUG_MASK_LPSP (1<<27) #define EDP_PSR_DEBUG_MASK_MEMUP (1<<26) #define EDP_PSR_DEBUG_MASK_HPD (1<<25) #define EDP_PSR_DEBUG_MASK_DISP_REG_WRITE (1<<16) -#define EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1<<15) +#define EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1<<15) /* SKL+ */ #define EDP_PSR2_CTL _MMIO(0x6f900) #define EDP_PSR2_ENABLE (1<<31) #define EDP_SU_TRACK_ENABLE (1<<30) +#define EDP_Y_COORDINATE_VALID (1<<26) /* GLK and CNL+ */ +#define EDP_Y_COORDINATE_ENABLE (1<<25) /* GLK and CNL+ */ #define EDP_MAX_SU_DISABLE_TIME(t) ((t)<<20) #define EDP_MAX_SU_DISABLE_TIME_MASK (0x1f<<20) #define EDP_PSR2_TP2_TIME_500 (0<<8) @@ -4200,8 +4095,32 @@ enum { #define EDP_PSR2_TP2_TIME_MASK (3<<8) #define EDP_PSR2_FRAME_BEFORE_SU_SHIFT 4 #define EDP_PSR2_FRAME_BEFORE_SU_MASK (0xf<<4) -#define EDP_PSR2_IDLE_MASK 0xf #define EDP_PSR2_FRAME_BEFORE_SU(a) ((a)<<4) +#define EDP_PSR2_IDLE_FRAME_MASK 0xf +#define EDP_PSR2_IDLE_FRAME_SHIFT 0 + +#define _PSR_EVENT_TRANS_A 0x60848 +#define _PSR_EVENT_TRANS_B 0x61848 +#define _PSR_EVENT_TRANS_C 0x62848 +#define _PSR_EVENT_TRANS_D 0x63848 +#define _PSR_EVENT_TRANS_EDP 0x6F848 +#define PSR_EVENT(trans) _MMIO_TRANS2(trans, _PSR_EVENT_TRANS_A) +#define PSR_EVENT_PSR2_WD_TIMER_EXPIRE (1 << 17) +#define PSR_EVENT_PSR2_DISABLED (1 << 16) +#define PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN (1 << 15) +#define PSR_EVENT_SU_CRC_FIFO_UNDERRUN (1 << 14) +#define PSR_EVENT_GRAPHICS_RESET (1 << 12) +#define PSR_EVENT_PCH_INTERRUPT (1 << 11) +#define PSR_EVENT_MEMORY_UP (1 << 10) +#define PSR_EVENT_FRONT_BUFFER_MODIFY (1 << 9) +#define PSR_EVENT_WD_TIMER_EXPIRE (1 << 8) +#define PSR_EVENT_PIPE_REGISTERS_UPDATE (1 << 6) +#define PSR_EVENT_REGISTER_UPDATE (1 << 5) +#define PSR_EVENT_HDCP_ENABLE (1 << 4) +#define PSR_EVENT_KVMR_SESSION_ENABLE (1 << 3) +#define PSR_EVENT_VBI_ENABLE (1 << 2) +#define PSR_EVENT_LPSP_MODE_EXIT (1 << 1) +#define PSR_EVENT_PSR_DISABLE (1 << 0) #define EDP_PSR2_STATUS _MMIO(0x6f940) #define EDP_PSR2_STATUS_STATE_MASK (0xf<<28) @@ -5265,8 +5184,6 @@ enum { #define DP_LINK_TRAIN_OFF (3 << 28) #define DP_LINK_TRAIN_MASK (3 << 28) #define DP_LINK_TRAIN_SHIFT 28 -#define DP_LINK_TRAIN_PAT_3_CHV (1 << 14) -#define DP_LINK_TRAIN_MASK_CHV ((3 << 28)|(1<<14)) /* CPT Link training mode */ #define DP_LINK_TRAIN_PAT_1_CPT (0 << 8) @@ -6009,6 +5926,7 @@ enum { #define CURSIZE _MMIO(0x700a0) /* 845/865 */ #define _CUR_FBC_CTL_A 0x700a0 /* ivb+ */ #define CUR_FBC_CTL_EN (1 << 31) +#define _CURASURFLIVE 0x700ac /* g4x+ */ #define _CURBCNTR 0x700c0 #define _CURBBASE 0x700c4 #define _CURBPOS 0x700c8 @@ -6025,6 +5943,7 @@ enum { #define CURBASE(pipe) _CURSOR2(pipe, _CURABASE) #define CURPOS(pipe) _CURSOR2(pipe, _CURAPOS) #define CUR_FBC_CTL(pipe) _CURSOR2(pipe, _CUR_FBC_CTL_A) +#define CURSURFLIVE(pipe) _CURSOR2(pipe, _CURASURFLIVE) #define CURSOR_A_OFFSET 0x70080 #define CURSOR_B_OFFSET 0x700c0 @@ -6492,9 +6411,9 @@ enum { #define _PLANE_COLOR_CTL_1_A 0x701CC /* GLK+ */ #define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ #define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ -#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) /* Pre-ICL */ #define PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE (1 << 28) -#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) /* Pre-ICL */ #define PLANE_COLOR_CSC_MODE_BYPASS (0 << 17) #define PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709 (1 << 17) #define PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709 (2 << 17) @@ -6589,6 +6508,9 @@ enum { #define _PLANE_BUF_CFG_1_B 0x7127c #define _PLANE_BUF_CFG_2_B 0x7137c +#define SKL_DDB_ENTRY_MASK 0x3FF +#define ICL_DDB_ENTRY_MASK 0x7FF +#define DDB_ENTRY_END_SHIFT 16 #define _PLANE_BUF_CFG_1(pipe) \ _PIPE(pipe, _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B) #define _PLANE_BUF_CFG_2(pipe) \ @@ -6779,6 +6701,8 @@ enum { #define PS_SCALER_MODE_MASK (3 << 28) #define PS_SCALER_MODE_DYN (0 << 28) #define PS_SCALER_MODE_HQ (1 << 28) +#define SKL_PS_SCALER_MODE_NV12 (2 << 28) +#define PS_SCALER_MODE_PLANAR (1 << 29) #define PS_PLANE_SEL_MASK (7 << 25) #define PS_PLANE_SEL(plane) (((plane) + 1) << 25) #define PS_FILTER_MASK (3 << 23) @@ -6950,6 +6874,7 @@ enum { #define DE_PCH_EVENT_IVB (1<<28) #define DE_DP_A_HOTPLUG_IVB (1<<27) #define DE_AUX_CHANNEL_A_IVB (1<<26) +#define DE_EDP_PSR_INT_HSW (1<<19) #define DE_SPRITEC_FLIP_DONE_IVB (1<<14) #define DE_PLANEC_FLIP_DONE_IVB (1<<13) #define DE_PIPEC_VBLANK_IVB (1<<10) @@ -7074,6 +6999,7 @@ enum { #define GEN8_DE_MISC_IIR _MMIO(0x44468) #define GEN8_DE_MISC_IER _MMIO(0x4446c) #define GEN8_DE_MISC_GSE (1 << 27) +#define GEN8_DE_EDP_PSR (1 << 19) #define GEN8_PCU_ISR _MMIO(0x444e0) #define GEN8_PCU_IMR _MMIO(0x444e4) @@ -7117,7 +7043,9 @@ enum { #define GEN11_INTR_IDENTITY_REG0 _MMIO(0x190060) #define GEN11_INTR_IDENTITY_REG1 _MMIO(0x190064) #define GEN11_INTR_DATA_VALID (1 << 31) -#define GEN11_INTR_ENGINE_MASK (0xffff) +#define GEN11_INTR_ENGINE_CLASS(x) (((x) & GENMASK(18, 16)) >> 16) +#define GEN11_INTR_ENGINE_INSTANCE(x) (((x) & GENMASK(25, 20)) >> 20) +#define GEN11_INTR_ENGINE_INTR(x) ((x) & 0xffff) #define GEN11_INTR_IDENTITY_REG(x) _MMIO(0x190060 + (x * 4)) @@ -7197,6 +7125,7 @@ enum { #define CHICKEN_TRANS_A 0x420c0 #define CHICKEN_TRANS_B 0x420c4 #define CHICKEN_TRANS(trans) _MMIO_TRANS(trans, CHICKEN_TRANS_A, CHICKEN_TRANS_B) +#define VSC_DATA_SEL_SOFTWARE_CONTROL (1<<25) /* GLK and CNL+ */ #define DDI_TRAINING_OVERRIDE_ENABLE (1<<19) #define DDI_TRAINING_OVERRIDE_VALUE (1<<18) #define DDIE_TRAINING_OVERRIDE_ENABLE (1<<17) /* CHICKEN_TRANS_A only */ @@ -7301,18 +7230,22 @@ enum { #define GEN7_L3CNTLREG3 _MMIO(0xB024) #define GEN7_L3_CHICKEN_MODE_REGISTER _MMIO(0xB030) -#define GEN7_WA_L3_CHICKEN_MODE 0x20000000 +#define GEN7_WA_L3_CHICKEN_MODE 0x20000000 +#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xB114) +#define GEN11_I2M_WRITE_DISABLE (1 << 28) #define GEN7_L3SQCREG4 _MMIO(0xb034) #define L3SQ_URB_READ_CAM_MATCH_DISABLE (1<<27) #define GEN8_L3SQCREG4 _MMIO(0xb118) -#define GEN8_LQSC_RO_PERF_DIS (1<<27) -#define GEN8_LQSC_FLUSH_COHERENT_LINES (1<<21) +#define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6) +#define GEN8_LQSC_RO_PERF_DIS (1 << 27) +#define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21) /* GEN8 chicken */ #define HDC_CHICKEN0 _MMIO(0x7300) #define CNL_HDC_CHICKEN0 _MMIO(0xE5F0) +#define ICL_HDC_MODE _MMIO(0xE5F4) #define HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE (1<<15) #define HDC_FENCE_DEST_SLM_DISABLE (1<<14) #define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11) @@ -8327,8 +8260,30 @@ enum { #define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1<<4) #define GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE (1<<6) -#define GEN8_GARBCNTL _MMIO(0xB004) -#define GEN9_GAPS_TSV_CREDIT_DISABLE (1<<7) +#define GEN8_GARBCNTL _MMIO(0xB004) +#define GEN9_GAPS_TSV_CREDIT_DISABLE (1 << 7) +#define GEN11_ARBITRATION_PRIO_ORDER_MASK (0x3f << 22) +#define GEN11_HASH_CTRL_EXCL_MASK (0x7f << 0) +#define GEN11_HASH_CTRL_EXCL_BIT0 (1 << 0) + +#define GEN11_GLBLINVL _MMIO(0xB404) +#define GEN11_BANK_HASH_ADDR_EXCL_MASK (0x7f << 5) +#define GEN11_BANK_HASH_ADDR_EXCL_BIT0 (1 << 5) + +#define GEN10_DFR_RATIO_EN_AND_CHICKEN _MMIO(0x9550) +#define DFR_DISABLE (1 << 9) + +#define GEN11_GACB_PERF_CTRL _MMIO(0x4B80) +#define GEN11_HASH_CTRL_MASK (0x3 << 12 | 0xf << 0) +#define GEN11_HASH_CTRL_BIT0 (1 << 0) +#define GEN11_HASH_CTRL_BIT4 (1 << 12) + +#define GEN11_LSN_UNSLCVC _MMIO(0xB43C) +#define GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC (1 << 9) +#define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7) + +#define GAMW_ECO_DEV_RW_IA_REG _MMIO(0x4080) +#define GAMW_ECO_DEV_CTX_RELOAD_DISABLE (1 << 7) /* IVYBRIDGE DPF */ #define GEN7_L3CDERRST1(slice) _MMIO(0xB008 + (slice) * 0x200) /* L3CD Error Status 1 */ @@ -8837,6 +8792,12 @@ enum skl_power_gate { #define PORT_CLK_SEL_NONE (7<<29) #define PORT_CLK_SEL_MASK (7<<29) +/* On ICL+ this is the same as PORT_CLK_SEL, but all bits change. */ +#define DDI_CLK_SEL(port) PORT_CLK_SEL(port) +#define DDI_CLK_SEL_NONE (0x0 << 28) +#define DDI_CLK_SEL_MG (0x8 << 28) +#define DDI_CLK_SEL_MASK (0xF << 28) + /* Transcoder clock selection */ #define _TRANS_CLK_SEL_A 0x46140 #define _TRANS_CLK_SEL_B 0x46144 @@ -8967,6 +8928,7 @@ enum skl_power_gate { * CNL Clocks */ #define DPCLKA_CFGCR0 _MMIO(0x6C200) +#define DPCLKA_CFGCR0_ICL _MMIO(0x164280) #define DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port) == PORT_F ? 23 : \ (port)+10)) #define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port) == PORT_F ? 21 : \ @@ -8983,10 +8945,141 @@ enum skl_power_gate { #define PLL_POWER_STATE (1 << 26) #define CNL_DPLL_ENABLE(pll) _MMIO_PLL(pll, DPLL0_ENABLE, DPLL1_ENABLE) +#define _MG_PLL1_ENABLE 0x46030 +#define _MG_PLL2_ENABLE 0x46034 +#define _MG_PLL3_ENABLE 0x46038 +#define _MG_PLL4_ENABLE 0x4603C +/* Bits are the same as DPLL0_ENABLE */ +#define MG_PLL_ENABLE(port) _MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \ + _MG_PLL2_ENABLE) + +#define _MG_REFCLKIN_CTL_PORT1 0x16892C +#define _MG_REFCLKIN_CTL_PORT2 0x16992C +#define _MG_REFCLKIN_CTL_PORT3 0x16A92C +#define _MG_REFCLKIN_CTL_PORT4 0x16B92C +#define MG_REFCLKIN_CTL_OD_2_MUX(x) ((x) << 8) +#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \ + _MG_REFCLKIN_CTL_PORT1, \ + _MG_REFCLKIN_CTL_PORT2) + +#define _MG_CLKTOP2_CORECLKCTL1_PORT1 0x1688D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT2 0x1698D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT3 0x16A8D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT4 0x16B8D8 +#define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO(x) ((x) << 16) +#define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x) ((x) << 8) +#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \ + _MG_CLKTOP2_CORECLKCTL1_PORT1, \ + _MG_CLKTOP2_CORECLKCTL1_PORT2) + +#define _MG_CLKTOP2_HSCLKCTL_PORT1 0x1688D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT2 0x1698D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT3 0x16A8D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT4 0x16B8D4 +#define MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(x) ((x) << 16) +#define MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(x) ((x) << 14) +#define MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(x) ((x) << 12) +#define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x) ((x) << 8) +#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \ + _MG_CLKTOP2_HSCLKCTL_PORT1, \ + _MG_CLKTOP2_HSCLKCTL_PORT2) + +#define _MG_PLL_DIV0_PORT1 0x168A00 +#define _MG_PLL_DIV0_PORT2 0x169A00 +#define _MG_PLL_DIV0_PORT3 0x16AA00 +#define _MG_PLL_DIV0_PORT4 0x16BA00 +#define MG_PLL_DIV0_FRACNEN_H (1 << 30) +#define MG_PLL_DIV0_FBDIV_FRAC(x) ((x) << 8) +#define MG_PLL_DIV0_FBDIV_INT(x) ((x) << 0) +#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \ + _MG_PLL_DIV0_PORT2) + +#define _MG_PLL_DIV1_PORT1 0x168A04 +#define _MG_PLL_DIV1_PORT2 0x169A04 +#define _MG_PLL_DIV1_PORT3 0x16AA04 +#define _MG_PLL_DIV1_PORT4 0x16BA04 +#define MG_PLL_DIV1_IREF_NDIVRATIO(x) ((x) << 16) +#define MG_PLL_DIV1_DITHER_DIV_1 (0 << 12) +#define MG_PLL_DIV1_DITHER_DIV_2 (1 << 12) +#define MG_PLL_DIV1_DITHER_DIV_4 (2 << 12) +#define MG_PLL_DIV1_DITHER_DIV_8 (3 << 12) +#define MG_PLL_DIV1_NDIVRATIO(x) ((x) << 4) +#define MG_PLL_DIV1_FBPREDIV(x) ((x) << 0) +#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \ + _MG_PLL_DIV1_PORT2) + +#define _MG_PLL_LF_PORT1 0x168A08 +#define _MG_PLL_LF_PORT2 0x169A08 +#define _MG_PLL_LF_PORT3 0x16AA08 +#define _MG_PLL_LF_PORT4 0x16BA08 +#define MG_PLL_LF_TDCTARGETCNT(x) ((x) << 24) +#define MG_PLL_LF_AFCCNTSEL_256 (0 << 20) +#define MG_PLL_LF_AFCCNTSEL_512 (1 << 20) +#define MG_PLL_LF_GAINCTRL(x) ((x) << 16) +#define MG_PLL_LF_INT_COEFF(x) ((x) << 8) +#define MG_PLL_LF_PROP_COEFF(x) ((x) << 0) +#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \ + _MG_PLL_LF_PORT2) + +#define _MG_PLL_FRAC_LOCK_PORT1 0x168A0C +#define _MG_PLL_FRAC_LOCK_PORT2 0x169A0C +#define _MG_PLL_FRAC_LOCK_PORT3 0x16AA0C +#define _MG_PLL_FRAC_LOCK_PORT4 0x16BA0C +#define MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 (1 << 18) +#define MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 (1 << 16) +#define MG_PLL_FRAC_LOCK_LOCKTHRESH(x) ((x) << 11) +#define MG_PLL_FRAC_LOCK_DCODITHEREN (1 << 10) +#define MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN (1 << 8) +#define MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x) ((x) << 0) +#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \ + _MG_PLL_FRAC_LOCK_PORT1, \ + _MG_PLL_FRAC_LOCK_PORT2) + +#define _MG_PLL_SSC_PORT1 0x168A10 +#define _MG_PLL_SSC_PORT2 0x169A10 +#define _MG_PLL_SSC_PORT3 0x16AA10 +#define _MG_PLL_SSC_PORT4 0x16BA10 +#define MG_PLL_SSC_EN (1 << 28) +#define MG_PLL_SSC_TYPE(x) ((x) << 26) +#define MG_PLL_SSC_STEPLENGTH(x) ((x) << 16) +#define MG_PLL_SSC_STEPNUM(x) ((x) << 10) +#define MG_PLL_SSC_FLLEN (1 << 9) +#define MG_PLL_SSC_STEPSIZE(x) ((x) << 0) +#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \ + _MG_PLL_SSC_PORT2) + +#define _MG_PLL_BIAS_PORT1 0x168A14 +#define _MG_PLL_BIAS_PORT2 0x169A14 +#define _MG_PLL_BIAS_PORT3 0x16AA14 +#define _MG_PLL_BIAS_PORT4 0x16BA14 +#define MG_PLL_BIAS_BIAS_GB_SEL(x) ((x) << 30) +#define MG_PLL_BIAS_INIT_DCOAMP(x) ((x) << 24) +#define MG_PLL_BIAS_BIAS_BONUS(x) ((x) << 16) +#define MG_PLL_BIAS_BIASCAL_EN (1 << 15) +#define MG_PLL_BIAS_CTRIM(x) ((x) << 8) +#define MG_PLL_BIAS_VREF_RDAC(x) ((x) << 5) +#define MG_PLL_BIAS_IREFTRIM(x) ((x) << 0) +#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \ + _MG_PLL_BIAS_PORT2) + +#define _MG_PLL_TDC_COLDST_BIAS_PORT1 0x168A18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT2 0x169A18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT3 0x16AA18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT4 0x16BA18 +#define MG_PLL_TDC_COLDST_IREFINT_EN (1 << 27) +#define MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(x) ((x) << 17) +#define MG_PLL_TDC_COLDST_COLDSTART (1 << 16) +#define MG_PLL_TDC_TDCOVCCORR_EN (1 << 2) +#define MG_PLL_TDC_TDCSEL(x) ((x) << 0) +#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \ + _MG_PLL_TDC_COLDST_BIAS_PORT1, \ + _MG_PLL_TDC_COLDST_BIAS_PORT2) + #define _CNL_DPLL0_CFGCR0 0x6C000 #define _CNL_DPLL1_CFGCR0 0x6C080 #define DPLL_CFGCR0_HDMI_MODE (1 << 30) #define DPLL_CFGCR0_SSC_ENABLE (1 << 29) +#define DPLL_CFGCR0_SSC_ENABLE_ICL (1 << 25) #define DPLL_CFGCR0_LINK_RATE_MASK (0xf << 25) #define DPLL_CFGCR0_LINK_RATE_2700 (0 << 25) #define DPLL_CFGCR0_LINK_RATE_1350 (1 << 25) @@ -9020,8 +9113,19 @@ enum skl_power_gate { #define DPLL_CFGCR1_PDIV_5 (4 << 2) #define DPLL_CFGCR1_PDIV_7 (8 << 2) #define DPLL_CFGCR1_CENTRAL_FREQ (3 << 0) +#define DPLL_CFGCR1_CENTRAL_FREQ_8400 (3 << 0) #define CNL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR1, _CNL_DPLL1_CFGCR1) +#define _ICL_DPLL0_CFGCR0 0x164000 +#define _ICL_DPLL1_CFGCR0 0x164080 +#define ICL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR0, \ + _ICL_DPLL1_CFGCR0) + +#define _ICL_DPLL0_CFGCR1 0x164004 +#define _ICL_DPLL1_CFGCR1 0x164084 +#define ICL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR1, \ + _ICL_DPLL1_CFGCR1) + /* BXT display engine PLL */ #define BXT_DE_PLL_CTL _MMIO(0x6d000) #define BXT_DE_PLL_RATIO(x) (x) /* {60,65,100} * 19.2MHz */ @@ -9793,6 +9897,13 @@ enum skl_power_gate { #define GEN9_MFX1_MOCS(i) _MMIO(0xca00 + (i) * 4) /* Media 1 MOCS registers */ #define GEN9_VEBOX_MOCS(i) _MMIO(0xcb00 + (i) * 4) /* Video MOCS registers */ #define GEN9_BLT_MOCS(i) _MMIO(0xcc00 + (i) * 4) /* Blitter MOCS registers */ +/* Media decoder 2 MOCS registers */ +#define GEN11_MFX2_MOCS(i) _MMIO(0x10000 + (i) * 4) + +#define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0) +#define PMFLUSHDONE_LNICRSDROP (1 << 20) +#define PMFLUSH_GAPL3UNBLOCK (1 << 21) +#define PMFLUSHDONE_LNEBLK (1 << 22) /* gamt regs */ #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 282f57630cc1..8928894dd9c7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -49,7 +49,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - return to_request(fence)->timeline->common->name; + return to_request(fence)->timeline->name; } static bool i915_fence_signaled(struct dma_fence *fence) @@ -59,11 +59,7 @@ static bool i915_fence_signaled(struct dma_fence *fence) static bool i915_fence_enable_signaling(struct dma_fence *fence) { - if (i915_fence_signaled(fence)) - return false; - - intel_engine_enable_signaling(to_request(fence), true); - return !i915_fence_signaled(fence); + return intel_engine_enable_signaling(to_request(fence), true); } static signed long i915_fence_wait(struct dma_fence *fence, @@ -129,22 +125,22 @@ i915_dependency_free(struct drm_i915_private *i915, } static void -__i915_priotree_add_dependency(struct i915_priotree *pt, - struct i915_priotree *signal, - struct i915_dependency *dep, - unsigned long flags) +__i915_sched_node_add_dependency(struct i915_sched_node *node, + struct i915_sched_node *signal, + struct i915_dependency *dep, + unsigned long flags) { INIT_LIST_HEAD(&dep->dfs_link); list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &pt->signalers_list); + list_add(&dep->signal_link, &node->signalers_list); dep->signaler = signal; dep->flags = flags; } static int -i915_priotree_add_dependency(struct drm_i915_private *i915, - struct i915_priotree *pt, - struct i915_priotree *signal) +i915_sched_node_add_dependency(struct drm_i915_private *i915, + struct i915_sched_node *node, + struct i915_sched_node *signal) { struct i915_dependency *dep; @@ -152,16 +148,18 @@ i915_priotree_add_dependency(struct drm_i915_private *i915, if (!dep) return -ENOMEM; - __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); + __i915_sched_node_add_dependency(node, signal, dep, + I915_DEPENDENCY_ALLOC); return 0; } static void -i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) +i915_sched_node_fini(struct drm_i915_private *i915, + struct i915_sched_node *node) { - struct i915_dependency *dep, *next; + struct i915_dependency *dep, *tmp; - GEM_BUG_ON(!list_empty(&pt->link)); + GEM_BUG_ON(!list_empty(&node->link)); /* * Everyone we depended upon (the fences we wait to be signaled) @@ -169,8 +167,8 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) * However, retirement is run independently on each timeline and * so we may be called out-of-order. */ - list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { - GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); + list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { + GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler)); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->wait_link); @@ -179,8 +177,8 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) } /* Remove ourselves from everyone who depends upon us */ - list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { - GEM_BUG_ON(dep->signaler != pt); + list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != node); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->signal_link); @@ -190,17 +188,18 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) } static void -i915_priotree_init(struct i915_priotree *pt) +i915_sched_node_init(struct i915_sched_node *node) { - INIT_LIST_HEAD(&pt->signalers_list); - INIT_LIST_HEAD(&pt->waiters_list); - INIT_LIST_HEAD(&pt->link); - pt->priority = I915_PRIORITY_INVALID; + INIT_LIST_HEAD(&node->signalers_list); + INIT_LIST_HEAD(&node->waiters_list); + INIT_LIST_HEAD(&node->link); + node->attr.priority = I915_PRIORITY_INVALID; } static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct intel_engine_cs *engine; + struct i915_timeline *timeline; enum intel_engine_id id; int ret; @@ -211,30 +210,37 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) if (ret) return ret; + GEM_BUG_ON(i915->gt.active_requests); + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ for_each_engine(engine, i915, id) { - struct i915_gem_timeline *timeline; - struct intel_timeline *tl = engine->timeline; + GEM_TRACE("%s seqno %d (current %d) -> %d\n", + engine->name, + engine->timeline.seqno, + intel_engine_get_seqno(engine), + seqno); - if (!i915_seqno_passed(seqno, tl->seqno)) { + if (!i915_seqno_passed(seqno, engine->timeline.seqno)) { /* Flush any waiters before we reuse the seqno */ intel_engine_disarm_breadcrumbs(engine); + intel_engine_init_hangcheck(engine); GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); } /* Check we are idle before we fiddle with hw state! */ GEM_BUG_ON(!intel_engine_is_idle(engine)); - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); + GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request)); /* Finally reset hw state */ intel_engine_init_global_seqno(engine, seqno); - tl->seqno = seqno; - - list_for_each_entry(timeline, &i915->gt.timelines, link) - memset(timeline->engine[id].global_sync, 0, - sizeof(timeline->engine[id].global_sync)); + engine->timeline.seqno = seqno; } + list_for_each_entry(timeline, &i915->gt.timelines, link) + memset(timeline->global_sync, 0, sizeof(timeline->global_sync)); + + i915->gt.request_serial = seqno; + return 0; } @@ -251,83 +257,37 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) return reset_all_global_seqno(i915, seqno - 1); } -static void mark_busy(struct drm_i915_private *i915) +static int reserve_gt(struct drm_i915_private *i915) { - if (i915->gt.awake) - return; - - GEM_BUG_ON(!i915->gt.active_requests); - - intel_runtime_pm_get_noresume(i915); + int ret; /* - * It seems that the DMC likes to transition between the DC states a lot - * when there are no connected displays (no active power domains) during - * command submission. - * - * This activity has negative impact on the performance of the chip with - * huge latencies observed in the interrupt handler and elsewhere. + * Reservation is fine until we may need to wrap around * - * Work around it by grabbing a GT IRQ power domain whilst there is any - * GT activity, preventing any DC state transitions. + * By incrementing the serial for every request, we know that no + * individual engine may exceed that serial (as each is reset to 0 + * on any wrap). This protects even the most pessimistic of migrations + * of every request from all engines onto just one. */ - intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - - i915->gt.awake = true; - if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ - i915->gt.epoch = 1; - - intel_enable_gt_powersave(i915); - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - i915_pmu_gt_unparked(i915); - - intel_engines_unpark(i915); - - i915_queue_hangcheck(i915); - - queue_delayed_work(i915->wq, - &i915->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - -static int reserve_engine(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - u32 active = ++engine->timeline->inflight_seqnos; - u32 seqno = engine->timeline->seqno; - int ret; - - /* Reservation is fine until we need to wrap around */ - if (unlikely(add_overflows(seqno, active))) { + while (unlikely(++i915->gt.request_serial == 0)) { ret = reset_all_global_seqno(i915, 0); if (ret) { - engine->timeline->inflight_seqnos--; + i915->gt.request_serial--; return ret; } } if (!i915->gt.active_requests++) - mark_busy(i915); + i915_gem_unpark(i915); return 0; } -static void unreserve_engine(struct intel_engine_cs *engine) +static void unreserve_gt(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = engine->i915; - - if (!--i915->gt.active_requests) { - /* Cancel the mark_busy() from our reserve_engine() */ - GEM_BUG_ON(!i915->gt.awake); - mod_delayed_work(i915->wq, - &i915->gt.idle_work, - msecs_to_jiffies(100)); - } - - GEM_BUG_ON(!engine->timeline->inflight_seqnos); - engine->timeline->inflight_seqnos--; + GEM_BUG_ON(!i915->gt.active_requests); + if (!--i915->gt.active_requests) + i915_gem_park(i915); } void i915_gem_retire_noop(struct i915_gem_active *active, @@ -338,6 +298,7 @@ void i915_gem_retire_noop(struct i915_gem_active *active, static void advance_ring(struct i915_request *request) { + struct intel_ring *ring = request->ring; unsigned int tail; /* @@ -349,7 +310,8 @@ static void advance_ring(struct i915_request *request) * Note this requires that we are always called in request * completion order. */ - if (list_is_last(&request->ring_link, &request->ring->request_list)) { + GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list)); + if (list_is_last(&request->ring_link, &ring->request_list)) { /* * We may race here with execlists resubmitting this request * as we retire it. The resubmission will move the ring->tail @@ -358,13 +320,14 @@ static void advance_ring(struct i915_request *request) * is just about to be. Either works, if we miss the last two * noops - they are safe to be replayed on a reset. */ - tail = READ_ONCE(request->ring->tail); + tail = READ_ONCE(request->tail); + list_del(&ring->active_link); } else { tail = request->postfix; } - list_del(&request->ring_link); + list_del_init(&request->ring_link); - request->ring->head = tail; + ring->head = tail; } static void free_capture_list(struct i915_request *request) @@ -380,25 +343,84 @@ static void free_capture_list(struct i915_request *request) } } +static void __retire_engine_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n", + __func__, engine->name, + rq->fence.context, rq->fence.seqno, + rq->global_seqno, + intel_engine_get_seqno(engine)); + + GEM_BUG_ON(!i915_request_completed(rq)); + + local_irq_disable(); + + spin_lock(&engine->timeline.lock); + GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests)); + list_del_init(&rq->link); + spin_unlock(&engine->timeline.lock); + + spin_lock(&rq->lock); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + dma_fence_signal_locked(&rq->fence); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) + intel_engine_cancel_signaling(rq); + if (rq->waitboost) { + GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); + atomic_dec(&rq->i915->gt_pm.rps.num_waiters); + } + spin_unlock(&rq->lock); + + local_irq_enable(); + + /* + * The backing object for the context is done after switching to the + * *next* context. Therefore we cannot retire the previous context until + * the next context has already started running. However, since we + * cannot take the required locks at i915_request_submit() we + * defer the unpinning of the active context to now, retirement of + * the subsequent request. + */ + if (engine->last_retired_context) + intel_context_unpin(engine->last_retired_context, engine); + engine->last_retired_context = rq->ctx; +} + +static void __retire_engine_upto(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + struct i915_request *tmp; + + if (list_empty(&rq->link)) + return; + + do { + tmp = list_first_entry(&engine->timeline.requests, + typeof(*tmp), link); + + GEM_BUG_ON(tmp->engine != engine); + __retire_engine_request(engine, tmp); + } while (tmp != rq); +} + static void i915_request_retire(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; + GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", + request->engine->name, + request->fence.context, request->fence.seqno, + request->global_seqno, + intel_engine_get_seqno(request->engine)); + lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); GEM_BUG_ON(!i915_request_completed(request)); - GEM_BUG_ON(!request->i915->gt.active_requests); trace_i915_request_retire(request); - spin_lock_irq(&engine->timeline->lock); - list_del_init(&request->link); - spin_unlock_irq(&engine->timeline->lock); - - unreserve_engine(request->engine); advance_ring(request); - free_capture_list(request); /* @@ -434,73 +456,74 @@ static void i915_request_retire(struct i915_request *request) /* Retirement decays the ban score as it is a sign of ctx progress */ atomic_dec_if_positive(&request->ctx->ban_score); + intel_context_unpin(request->ctx, request->engine); - /* - * The backing object for the context is done after switching to the - * *next* context. Therefore we cannot retire the previous context until - * the next context has already started running. However, since we - * cannot take the required locks at i915_request_submit() we - * defer the unpinning of the active context to now, retirement of - * the subsequent request. - */ - if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); - engine->last_retired_context = request->ctx; + __retire_engine_upto(request->engine, request); - spin_lock_irq(&request->lock); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) - dma_fence_signal_locked(&request->fence); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); - if (request->waitboost) { - GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); - atomic_dec(&request->i915->gt_pm.rps.num_waiters); - } - spin_unlock_irq(&request->lock); + unreserve_gt(request->i915); - i915_priotree_fini(request->i915, &request->priotree); + i915_sched_node_fini(request->i915, &request->sched); i915_request_put(request); } void i915_request_retire_upto(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; + struct intel_ring *ring = rq->ring; struct i915_request *tmp; + GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", + rq->engine->name, + rq->fence.context, rq->fence.seqno, + rq->global_seqno, + intel_engine_get_seqno(rq->engine)); + lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!i915_request_completed(rq)); - if (list_empty(&rq->link)) + if (list_empty(&rq->ring_link)) return; do { - tmp = list_first_entry(&engine->timeline->requests, - typeof(*tmp), link); + tmp = list_first_entry(&ring->request_list, + typeof(*tmp), ring_link); i915_request_retire(tmp); } while (tmp != rq); } -static u32 timeline_get_seqno(struct intel_timeline *tl) +static u32 timeline_get_seqno(struct i915_timeline *tl) { return ++tl->seqno; } +static void move_to_timeline(struct i915_request *request, + struct i915_timeline *timeline) +{ + GEM_BUG_ON(request->timeline == &request->engine->timeline); + lockdep_assert_held(&request->engine->timeline.lock); + + spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); + list_move_tail(&request->link, &timeline->requests); + spin_unlock(&request->timeline->lock); +} + void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - struct intel_timeline *timeline; u32 seqno; + GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n", + engine->name, + request->fence.context, request->fence.seqno, + engine->timeline.seqno + 1, + intel_engine_get_seqno(engine)); + GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); - /* Transfer from per-context onto the global per-engine timeline */ - timeline = engine->timeline; - GEM_BUG_ON(timeline == request->timeline); GEM_BUG_ON(request->global_seqno); - seqno = timeline_get_seqno(timeline); + seqno = timeline_get_seqno(&engine->timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); @@ -514,9 +537,8 @@ void __i915_request_submit(struct i915_request *request) engine->emit_breadcrumb(request, request->ring->vaddr + request->postfix); - spin_lock(&request->timeline->lock); - list_move_tail(&request->link, &timeline->requests); - spin_unlock(&request->timeline->lock); + /* Transfer from per-context onto the global per-engine timeline */ + move_to_timeline(request, &engine->timeline); trace_i915_request_execute(request); @@ -529,30 +551,35 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - struct intel_timeline *timeline; + + GEM_TRACE("%s fence %llx:%d <- global=%d, current %d\n", + engine->name, + request->fence.context, request->fence.seqno, + request->global_seqno, + intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); /* * Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ GEM_BUG_ON(!request->global_seqno); - GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), request->global_seqno)); - engine->timeline->seqno--; + engine->timeline.seqno--; /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); @@ -562,12 +589,7 @@ void __i915_request_unsubmit(struct i915_request *request) spin_unlock(&request->lock); /* Transfer back from the global per-engine timeline to per-context */ - timeline = request->timeline; - GEM_BUG_ON(timeline == engine->timeline); - - spin_lock(&timeline->lock); - list_move(&request->link, &timeline->requests); - spin_unlock(&timeline->lock); + move_to_timeline(request, request->timeline); /* * We don't need to wake_up any waiters on request->execute, they @@ -584,11 +606,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static int __i915_sw_fence_call @@ -659,12 +681,12 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ring = engine->context_pin(engine, ctx); + ring = intel_context_pin(ctx, engine); if (IS_ERR(ring)) return ERR_CAST(ring); GEM_BUG_ON(!ring); - ret = reserve_engine(engine); + ret = reserve_gt(i915); if (ret) goto err_unpin; @@ -672,10 +694,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (ret) goto err_unreserve; - /* Move the oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry_or_null(&engine->timeline->requests, - typeof(*rq), link); - if (rq && i915_request_completed(rq)) + /* Move our oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); + if (!list_is_last(&rq->ring_link, &ring->request_list) && + i915_request_completed(rq)) i915_request_retire(rq); /* @@ -735,8 +757,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) } } - rq->timeline = i915_gem_context_lookup_timeline(ctx, engine); - GEM_BUG_ON(rq->timeline == engine->timeline); + INIT_LIST_HEAD(&rq->active_list); + rq->i915 = i915; + rq->engine = engine; + rq->ctx = ctx; + rq->ring = ring; + rq->timeline = ring->timeline; + GEM_BUG_ON(rq->timeline == &engine->timeline); spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, @@ -749,13 +776,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); init_waitqueue_head(&rq->execute); - i915_priotree_init(&rq->priotree); - - INIT_LIST_HEAD(&rq->active_list); - rq->i915 = i915; - rq->engine = engine; - rq->ctx = ctx; - rq->ring = ring; + i915_sched_node_init(&rq->sched); /* No zalloc, must clear what we need by hand */ rq->global_seqno = 0; @@ -792,6 +813,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (ret) goto err_unwind; + /* Keep a second pin for the dual retirement along engine and ring */ + __intel_context_pin(rq->ctx, engine); + /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); return rq; @@ -801,14 +825,14 @@ err_unwind: /* Make sure we didn't add ourselves to external state before freeing */ GEM_BUG_ON(!list_empty(&rq->active_list)); - GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list)); - GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list)); + GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); + GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); kmem_cache_free(i915->requests, rq); err_unreserve: - unreserve_engine(engine); + unreserve_gt(i915); err_unpin: - engine->context_unpin(engine, ctx); + intel_context_unpin(ctx, engine); return ERR_PTR(ret); } @@ -824,9 +848,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; if (to->engine->schedule) { - ret = i915_priotree_add_dependency(to->i915, - &to->priotree, - &from->priotree); + ret = i915_sched_node_add_dependency(to->i915, + &to->sched, + &from->sched); if (ret < 0) return ret; } @@ -904,7 +928,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Squash repeated waits to the same timelines */ if (fence->context != rq->i915->mm.unordered_timeline && - intel_timeline_sync_is_later(rq->timeline, fence)) + i915_timeline_sync_is_later(rq->timeline, fence)) continue; if (dma_fence_is_i915(fence)) @@ -918,7 +942,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Record the latest fence used against each timeline */ if (fence->context != rq->i915->mm.unordered_timeline) - intel_timeline_sync_set(rq->timeline, fence); + i915_timeline_sync_set(rq->timeline, fence); } while (--nchild); return 0; @@ -995,11 +1019,14 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) { struct intel_engine_cs *engine = request->engine; struct intel_ring *ring = request->ring; - struct intel_timeline *timeline = request->timeline; + struct i915_timeline *timeline = request->timeline; struct i915_request *prev; u32 *cs; int err; + GEM_TRACE("%s fence %llx:%d\n", + engine->name, request->fence.context, request->fence.seqno); + lockdep_assert_held(&request->i915->drm.struct_mutex); trace_i915_request_add(request); @@ -1054,10 +1081,10 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); if (engine->schedule) - __i915_priotree_add_dependency(&request->priotree, - &prev->priotree, - &request->dep, - 0); + __i915_sched_node_add_dependency(&request->sched, + &prev->sched, + &request->dep, + 0); } spin_lock_irq(&timeline->lock); @@ -1068,6 +1095,8 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); + if (list_is_first(&request->ring_link, &ring->request_list)) + list_add(&ring->active_link, &request->i915->gt.active_rings); request->emitted_jiffies = jiffies; /* @@ -1081,12 +1110,11 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - rcu_read_lock(); + local_bh_disable(); + rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) - engine->schedule(request, request->ctx->priority); + engine->schedule(request, &request->ctx->sched); rcu_read_unlock(); - - local_bh_disable(); i915_sw_fence_commit(&request->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ @@ -1206,11 +1234,13 @@ static bool __i915_spin_request(const struct i915_request *rq, static bool __i915_wait_request_check_and_reset(struct i915_request *request) { - if (likely(!i915_reset_handoff(&request->i915->gpu_error))) + struct i915_gpu_error *error = &request->i915->gpu_error; + + if (likely(!i915_reset_handoff(error))) return false; __set_current_state(TASK_RUNNING); - i915_reset(request->i915, 0); + i915_reset(request->i915, error->stalled_mask, error->reason); return true; } @@ -1373,38 +1403,30 @@ complete: return timeout; } -static void engine_retire_requests(struct intel_engine_cs *engine) +static void ring_retire_requests(struct intel_ring *ring) { struct i915_request *request, *next; - u32 seqno = intel_engine_get_seqno(engine); - LIST_HEAD(retire); - spin_lock_irq(&engine->timeline->lock); list_for_each_entry_safe(request, next, - &engine->timeline->requests, link) { - if (!i915_seqno_passed(seqno, request->global_seqno)) + &ring->request_list, ring_link) { + if (!i915_request_completed(request)) break; - list_move_tail(&request->link, &retire); - } - spin_unlock_irq(&engine->timeline->lock); - - list_for_each_entry_safe(request, next, &retire, link) i915_request_retire(request); + } } void i915_retire_requests(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct intel_ring *ring, *tmp; lockdep_assert_held(&i915->drm.struct_mutex); if (!i915->gt.active_requests) return; - for_each_engine(engine, i915, id) - engine_retire_requests(engine); + list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) + ring_retire_requests(ring); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 7d6eb82eeb91..eddbd4245cb3 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -28,13 +28,16 @@ #include <linux/dma-fence.h> #include "i915_gem.h" +#include "i915_scheduler.h" #include "i915_sw_fence.h" +#include "i915_scheduler.h" #include <uapi/drm/i915_drm.h> struct drm_file; struct drm_i915_gem_object; struct i915_request; +struct i915_timeline; struct intel_wait { struct rb_node node; @@ -48,44 +51,6 @@ struct intel_signal_node { struct list_head link; }; -struct i915_dependency { - struct i915_priotree *signaler; - struct list_head signal_link; - struct list_head wait_link; - struct list_head dfs_link; - unsigned long flags; -#define I915_DEPENDENCY_ALLOC BIT(0) -}; - -/* - * "People assume that time is a strict progression of cause to effect, but - * actually, from a nonlinear, non-subjective viewpoint, it's more like a big - * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 - * - * Requests exist in a complex web of interdependencies. Each request - * has to wait for some other request to complete before it is ready to be run - * (e.g. we have to wait until the pixels have been rendering into a texture - * before we can copy from it). We track the readiness of a request in terms - * of fences, but we also need to keep the dependency tree for the lifetime - * of the request (beyond the life of an individual fence). We use the tree - * at various points to reorder the requests whilst keeping the requests - * in order with respect to their various dependencies. - */ -struct i915_priotree { - struct list_head signalers_list; /* those before us, we depend upon */ - struct list_head waiters_list; /* those after us, they depend upon us */ - struct list_head link; - int priority; -}; - -enum { - I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, - I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, - I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, - - I915_PRIORITY_INVALID = INT_MIN -}; - struct i915_capture_list { struct i915_capture_list *next; struct i915_vma *vma; @@ -131,7 +96,7 @@ struct i915_request { struct i915_gem_context *ctx; struct intel_engine_cs *engine; struct intel_ring *ring; - struct intel_timeline *timeline; + struct i915_timeline *timeline; struct intel_signal_node signaling; /* @@ -154,7 +119,7 @@ struct i915_request { * to retirement), i.e. bidirectional dependency information for the * request not tied to individual fences. */ - struct i915_priotree priotree; + struct i915_sched_node sched; struct i915_dependency dep; /** @@ -343,10 +308,10 @@ static inline bool i915_request_started(const struct i915_request *rq) seqno - 1); } -static inline bool i915_priotree_signaled(const struct i915_priotree *pt) +static inline bool i915_sched_node_signaled(const struct i915_sched_node *node) { const struct i915_request *rq = - container_of(pt, const struct i915_request, priotree); + container_of(node, const struct i915_request, sched); return i915_request_completed(rq); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h new file mode 100644 index 000000000000..70a42220358d --- /dev/null +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef _I915_SCHEDULER_H_ +#define _I915_SCHEDULER_H_ + +#include <linux/bitops.h> + +#include <uapi/drm/i915_drm.h> + +enum { + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + I915_PRIORITY_INVALID = INT_MIN +}; + +struct i915_sched_attr { + /** + * @priority: execution and service priority + * + * All clients are equal, but some are more equal than others! + * + * Requests from a context with a greater (more positive) value of + * @priority will be executed before those with a lower @priority + * value, forming a simple QoS. + * + * The &drm_i915_private.kernel_context is assigned the lowest priority. + */ + int priority; +}; + +/* + * "People assume that time is a strict progression of cause to effect, but + * actually, from a nonlinear, non-subjective viewpoint, it's more like a big + * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 + * + * Requests exist in a complex web of interdependencies. Each request + * has to wait for some other request to complete before it is ready to be run + * (e.g. we have to wait until the pixels have been rendering into a texture + * before we can copy from it). We track the readiness of a request in terms + * of fences, but we also need to keep the dependency tree for the lifetime + * of the request (beyond the life of an individual fence). We use the tree + * at various points to reorder the requests whilst keeping the requests + * in order with respect to their various dependencies. + * + * There is no active component to the "scheduler". As we know the dependency + * DAG of each request, we are able to insert it into a sorted queue when it + * is ready, and are able to reorder its portion of the graph to accommodate + * dynamic priority changes. + */ +struct i915_sched_node { + struct list_head signalers_list; /* those before us, we depend upon */ + struct list_head waiters_list; /* those after us, they depend upon us */ + struct list_head link; + struct i915_sched_attr attr; +}; + +struct i915_dependency { + struct i915_sched_node *signaler; + struct list_head signal_link; + struct list_head wait_link; + struct list_head dfs_link; + unsigned long flags; +#define I915_DEPENDENCY_ALLOC BIT(0) +}; + +#endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c new file mode 100644 index 000000000000..4667cc08c416 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016-2018 Intel Corporation + */ + +#include "i915_drv.h" + +#include "i915_timeline.h" +#include "i915_syncmap.h" + +void i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *timeline, + const char *name) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + + /* + * Ideally we want a set of engines on a single leaf as we expect + * to mostly be tracking synchronisation between engines. It is not + * a huge issue if this is not the case, but we may want to mitigate + * any page crossing penalties if they become an issue. + */ + BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + + timeline->name = name; + + list_add(&timeline->link, &i915->gt.timelines); + + /* Called during early_init before we know how many engines there are */ + + timeline->fence_context = dma_fence_context_alloc(1); + + spin_lock_init(&timeline->lock); + + init_request_active(&timeline->last_request, NULL); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); +} + +/** + * i915_timelines_park - called when the driver idles + * @i915: the drm_i915_private device + * + * When the driver is completely idle, we know that all of our sync points + * have been signaled and our tracking is then entirely redundant. Any request + * to wait upon an older sync point will be completed instantly as we know + * the fence is signaled and therefore we will not even look them up in the + * sync point map. + */ +void i915_timelines_park(struct drm_i915_private *i915) +{ + struct i915_timeline *timeline; + + lockdep_assert_held(&i915->drm.struct_mutex); + + list_for_each_entry(timeline, &i915->gt.timelines, link) { + /* + * All known fences are completed so we can scrap + * the current sync point tracking and start afresh, + * any attempt to wait upon a previous sync point + * will be skipped as the fence was signaled. + */ + i915_syncmap_free(&timeline->sync); + } +} + +void i915_timeline_fini(struct i915_timeline *timeline) +{ + GEM_BUG_ON(!list_empty(&timeline->requests)); + + i915_syncmap_free(&timeline->sync); + + list_del(&timeline->link); +} + +struct i915_timeline * +i915_timeline_create(struct drm_i915_private *i915, const char *name) +{ + struct i915_timeline *timeline; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + i915_timeline_init(i915, timeline, name); + kref_init(&timeline->kref); + + return timeline; +} + +void __i915_timeline_free(struct kref *kref) +{ + struct i915_timeline *timeline = + container_of(kref, typeof(*timeline), kref); + + i915_timeline_fini(timeline); + kfree(timeline); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_timeline.c" +#include "selftests/i915_timeline.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 33e01bf6aa36..dc2a4632faa7 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -22,27 +22,20 @@ * */ -#ifndef I915_GEM_TIMELINE_H -#define I915_GEM_TIMELINE_H +#ifndef I915_TIMELINE_H +#define I915_TIMELINE_H #include <linux/list.h> +#include <linux/kref.h> #include "i915_request.h" #include "i915_syncmap.h" #include "i915_utils.h" -struct i915_gem_timeline; - -struct intel_timeline { +struct i915_timeline { u64 fence_context; u32 seqno; - /** - * Count of outstanding requests, from the time they are constructed - * to the moment they are retired. Loosely coupled to hardware. - */ - u32 inflight_seqnos; - spinlock_t lock; /** @@ -77,47 +70,57 @@ struct intel_timeline { */ u32 global_sync[I915_NUM_ENGINES]; - struct i915_gem_timeline *common; -}; - -struct i915_gem_timeline { struct list_head link; - - struct drm_i915_private *i915; const char *name; - struct intel_timeline engine[I915_NUM_ENGINES]; + struct kref kref; }; -int i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *tl, - const char *name); -int i915_gem_timeline_init__global(struct drm_i915_private *i915); -void i915_gem_timelines_park(struct drm_i915_private *i915); -void i915_gem_timeline_fini(struct i915_gem_timeline *tl); +void i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *tl, + const char *name); +void i915_timeline_fini(struct i915_timeline *tl); + +struct i915_timeline * +i915_timeline_create(struct drm_i915_private *i915, const char *name); -static inline int __intel_timeline_sync_set(struct intel_timeline *tl, - u64 context, u32 seqno) +static inline struct i915_timeline * +i915_timeline_get(struct i915_timeline *timeline) +{ + kref_get(&timeline->kref); + return timeline; +} + +void __i915_timeline_free(struct kref *kref); +static inline void i915_timeline_put(struct i915_timeline *timeline) +{ + kref_put(&timeline->kref, __i915_timeline_free); +} + +static inline int __i915_timeline_sync_set(struct i915_timeline *tl, + u64 context, u32 seqno) { return i915_syncmap_set(&tl->sync, context, seqno); } -static inline int intel_timeline_sync_set(struct intel_timeline *tl, - const struct dma_fence *fence) +static inline int i915_timeline_sync_set(struct i915_timeline *tl, + const struct dma_fence *fence) { - return __intel_timeline_sync_set(tl, fence->context, fence->seqno); + return __i915_timeline_sync_set(tl, fence->context, fence->seqno); } -static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, - u64 context, u32 seqno) +static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl, + u64 context, u32 seqno) { return i915_syncmap_is_later(&tl->sync, context, seqno); } -static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, - const struct dma_fence *fence) +static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, + const struct dma_fence *fence) { - return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); + return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); } +void i915_timelines_park(struct drm_i915_private *i915); + #endif diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 408827bf5d96..8cc3a256f29d 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -679,45 +679,68 @@ DEFINE_EVENT(i915_request, i915_request_execute, TP_ARGS(rq) ); -DECLARE_EVENT_CLASS(i915_request_hw, - TP_PROTO(struct i915_request *rq, unsigned int port), - TP_ARGS(rq, port), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u32, hw_id) - __field(u32, ring) - __field(u32, ctx) - __field(u32, seqno) - __field(u32, global_seqno) - __field(u32, port) - ), - - TP_fast_assign( - __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->ctx->hw_id; - __entry->ring = rq->engine->id; - __entry->ctx = rq->fence.context; - __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; - __entry->port = port; - ), - - TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", - __entry->dev, __entry->hw_id, __entry->ring, - __entry->ctx, __entry->seqno, - __entry->global_seqno, __entry->port) -); +TRACE_EVENT(i915_request_in, + TP_PROTO(struct i915_request *rq, unsigned int port), + TP_ARGS(rq, port), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, hw_id) + __field(u32, ring) + __field(u32, ctx) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, port) + __field(u32, prio) + ), + + TP_fast_assign( + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global_seqno = rq->global_seqno; + __entry->prio = rq->sched.attr.priority; + __entry->port = port; + ), -DEFINE_EVENT(i915_request_hw, i915_request_in, - TP_PROTO(struct i915_request *rq, unsigned int port), - TP_ARGS(rq, port) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, prio=%u, global=%u, port=%u", + __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx, + __entry->seqno, __entry->prio, __entry->global_seqno, + __entry->port) ); -DEFINE_EVENT(i915_request, i915_request_out, - TP_PROTO(struct i915_request *rq), - TP_ARGS(rq) +TRACE_EVENT(i915_request_out, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, hw_id) + __field(u32, ring) + __field(u32, ctx) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, completed) + ), + + TP_fast_assign( + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global_seqno = rq->global_seqno; + __entry->completed = i915_request_completed(rq); + ), + + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, completed?=%u", + __entry->dev, __entry->hw_id, __entry->ring, + __entry->ctx, __entry->seqno, + __entry->global_seqno, __entry->completed) ); + #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void @@ -811,42 +834,6 @@ DEFINE_EVENT(i915_request, i915_request_wait_end, TP_ARGS(rq) ); -TRACE_EVENT(i915_flip_request, - TP_PROTO(int plane, struct drm_i915_gem_object *obj), - - TP_ARGS(plane, obj), - - TP_STRUCT__entry( - __field(int, plane) - __field(struct drm_i915_gem_object *, obj) - ), - - TP_fast_assign( - __entry->plane = plane; - __entry->obj = obj; - ), - - TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj) -); - -TRACE_EVENT(i915_flip_complete, - TP_PROTO(int plane, struct drm_i915_gem_object *obj), - - TP_ARGS(plane, obj), - - TP_STRUCT__entry( - __field(int, plane) - __field(struct drm_i915_gem_object *, obj) - ), - - TP_fast_assign( - __entry->plane = plane; - __entry->obj = obj; - ), - - TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj) -); - TRACE_EVENT_CONDITION(i915_reg_rw, TP_PROTO(bool write, i915_reg_t reg, u64 val, int len, bool trace), diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 51dbfe5bb418..00165ad55fb3 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -40,8 +40,8 @@ #undef WARN_ON_ONCE #define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")") -#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \ - (long)(x), __func__) +#define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ + __stringify(x), (long)(x)) #if GCC_VERSION >= 70000 #define add_overflows(A, B) \ @@ -120,6 +120,12 @@ static inline u64 ptr_to_u64(const void *ptr) #include <linux/list.h> +static inline int list_is_first(const struct list_head *list, + const struct list_head *head) +{ + return head->next == list; +} + static inline void __list_del_many(struct list_head *head, struct list_head *first) { diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4bda3bd29bf5..9324d476e0a7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -46,8 +46,6 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) - WARN_ON(i915_vma_unbind(vma)); GEM_BUG_ON(!i915_gem_object_is_active(obj)); if (--obj->active_count) @@ -232,7 +230,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj, if (!vma) vma = vma_create(obj, vm, view); - GEM_BUG_ON(!IS_ERR(vma) && i915_vma_is_closed(vma)); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma); return vma; @@ -684,13 +681,43 @@ err_unpin: return ret; } -static void i915_vma_destroy(struct i915_vma *vma) +void i915_vma_close(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + GEM_BUG_ON(i915_vma_is_closed(vma)); + vma->flags |= I915_VMA_CLOSED; + + /* + * We defer actually closing, unbinding and destroying the VMA until + * the next idle point, or if the object is freed in the meantime. By + * postponing the unbind, we allow for it to be resurrected by the + * client, avoiding the work required to rebind the VMA. This is + * advantageous for DRI, where the client/server pass objects + * between themselves, temporarily opening a local VMA to the + * object, and then closing it again. The same object is then reused + * on the next frame (or two, depending on the depth of the swap queue) + * causing us to rebind the VMA once more. This ends up being a lot + * of wasted work for the steady state. + */ + list_add_tail(&vma->closed_link, &vma->vm->i915->gt.closed_vma); +} + +void i915_vma_reopen(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + if (vma->flags & I915_VMA_CLOSED) { + vma->flags &= ~I915_VMA_CLOSED; + list_del(&vma->closed_link); + } +} + +static void __i915_vma_destroy(struct i915_vma *vma) { int i; GEM_BUG_ON(vma->node.allocated); - GEM_BUG_ON(i915_vma_is_active(vma)); - GEM_BUG_ON(!i915_vma_is_closed(vma)); GEM_BUG_ON(vma->fence); for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) @@ -699,6 +726,7 @@ static void i915_vma_destroy(struct i915_vma *vma) list_del(&vma->obj_link); list_del(&vma->vm_link); + rb_erase(&vma->obj_node, &vma->obj->vma_tree); if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); @@ -706,15 +734,30 @@ static void i915_vma_destroy(struct i915_vma *vma) kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); } -void i915_vma_close(struct i915_vma *vma) +void i915_vma_destroy(struct i915_vma *vma) { - GEM_BUG_ON(i915_vma_is_closed(vma)); - vma->flags |= I915_VMA_CLOSED; + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - rb_erase(&vma->obj_node, &vma->obj->vma_tree); + GEM_BUG_ON(i915_vma_is_active(vma)); + GEM_BUG_ON(i915_vma_is_pinned(vma)); + + if (i915_vma_is_closed(vma)) + list_del(&vma->closed_link); + + WARN_ON(i915_vma_unbind(vma)); + __i915_vma_destroy(vma); +} + +void i915_vma_parked(struct drm_i915_private *i915) +{ + struct i915_vma *vma, *next; - if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) - WARN_ON(i915_vma_unbind(vma)); + list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { + GEM_BUG_ON(!i915_vma_is_closed(vma)); + i915_vma_destroy(vma); + } + + GEM_BUG_ON(!list_empty(&i915->gt.closed_vma)); } static void __i915_vma_iounmap(struct i915_vma *vma) @@ -804,7 +847,7 @@ int i915_vma_unbind(struct i915_vma *vma) return -EBUSY; if (!drm_mm_node_allocated(&vma->node)) - goto destroy; + return 0; GEM_BUG_ON(obj->bind_count == 0); GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); @@ -841,10 +884,6 @@ int i915_vma_unbind(struct i915_vma *vma) i915_vma_remove(vma); -destroy: - if (unlikely(i915_vma_is_closed(vma))) - i915_vma_destroy(vma); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 8c5022095418..fc4294cfaa91 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -119,6 +119,8 @@ struct i915_vma { /** This vma's place in the eviction list */ struct list_head evict_link; + struct list_head closed_link; + /** * Used for performing relocations during execbuffer insertion. */ @@ -285,6 +287,8 @@ void i915_vma_revoke_mmap(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); +void i915_vma_reopen(struct i915_vma *vma); +void i915_vma_destroy(struct i915_vma *vma); int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); @@ -408,6 +412,8 @@ i915_vma_unpin_fence(struct i915_vma *vma) __i915_vma_unpin_fence(vma); } +void i915_vma_parked(struct drm_i915_private *i915); + #define for_each_until(cond) if (cond) break; else /** diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index e9fb692076d7..40285d1b91b7 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -227,6 +227,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; struct drm_atomic_state *drm_state = crtc_state->base.state; + struct intel_atomic_state *intel_state = to_intel_atomic_state(drm_state); int num_scalers_need; int i, j; @@ -304,8 +305,8 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, continue; } - plane_state = intel_atomic_get_existing_plane_state(drm_state, - intel_plane); + plane_state = intel_atomic_get_new_plane_state(intel_state, + intel_plane); scaler_id = &plane_state->scaler_id; } @@ -328,8 +329,18 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, } /* set scaler mode */ - if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { - scaler_state->scalers[*scaler_id].mode = 0; + if ((INTEL_GEN(dev_priv) >= 9) && + plane_state && plane_state->base.fb && + plane_state->base.fb->format->format == + DRM_FORMAT_NV12) { + if (INTEL_GEN(dev_priv) == 9 && + !IS_GEMINILAKE(dev_priv) && + !IS_SKYLAKE(dev_priv)) + scaler_state->scalers[*scaler_id].mode = + SKL_PS_SCALER_MODE_NV12; + else + scaler_state->scalers[*scaler_id].mode = + PS_SCALER_MODE_PLANAR; } else if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { /* * when only 1 scaler is in use on either pipe A or B, diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 7481ce85746b..6d068786eb41 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -183,11 +183,16 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ } /* FIXME pre-g4x don't work like this */ - if (intel_state->base.visible) + if (state->visible) crtc_state->active_planes |= BIT(intel_plane->id); else crtc_state->active_planes &= ~BIT(intel_plane->id); + if (state->visible && state->fb->format->format == DRM_FORMAT_NV12) + crtc_state->nv12_planes |= BIT(intel_plane->id); + else + crtc_state->nv12_planes &= ~BIT(intel_plane->id); + return intel_plane_atomic_calc_changes(old_crtc_state, &crtc_state->base, old_plane_state, diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 447b721c3be9..54270bdde100 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -530,6 +530,7 @@ parse_driver_features(struct drm_i915_private *dev_priv, */ if (!driver->drrs_enabled) dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED; + dev_priv->vbt.psr.enable = driver->psr_enabled; } static void @@ -1215,10 +1216,8 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, { struct child_device_config *it, *child = NULL; struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; - uint8_t hdmi_level_shift; int i, j; bool is_dvi, is_hdmi, is_dp, is_edp, is_crt; - uint8_t aux_channel, ddc_pin; /* Each DDI port can have more than one value on the "DVO Port" field, * so look for all the possible values for each port. */ @@ -1255,8 +1254,6 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (!child) return; - aux_channel = child->aux_channel; - is_dvi = child->device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING; is_dp = child->device_type & DEVICE_TYPE_DISPLAYPORT_OUTPUT; is_crt = child->device_type & DEVICE_TYPE_ANALOG_OUTPUT; @@ -1270,13 +1267,6 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, is_hdmi = false; } - if (port == PORT_A && is_dvi) { - DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n", - is_hdmi ? "/HDMI" : ""); - is_dvi = false; - is_hdmi = false; - } - info->supports_dvi = is_dvi; info->supports_hdmi = is_hdmi; info->supports_dp = is_dp; @@ -1302,6 +1292,8 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port)); if (is_dvi) { + u8 ddc_pin; + ddc_pin = map_ddc_pin(dev_priv, child->ddc_pin); if (intel_gmbus_is_valid_pin(dev_priv, ddc_pin)) { info->alternate_ddc_pin = ddc_pin; @@ -1314,14 +1306,14 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, } if (is_dp) { - info->alternate_aux_channel = aux_channel; + info->alternate_aux_channel = child->aux_channel; sanitize_aux_ch(dev_priv, port); } if (bdb_version >= 158) { /* The VBT HDMI level shift values match the table we have. */ - hdmi_level_shift = child->hdmi_level_shifter_value; + u8 hdmi_level_shift = child->hdmi_level_shifter_value; DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n", port_name(port), hdmi_level_shift); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 1f79e7a47433..18e643df523e 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -82,7 +82,7 @@ static unsigned long wait_timeout(void) static noinline void missed_breadcrumb(struct intel_engine_cs *engine) { - if (drm_debug & DRM_UT_DRIVER) { + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer(__func__); intel_engine_dump(engine, &p, @@ -130,11 +130,12 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) static void intel_breadcrumbs_fake_irq(struct timer_list *t) { - struct intel_engine_cs *engine = from_timer(engine, t, - breadcrumbs.fake_irq); + struct intel_engine_cs *engine = + from_timer(engine, t, breadcrumbs.fake_irq); struct intel_breadcrumbs *b = &engine->breadcrumbs; - /* The timer persists in case we cannot enable interrupts, + /* + * The timer persists in case we cannot enable interrupts, * or if we have previously seen seqno/interrupt incoherency * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). * Here the worker will wake up every jiffie in order to kick the @@ -148,6 +149,12 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t) if (!b->irq_armed) return; + /* If the user has disabled the fake-irq, restore the hangchecking */ + if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) { + mod_timer(&b->hangcheck, wait_timeout()); + return; + } + mod_timer(&b->fake_irq, jiffies + 1); } @@ -730,10 +737,11 @@ static void insert_signal(struct intel_breadcrumbs *b, list_add(&request->signaling.link, &iter->signaling.link); } -void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) +bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_wait *wait = &request->signaling.wait; u32 seqno; /* @@ -750,12 +758,12 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) seqno = i915_request_global_seqno(request); if (!seqno) /* will be enabled later upon execution */ - return; + return true; - GEM_BUG_ON(request->signaling.wait.seqno); - request->signaling.wait.tsk = b->signaler; - request->signaling.wait.request = request; - request->signaling.wait.seqno = seqno; + GEM_BUG_ON(wait->seqno); + wait->tsk = b->signaler; + wait->request = request; + wait->seqno = seqno; /* * Add ourselves into the list of waiters, but registering our @@ -768,11 +776,15 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) */ spin_lock(&b->rb_lock); insert_signal(b, request, seqno); - wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); + wakeup &= __intel_engine_add_wait(engine, wait); spin_unlock(&b->rb_lock); - if (wakeup) + if (wakeup) { wake_up_process(b->signaler); + return !intel_wait_complete(wait); + } + + return true; } void intel_engine_cancel_signaling(struct i915_request *request) @@ -826,8 +838,8 @@ static void cancel_fake_irq(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; + del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */ del_timer_sync(&b->hangcheck); - del_timer_sync(&b->fake_irq); clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } @@ -835,15 +847,22 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - cancel_fake_irq(engine); spin_lock_irq(&b->irq_lock); + /* + * Leave the fake_irq timer enabled (if it is running), but clear the + * bit so that it turns itself off on its next wake up and goes back + * to the long hangcheck interval if still required. + */ + clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + if (b->irq_enabled) irq_enable(engine); else irq_disable(engine); - /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the + /* + * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the * GPU is active and may have already executed the MI_USER_INTERRUPT * before the CPU is ready to receive. However, the engine is currently * idle (we haven't started it yet), there is no possibility for a @@ -852,9 +871,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) */ clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - if (b->irq_armed) - enable_fake_irq(b); - spin_unlock_irq(&b->irq_lock); } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index c0a8805b277f..de0e22322c76 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -748,6 +748,11 @@ intel_crt_detect(struct drm_connector *connector, connector->base.id, connector->name, force); + if (i915_modparams.load_detect_test) { + intel_display_power_get(dev_priv, intel_encoder->power_domain); + goto load_detect; + } + /* Skip machines without VGA that falsely report hotplug events */ if (dmi_check_system(intel_spurious_crt_detect)) return connector_status_disconnected; @@ -776,11 +781,12 @@ intel_crt_detect(struct drm_connector *connector, * broken monitor (without edid) to work behind a broken kvm (that fails * to have the right resistors for HP detection) needs to fix this up. * For now just bail out. */ - if (I915_HAS_HOTPLUG(dev_priv) && !i915_modparams.load_detect_test) { + if (I915_HAS_HOTPLUG(dev_priv)) { status = connector_status_disconnected; goto out; } +load_detect: if (!force) { status = connector->status; goto out; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index f9550ea46c26..cf9b600cca79 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -298,7 +298,10 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, csr->version = css_header->version; - if (IS_CANNONLAKE(dev_priv)) { + if (csr->fw_path == i915_modparams.dmc_firmware_path) { + /* Bypass version check for firmware override. */ + required_version = csr->version; + } else if (IS_CANNONLAKE(dev_priv)) { required_version = CNL_CSR_VERSION_REQUIRED; } else if (IS_GEMINILAKE(dev_priv)) { required_version = GLK_CSR_VERSION_REQUIRED; @@ -453,7 +456,9 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) if (!HAS_CSR(dev_priv)) return; - if (IS_CANNONLAKE(dev_priv)) + if (i915_modparams.dmc_firmware_path) + csr->fw_path = i915_modparams.dmc_firmware_path; + else if (IS_CANNONLAKE(dev_priv)) csr->fw_path = I915_CSR_CNL; else if (IS_GEMINILAKE(dev_priv)) csr->fw_path = I915_CSR_GLK; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 8c2d778560f0..b98ac0541f19 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -493,6 +493,125 @@ static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_1_05V[] = { { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ }; +struct icl_combo_phy_ddi_buf_trans { + u32 dw2_swing_select; + u32 dw2_swing_scalar; + u32 dw4_scaling; +}; + +/* Voltage Swing Programming for VccIO 0.85V for DP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_85V[] = { + /* Voltage mV db */ + { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ + { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ + { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ + { 0x2, 0x98, 0x900F }, /* 400 9.5 */ + { 0xB, 0x70, 0x0018 }, /* 600 0.0 */ + { 0xB, 0x70, 0x3015 }, /* 600 3.5 */ + { 0xB, 0x70, 0x6012 }, /* 600 6.0 */ + { 0x5, 0x00, 0x0018 }, /* 800 0.0 */ + { 0x5, 0x00, 0x3015 }, /* 800 3.5 */ + { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ +}; + +/* FIXME - After table is updated in Bspec */ +/* Voltage Swing Programming for VccIO 0.85V for eDP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_85V[] = { + /* Voltage mV db */ + { 0x0, 0x00, 0x00 }, /* 200 0.0 */ + { 0x0, 0x00, 0x00 }, /* 200 1.5 */ + { 0x0, 0x00, 0x00 }, /* 200 4.0 */ + { 0x0, 0x00, 0x00 }, /* 200 6.0 */ + { 0x0, 0x00, 0x00 }, /* 250 0.0 */ + { 0x0, 0x00, 0x00 }, /* 250 1.5 */ + { 0x0, 0x00, 0x00 }, /* 250 4.0 */ + { 0x0, 0x00, 0x00 }, /* 300 0.0 */ + { 0x0, 0x00, 0x00 }, /* 300 1.5 */ + { 0x0, 0x00, 0x00 }, /* 350 0.0 */ +}; + +/* Voltage Swing Programming for VccIO 0.95V for DP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_95V[] = { + /* Voltage mV db */ + { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ + { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ + { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ + { 0x2, 0x98, 0x900F }, /* 400 9.5 */ + { 0x4, 0x98, 0x0018 }, /* 600 0.0 */ + { 0x4, 0x98, 0x3015 }, /* 600 3.5 */ + { 0x4, 0x98, 0x6012 }, /* 600 6.0 */ + { 0x5, 0x76, 0x0018 }, /* 800 0.0 */ + { 0x5, 0x76, 0x3015 }, /* 800 3.5 */ + { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ +}; + +/* FIXME - After table is updated in Bspec */ +/* Voltage Swing Programming for VccIO 0.95V for eDP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_95V[] = { + /* Voltage mV db */ + { 0x0, 0x00, 0x00 }, /* 200 0.0 */ + { 0x0, 0x00, 0x00 }, /* 200 1.5 */ + { 0x0, 0x00, 0x00 }, /* 200 4.0 */ + { 0x0, 0x00, 0x00 }, /* 200 6.0 */ + { 0x0, 0x00, 0x00 }, /* 250 0.0 */ + { 0x0, 0x00, 0x00 }, /* 250 1.5 */ + { 0x0, 0x00, 0x00 }, /* 250 4.0 */ + { 0x0, 0x00, 0x00 }, /* 300 0.0 */ + { 0x0, 0x00, 0x00 }, /* 300 1.5 */ + { 0x0, 0x00, 0x00 }, /* 350 0.0 */ +}; + +/* Voltage Swing Programming for VccIO 1.05V for DP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_1_05V[] = { + /* Voltage mV db */ + { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ + { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ + { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ + { 0x2, 0x98, 0x900F }, /* 400 9.5 */ + { 0x4, 0x98, 0x0018 }, /* 600 0.0 */ + { 0x4, 0x98, 0x3015 }, /* 600 3.5 */ + { 0x4, 0x98, 0x6012 }, /* 600 6.0 */ + { 0x5, 0x71, 0x0018 }, /* 800 0.0 */ + { 0x5, 0x71, 0x3015 }, /* 800 3.5 */ + { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ +}; + +/* FIXME - After table is updated in Bspec */ +/* Voltage Swing Programming for VccIO 1.05V for eDP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_1_05V[] = { + /* Voltage mV db */ + { 0x0, 0x00, 0x00 }, /* 200 0.0 */ + { 0x0, 0x00, 0x00 }, /* 200 1.5 */ + { 0x0, 0x00, 0x00 }, /* 200 4.0 */ + { 0x0, 0x00, 0x00 }, /* 200 6.0 */ + { 0x0, 0x00, 0x00 }, /* 250 0.0 */ + { 0x0, 0x00, 0x00 }, /* 250 1.5 */ + { 0x0, 0x00, 0x00 }, /* 250 4.0 */ + { 0x0, 0x00, 0x00 }, /* 300 0.0 */ + { 0x0, 0x00, 0x00 }, /* 300 1.5 */ + { 0x0, 0x00, 0x00 }, /* 350 0.0 */ +}; + +struct icl_mg_phy_ddi_buf_trans { + u32 cri_txdeemph_override_5_0; + u32 cri_txdeemph_override_11_6; + u32 cri_txdeemph_override_17_12; +}; + +static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations[] = { + /* Voltage swing pre-emphasis */ + { 0x0, 0x1B, 0x00 }, /* 0 0 */ + { 0x0, 0x23, 0x08 }, /* 0 1 */ + { 0x0, 0x2D, 0x12 }, /* 0 2 */ + { 0x0, 0x00, 0x00 }, /* 0 3 */ + { 0x0, 0x23, 0x00 }, /* 1 0 */ + { 0x0, 0x2B, 0x09 }, /* 1 1 */ + { 0x0, 0x2E, 0x11 }, /* 1 2 */ + { 0x0, 0x2F, 0x00 }, /* 2 0 */ + { 0x0, 0x33, 0x0C }, /* 2 1 */ + { 0x0, 0x00, 0x00 }, /* 3 0 */ +}; + static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { @@ -751,6 +870,45 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } } +static const struct icl_combo_phy_ddi_buf_trans * +icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, enum port port, + int type, int *n_entries) +{ + u32 voltage = I915_READ(ICL_PORT_COMP_DW3(port)) & VOLTAGE_INFO_MASK; + + if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { + switch (voltage) { + case VOLTAGE_INFO_0_85V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_85V); + return icl_combo_phy_ddi_translations_edp_0_85V; + case VOLTAGE_INFO_0_95V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_95V); + return icl_combo_phy_ddi_translations_edp_0_95V; + case VOLTAGE_INFO_1_05V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_1_05V); + return icl_combo_phy_ddi_translations_edp_1_05V; + default: + MISSING_CASE(voltage); + return NULL; + } + } else { + switch (voltage) { + case VOLTAGE_INFO_0_85V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_85V); + return icl_combo_phy_ddi_translations_dp_hdmi_0_85V; + case VOLTAGE_INFO_0_95V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_95V); + return icl_combo_phy_ddi_translations_dp_hdmi_0_95V; + case VOLTAGE_INFO_1_05V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_1_05V); + return icl_combo_phy_ddi_translations_dp_hdmi_1_05V; + default: + MISSING_CASE(voltage); + return NULL; + } + } +} + static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) { int n_entries, level, default_entry; @@ -875,7 +1033,7 @@ static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) { - switch (pll->id) { + switch (pll->info->id) { case DPLL_ID_WRPLL1: return PORT_CLK_SEL_WRPLL1; case DPLL_ID_WRPLL2: @@ -889,11 +1047,30 @@ static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) case DPLL_ID_LCPLL_2700: return PORT_CLK_SEL_LCPLL_2700; default: - MISSING_CASE(pll->id); + MISSING_CASE(pll->info->id); return PORT_CLK_SEL_NONE; } } +static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, + const struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + + switch (id) { + default: + MISSING_CASE(id); + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + return DDI_CLK_SEL_NONE; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + return DDI_CLK_SEL_MG; + } +} + /* Starting with Haswell, different DDI ports can work in FDI mode for * connection to the PCH-located connectors. For this, it is necessary to train * both the DDI port and PCH receiver for the desired DDI buffer settings. @@ -1906,7 +2083,13 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) enum port port = encoder->port; int n_entries; - if (IS_CANNONLAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + if (port == PORT_A || port == PORT_B) + icl_get_combo_buf_trans(dev_priv, port, encoder->type, + &n_entries); + else + n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations); + } else if (IS_CANNONLAKE(dev_priv)) { if (encoder->type == INTEL_OUTPUT_EDP) cnl_get_buf_trans_edp(dev_priv, &n_entries); else @@ -2063,6 +2246,146 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); } +static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, + u32 level, enum port port, int type) +{ + const struct icl_combo_phy_ddi_buf_trans *ddi_translations = NULL; + u32 n_entries, val; + int ln; + + ddi_translations = icl_get_combo_buf_trans(dev_priv, port, type, + &n_entries); + if (!ddi_translations) + return; + + if (level >= n_entries) { + DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.", level, n_entries - 1); + level = n_entries - 1; + } + + /* Set PORT_TX_DW5 Rterm Sel to 110b. */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val &= ~RTERM_SELECT_MASK; + val |= RTERM_SELECT(0x6); + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* Program PORT_TX_DW5 */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + /* Set DisableTap2 and DisableTap3 if MIPI DSI + * Clear DisableTap2 and DisableTap3 for all other Ports + */ + if (type == INTEL_OUTPUT_DSI) { + val |= TAP2_DISABLE; + val |= TAP3_DISABLE; + } else { + val &= ~TAP2_DISABLE; + val &= ~TAP3_DISABLE; + } + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* Program PORT_TX_DW2 */ + val = I915_READ(ICL_PORT_TX_DW2_LN0(port)); + val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | + RCOMP_SCALAR_MASK); + val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_select); + val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_select); + /* Program Rcomp scalar for every table entry */ + val |= RCOMP_SCALAR(ddi_translations[level].dw2_swing_scalar); + I915_WRITE(ICL_PORT_TX_DW2_GRP(port), val); + + /* Program PORT_TX_DW4 */ + /* We cannot write to GRP. It would overwrite individual loadgen. */ + for (ln = 0; ln <= 3; ln++) { + val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); + val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | + CURSOR_COEFF_MASK); + val |= ddi_translations[level].dw4_scaling; + I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); + } +} + +static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, + u32 level, + enum intel_output_type type) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; + int width = 0; + int rate = 0; + u32 val; + int ln = 0; + + if (type == INTEL_OUTPUT_HDMI) { + width = 4; + /* Rate is always < than 6GHz for HDMI */ + } else { + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + + width = intel_dp->lane_count; + rate = intel_dp->link_rate; + } + + /* + * 1. If port type is eDP or DP, + * set PORT_PCS_DW1 cmnkeeper_enable to 1b, + * else clear to 0b. + */ + val = I915_READ(ICL_PORT_PCS_DW1_LN0(port)); + if (type == INTEL_OUTPUT_HDMI) + val &= ~COMMON_KEEPER_EN; + else + val |= COMMON_KEEPER_EN; + I915_WRITE(ICL_PORT_PCS_DW1_GRP(port), val); + + /* 2. Program loadgen select */ + /* + * Program PORT_TX_DW4_LN depending on Bit rate and used lanes + * <= 6 GHz and 4 lanes (LN0=0, LN1=1, LN2=1, LN3=1) + * <= 6 GHz and 1,2 lanes (LN0=0, LN1=1, LN2=1, LN3=0) + * > 6 GHz (LN0=0, LN1=0, LN2=0, LN3=0) + */ + for (ln = 0; ln <= 3; ln++) { + val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); + val &= ~LOADGEN_SELECT; + + if ((rate <= 600000 && width == 4 && ln >= 1) || + (rate <= 600000 && width < 4 && (ln == 1 || ln == 2))) { + val |= LOADGEN_SELECT; + } + I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); + } + + /* 3. Set PORT_CL_DW5 SUS Clock Config to 11b */ + val = I915_READ(ICL_PORT_CL_DW5(port)); + val |= SUS_CLOCK_CONFIG; + I915_WRITE(ICL_PORT_CL_DW5(port), val); + + /* 4. Clear training enable to change swing values */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val &= ~TX_TRAINING_EN; + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* 5. Program swing and de-emphasis */ + icl_ddi_combo_vswing_program(dev_priv, level, port, type); + + /* 6. Set training enable to trigger update */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val |= TX_TRAINING_EN; + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); +} + +static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level, + enum intel_output_type type) +{ + enum port port = encoder->port; + + if (port == PORT_A || port == PORT_B) + icl_combo_phy_ddi_vswing_sequence(encoder, level, type); + else + /* Not Implemented Yet */ + WARN_ON(1); +} + static uint32_t translate_signal_level(int signal_levels) { int i; @@ -2094,7 +2417,9 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp) struct intel_encoder *encoder = &dport->base; int level = intel_ddi_dp_level(intel_dp); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, encoder->type); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, encoder->type); else bxt_ddi_vswing_sequence(encoder, level, encoder->type); @@ -2115,6 +2440,69 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) return DDI_BUF_TRANS_SELECT(level); } +void icl_map_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state) +{ + struct intel_shared_dpll *pll = crtc_state->shared_dpll; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_connector_state *conn_state; + struct drm_connector *conn; + int i; + + for_each_new_connector_in_state(old_state, conn, conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(conn_state->best_encoder); + enum port port = encoder->port; + uint32_t val; + + if (conn_state->crtc != crtc) + continue; + + mutex_lock(&dev_priv->dpll_lock); + + val = I915_READ(DPCLKA_CFGCR0_ICL); + WARN_ON((val & DPCLKA_CFGCR0_DDI_CLK_OFF(port)) == 0); + + if (port == PORT_A || port == PORT_B) { + val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); + val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port); + I915_WRITE(DPCLKA_CFGCR0_ICL, val); + POSTING_READ(DPCLKA_CFGCR0_ICL); + } + + val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port); + I915_WRITE(DPCLKA_CFGCR0_ICL, val); + + mutex_unlock(&dev_priv->dpll_lock); + } +} + +void icl_unmap_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(old_conn_state->best_encoder); + enum port port = encoder->port; + + if (old_conn_state->crtc != crtc) + continue; + + mutex_lock(&dev_priv->dpll_lock); + I915_WRITE(DPCLKA_CFGCR0_ICL, + I915_READ(DPCLKA_CFGCR0_ICL) | + DPCLKA_CFGCR0_DDI_CLK_OFF(port)); + mutex_unlock(&dev_priv->dpll_lock); + } +} + static void intel_ddi_clk_select(struct intel_encoder *encoder, const struct intel_shared_dpll *pll) { @@ -2127,11 +2515,15 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, mutex_lock(&dev_priv->dpll_lock); - if (IS_CANNONLAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + if (port >= PORT_C) + I915_WRITE(DDI_CLK_SEL(port), + icl_pll_to_ddi_pll_sel(encoder, pll)); + } else if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); - val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->id, port); + val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port); I915_WRITE(DPCLKA_CFGCR0, val); /* @@ -2148,7 +2540,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) | DPLL_CTRL2_DDI_CLK_SEL_MASK(port)); - val |= (DPLL_CTRL2_DDI_CLK_SEL(pll->id, port) | + val |= (DPLL_CTRL2_DDI_CLK_SEL(pll->info->id, port) | DPLL_CTRL2_DDI_SEL_OVERRIDE(port)); I915_WRITE(DPLL_CTRL2, val); @@ -2165,14 +2557,18 @@ static void intel_ddi_clk_disable(struct intel_encoder *encoder) struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) { + if (port >= PORT_C) + I915_WRITE(DDI_CLK_SEL(port), DDI_CLK_SEL_NONE); + } else if (IS_CANNONLAKE(dev_priv)) { I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | DPCLKA_CFGCR0_DDI_CLK_OFF(port)); - else if (IS_GEN9_BC(dev_priv)) + } else if (IS_GEN9_BC(dev_priv)) { I915_WRITE(DPLL_CTRL2, I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port)); - else if (INTEL_GEN(dev_priv) < 9) + } else if (INTEL_GEN(dev_priv) < 9) { I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); + } } static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, @@ -2197,7 +2593,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, encoder->type); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, encoder->type); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(encoder, level, encoder->type); @@ -2205,7 +2603,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_prepare_dp_ddi_buffers(encoder, crtc_state); intel_ddi_init_dp_buf_reg(encoder); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) intel_dp_stop_link_train(intel_dp); @@ -2227,7 +2626,9 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); @@ -2303,12 +2704,15 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); struct intel_dp *intel_dp = &dig_port->dp; + bool is_mst = intel_crtc_has_type(old_crtc_state, + INTEL_OUTPUT_DP_MST); /* * Power down sink before disabling the port, otherwise we end * up getting interrupts from the sink on detecting link loss. */ - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); intel_disable_ddi_buf(encoder); @@ -2424,12 +2828,14 @@ static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct drm_connector *connector = conn_state->connector; enum port port = encoder->port; - intel_hdmi_handle_sink_scrambling(encoder, - conn_state->connector, - crtc_state->hdmi_high_tmds_clock_ratio, - crtc_state->hdmi_scrambling); + if (!intel_hdmi_handle_sink_scrambling(encoder, connector, + crtc_state->hdmi_high_tmds_clock_ratio, + crtc_state->hdmi_scrambling)) + DRM_ERROR("[CONNECTOR:%d:%s] Failed to configure sink scrambling/TMDS bit clock ratio\n", + connector->base.id, connector->name); /* Display WA #1143: skl,kbl,cfl */ if (IS_GEN9_BC(dev_priv)) { @@ -2520,13 +2926,16 @@ static void intel_disable_ddi_hdmi(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { + struct drm_connector *connector = old_conn_state->connector; + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); - intel_hdmi_handle_sink_scrambling(encoder, - old_conn_state->connector, - false, false); + if (!intel_hdmi_handle_sink_scrambling(encoder, connector, + false, false)) + DRM_DEBUG_KMS("[CONNECTOR:%d:%s] Failed to reset sink scrambling/TMDS bit clock ratio\n", + connector->base.id, connector->name); } static void intel_disable_ddi(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 3dd350f7b8e6..0fd13df424cf 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -83,11 +83,11 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) { int s; - drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); - drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); + drm_printf(p, "slice total: %u, mask=%04x\n", + hweight8(sseu->slice_mask), sseu->slice_mask); drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); - for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) { - drm_printf(p, "slice%d %u subslices mask=%04x\n", + for (s = 0; s < sseu->max_slices; s++) { + drm_printf(p, "slice%d: %u subslices, mask=%04x\n", s, hweight8(sseu->subslice_mask[s]), sseu->subslice_mask[s]); } @@ -158,6 +158,45 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } +static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) +{ + struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; + u8 s_en; + u32 ss_en, ss_en_mask; + u8 eu_en; + int s; + + sseu->max_slices = 1; + sseu->max_subslices = 8; + sseu->max_eus_per_subslice = 8; + + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; + ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); + ss_en_mask = BIT(sseu->max_subslices) - 1; + eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); + + for (s = 0; s < sseu->max_slices; s++) { + if (s_en & BIT(s)) { + int ss_idx = sseu->max_subslices * s; + int ss; + + sseu->slice_mask |= BIT(s); + sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask; + for (ss = 0; ss < sseu->max_subslices; ss++) { + if (sseu->subslice_mask[s] & BIT(ss)) + sseu_set_eus(sseu, s, ss, eu_en); + } + } + } + sseu->eu_per_subslice = hweight8(eu_en); + sseu->eu_total = compute_eu_total(sseu); + + /* ICL has no power gating restrictions. */ + sseu->has_slice_pg = 1; + sseu->has_subslice_pg = 1; + sseu->has_eu_pg = 1; +} + static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; @@ -557,6 +596,52 @@ static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) return base_freq + frac_freq; } +static u32 gen10_get_crystal_clock_freq(struct drm_i915_private *dev_priv, + u32 rpm_config_reg) +{ + u32 f19_2_mhz = 19200; + u32 f24_mhz = 24000; + u32 crystal_clock = (rpm_config_reg & + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + + switch (crystal_clock) { + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + return f19_2_mhz; + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + return f24_mhz; + default: + MISSING_CASE(crystal_clock); + return 0; + } +} + +static u32 gen11_get_crystal_clock_freq(struct drm_i915_private *dev_priv, + u32 rpm_config_reg) +{ + u32 f19_2_mhz = 19200; + u32 f24_mhz = 24000; + u32 f25_mhz = 25000; + u32 f38_4_mhz = 38400; + u32 crystal_clock = (rpm_config_reg & + GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + + switch (crystal_clock) { + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + return f24_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + return f19_2_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: + return f38_4_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: + return f25_mhz; + default: + MISSING_CASE(crystal_clock); + return 0; + } +} + static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) { u32 f12_5_mhz = 12500; @@ -597,10 +682,9 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) } return freq; - } else if (INTEL_GEN(dev_priv) <= 10) { + } else if (INTEL_GEN(dev_priv) <= 11) { u32 ctc_reg = I915_READ(CTC_MODE); u32 freq = 0; - u32 rpm_config_reg = 0; /* First figure out the reference frequency. There are 2 ways * we can compute the frequency, either through the @@ -610,20 +694,14 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { freq = read_reference_ts_freq(dev_priv); } else { - u32 crystal_clock; - - rpm_config_reg = I915_READ(RPM_CONFIG0); - crystal_clock = (rpm_config_reg & - GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> - GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; - switch (crystal_clock) { - case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: - freq = f19_2_mhz; - break; - case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: - freq = f24_mhz; - break; - } + u32 rpm_config_reg = I915_READ(RPM_CONFIG0); + + if (INTEL_GEN(dev_priv) <= 10) + freq = gen10_get_crystal_clock_freq(dev_priv, + rpm_config_reg); + else + freq = gen11_get_crystal_clock_freq(dev_priv, + rpm_config_reg); /* Now figure out how the command stream's timestamp * register increments from this frequency (it might @@ -768,8 +846,10 @@ void intel_device_info_runtime_init(struct intel_device_info *info) broadwell_sseu_info_init(dev_priv); else if (INTEL_GEN(dev_priv) == 9) gen9_sseu_info_init(dev_priv); - else if (INTEL_GEN(dev_priv) >= 10) + else if (INTEL_GEN(dev_priv) == 10) gen10_sseu_info_init(dev_priv); + else if (INTEL_GEN(dev_priv) >= 11) + gen11_sseu_info_init(dev_priv); /* Initialize command stream timestamp frequency */ info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv); @@ -780,3 +860,50 @@ void intel_driver_caps_print(const struct intel_driver_caps *caps, { drm_printf(p, "scheduler: %x\n", caps->scheduler); } + +/* + * Determine which engines are fused off in our particular hardware. Since the + * fuse register is in the blitter powerwell, we need forcewake to be ready at + * this point (but later we need to prune the forcewake domains for engines that + * are indeed fused off). + */ +void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) +{ + struct intel_device_info *info = mkwrite_device_info(dev_priv); + u8 vdbox_disable, vebox_disable; + u32 media_fuse; + int i; + + if (INTEL_GEN(dev_priv) < 11) + return; + + media_fuse = I915_READ(GEN11_GT_VEBOX_VDBOX_DISABLE); + + vdbox_disable = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; + vebox_disable = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> + GEN11_GT_VEBOX_DISABLE_SHIFT; + + DRM_DEBUG_DRIVER("vdbox disable: %04x\n", vdbox_disable); + for (i = 0; i < I915_MAX_VCS; i++) { + if (!HAS_ENGINE(dev_priv, _VCS(i))) + continue; + + if (!(BIT(i) & vdbox_disable)) + continue; + + info->ring_mask &= ~ENGINE_MASK(_VCS(i)); + DRM_DEBUG_DRIVER("vcs%u fused off\n", i); + } + + DRM_DEBUG_DRIVER("vebox disable: %04x\n", vebox_disable); + for (i = 0; i < I915_MAX_VECS; i++) { + if (!HAS_ENGINE(dev_priv, _VECS(i))) + continue; + + if (!(BIT(i) & vebox_disable)) + continue; + + info->ring_mask &= ~ENGINE_MASK(_VECS(i)); + DRM_DEBUG_DRIVER("vecs%u fused off\n", i); + } +} diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 0835752c8b22..933e31669557 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -114,7 +114,7 @@ enum intel_platform { func(has_ipc); #define GEN_MAX_SLICES (6) /* CNL upper bound */ -#define GEN_MAX_SUBSLICES (7) +#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ struct sseu_dev_info { u8 slice_mask; @@ -247,6 +247,8 @@ void intel_device_info_dump_runtime(const struct intel_device_info *info, void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, struct drm_printer *p); +void intel_device_info_init_mmio(struct drm_i915_private *dev_priv); + void intel_driver_caps_print(const struct intel_driver_caps *caps, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 56004ffbd8bb..ad588d564198 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -88,6 +88,22 @@ static const uint32_t skl_primary_formats[] = { DRM_FORMAT_VYUY, }; +static const uint32_t skl_pri_planar_formats[] = { + DRM_FORMAT_C8, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_YUYV, + DRM_FORMAT_YVYU, + DRM_FORMAT_UYVY, + DRM_FORMAT_VYUY, + DRM_FORMAT_NV12, +}; + static const uint64_t skl_format_modifiers_noccs[] = { I915_FORMAT_MOD_Yf_TILED, I915_FORMAT_MOD_Y_TILED, @@ -488,6 +504,33 @@ static const struct intel_limit intel_limits_bxt = { .p2 = { .p2_slow = 1, .p2_fast = 20 }, }; +static void +skl_wa_528(struct drm_i915_private *dev_priv, int pipe, bool enable) +{ + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return; + + if (enable) + I915_WRITE(CHICKEN_PIPESL_1(pipe), HSW_FBCQ_DIS); + else + I915_WRITE(CHICKEN_PIPESL_1(pipe), 0); +} + +static void +skl_wa_clkgate(struct drm_i915_private *dev_priv, int pipe, bool enable) +{ + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return; + + if (enable) + I915_WRITE(CLKGATE_DIS_PSL(pipe), + DUPS1_GATING_DIS | DUPS2_GATING_DIS); + else + I915_WRITE(CLKGATE_DIS_PSL(pipe), + I915_READ(CLKGATE_DIS_PSL(pipe)) & + ~(DUPS1_GATING_DIS | DUPS2_GATING_DIS)); +} + static bool needs_modeset(const struct drm_crtc_state *state) { @@ -2657,11 +2700,13 @@ static int i9xx_format_to_fourcc(int format) } } -static int skl_format_to_fourcc(int format, bool rgb_order, bool alpha) +int skl_format_to_fourcc(int format, bool rgb_order, bool alpha) { switch (format) { case PLANE_CTL_FORMAT_RGB_565: return DRM_FORMAT_RGB565; + case PLANE_CTL_FORMAT_NV12: + return DRM_FORMAT_NV12; default: case PLANE_CTL_FORMAT_XRGB_8888: if (rgb_order) { @@ -2824,7 +2869,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, continue; if (intel_plane_ggtt_offset(state) == plane_config->base) { - fb = c->primary->fb; + fb = state->base.fb; drm_framebuffer_get(fb); goto valid_fb; } @@ -2858,6 +2903,9 @@ valid_fb: return; } + obj = intel_fb_obj(fb); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); + plane_state->src_x = 0; plane_state->src_y = 0; plane_state->src_w = fb->width << 16; @@ -2871,7 +2919,6 @@ valid_fb: intel_state->base.src = drm_plane_state_src(plane_state); intel_state->base.dst = drm_plane_state_dest(plane_state); - obj = intel_fb_obj(fb); if (i915_gem_object_is_tiled(obj)) dev_priv->preserve_bios_swizzle = true; @@ -3071,6 +3118,29 @@ static int skl_check_main_surface(const struct intel_crtc_state *crtc_state, return 0; } +static int +skl_check_nv12_surface(const struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state) +{ + /* Display WA #1106 */ + if (plane_state->base.rotation != + (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_90) && + plane_state->base.rotation != DRM_MODE_ROTATE_270) + return 0; + + /* + * src coordinates are rotated here. + * We check height but report it as width + */ + if (((drm_rect_height(&plane_state->base.src) >> 16) % 4) != 0) { + DRM_DEBUG_KMS("src width must be multiple " + "of 4 for rotated NV12\n"); + return -EINVAL; + } + + return 0; +} + static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; @@ -3154,6 +3224,9 @@ int skl_check_plane_surface(const struct intel_crtc_state *crtc_state, * the main surface setup depends on it. */ if (fb->format->format == DRM_FORMAT_NV12) { + ret = skl_check_nv12_surface(crtc_state, plane_state); + if (ret) + return ret; ret = skl_check_nv12_aux_surface(plane_state); if (ret) return ret; @@ -3464,6 +3537,8 @@ static u32 skl_plane_ctl_format(uint32_t pixel_format) return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY; case DRM_FORMAT_VYUY: return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY; + case DRM_FORMAT_NV12: + return PLANE_CTL_FORMAT_NV12; default: MISSING_CASE(pixel_format); } @@ -3602,15 +3677,24 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; u32 plane_color_ctl = 0; - plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; - plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + if (INTEL_GEN(dev_priv) < 11) { + plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; + plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + } plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE; plane_color_ctl |= glk_plane_color_ctl_alpha(fb->format->format); if (intel_format_is_yuv(fb->format->format)) { + if (fb->format->format == DRM_FORMAT_NV12) { + plane_color_ctl |= + PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709; + goto out; + } if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709; else @@ -3619,7 +3703,7 @@ u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) plane_color_ctl |= PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE; } - +out: return plane_color_ctl; } @@ -3675,7 +3759,6 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) struct drm_atomic_state *state; int ret; - /* reset doesn't touch the display */ if (!i915_modparams.force_reset_modeset_test && !gpu_reset_clobbers_display(dev_priv)) @@ -3729,19 +3812,17 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; - struct drm_atomic_state *state = dev_priv->modeset_restore_state; + struct drm_atomic_state *state; int ret; /* reset doesn't touch the display */ - if (!i915_modparams.force_reset_modeset_test && - !gpu_reset_clobbers_display(dev_priv)) + if (!test_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags)) return; + state = fetch_and_zero(&dev_priv->modeset_restore_state); if (!state) goto unlock; - dev_priv->modeset_restore_state = NULL; - /* reset doesn't touch the display */ if (!gpu_reset_clobbers_display(dev_priv)) { /* for testing only restore the display */ @@ -4703,7 +4784,9 @@ static void cpt_verify_modeset(struct drm_device *dev, int pipe) static int skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, unsigned int scaler_user, int *scaler_id, - int src_w, int src_h, int dst_w, int dst_h) + int src_w, int src_h, int dst_w, int dst_h, + bool plane_scaler_check, + uint32_t pixel_format) { struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; @@ -4721,6 +4804,10 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, */ need_scaling = src_w != dst_w || src_h != dst_h; + if (plane_scaler_check) + if (pixel_format == DRM_FORMAT_NV12) + need_scaling = true; + if (crtc_state->ycbcr420 && scaler_user == SKL_CRTC_INDEX) need_scaling = true; @@ -4760,12 +4847,21 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, return 0; } + if (plane_scaler_check && pixel_format == DRM_FORMAT_NV12 && + (src_h < SKL_MIN_YUV_420_SRC_H || src_w < SKL_MIN_YUV_420_SRC_W)) { + DRM_DEBUG_KMS("NV12: src dimensions not met\n"); + return -EINVAL; + } + /* range checks */ if (src_w < SKL_MIN_SRC_W || src_h < SKL_MIN_SRC_H || - dst_w < SKL_MIN_DST_W || dst_h < SKL_MIN_DST_H || - - src_w > SKL_MAX_SRC_W || src_h > SKL_MAX_SRC_H || - dst_w > SKL_MAX_DST_W || dst_h > SKL_MAX_DST_H) { + dst_w < SKL_MIN_DST_W || dst_h < SKL_MIN_DST_H || + (IS_GEN11(dev_priv) && + (src_w > ICL_MAX_SRC_W || src_h > ICL_MAX_SRC_H || + dst_w > ICL_MAX_DST_W || dst_h > ICL_MAX_DST_H)) || + (!IS_GEN11(dev_priv) && + (src_w > SKL_MAX_SRC_W || src_h > SKL_MAX_SRC_H || + dst_w > SKL_MAX_DST_W || dst_h > SKL_MAX_DST_H))) { DRM_DEBUG_KMS("scaler_user index %u.%u: src %ux%u dst %ux%u " "size is out of scaler range\n", intel_crtc->pipe, scaler_user, src_w, src_h, dst_w, dst_h); @@ -4796,9 +4892,10 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state) const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode; return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX, - &state->scaler_state.scaler_id, - state->pipe_src_w, state->pipe_src_h, - adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay); + &state->scaler_state.scaler_id, + state->pipe_src_w, state->pipe_src_h, + adjusted_mode->crtc_hdisplay, + adjusted_mode->crtc_vdisplay, false, 0); } /** @@ -4827,7 +4924,8 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, drm_rect_width(&plane_state->base.src) >> 16, drm_rect_height(&plane_state->base.src) >> 16, drm_rect_width(&plane_state->base.dst), - drm_rect_height(&plane_state->base.dst)); + drm_rect_height(&plane_state->base.dst), + fb ? true : false, fb ? fb->format->format : 0); if (ret || plane_state->scaler_id < 0) return ret; @@ -4853,6 +4951,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: break; default: DRM_DEBUG_KMS("[PLANE:%d:%s] FB:%d unsupported scaling format 0x%x\n", @@ -5096,16 +5195,34 @@ static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_s return !old_crtc_state->ips_enabled; } +static bool needs_nv12_wa(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->nv12_planes) + return false; + + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + + if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) || + IS_CANNONLAKE(dev_priv)) + return true; + + return false; +} + static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_atomic_state *old_state = old_crtc_state->base.state; struct intel_crtc_state *pipe_config = intel_atomic_get_new_crtc_state(to_intel_atomic_state(old_state), crtc); struct drm_plane *primary = crtc->base.primary; - struct drm_plane_state *old_pri_state = - drm_atomic_get_existing_plane_state(old_state, primary); + struct drm_plane_state *old_primary_state = + drm_atomic_get_old_plane_state(old_state, primary); intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits); @@ -5115,20 +5232,24 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) if (hsw_post_update_enable_ips(old_crtc_state, pipe_config)) hsw_enable_ips(pipe_config); - if (old_pri_state) { - struct intel_plane_state *primary_state = - intel_atomic_get_new_plane_state(to_intel_atomic_state(old_state), - to_intel_plane(primary)); - struct intel_plane_state *old_primary_state = - to_intel_plane_state(old_pri_state); + if (old_primary_state) { + struct drm_plane_state *new_primary_state = + drm_atomic_get_new_plane_state(old_state, primary); intel_fbc_post_update(crtc); - if (primary_state->base.visible && + if (new_primary_state->visible && (needs_modeset(&pipe_config->base) || - !old_primary_state->base.visible)) + !old_primary_state->visible)) intel_post_enable_primary(&crtc->base, pipe_config); } + + /* Display WA 827 */ + if (needs_nv12_wa(dev_priv, old_crtc_state) && + !needs_nv12_wa(dev_priv, pipe_config)) { + skl_wa_clkgate(dev_priv, crtc->pipe, false); + skl_wa_528(dev_priv, crtc->pipe, false); + } } static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, @@ -5139,8 +5260,8 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_atomic_state *old_state = old_crtc_state->base.state; struct drm_plane *primary = crtc->base.primary; - struct drm_plane_state *old_pri_state = - drm_atomic_get_existing_plane_state(old_state, primary); + struct drm_plane_state *old_primary_state = + drm_atomic_get_old_plane_state(old_state, primary); bool modeset = needs_modeset(&pipe_config->base); struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); @@ -5148,23 +5269,28 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, if (hsw_pre_update_disable_ips(old_crtc_state, pipe_config)) hsw_disable_ips(old_crtc_state); - if (old_pri_state) { - struct intel_plane_state *primary_state = + if (old_primary_state) { + struct intel_plane_state *new_primary_state = intel_atomic_get_new_plane_state(old_intel_state, to_intel_plane(primary)); - struct intel_plane_state *old_primary_state = - to_intel_plane_state(old_pri_state); - intel_fbc_pre_update(crtc, pipe_config, primary_state); + intel_fbc_pre_update(crtc, pipe_config, new_primary_state); /* * Gen2 reports pipe underruns whenever all planes are disabled. * So disable underrun reporting before all the planes get disabled. */ - if (IS_GEN2(dev_priv) && old_primary_state->base.visible && - (modeset || !primary_state->base.visible)) + if (IS_GEN2(dev_priv) && old_primary_state->visible && + (modeset || !new_primary_state->base.visible)) intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); } + /* Display WA 827 */ + if (!needs_nv12_wa(dev_priv, old_crtc_state) && + needs_nv12_wa(dev_priv, pipe_config)) { + skl_wa_clkgate(dev_priv, crtc->pipe, true); + skl_wa_528(dev_priv, crtc->pipe, true); + } + /* * Vblank time updates from the shadow to live plane control register * are blocked if the memory self-refresh mode is active at that @@ -5499,6 +5625,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (intel_crtc->config->shared_dpll) intel_enable_shared_dpll(intel_crtc); + if (INTEL_GEN(dev_priv) >= 11) + icl_map_plls_to_ports(crtc, pipe_config, old_state); + if (intel_crtc_has_dp_encoder(intel_crtc->config)) intel_dp_set_m_n(intel_crtc, M1_N1); @@ -5696,6 +5825,9 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); + + if (INTEL_GEN(dev_priv) >= 11) + icl_unmap_plls_to_ports(crtc, old_crtc_state, old_state); } static void i9xx_pfit_enable(struct intel_crtc *crtc) @@ -8766,8 +8898,8 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, intel_get_shared_dpll_by_id(dev_priv, pll_id); pll = pipe_config->shared_dpll; - WARN_ON(!pll->funcs.get_hw_state(dev_priv, pll, - &pipe_config->dpll_hw_state)); + WARN_ON(!pll->info->funcs->get_hw_state(dev_priv, pll, + &pipe_config->dpll_hw_state)); tmp = pipe_config->dpll_hw_state.dpll; pipe_config->pixel_multiplier = @@ -9243,8 +9375,8 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, pll = pipe_config->shared_dpll; if (pll) { - WARN_ON(!pll->funcs.get_hw_state(dev_priv, pll, - &pipe_config->dpll_hw_state)); + WARN_ON(!pll->info->funcs->get_hw_state(dev_priv, pll, + &pipe_config->dpll_hw_state)); } /* @@ -9974,6 +10106,8 @@ found: ret = PTR_ERR_OR_ZERO(drm_atomic_get_connector_state(restore_state, connector)); if (!ret) ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, crtc)); + if (!ret) + ret = drm_atomic_add_affected_planes(restore_state, crtc); if (ret) { DRM_DEBUG_KMS("Failed to create a copy of old state to restore: %i\n", ret); goto fail; @@ -10773,7 +10907,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) struct drm_connector_state *connector_state; struct intel_encoder *encoder; - connector_state = drm_atomic_get_existing_connector_state(state, connector); + connector_state = drm_atomic_get_new_connector_state(state, connector); if (!connector_state) connector_state = connector->state; @@ -11085,39 +11219,42 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); -#define PIPE_CONF_CHECK_X(name) \ +#define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected 0x%08x, found 0x%08x)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_I(name) \ +#define PIPE_CONF_CHECK_I(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %i, found %i)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_BOOL(name) \ +#define PIPE_CONF_CHECK_BOOL(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %s, found %s)\n", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ - } + } \ +} while (0) /* * Checks state where we only read out the enabling, but not the entire * state itself (like full infoframes or ELD for audio). These states * require a full modeset on bootup to fix up. */ -#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) \ +#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) do { \ if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \ PIPE_CONF_CHECK_BOOL(name); \ } else { \ @@ -11126,18 +11263,20 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_P(name) \ +#define PIPE_CONF_CHECK_P(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %p, found %p)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_M_N(name) \ +#define PIPE_CONF_CHECK_M_N(name) do { \ if (!intel_compare_link_m_n(¤t_config->name, \ &pipe_config->name,\ adjust)) { \ @@ -11155,14 +11294,15 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, pipe_config->name.link_m, \ pipe_config->name.link_n); \ ret = false; \ - } + } \ +} while (0) /* This is required for BDW+ where there is only one set of registers for * switching between high and low RR. * This macro can be used whenever a comparison has to be made between one * hw state and multiple sw state variables. */ -#define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) \ +#define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) do { \ if (!intel_compare_link_m_n(¤t_config->name, \ &pipe_config->name, adjust) && \ !intel_compare_link_m_n(¤t_config->alt_name, \ @@ -11187,9 +11327,10 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, pipe_config->name.link_m, \ pipe_config->name.link_n); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_FLAGS(name, mask) \ +#define PIPE_CONF_CHECK_FLAGS(name, mask) do { \ if ((current_config->name ^ pipe_config->name) & (mask)) { \ pipe_config_err(adjust, __stringify(name), \ "(%x) (expected %i, found %i)\n", \ @@ -11197,16 +11338,18 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, current_config->name & (mask), \ pipe_config->name & (mask)); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_CLOCK_FUZZY(name) \ +#define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \ if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %i, found %i)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) #define PIPE_CONF_QUIRK(quirk) \ ((current_config->quirks | pipe_config->quirks) & (quirk)) @@ -11315,6 +11458,16 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_X(dpll_hw_state.pll9); PIPE_CONF_CHECK_X(dpll_hw_state.pll10); PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_refclkin_ctl); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_coreclkctl1); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_hsclkctl); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div0); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div1); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_lf); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_frac_lock); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_ssc); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_bias); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_tdc_coldst_bias); PIPE_CONF_CHECK_X(dsi_pll.ctrl); PIPE_CONF_CHECK_X(dsi_pll.div); @@ -11378,6 +11531,11 @@ static void verify_wm_state(struct drm_crtc *crtc, skl_ddb_get_hw_state(dev_priv, &hw_ddb); sw_ddb = &dev_priv->wm.skl_hw.ddb; + if (INTEL_GEN(dev_priv) >= 11) + if (hw_ddb.enabled_slices != sw_ddb->enabled_slices) + DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n", + sw_ddb->enabled_slices, + hw_ddb.enabled_slices); /* planes */ for_each_universal_plane(dev_priv, pipe, plane) { hw_plane_wm = &hw_wm.planes[plane]; @@ -11643,11 +11801,11 @@ verify_single_dpll_state(struct drm_i915_private *dev_priv, memset(&dpll_hw_state, 0, sizeof(dpll_hw_state)); - DRM_DEBUG_KMS("%s\n", pll->name); + DRM_DEBUG_KMS("%s\n", pll->info->name); - active = pll->funcs.get_hw_state(dev_priv, pll, &dpll_hw_state); + active = pll->info->funcs->get_hw_state(dev_priv, pll, &dpll_hw_state); - if (!(pll->flags & INTEL_DPLL_ALWAYS_ON)) { + if (!(pll->info->flags & INTEL_DPLL_ALWAYS_ON)) { I915_STATE_WARN(!pll->on && pll->active_mask, "pll in active use but not on in sw tracking\n"); I915_STATE_WARN(pll->on && !pll->active_mask, @@ -12136,20 +12294,23 @@ static void intel_update_crtc(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc_state *pipe_config = to_intel_crtc_state(new_crtc_state); bool modeset = needs_modeset(new_crtc_state); + struct intel_plane_state *new_plane_state = + intel_atomic_get_new_plane_state(to_intel_atomic_state(state), + to_intel_plane(crtc->primary)); if (modeset) { update_scanline_offset(intel_crtc); dev_priv->display.crtc_enable(pipe_config, state); + + /* vblanks work again, re-enable pipe CRC. */ + intel_crtc_enable_pipe_crc(intel_crtc); } else { intel_pre_plane_update(to_intel_crtc_state(old_crtc_state), pipe_config); } - if (drm_atomic_get_existing_plane_state(state, crtc->primary)) { - intel_fbc_enable( - intel_crtc, pipe_config, - to_intel_plane_state(crtc->primary->state)); - } + if (new_plane_state) + intel_fbc_enable(intel_crtc, pipe_config, new_plane_state); drm_atomic_helper_commit_planes_on_crtc(old_crtc_state); } @@ -12181,6 +12342,8 @@ static void skl_update_crtcs(struct drm_atomic_state *state) bool progress; enum pipe pipe; int i; + u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; + u8 required_slices = intel_state->wm_results.ddb.enabled_slices; const struct skl_ddb_entry *entries[I915_MAX_PIPES] = {}; @@ -12189,6 +12352,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state) if (new_crtc_state->active) entries[i] = &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb; + /* If 2nd DBuf slice required, enable it here */ + if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices) + icl_dbuf_slices_update(dev_priv, required_slices); + /* * Whenever the number of active pipes changes, we need to make sure we * update the pipes in the right order so that their ddb allocations @@ -12239,6 +12406,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state) progress = true; } } while (progress); + + /* If 2nd DBuf slice is no more required disable it */ + if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices) + icl_dbuf_slices_update(dev_priv, required_slices); } static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) @@ -12320,6 +12491,13 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (old_crtc_state->active) { intel_crtc_disable_planes(crtc, old_crtc_state->plane_mask); + + /* + * We need to disable pipe CRC before disabling the pipe, + * or we race against vblank off. + */ + intel_crtc_disable_pipe_crc(intel_crtc); + dev_priv->display.crtc_disable(to_intel_crtc_state(old_crtc_state), state); intel_crtc->active = false; intel_fbc_disable(intel_crtc); @@ -12695,6 +12873,15 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) intel_unpin_fb_vma(vma, old_plane_state->flags); } +static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) +{ + struct i915_sched_attr attr = { + .priority = I915_PRIORITY_DISPLAY, + }; + + i915_gem_object_wait_priority(obj, 0, &attr); +} + /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for @@ -12723,8 +12910,8 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (old_obj) { struct drm_crtc_state *crtc_state = - drm_atomic_get_existing_crtc_state(new_state->state, - plane->state->crtc); + drm_atomic_get_new_crtc_state(new_state->state, + plane->state->crtc); /* Big Hammer, we also need to ensure that any pending * MI_WAIT_FOR_EVENT inside a user batch buffer on the @@ -12771,13 +12958,15 @@ intel_prepare_plane_fb(struct drm_plane *plane, ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); - i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); + fb_obj_bump_render_priority(obj); mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_object_unpin_pages(obj); if (ret) return ret; + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); + if (!new_state->fence) { /* implicit fencing */ struct dma_fence *fence; @@ -12822,11 +13011,13 @@ intel_cleanup_plane_fb(struct drm_plane *plane, } int -skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state) +skl_max_scale(struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state, + uint32_t pixel_format) { struct drm_i915_private *dev_priv; - int max_scale; - int crtc_clock, max_dotclk; + int max_scale, mult; + int crtc_clock, max_dotclk, tmpclk1, tmpclk2; if (!intel_crtc || !crtc_state->base.enable) return DRM_PLANE_HELPER_NO_SCALING; @@ -12848,8 +13039,10 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state * or * cdclk/crtc_clock */ - max_scale = min((1 << 16) * 3 - 1, - (1 << 8) * ((max_dotclk << 8) / crtc_clock)); + mult = pixel_format == DRM_FORMAT_NV12 ? 2 : 3; + tmpclk1 = (1 << 16) * mult - 1; + tmpclk2 = (1 << 8) * ((max_dotclk << 8) / crtc_clock); + max_scale = min(tmpclk1, tmpclk2); return max_scale; } @@ -12865,12 +13058,16 @@ intel_check_primary_plane(struct intel_plane *plane, int max_scale = DRM_PLANE_HELPER_NO_SCALING; bool can_position = false; int ret; + uint32_t pixel_format = 0; if (INTEL_GEN(dev_priv) >= 9) { /* use scaler when colorkey is not required */ if (!state->ckey.flags) { min_scale = 1; - max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state); + if (state->base.fb) + pixel_format = state->base.fb->format->format; + max_scale = skl_max_scale(to_intel_crtc(crtc), + crtc_state, pixel_format); } can_position = true; } @@ -12943,10 +13140,25 @@ out: intel_cstate); } +void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (!IS_GEN2(dev_priv)) + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); + + if (crtc_state->has_pch_encoder) { + enum pipe pch_transcoder = + intel_crtc_pch_transcoder(crtc); + + intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); + } +} + static void intel_finish_crtc_commit(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_crtc_state->state); @@ -12957,17 +13169,8 @@ static void intel_finish_crtc_commit(struct drm_crtc *crtc, if (new_crtc_state->update_pipe && !needs_modeset(&new_crtc_state->base) && - old_crtc_state->mode.private_flags & I915_MODE_FLAG_INHERITED) { - if (!IS_GEN2(dev_priv)) - intel_set_cpu_fifo_underrun_reporting(dev_priv, intel_crtc->pipe, true); - - if (new_crtc_state->has_pch_encoder) { - enum pipe pch_transcoder = - intel_crtc_pch_transcoder(intel_crtc); - - intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); - } - } + old_crtc_state->mode.private_flags & I915_MODE_FLAG_INHERITED) + intel_crtc_arm_fifo_underrun(intel_crtc, new_crtc_state); } /** @@ -13031,6 +13234,7 @@ static bool skl_mod_supported(uint32_t format, uint64_t modifier) case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: if (modifier == I915_FORMAT_MOD_Yf_TILED) return true; /* fall through */ @@ -13165,8 +13369,9 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (ret) goto out_unlock; - old_fb = old_plane_state->fb; + intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_FLIP); + old_fb = old_plane_state->fb; i915_gem_track_fb(intel_fb_obj(old_fb), intel_fb_obj(fb), intel_plane->frontbuffer_bit); @@ -13237,6 +13442,30 @@ static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv, return pipe == PIPE_A && plane_id == PLANE_PRIMARY; } +bool skl_plane_has_planar(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id) +{ + if (plane_id == PLANE_PRIMARY) { + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + else if ((INTEL_GEN(dev_priv) == 9 && pipe == PIPE_C) && + !IS_GEMINILAKE(dev_priv)) + return false; + } else if (plane_id >= PLANE_SPRITE0) { + if (plane_id == PLANE_CURSOR) + return false; + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) == 10) { + if (plane_id != PLANE_SPRITE0) + return false; + } else { + if (plane_id != PLANE_SPRITE0 || pipe == PIPE_C || + IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + } + } + return true; +} + static struct intel_plane * intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) { @@ -13297,8 +13526,13 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->check_plane = intel_check_primary_plane; if (INTEL_GEN(dev_priv) >= 9) { - intel_primary_formats = skl_primary_formats; - num_formats = ARRAY_SIZE(skl_primary_formats); + if (skl_plane_has_planar(dev_priv, pipe, PLANE_PRIMARY)) { + intel_primary_formats = skl_pri_planar_formats; + num_formats = ARRAY_SIZE(skl_pri_planar_formats); + } else { + intel_primary_formats = skl_primary_formats; + num_formats = ARRAY_SIZE(skl_primary_formats); + } if (skl_plane_has_ccs(dev_priv, pipe, PLANE_PRIMARY)) modifiers = skl_format_modifiers_ccs; @@ -13553,10 +13787,17 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) /* initialize shared scalers */ intel_crtc_init_scalers(intel_crtc, crtc_state); - BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || - dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] != NULL); - dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] = intel_crtc; - dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = intel_crtc; + BUG_ON(pipe >= ARRAY_SIZE(dev_priv->pipe_to_crtc_mapping) || + dev_priv->pipe_to_crtc_mapping[pipe] != NULL); + dev_priv->pipe_to_crtc_mapping[pipe] = intel_crtc; + + if (INTEL_GEN(dev_priv) < 9) { + enum i9xx_plane_id i9xx_plane = primary->i9xx_plane; + + BUG_ON(i9xx_plane >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || + dev_priv->plane_to_crtc_mapping[i9xx_plane] != NULL); + dev_priv->plane_to_crtc_mapping[i9xx_plane] = intel_crtc; + } drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); @@ -14112,6 +14353,20 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, goto err; } break; + case DRM_FORMAT_NV12: + if (mode_cmd->modifier[0] == I915_FORMAT_MOD_Y_TILED_CCS || + mode_cmd->modifier[0] == I915_FORMAT_MOD_Yf_TILED_CCS) { + DRM_DEBUG_KMS("RC not to be enabled with NV12\n"); + goto err; + } + if (INTEL_GEN(dev_priv) < 9 || IS_SKYLAKE(dev_priv) || + IS_BROXTON(dev_priv)) { + DRM_DEBUG_KMS("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format, + &format_name)); + goto err; + } + break; default: DRM_DEBUG_KMS("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); @@ -14124,6 +14379,14 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); + if (fb->format->format == DRM_FORMAT_NV12 && + (fb->width < SKL_MIN_YUV_420_SRC_W || + fb->height < SKL_MIN_YUV_420_SRC_H || + (fb->width % 4) != 0 || (fb->height % 4) != 0)) { + DRM_DEBUG_KMS("src dimensions not correct for NV12\n"); + return -EINVAL; + } + for (i = 0; i < fb->format->num_planes; i++) { u32 stride_alignment; @@ -15101,8 +15364,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) for (i = 0; i < dev_priv->num_shared_dpll; i++) { struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i]; - pll->on = pll->funcs.get_hw_state(dev_priv, pll, - &pll->state.hw_state); + pll->on = pll->info->funcs->get_hw_state(dev_priv, pll, + &pll->state.hw_state); pll->state.crtc_mask = 0; for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = @@ -15115,7 +15378,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) pll->active_mask = pll->state.crtc_mask; DRM_DEBUG_KMS("%s hw state readout: crtc_mask 0x%08x, on %i\n", - pll->name, pll->state.crtc_mask, pll->on); + pll->info->name, pll->state.crtc_mask, pll->on); } for_each_intel_encoder(dev, encoder) { @@ -15291,9 +15554,10 @@ intel_modeset_setup_hw_state(struct drm_device *dev, if (!pll->on || pll->active_mask) continue; - DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", pll->name); + DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", + pll->info->name); - pll->funcs.disable(dev_priv, pll); + pll->info->funcs->disable(dev_priv, pll); pll->on = false; } diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h index 4e7418b345bc..2ef31617614a 100644 --- a/drivers/gpu/drm/i915/intel_display.h +++ b/drivers/gpu/drm/i915/intel_display.h @@ -218,6 +218,10 @@ struct intel_link_m_n { for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ for_each_if((__mask) & BIT(__p)) +#define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \ + for ((__t) = 0; (__t) < I915_MAX_TRANSCODERS; (__t)++) \ + for_each_if ((__mask) & (1 << (__t))) + #define for_each_universal_plane(__dev_priv, __pipe, __p) \ for ((__p) = 0; \ (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \ diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index b7b4cfdeb974..dde92e4af5d3 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -43,7 +43,6 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -#define DP_LINK_CHECK_TIMEOUT (10 * 1000) #define DP_DPRX_ESI_LEN 14 /* Compliance test status bits */ @@ -92,8 +91,6 @@ static const struct dp_link_dpll chv_dpll[] = { { .p1 = 4, .p2 = 2, .n = 1, .m1 = 2, .m2 = 0x819999a } }, { 270000, /* m2_int = 27, m2_fraction = 0 */ { .p1 = 4, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c00000 } }, - { 540000, /* m2_int = 27, m2_fraction = 0 */ - { .p1 = 2, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c00000 } } }; /** @@ -1650,9 +1647,17 @@ void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, } } +struct link_config_limits { + int min_clock, max_clock; + int min_lane_count, max_lane_count; + int min_bpp, max_bpp; +}; + static int intel_dp_compute_bpp(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + struct intel_connector *intel_connector = intel_dp->attached_connector; int bpp, bpc; bpp = pipe_config->pipe_bpp; @@ -1661,13 +1666,16 @@ static int intel_dp_compute_bpp(struct intel_dp *intel_dp, if (bpc > 0) bpp = min(bpp, 3*bpc); - /* For DP Compliance we override the computed bpp for the pipe */ - if (intel_dp->compliance.test_data.bpc != 0) { - pipe_config->pipe_bpp = 3*intel_dp->compliance.test_data.bpc; - pipe_config->dither_force_disable = pipe_config->pipe_bpp == 6*3; - DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", - pipe_config->pipe_bpp); + if (intel_dp_is_edp(intel_dp)) { + /* Get bpp from vbt only for panels that dont have bpp in edid */ + if (intel_connector->base.display_info.bpc == 0 && + dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp) { + DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n", + dev_priv->vbt.edp.bpp); + bpp = dev_priv->vbt.edp.bpp; + } } + return bpp; } @@ -1688,6 +1696,142 @@ static bool intel_edp_compare_alt_mode(struct drm_display_mode *m1, return bres; } +/* Adjust link config limits based on compliance test requests. */ +static void +intel_dp_adjust_compliance_config(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config, + struct link_config_limits *limits) +{ + /* For DP Compliance we override the computed bpp for the pipe */ + if (intel_dp->compliance.test_data.bpc != 0) { + int bpp = 3 * intel_dp->compliance.test_data.bpc; + + limits->min_bpp = limits->max_bpp = bpp; + pipe_config->dither_force_disable = bpp == 6 * 3; + + DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", bpp); + } + + /* Use values requested by Compliance Test Request */ + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + int index; + + /* Validate the compliance test data since max values + * might have changed due to link train fallback. + */ + if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate, + intel_dp->compliance.test_lane_count)) { + index = intel_dp_rate_index(intel_dp->common_rates, + intel_dp->num_common_rates, + intel_dp->compliance.test_link_rate); + if (index >= 0) + limits->min_clock = limits->max_clock = index; + limits->min_lane_count = limits->max_lane_count = + intel_dp->compliance.test_lane_count; + } + } +} + +/* Optimize link config in order: max bpp, min clock, min lanes */ +static bool +intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config, + const struct link_config_limits *limits) +{ + struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + int bpp, clock, lane_count; + int mode_rate, link_clock, link_avail; + + for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) { + mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, + bpp); + + for (clock = limits->min_clock; clock <= limits->max_clock; clock++) { + for (lane_count = limits->min_lane_count; + lane_count <= limits->max_lane_count; + lane_count <<= 1) { + link_clock = intel_dp->common_rates[clock]; + link_avail = intel_dp_max_data_rate(link_clock, + lane_count); + + if (mode_rate <= link_avail) { + pipe_config->lane_count = lane_count; + pipe_config->pipe_bpp = bpp; + pipe_config->port_clock = link_clock; + + return true; + } + } + } + } + + return false; +} + +static bool +intel_dp_compute_link_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) +{ + struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct link_config_limits limits; + int common_len; + + common_len = intel_dp_common_len_rate_limit(intel_dp, + intel_dp->max_link_rate); + + /* No common link rates between source and sink */ + WARN_ON(common_len <= 0); + + limits.min_clock = 0; + limits.max_clock = common_len - 1; + + limits.min_lane_count = 1; + limits.max_lane_count = intel_dp_max_lane_count(intel_dp); + + limits.min_bpp = 6 * 3; + limits.max_bpp = intel_dp_compute_bpp(intel_dp, pipe_config); + + if (intel_dp_is_edp(intel_dp)) { + /* + * Use the maximum clock and number of lanes the eDP panel + * advertizes being capable of. The panels are generally + * designed to support only a single clock and lane + * configuration, and typically these values correspond to the + * native resolution of the panel. + */ + limits.min_lane_count = limits.max_lane_count; + limits.min_clock = limits.max_clock; + } + + intel_dp_adjust_compliance_config(intel_dp, pipe_config, &limits); + + DRM_DEBUG_KMS("DP link computation with max lane count %i " + "max rate %d max bpp %d pixel clock %iKHz\n", + limits.max_lane_count, + intel_dp->common_rates[limits.max_clock], + limits.max_bpp, adjusted_mode->crtc_clock); + + /* + * Optimize for slow and wide. This is the place to add alternative + * optimization policy. + */ + if (!intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits)) + return false; + + DRM_DEBUG_KMS("DP lane count %d clock %d bpp %d\n", + pipe_config->lane_count, pipe_config->port_clock, + pipe_config->pipe_bpp); + + DRM_DEBUG_KMS("DP link rate required %i available %i\n", + intel_dp_link_required(adjusted_mode->crtc_clock, + pipe_config->pipe_bpp), + intel_dp_max_data_rate(pipe_config->port_clock, + pipe_config->lane_count)); + + return true; +} + bool intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, @@ -1701,27 +1845,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, struct intel_connector *intel_connector = intel_dp->attached_connector; struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); - int lane_count, clock; - int min_lane_count = 1; - int max_lane_count = intel_dp_max_lane_count(intel_dp); - /* Conveniently, the link BW constants become indices with a shift...*/ - int min_clock = 0; - int max_clock; - int bpp, mode_rate; - int link_avail, link_clock; - int common_len; - uint8_t link_bw, rate_select; bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_LIMITED_M_N); - common_len = intel_dp_common_len_rate_limit(intel_dp, - intel_dp->max_link_rate); - - /* No common link rates between source and sink */ - WARN_ON(common_len <= 0); - - max_clock = common_len - 1; - if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && port != PORT_A) pipe_config->has_pch_encoder = true; @@ -1747,6 +1873,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) >= 9) { int ret; + ret = skl_update_scaler_crtc(pipe_config); if (ret) return ret; @@ -1767,75 +1894,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; - /* Use values requested by Compliance Test Request */ - if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { - int index; - - /* Validate the compliance test data since max values - * might have changed due to link train fallback. - */ - if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate, - intel_dp->compliance.test_lane_count)) { - index = intel_dp_rate_index(intel_dp->common_rates, - intel_dp->num_common_rates, - intel_dp->compliance.test_link_rate); - if (index >= 0) - min_clock = max_clock = index; - min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; - } - } - DRM_DEBUG_KMS("DP link computation with max lane count %i " - "max bw %d pixel clock %iKHz\n", - max_lane_count, intel_dp->common_rates[max_clock], - adjusted_mode->crtc_clock); - - /* Walk through all bpp values. Luckily they're all nicely spaced with 2 - * bpc in between. */ - bpp = intel_dp_compute_bpp(intel_dp, pipe_config); - if (intel_dp_is_edp(intel_dp)) { - - /* Get bpp from vbt only for panels that dont have bpp in edid */ - if (intel_connector->base.display_info.bpc == 0 && - (dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp)) { - DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n", - dev_priv->vbt.edp.bpp); - bpp = dev_priv->vbt.edp.bpp; - } - - /* - * Use the maximum clock and number of lanes the eDP panel - * advertizes being capable of. The panels are generally - * designed to support only a single clock and lane - * configuration, and typically these values correspond to the - * native resolution of the panel. - */ - min_lane_count = max_lane_count; - min_clock = max_clock; - } - - for (; bpp >= 6*3; bpp -= 2*3) { - mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, - bpp); - - for (clock = min_clock; clock <= max_clock; clock++) { - for (lane_count = min_lane_count; - lane_count <= max_lane_count; - lane_count <<= 1) { - - link_clock = intel_dp->common_rates[clock]; - link_avail = intel_dp_max_data_rate(link_clock, - lane_count); - - if (mode_rate <= link_avail) { - goto found; - } - } - } - } - - return false; + if (!intel_dp_compute_link_config(encoder, pipe_config)) + return false; -found: if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* * See: @@ -1843,7 +1904,7 @@ found: * VESA DisplayPort Ver.1.2a - 5.1.1.1 Video Colorimetry */ pipe_config->limited_color_range = - bpp != 18 && + pipe_config->pipe_bpp != 18 && drm_default_rgb_quant_range(adjusted_mode) == HDMI_QUANTIZATION_RANGE_LIMITED; } else { @@ -1851,21 +1912,7 @@ found: intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; } - pipe_config->lane_count = lane_count; - - pipe_config->pipe_bpp = bpp; - pipe_config->port_clock = intel_dp->common_rates[clock]; - - intel_dp_compute_rate(intel_dp, pipe_config->port_clock, - &link_bw, &rate_select); - - DRM_DEBUG_KMS("DP link bw %02x rate select %02x lane count %d clock %d bpp %d\n", - link_bw, rate_select, pipe_config->lane_count, - pipe_config->port_clock, bpp); - DRM_DEBUG_KMS("DP link bw required %i available %i\n", - mode_rate, link_avail); - - intel_link_compute_m_n(bpp, lane_count, + intel_link_compute_m_n(pipe_config->pipe_bpp, pipe_config->lane_count, adjusted_mode->crtc_clock, pipe_config->port_clock, &pipe_config->dp_m_n, @@ -1874,11 +1921,12 @@ found: if (intel_connector->panel.downclock_mode != NULL && dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { pipe_config->has_drrs = true; - intel_link_compute_m_n(bpp, lane_count, - intel_connector->panel.downclock_mode->clock, - pipe_config->port_clock, - &pipe_config->dp_m2_n2, - reduce_m_n); + intel_link_compute_m_n(pipe_config->pipe_bpp, + pipe_config->lane_count, + intel_connector->panel.downclock_mode->clock, + pipe_config->port_clock, + &pipe_config->dp_m2_n2, + reduce_m_n); } if (!HAS_DDI(dev_priv)) @@ -2881,10 +2929,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, } } else { - if (IS_CHERRYVIEW(dev_priv)) - *DP &= ~DP_LINK_TRAIN_MASK_CHV; - else - *DP &= ~DP_LINK_TRAIN_MASK; + *DP &= ~DP_LINK_TRAIN_MASK; switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) { case DP_TRAINING_PATTERN_DISABLE: @@ -2897,12 +2942,8 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, *DP |= DP_LINK_TRAIN_PAT_2; break; case DP_TRAINING_PATTERN_3: - if (IS_CHERRYVIEW(dev_priv)) { - *DP |= DP_LINK_TRAIN_PAT_3_CHV; - } else { - DRM_DEBUG_KMS("TPS3 not supported, using TPS2 instead\n"); - *DP |= DP_LINK_TRAIN_PAT_2; - } + DRM_DEBUG_KMS("TPS3 not supported, using TPS2 instead\n"); + *DP |= DP_LINK_TRAIN_PAT_2; break; } } @@ -3641,10 +3682,7 @@ intel_dp_link_down(struct intel_encoder *encoder, DP &= ~DP_LINK_TRAIN_MASK_CPT; DP |= DP_LINK_TRAIN_PAT_IDLE_CPT; } else { - if (IS_CHERRYVIEW(dev_priv)) - DP &= ~DP_LINK_TRAIN_MASK_CHV; - else - DP &= ~DP_LINK_TRAIN_MASK; + DP &= ~DP_LINK_TRAIN_MASK; DP |= DP_LINK_TRAIN_PAT_IDLE; } I915_WRITE(intel_dp->output_reg, DP); diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index f59b59bb0a21..3fcaa98b9055 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -139,6 +139,11 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) intel_dp_compute_rate(intel_dp, intel_dp->link_rate, &link_bw, &rate_select); + if (link_bw) + DRM_DEBUG_KMS("Using LINK_BW_SET value %02x\n", link_bw); + else + DRM_DEBUG_KMS("Using LINK_RATE_SET value %02x\n", rate_select); + /* Write the link configuration data */ link_config[0] = link_bw; link_config[1] = intel_dp->lane_count; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index c3de0918ee13..9e6956c08688 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -180,9 +180,11 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, intel_dp->active_mst_links--; intel_mst->connector = NULL; - if (intel_dp->active_mst_links == 0) + if (intel_dp->active_mst_links == 0) { + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); intel_dig_port->base.post_disable(&intel_dig_port->base, old_crtc_state, NULL); + } DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); } @@ -223,7 +225,11 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); + if (intel_dp->active_mst_links == 0) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true); + if (intel_dp->active_mst_links == 0) intel_dig_port->base.pre_enable(&intel_dig_port->base, pipe_config, NULL); diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index c8e9e44e5981..00b3ab656b06 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -380,13 +380,14 @@ static void _bxt_ddi_phy_init(struct drm_i915_private *dev_priv, * all 1s. Eventually they become accessible as they power up, then * the reserved bit will give the default 0. Poll on the reserved bit * becoming 0 to find when the PHY is accessible. - * HW team confirmed that the time to reach phypowergood status is - * anywhere between 50 us and 100us. + * The flag should get set in 100us according to the HW team, but + * use 1ms due to occasional timeouts observed with that. */ - if (wait_for_us(((I915_READ(BXT_PORT_CL1CM_DW0(phy)) & - (PHY_RESERVED | PHY_POWER_GOOD)) == PHY_POWER_GOOD), 100)) { + if (intel_wait_for_register_fw(dev_priv, BXT_PORT_CL1CM_DW0(phy), + PHY_RESERVED | PHY_POWER_GOOD, + PHY_POWER_GOOD, + 1)) DRM_ERROR("timeout during PHY%d power on\n", phy); - } /* Program PLL Rcomp code offset */ val = I915_READ(BXT_PORT_CL1CM_DW9(phy)); diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 51c5ae4e9116..383fbc15113d 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -118,10 +118,10 @@ void assert_shared_dpll(struct drm_i915_private *dev_priv, if (WARN(!pll, "asserting DPLL %s with no DPLL\n", onoff(state))) return; - cur_state = pll->funcs.get_hw_state(dev_priv, pll, &hw_state); + cur_state = pll->info->funcs->get_hw_state(dev_priv, pll, &hw_state); I915_STATE_WARN(cur_state != state, "%s assertion failure (expected %s, current %s)\n", - pll->name, onoff(state), onoff(cur_state)); + pll->info->name, onoff(state), onoff(cur_state)); } /** @@ -143,11 +143,11 @@ void intel_prepare_shared_dpll(struct intel_crtc *crtc) mutex_lock(&dev_priv->dpll_lock); WARN_ON(!pll->state.crtc_mask); if (!pll->active_mask) { - DRM_DEBUG_DRIVER("setting up %s\n", pll->name); + DRM_DEBUG_DRIVER("setting up %s\n", pll->info->name); WARN_ON(pll->on); assert_shared_dpll_disabled(dev_priv, pll); - pll->funcs.prepare(dev_priv, pll); + pll->info->funcs->prepare(dev_priv, pll); } mutex_unlock(&dev_priv->dpll_lock); } @@ -179,7 +179,7 @@ void intel_enable_shared_dpll(struct intel_crtc *crtc) pll->active_mask |= crtc_mask; DRM_DEBUG_KMS("enable %s (active %x, on? %d) for crtc %d\n", - pll->name, pll->active_mask, pll->on, + pll->info->name, pll->active_mask, pll->on, crtc->base.base.id); if (old_mask) { @@ -189,8 +189,8 @@ void intel_enable_shared_dpll(struct intel_crtc *crtc) } WARN_ON(pll->on); - DRM_DEBUG_KMS("enabling %s\n", pll->name); - pll->funcs.enable(dev_priv, pll); + DRM_DEBUG_KMS("enabling %s\n", pll->info->name); + pll->info->funcs->enable(dev_priv, pll); pll->on = true; out: @@ -221,7 +221,7 @@ void intel_disable_shared_dpll(struct intel_crtc *crtc) goto out; DRM_DEBUG_KMS("disable %s (active %x, on? %d) for crtc %d\n", - pll->name, pll->active_mask, pll->on, + pll->info->name, pll->active_mask, pll->on, crtc->base.base.id); assert_shared_dpll_enabled(dev_priv, pll); @@ -231,8 +231,8 @@ void intel_disable_shared_dpll(struct intel_crtc *crtc) if (pll->active_mask) goto out; - DRM_DEBUG_KMS("disabling %s\n", pll->name); - pll->funcs.disable(dev_priv, pll); + DRM_DEBUG_KMS("disabling %s\n", pll->info->name); + pll->info->funcs->disable(dev_priv, pll); pll->on = false; out: @@ -263,7 +263,8 @@ intel_find_shared_dpll(struct intel_crtc *crtc, &shared_dpll[i].hw_state, sizeof(crtc_state->dpll_hw_state)) == 0) { DRM_DEBUG_KMS("[CRTC:%d:%s] sharing existing %s (crtc mask 0x%08x, active %x)\n", - crtc->base.base.id, crtc->base.name, pll->name, + crtc->base.base.id, crtc->base.name, + pll->info->name, shared_dpll[i].crtc_mask, pll->active_mask); return pll; @@ -275,7 +276,8 @@ intel_find_shared_dpll(struct intel_crtc *crtc, pll = &dev_priv->shared_dplls[i]; if (shared_dpll[i].crtc_mask == 0) { DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n", - crtc->base.base.id, crtc->base.name, pll->name); + crtc->base.base.id, crtc->base.name, + pll->info->name); return pll; } } @@ -289,19 +291,19 @@ intel_reference_shared_dpll(struct intel_shared_dpll *pll, { struct intel_shared_dpll_state *shared_dpll; struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - enum intel_dpll_id i = pll->id; + const enum intel_dpll_id id = pll->info->id; shared_dpll = intel_atomic_get_shared_dpll_state(crtc_state->base.state); - if (shared_dpll[i].crtc_mask == 0) - shared_dpll[i].hw_state = + if (shared_dpll[id].crtc_mask == 0) + shared_dpll[id].hw_state = crtc_state->dpll_hw_state; crtc_state->shared_dpll = pll; - DRM_DEBUG_DRIVER("using %s for pipe %c\n", pll->name, + DRM_DEBUG_DRIVER("using %s for pipe %c\n", pll->info->name, pipe_name(crtc->pipe)); - shared_dpll[pll->id].crtc_mask |= 1 << crtc->pipe; + shared_dpll[id].crtc_mask |= 1 << crtc->pipe; } /** @@ -341,15 +343,16 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) return false; - val = I915_READ(PCH_DPLL(pll->id)); + val = I915_READ(PCH_DPLL(id)); hw_state->dpll = val; - hw_state->fp0 = I915_READ(PCH_FP0(pll->id)); - hw_state->fp1 = I915_READ(PCH_FP1(pll->id)); + hw_state->fp0 = I915_READ(PCH_FP0(id)); + hw_state->fp1 = I915_READ(PCH_FP1(id)); intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); @@ -359,8 +362,10 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv, static void ibx_pch_dpll_prepare(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - I915_WRITE(PCH_FP0(pll->id), pll->state.hw_state.fp0); - I915_WRITE(PCH_FP1(pll->id), pll->state.hw_state.fp1); + const enum intel_dpll_id id = pll->info->id; + + I915_WRITE(PCH_FP0(id), pll->state.hw_state.fp0); + I915_WRITE(PCH_FP1(id), pll->state.hw_state.fp1); } static void ibx_assert_pch_refclk_enabled(struct drm_i915_private *dev_priv) @@ -379,13 +384,15 @@ static void ibx_assert_pch_refclk_enabled(struct drm_i915_private *dev_priv) static void ibx_pch_dpll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; + /* PCH refclock must be enabled first */ ibx_assert_pch_refclk_enabled(dev_priv); - I915_WRITE(PCH_DPLL(pll->id), pll->state.hw_state.dpll); + I915_WRITE(PCH_DPLL(id), pll->state.hw_state.dpll); /* Wait for the clocks to stabilize. */ - POSTING_READ(PCH_DPLL(pll->id)); + POSTING_READ(PCH_DPLL(id)); udelay(150); /* The pixel multiplier can only be updated once the @@ -393,14 +400,15 @@ static void ibx_pch_dpll_enable(struct drm_i915_private *dev_priv, * * So write it again. */ - I915_WRITE(PCH_DPLL(pll->id), pll->state.hw_state.dpll); - POSTING_READ(PCH_DPLL(pll->id)); + I915_WRITE(PCH_DPLL(id), pll->state.hw_state.dpll); + POSTING_READ(PCH_DPLL(id)); udelay(200); } static void ibx_pch_dpll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; struct drm_device *dev = &dev_priv->drm; struct intel_crtc *crtc; @@ -410,8 +418,8 @@ static void ibx_pch_dpll_disable(struct drm_i915_private *dev_priv, assert_pch_transcoder_disabled(dev_priv, crtc->pipe); } - I915_WRITE(PCH_DPLL(pll->id), 0); - POSTING_READ(PCH_DPLL(pll->id)); + I915_WRITE(PCH_DPLL(id), 0); + POSTING_READ(PCH_DPLL(id)); udelay(200); } @@ -429,7 +437,8 @@ ibx_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, pll = &dev_priv->shared_dplls[i]; DRM_DEBUG_KMS("[CRTC:%d:%s] using pre-allocated %s\n", - crtc->base.base.id, crtc->base.name, pll->name); + crtc->base.base.id, crtc->base.name, + pll->info->name); } else { pll = intel_find_shared_dpll(crtc, crtc_state, DPLL_ID_PCH_PLL_A, @@ -466,8 +475,10 @@ static const struct intel_shared_dpll_funcs ibx_pch_dpll_funcs = { static void hsw_ddi_wrpll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - I915_WRITE(WRPLL_CTL(pll->id), pll->state.hw_state.wrpll); - POSTING_READ(WRPLL_CTL(pll->id)); + const enum intel_dpll_id id = pll->info->id; + + I915_WRITE(WRPLL_CTL(id), pll->state.hw_state.wrpll); + POSTING_READ(WRPLL_CTL(id)); udelay(20); } @@ -482,11 +493,12 @@ static void hsw_ddi_spll_enable(struct drm_i915_private *dev_priv, static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; - val = I915_READ(WRPLL_CTL(pll->id)); - I915_WRITE(WRPLL_CTL(pll->id), val & ~WRPLL_PLL_ENABLE); - POSTING_READ(WRPLL_CTL(pll->id)); + val = I915_READ(WRPLL_CTL(id)); + I915_WRITE(WRPLL_CTL(id), val & ~WRPLL_PLL_ENABLE); + POSTING_READ(WRPLL_CTL(id)); } static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv, @@ -503,12 +515,13 @@ static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) return false; - val = I915_READ(WRPLL_CTL(pll->id)); + val = I915_READ(WRPLL_CTL(id)); hw_state->wrpll = val; intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); @@ -914,13 +927,15 @@ static const struct skl_dpll_regs skl_dpll_regs[4] = { static void skl_ddi_pll_write_ctrl1(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; val = I915_READ(DPLL_CTRL1); - val &= ~(DPLL_CTRL1_HDMI_MODE(pll->id) | DPLL_CTRL1_SSC(pll->id) | - DPLL_CTRL1_LINK_RATE_MASK(pll->id)); - val |= pll->state.hw_state.ctrl1 << (pll->id * 6); + val &= ~(DPLL_CTRL1_HDMI_MODE(id) | + DPLL_CTRL1_SSC(id) | + DPLL_CTRL1_LINK_RATE_MASK(id)); + val |= pll->state.hw_state.ctrl1 << (id * 6); I915_WRITE(DPLL_CTRL1, val); POSTING_READ(DPLL_CTRL1); @@ -930,24 +945,25 @@ static void skl_ddi_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const struct skl_dpll_regs *regs = skl_dpll_regs; + const enum intel_dpll_id id = pll->info->id; skl_ddi_pll_write_ctrl1(dev_priv, pll); - I915_WRITE(regs[pll->id].cfgcr1, pll->state.hw_state.cfgcr1); - I915_WRITE(regs[pll->id].cfgcr2, pll->state.hw_state.cfgcr2); - POSTING_READ(regs[pll->id].cfgcr1); - POSTING_READ(regs[pll->id].cfgcr2); + I915_WRITE(regs[id].cfgcr1, pll->state.hw_state.cfgcr1); + I915_WRITE(regs[id].cfgcr2, pll->state.hw_state.cfgcr2); + POSTING_READ(regs[id].cfgcr1); + POSTING_READ(regs[id].cfgcr2); /* the enable bit is always bit 31 */ - I915_WRITE(regs[pll->id].ctl, - I915_READ(regs[pll->id].ctl) | LCPLL_PLL_ENABLE); + I915_WRITE(regs[id].ctl, + I915_READ(regs[id].ctl) | LCPLL_PLL_ENABLE); if (intel_wait_for_register(dev_priv, DPLL_STATUS, - DPLL_LOCK(pll->id), - DPLL_LOCK(pll->id), + DPLL_LOCK(id), + DPLL_LOCK(id), 5)) - DRM_ERROR("DPLL %d not locked\n", pll->id); + DRM_ERROR("DPLL %d not locked\n", id); } static void skl_ddi_dpll0_enable(struct drm_i915_private *dev_priv, @@ -960,11 +976,12 @@ static void skl_ddi_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const struct skl_dpll_regs *regs = skl_dpll_regs; + const enum intel_dpll_id id = pll->info->id; /* the enable bit is always bit 31 */ - I915_WRITE(regs[pll->id].ctl, - I915_READ(regs[pll->id].ctl) & ~LCPLL_PLL_ENABLE); - POSTING_READ(regs[pll->id].ctl); + I915_WRITE(regs[id].ctl, + I915_READ(regs[id].ctl) & ~LCPLL_PLL_ENABLE); + POSTING_READ(regs[id].ctl); } static void skl_ddi_dpll0_disable(struct drm_i915_private *dev_priv, @@ -978,6 +995,7 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, { uint32_t val; const struct skl_dpll_regs *regs = skl_dpll_regs; + const enum intel_dpll_id id = pll->info->id; bool ret; if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) @@ -985,17 +1003,17 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = false; - val = I915_READ(regs[pll->id].ctl); + val = I915_READ(regs[id].ctl); if (!(val & LCPLL_PLL_ENABLE)) goto out; val = I915_READ(DPLL_CTRL1); - hw_state->ctrl1 = (val >> (pll->id * 6)) & 0x3f; + hw_state->ctrl1 = (val >> (id * 6)) & 0x3f; /* avoid reading back stale values if HDMI mode is not enabled */ - if (val & DPLL_CTRL1_HDMI_MODE(pll->id)) { - hw_state->cfgcr1 = I915_READ(regs[pll->id].cfgcr1); - hw_state->cfgcr2 = I915_READ(regs[pll->id].cfgcr2); + if (val & DPLL_CTRL1_HDMI_MODE(id)) { + hw_state->cfgcr1 = I915_READ(regs[id].cfgcr1); + hw_state->cfgcr2 = I915_READ(regs[id].cfgcr2); } ret = true; @@ -1011,6 +1029,7 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv, { uint32_t val; const struct skl_dpll_regs *regs = skl_dpll_regs; + const enum intel_dpll_id id = pll->info->id; bool ret; if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) @@ -1019,12 +1038,12 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv, ret = false; /* DPLL0 is always enabled since it drives CDCLK */ - val = I915_READ(regs[pll->id].ctl); + val = I915_READ(regs[id].ctl); if (WARN_ON(!(val & LCPLL_PLL_ENABLE))) goto out; val = I915_READ(DPLL_CTRL1); - hw_state->ctrl1 = (val >> (pll->id * 6)) & 0x3f; + hw_state->ctrl1 = (val >> (id * 6)) & 0x3f; ret = true; @@ -1424,7 +1443,7 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { uint32_t temp; - enum port port = (enum port)pll->id; /* 1:1 port->PLL mapping */ + enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ enum dpio_phy phy; enum dpio_channel ch; @@ -1543,7 +1562,7 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, static void bxt_ddi_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - enum port port = (enum port)pll->id; /* 1:1 port->PLL mapping */ + enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ uint32_t temp; temp = I915_READ(BXT_PORT_PLL_ENABLE(port)); @@ -1566,7 +1585,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - enum port port = (enum port)pll->id; /* 1:1 port->PLL mapping */ + enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ uint32_t val; bool ret; enum dpio_phy phy; @@ -1824,7 +1843,7 @@ bxt_get_dpll(struct intel_crtc *crtc, pll = intel_get_shared_dpll_by_id(dev_priv, i); DRM_DEBUG_KMS("[CRTC:%d:%s] using pre-allocated %s\n", - crtc->base.base.id, crtc->base.name, pll->name); + crtc->base.base.id, crtc->base.name, pll->info->name); intel_reference_shared_dpll(pll, crtc_state); @@ -1877,13 +1896,6 @@ static void intel_ddi_pll_init(struct drm_device *dev) } } -struct dpll_info { - const char *name; - const int id; - const struct intel_shared_dpll_funcs *funcs; - uint32_t flags; -}; - struct intel_dpll_mgr { const struct dpll_info *dpll_info; @@ -1896,9 +1908,9 @@ struct intel_dpll_mgr { }; static const struct dpll_info pch_plls[] = { - { "PCH DPLL A", DPLL_ID_PCH_PLL_A, &ibx_pch_dpll_funcs, 0 }, - { "PCH DPLL B", DPLL_ID_PCH_PLL_B, &ibx_pch_dpll_funcs, 0 }, - { NULL, -1, NULL, 0 }, + { "PCH DPLL A", &ibx_pch_dpll_funcs, DPLL_ID_PCH_PLL_A, 0 }, + { "PCH DPLL B", &ibx_pch_dpll_funcs, DPLL_ID_PCH_PLL_B, 0 }, + { }, }; static const struct intel_dpll_mgr pch_pll_mgr = { @@ -1908,13 +1920,13 @@ static const struct intel_dpll_mgr pch_pll_mgr = { }; static const struct dpll_info hsw_plls[] = { - { "WRPLL 1", DPLL_ID_WRPLL1, &hsw_ddi_wrpll_funcs, 0 }, - { "WRPLL 2", DPLL_ID_WRPLL2, &hsw_ddi_wrpll_funcs, 0 }, - { "SPLL", DPLL_ID_SPLL, &hsw_ddi_spll_funcs, 0 }, - { "LCPLL 810", DPLL_ID_LCPLL_810, &hsw_ddi_lcpll_funcs, INTEL_DPLL_ALWAYS_ON }, - { "LCPLL 1350", DPLL_ID_LCPLL_1350, &hsw_ddi_lcpll_funcs, INTEL_DPLL_ALWAYS_ON }, - { "LCPLL 2700", DPLL_ID_LCPLL_2700, &hsw_ddi_lcpll_funcs, INTEL_DPLL_ALWAYS_ON }, - { NULL, -1, NULL, }, + { "WRPLL 1", &hsw_ddi_wrpll_funcs, DPLL_ID_WRPLL1, 0 }, + { "WRPLL 2", &hsw_ddi_wrpll_funcs, DPLL_ID_WRPLL2, 0 }, + { "SPLL", &hsw_ddi_spll_funcs, DPLL_ID_SPLL, 0 }, + { "LCPLL 810", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_810, INTEL_DPLL_ALWAYS_ON }, + { "LCPLL 1350", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_1350, INTEL_DPLL_ALWAYS_ON }, + { "LCPLL 2700", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_2700, INTEL_DPLL_ALWAYS_ON }, + { }, }; static const struct intel_dpll_mgr hsw_pll_mgr = { @@ -1924,11 +1936,11 @@ static const struct intel_dpll_mgr hsw_pll_mgr = { }; static const struct dpll_info skl_plls[] = { - { "DPLL 0", DPLL_ID_SKL_DPLL0, &skl_ddi_dpll0_funcs, INTEL_DPLL_ALWAYS_ON }, - { "DPLL 1", DPLL_ID_SKL_DPLL1, &skl_ddi_pll_funcs, 0 }, - { "DPLL 2", DPLL_ID_SKL_DPLL2, &skl_ddi_pll_funcs, 0 }, - { "DPLL 3", DPLL_ID_SKL_DPLL3, &skl_ddi_pll_funcs, 0 }, - { NULL, -1, NULL, }, + { "DPLL 0", &skl_ddi_dpll0_funcs, DPLL_ID_SKL_DPLL0, INTEL_DPLL_ALWAYS_ON }, + { "DPLL 1", &skl_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 }, + { "DPLL 2", &skl_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 }, + { "DPLL 3", &skl_ddi_pll_funcs, DPLL_ID_SKL_DPLL3, 0 }, + { }, }; static const struct intel_dpll_mgr skl_pll_mgr = { @@ -1938,10 +1950,10 @@ static const struct intel_dpll_mgr skl_pll_mgr = { }; static const struct dpll_info bxt_plls[] = { - { "PORT PLL A", DPLL_ID_SKL_DPLL0, &bxt_ddi_pll_funcs, 0 }, - { "PORT PLL B", DPLL_ID_SKL_DPLL1, &bxt_ddi_pll_funcs, 0 }, - { "PORT PLL C", DPLL_ID_SKL_DPLL2, &bxt_ddi_pll_funcs, 0 }, - { NULL, -1, NULL, }, + { "PORT PLL A", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL0, 0 }, + { "PORT PLL B", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 }, + { "PORT PLL C", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 }, + { }, }; static const struct intel_dpll_mgr bxt_pll_mgr = { @@ -1953,38 +1965,39 @@ static const struct intel_dpll_mgr bxt_pll_mgr = { static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; /* 1. Enable DPLL power in DPLL_ENABLE. */ - val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val = I915_READ(CNL_DPLL_ENABLE(id)); val |= PLL_POWER_ENABLE; - I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 2. Wait for DPLL power state enabled in DPLL_ENABLE. */ if (intel_wait_for_register(dev_priv, - CNL_DPLL_ENABLE(pll->id), + CNL_DPLL_ENABLE(id), PLL_POWER_STATE, PLL_POWER_STATE, 5)) - DRM_ERROR("PLL %d Power not enabled\n", pll->id); + DRM_ERROR("PLL %d Power not enabled\n", id); /* * 3. Configure DPLL_CFGCR0 to set SSC enable/disable, * select DP mode, and set DP link rate. */ val = pll->state.hw_state.cfgcr0; - I915_WRITE(CNL_DPLL_CFGCR0(pll->id), val); + I915_WRITE(CNL_DPLL_CFGCR0(id), val); /* 4. Reab back to ensure writes completed */ - POSTING_READ(CNL_DPLL_CFGCR0(pll->id)); + POSTING_READ(CNL_DPLL_CFGCR0(id)); /* 3. Configure DPLL_CFGCR0 */ /* Avoid touch CFGCR1 if HDMI mode is not enabled */ if (pll->state.hw_state.cfgcr0 & DPLL_CFGCR0_HDMI_MODE) { val = pll->state.hw_state.cfgcr1; - I915_WRITE(CNL_DPLL_CFGCR1(pll->id), val); + I915_WRITE(CNL_DPLL_CFGCR1(id), val); /* 4. Reab back to ensure writes completed */ - POSTING_READ(CNL_DPLL_CFGCR1(pll->id)); + POSTING_READ(CNL_DPLL_CFGCR1(id)); } /* @@ -1997,17 +2010,17 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, */ /* 6. Enable DPLL in DPLL_ENABLE. */ - val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val = I915_READ(CNL_DPLL_ENABLE(id)); val |= PLL_ENABLE; - I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 7. Wait for PLL lock status in DPLL_ENABLE. */ if (intel_wait_for_register(dev_priv, - CNL_DPLL_ENABLE(pll->id), + CNL_DPLL_ENABLE(id), PLL_LOCK, PLL_LOCK, 5)) - DRM_ERROR("PLL %d not locked\n", pll->id); + DRM_ERROR("PLL %d not locked\n", id); /* * 8. If the frequency will result in a change to the voltage @@ -2027,6 +2040,7 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; /* @@ -2044,17 +2058,17 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, */ /* 3. Disable DPLL through DPLL_ENABLE. */ - val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val = I915_READ(CNL_DPLL_ENABLE(id)); val &= ~PLL_ENABLE; - I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 4. Wait for PLL not locked status in DPLL_ENABLE. */ if (intel_wait_for_register(dev_priv, - CNL_DPLL_ENABLE(pll->id), + CNL_DPLL_ENABLE(id), PLL_LOCK, 0, 5)) - DRM_ERROR("PLL %d locked\n", pll->id); + DRM_ERROR("PLL %d locked\n", id); /* * 5. If the frequency will result in a change to the voltage @@ -2066,23 +2080,24 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, */ /* 6. Disable DPLL power in DPLL_ENABLE. */ - val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val = I915_READ(CNL_DPLL_ENABLE(id)); val &= ~PLL_POWER_ENABLE; - I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 7. Wait for DPLL power state disabled in DPLL_ENABLE. */ if (intel_wait_for_register(dev_priv, - CNL_DPLL_ENABLE(pll->id), + CNL_DPLL_ENABLE(id), PLL_POWER_STATE, 0, 5)) - DRM_ERROR("PLL %d Power not disabled\n", pll->id); + DRM_ERROR("PLL %d Power not disabled\n", id); } static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; bool ret; @@ -2091,16 +2106,16 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = false; - val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val = I915_READ(CNL_DPLL_ENABLE(id)); if (!(val & PLL_ENABLE)) goto out; - val = I915_READ(CNL_DPLL_CFGCR0(pll->id)); + val = I915_READ(CNL_DPLL_CFGCR0(id)); hw_state->cfgcr0 = val; /* avoid reading back stale values if HDMI mode is not enabled */ if (val & DPLL_CFGCR0_HDMI_MODE) { - hw_state->cfgcr1 = I915_READ(CNL_DPLL_CFGCR1(pll->id)); + hw_state->cfgcr1 = I915_READ(CNL_DPLL_CFGCR1(id)); } ret = true; @@ -2203,6 +2218,7 @@ cnl_ddi_calculate_wrpll(int clock, struct skl_wrpll_params *wrpll_params) { u32 afe_clock = clock * 5; + uint32_t ref_clock; u32 dco_min = 7998000; u32 dco_max = 10000000; u32 dco_mid = (dco_min + dco_max) / 2; @@ -2235,8 +2251,17 @@ cnl_ddi_calculate_wrpll(int clock, cnl_wrpll_get_multipliers(best_div, &pdiv, &qdiv, &kdiv); - cnl_wrpll_params_populate(wrpll_params, best_dco, - dev_priv->cdclk.hw.ref, pdiv, qdiv, kdiv); + ref_clock = dev_priv->cdclk.hw.ref; + + /* + * For ICL, the spec states: if reference frequency is 38.4, use 19.2 + * because the DPLL automatically divides that by 2. + */ + if (IS_ICELAKE(dev_priv) && ref_clock == 38400) + ref_clock = 19200; + + cnl_wrpll_params_populate(wrpll_params, best_dco, ref_clock, pdiv, qdiv, + kdiv); return true; } @@ -2372,10 +2397,10 @@ static const struct intel_shared_dpll_funcs cnl_ddi_pll_funcs = { }; static const struct dpll_info cnl_plls[] = { - { "DPLL 0", DPLL_ID_SKL_DPLL0, &cnl_ddi_pll_funcs, 0 }, - { "DPLL 1", DPLL_ID_SKL_DPLL1, &cnl_ddi_pll_funcs, 0 }, - { "DPLL 2", DPLL_ID_SKL_DPLL2, &cnl_ddi_pll_funcs, 0 }, - { NULL, -1, NULL, }, + { "DPLL 0", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL0, 0 }, + { "DPLL 1", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 }, + { "DPLL 2", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 }, + { }, }; static const struct intel_dpll_mgr cnl_pll_mgr = { @@ -2384,6 +2409,644 @@ static const struct intel_dpll_mgr cnl_pll_mgr = { .dump_hw_state = cnl_dump_hw_state, }; +/* + * These values alrea already adjusted: they're the bits we write to the + * registers, not the logical values. + */ +static const struct skl_wrpll_params icl_dp_combo_pll_24MHz_values[] = { + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [0]: 5.4 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [1]: 2.7 */ + .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [2]: 1.62 */ + .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [3]: 3.24 */ + .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x168, .dco_fraction = 0x0000, /* [4]: 2.16 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2}, + { .dco_integer = 0x168, .dco_fraction = 0x0000, /* [5]: 4.32 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x195, .dco_fraction = 0x0000, /* [6]: 6.48 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [7]: 8.1 */ + .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, +}; + +/* Also used for 38.4 MHz values. */ +static const struct skl_wrpll_params icl_dp_combo_pll_19_2MHz_values[] = { + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [0]: 5.4 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [1]: 2.7 */ + .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [2]: 1.62 */ + .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [3]: 3.24 */ + .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1C2, .dco_fraction = 0x0000, /* [4]: 2.16 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2}, + { .dco_integer = 0x1C2, .dco_fraction = 0x0000, /* [5]: 4.32 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1FA, .dco_fraction = 0x2000, /* [6]: 6.48 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [7]: 8.1 */ + .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, +}; + +static bool icl_calc_dp_combo_pll(struct drm_i915_private *dev_priv, int clock, + struct skl_wrpll_params *pll_params) +{ + const struct skl_wrpll_params *params; + + params = dev_priv->cdclk.hw.ref == 24000 ? + icl_dp_combo_pll_24MHz_values : + icl_dp_combo_pll_19_2MHz_values; + + switch (clock) { + case 540000: + *pll_params = params[0]; + break; + case 270000: + *pll_params = params[1]; + break; + case 162000: + *pll_params = params[2]; + break; + case 324000: + *pll_params = params[3]; + break; + case 216000: + *pll_params = params[4]; + break; + case 432000: + *pll_params = params[5]; + break; + case 648000: + *pll_params = params[6]; + break; + case 810000: + *pll_params = params[7]; + break; + default: + MISSING_CASE(clock); + return false; + } + + return true; +} + +static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder, int clock, + struct intel_dpll_hw_state *pll_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + uint32_t cfgcr0, cfgcr1; + struct skl_wrpll_params pll_params = { 0 }; + bool ret; + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + ret = cnl_ddi_calculate_wrpll(clock, dev_priv, &pll_params); + else + ret = icl_calc_dp_combo_pll(dev_priv, clock, &pll_params); + + if (!ret) + return false; + + cfgcr0 = DPLL_CFGCR0_DCO_FRACTION(pll_params.dco_fraction) | + pll_params.dco_integer; + + cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(pll_params.qdiv_ratio) | + DPLL_CFGCR1_QDIV_MODE(pll_params.qdiv_mode) | + DPLL_CFGCR1_KDIV(pll_params.kdiv) | + DPLL_CFGCR1_PDIV(pll_params.pdiv) | + DPLL_CFGCR1_CENTRAL_FREQ_8400; + + pll_state->cfgcr0 = cfgcr0; + pll_state->cfgcr1 = cfgcr1; + return true; +} + +static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id) +{ + return id - DPLL_ID_ICL_MGPLL1 + PORT_C; +} + +static enum intel_dpll_id icl_port_to_mg_pll_id(enum port port) +{ + return port - PORT_C + DPLL_ID_ICL_MGPLL1; +} + +static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, + uint32_t *target_dco_khz, + struct intel_dpll_hw_state *state) +{ + uint32_t dco_min_freq, dco_max_freq; + int div1_vals[] = {7, 5, 3, 2}; + unsigned int i; + int div2; + + dco_min_freq = is_dp ? 8100000 : use_ssc ? 8000000 : 7992000; + dco_max_freq = is_dp ? 8100000 : 10000000; + + for (i = 0; i < ARRAY_SIZE(div1_vals); i++) { + int div1 = div1_vals[i]; + + for (div2 = 10; div2 > 0; div2--) { + int dco = div1 * div2 * clock_khz * 5; + int a_divratio, tlinedrv, inputsel, hsdiv; + + if (dco < dco_min_freq || dco > dco_max_freq) + continue; + + if (div2 >= 2) { + a_divratio = is_dp ? 10 : 5; + tlinedrv = 2; + } else { + a_divratio = 5; + tlinedrv = 0; + } + inputsel = is_dp ? 0 : 1; + + switch (div1) { + default: + MISSING_CASE(div1); + case 2: + hsdiv = 0; + break; + case 3: + hsdiv = 1; + break; + case 5: + hsdiv = 2; + break; + case 7: + hsdiv = 3; + break; + } + + *target_dco_khz = dco; + + state->mg_refclkin_ctl = MG_REFCLKIN_CTL_OD_2_MUX(1); + + state->mg_clktop2_coreclkctl1 = + MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(a_divratio); + + state->mg_clktop2_hsclkctl = + MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(tlinedrv) | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(inputsel) | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(hsdiv) | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(div2); + + return true; + } + } + + return false; +} + +/* + * The specification for this function uses real numbers, so the math had to be + * adapted to integer-only calculation, that's why it looks so different. + */ +static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder, int clock, + struct intel_dpll_hw_state *pll_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + int refclk_khz = dev_priv->cdclk.hw.ref; + uint32_t dco_khz, m1div, m2div_int, m2div_rem, m2div_frac; + uint32_t iref_ndiv, iref_trim, iref_pulse_w; + uint32_t prop_coeff, int_coeff; + uint32_t tdc_targetcnt, feedfwgain; + uint64_t ssc_stepsize, ssc_steplen, ssc_steplog; + uint64_t tmp; + bool use_ssc = false; + bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI); + + if (!icl_mg_pll_find_divisors(clock, is_dp, use_ssc, &dco_khz, + pll_state)) { + DRM_DEBUG_KMS("Failed to find divisors for clock %d\n", clock); + return false; + } + + m1div = 2; + m2div_int = dco_khz / (refclk_khz * m1div); + if (m2div_int > 255) { + m1div = 4; + m2div_int = dco_khz / (refclk_khz * m1div); + if (m2div_int > 255) { + DRM_DEBUG_KMS("Failed to find mdiv for clock %d\n", + clock); + return false; + } + } + m2div_rem = dco_khz % (refclk_khz * m1div); + + tmp = (uint64_t)m2div_rem * (1 << 22); + do_div(tmp, refclk_khz * m1div); + m2div_frac = tmp; + + switch (refclk_khz) { + case 19200: + iref_ndiv = 1; + iref_trim = 28; + iref_pulse_w = 1; + break; + case 24000: + iref_ndiv = 1; + iref_trim = 25; + iref_pulse_w = 2; + break; + case 38400: + iref_ndiv = 2; + iref_trim = 28; + iref_pulse_w = 1; + break; + default: + MISSING_CASE(refclk_khz); + return false; + } + + /* + * tdc_res = 0.000003 + * tdc_targetcnt = int(2 / (tdc_res * 8 * 50 * 1.1) / refclk_mhz + 0.5) + * + * The multiplication by 1000 is due to refclk MHz to KHz conversion. It + * was supposed to be a division, but we rearranged the operations of + * the formula to avoid early divisions so we don't multiply the + * rounding errors. + * + * 0.000003 * 8 * 50 * 1.1 = 0.00132, also known as 132 / 100000, which + * we also rearrange to work with integers. + * + * The 0.5 transformed to 5 results in a multiplication by 10 and the + * last division by 10. + */ + tdc_targetcnt = (2 * 1000 * 100000 * 10 / (132 * refclk_khz) + 5) / 10; + + /* + * Here we divide dco_khz by 10 in order to allow the dividend to fit in + * 32 bits. That's not a problem since we round the division down + * anyway. + */ + feedfwgain = (use_ssc || m2div_rem > 0) ? + m1div * 1000000 * 100 / (dco_khz * 3 / 10) : 0; + + if (dco_khz >= 9000000) { + prop_coeff = 5; + int_coeff = 10; + } else { + prop_coeff = 4; + int_coeff = 8; + } + + if (use_ssc) { + tmp = (uint64_t)dco_khz * 47 * 32; + do_div(tmp, refclk_khz * m1div * 10000); + ssc_stepsize = tmp; + + tmp = (uint64_t)dco_khz * 1000; + ssc_steplen = DIV_ROUND_UP_ULL(tmp, 32 * 2 * 32); + } else { + ssc_stepsize = 0; + ssc_steplen = 0; + } + ssc_steplog = 4; + + pll_state->mg_pll_div0 = (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | + MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | + MG_PLL_DIV0_FBDIV_INT(m2div_int); + + pll_state->mg_pll_div1 = MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) | + MG_PLL_DIV1_DITHER_DIV_2 | + MG_PLL_DIV1_NDIVRATIO(1) | + MG_PLL_DIV1_FBPREDIV(m1div); + + pll_state->mg_pll_lf = MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) | + MG_PLL_LF_AFCCNTSEL_512 | + MG_PLL_LF_GAINCTRL(1) | + MG_PLL_LF_INT_COEFF(int_coeff) | + MG_PLL_LF_PROP_COEFF(prop_coeff); + + pll_state->mg_pll_frac_lock = MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_LOCKTHRESH(10) | + MG_PLL_FRAC_LOCK_DCODITHEREN | + MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain); + if (use_ssc || m2div_rem > 0) + pll_state->mg_pll_frac_lock |= MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN; + + pll_state->mg_pll_ssc = (use_ssc ? MG_PLL_SSC_EN : 0) | + MG_PLL_SSC_TYPE(2) | + MG_PLL_SSC_STEPLENGTH(ssc_steplen) | + MG_PLL_SSC_STEPNUM(ssc_steplog) | + MG_PLL_SSC_FLLEN | + MG_PLL_SSC_STEPSIZE(ssc_stepsize); + + pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART; + + if (refclk_khz != 38400) { + pll_state->mg_pll_tdc_coldst_bias |= + MG_PLL_TDC_COLDST_IREFINT_EN | + MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | + MG_PLL_TDC_COLDST_COLDSTART | + MG_PLL_TDC_TDCOVCCORR_EN | + MG_PLL_TDC_TDCSEL(3); + + pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) | + MG_PLL_BIAS_INIT_DCOAMP(0x3F) | + MG_PLL_BIAS_BIAS_BONUS(10) | + MG_PLL_BIAS_BIASCAL_EN | + MG_PLL_BIAS_CTRIM(12) | + MG_PLL_BIAS_VREF_RDAC(4) | + MG_PLL_BIAS_IREFTRIM(iref_trim); + } + + return true; +} + +static struct intel_shared_dpll * +icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder) +{ + struct intel_shared_dpll *pll; + struct intel_dpll_hw_state pll_state = {}; + enum port port = encoder->port; + enum intel_dpll_id min, max; + int clock = crtc_state->port_clock; + bool ret; + + switch (port) { + case PORT_A: + case PORT_B: + min = DPLL_ID_ICL_DPLL0; + max = DPLL_ID_ICL_DPLL1; + ret = icl_calc_dpll_state(crtc_state, encoder, clock, + &pll_state); + break; + case PORT_C: + case PORT_D: + case PORT_E: + case PORT_F: + min = icl_port_to_mg_pll_id(port); + max = min; + ret = icl_calc_mg_pll_state(crtc_state, encoder, clock, + &pll_state); + break; + default: + MISSING_CASE(port); + return NULL; + } + + if (!ret) { + DRM_DEBUG_KMS("Could not calculate PLL state.\n"); + return NULL; + } + + crtc_state->dpll_hw_state = pll_state; + + pll = intel_find_shared_dpll(crtc, crtc_state, min, max); + if (!pll) { + DRM_DEBUG_KMS("No PLL selected\n"); + return NULL; + } + + intel_reference_shared_dpll(pll, crtc_state); + + return pll; +} + +static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id) +{ + switch (id) { + default: + MISSING_CASE(id); + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + return CNL_DPLL_ENABLE(id); + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id)); + } +} + +static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll, + struct intel_dpll_hw_state *hw_state) +{ + const enum intel_dpll_id id = pll->info->id; + uint32_t val; + enum port port; + bool ret = false; + + if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + return false; + + val = I915_READ(icl_pll_id_to_enable_reg(id)); + if (!(val & PLL_ENABLE)) + goto out; + + switch (id) { + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); + hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); + break; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + port = icl_mg_pll_id_to_port(id); + hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port)); + hw_state->mg_clktop2_coreclkctl1 = + I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + hw_state->mg_clktop2_hsclkctl = + I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); + hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port)); + hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); + hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port)); + hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port)); + hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port)); + hw_state->mg_pll_tdc_coldst_bias = + I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + break; + default: + MISSING_CASE(id); + } + + ret = true; +out: + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + return ret; +} + +static void icl_dpll_write(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; + const enum intel_dpll_id id = pll->info->id; + + I915_WRITE(ICL_DPLL_CFGCR0(id), hw_state->cfgcr0); + I915_WRITE(ICL_DPLL_CFGCR1(id), hw_state->cfgcr1); + POSTING_READ(ICL_DPLL_CFGCR1(id)); +} + +static void icl_mg_pll_write(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; + enum port port = icl_mg_pll_id_to_port(pll->info->id); + + I915_WRITE(MG_REFCLKIN_CTL(port), hw_state->mg_refclkin_ctl); + I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), + hw_state->mg_clktop2_coreclkctl1); + I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), hw_state->mg_clktop2_hsclkctl); + I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0); + I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1); + I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); + I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock); + I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc); + I915_WRITE(MG_PLL_BIAS(port), hw_state->mg_pll_bias); + I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), + hw_state->mg_pll_tdc_coldst_bias); + POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port)); +} + +static void icl_pll_enable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); + uint32_t val; + + val = I915_READ(enable_reg); + val |= PLL_POWER_ENABLE; + I915_WRITE(enable_reg, val); + + /* + * The spec says we need to "wait" but it also says it should be + * immediate. + */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE, + PLL_POWER_STATE, 1)) + DRM_ERROR("PLL %d Power not enabled\n", id); + + switch (id) { + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + icl_dpll_write(dev_priv, pll); + break; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + icl_mg_pll_write(dev_priv, pll); + break; + default: + MISSING_CASE(id); + } + + /* + * DVFS pre sequence would be here, but in our driver the cdclk code + * paths should already be setting the appropriate voltage, hence we do + * nothign here. + */ + + val = I915_READ(enable_reg); + val |= PLL_ENABLE; + I915_WRITE(enable_reg, val); + + if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, PLL_LOCK, + 1)) /* 600us actually. */ + DRM_ERROR("PLL %d not locked\n", id); + + /* DVFS post sequence would be here. See the comment above. */ +} + +static void icl_pll_disable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); + uint32_t val; + + /* The first steps are done by intel_ddi_post_disable(). */ + + /* + * DVFS pre sequence would be here, but in our driver the cdclk code + * paths should already be setting the appropriate voltage, hence we do + * nothign here. + */ + + val = I915_READ(enable_reg); + val &= ~PLL_ENABLE; + I915_WRITE(enable_reg, val); + + /* Timeout is actually 1us. */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, 0, 1)) + DRM_ERROR("PLL %d locked\n", id); + + /* DVFS post sequence would be here. See the comment above. */ + + val = I915_READ(enable_reg); + val &= ~PLL_POWER_ENABLE; + I915_WRITE(enable_reg, val); + + /* + * The spec says we need to "wait" but it also says it should be + * immediate. + */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE, 0, + 1)) + DRM_ERROR("PLL %d Power not disabled\n", id); +} + +static void icl_dump_hw_state(struct drm_i915_private *dev_priv, + struct intel_dpll_hw_state *hw_state) +{ + DRM_DEBUG_KMS("dpll_hw_state: cfgcr0: 0x%x, cfgcr1: 0x%x, " + "mg_refclkin_ctl: 0x%x, hg_clktop2_coreclkctl1: 0x%x, " + "mg_clktop2_hsclkctl: 0x%x, mg_pll_div0: 0x%x, " + "mg_pll_div2: 0x%x, mg_pll_lf: 0x%x, " + "mg_pll_frac_lock: 0x%x, mg_pll_ssc: 0x%x, " + "mg_pll_bias: 0x%x, mg_pll_tdc_coldst_bias: 0x%x\n", + hw_state->cfgcr0, hw_state->cfgcr1, + hw_state->mg_refclkin_ctl, + hw_state->mg_clktop2_coreclkctl1, + hw_state->mg_clktop2_hsclkctl, + hw_state->mg_pll_div0, + hw_state->mg_pll_div1, + hw_state->mg_pll_lf, + hw_state->mg_pll_frac_lock, + hw_state->mg_pll_ssc, + hw_state->mg_pll_bias, + hw_state->mg_pll_tdc_coldst_bias); +} + +static const struct intel_shared_dpll_funcs icl_pll_funcs = { + .enable = icl_pll_enable, + .disable = icl_pll_disable, + .get_hw_state = icl_pll_get_hw_state, +}; + +static const struct dpll_info icl_plls[] = { + { "DPLL 0", &icl_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, + { "DPLL 1", &icl_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "MG PLL 1", &icl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 }, + { "MG PLL 2", &icl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 }, + { "MG PLL 3", &icl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 }, + { "MG PLL 4", &icl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 }, + { }, +}; + +static const struct intel_dpll_mgr icl_pll_mgr = { + .dpll_info = icl_plls, + .get_dpll = icl_get_dpll, + .dump_hw_state = icl_dump_hw_state, +}; + /** * intel_shared_dpll_init - Initialize shared DPLLs * @dev: drm device @@ -2397,7 +3060,9 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + dpll_mgr = &icl_pll_mgr; + else if (IS_CANNONLAKE(dev_priv)) dpll_mgr = &cnl_pll_mgr; else if (IS_GEN9_BC(dev_priv)) dpll_mgr = &skl_pll_mgr; @@ -2415,13 +3080,9 @@ void intel_shared_dpll_init(struct drm_device *dev) dpll_info = dpll_mgr->dpll_info; - for (i = 0; dpll_info[i].id >= 0; i++) { + for (i = 0; dpll_info[i].name; i++) { WARN_ON(i != dpll_info[i].id); - - dev_priv->shared_dplls[i].id = dpll_info[i].id; - dev_priv->shared_dplls[i].name = dpll_info[i].name; - dev_priv->shared_dplls[i].funcs = *dpll_info[i].funcs; - dev_priv->shared_dplls[i].flags = dpll_info[i].flags; + dev_priv->shared_dplls[i].info = &dpll_info[i]; } dev_priv->dpll_mgr = dpll_mgr; @@ -2481,7 +3142,7 @@ void intel_release_shared_dpll(struct intel_shared_dpll *dpll, struct intel_shared_dpll_state *shared_dpll_state; shared_dpll_state = intel_atomic_get_shared_dpll_state(state); - shared_dpll_state[dpll->id].crtc_mask &= ~(1 << crtc->pipe); + shared_dpll_state[dpll->info->id].crtc_mask &= ~(1 << crtc->pipe); } /** diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index f24ccf443d25..7a0cd564a9ee 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -103,6 +103,32 @@ enum intel_dpll_id { * @DPLL_ID_SKL_DPLL3: SKL and later DPLL3 */ DPLL_ID_SKL_DPLL3 = 3, + + + /** + * @DPLL_ID_ICL_DPLL0: ICL combo PHY DPLL0 + */ + DPLL_ID_ICL_DPLL0 = 0, + /** + * @DPLL_ID_ICL_DPLL1: ICL combo PHY DPLL1 + */ + DPLL_ID_ICL_DPLL1 = 1, + /** + * @DPLL_ID_ICL_MGPLL1: ICL MG PLL 1 port 1 (C) + */ + DPLL_ID_ICL_MGPLL1 = 2, + /** + * @DPLL_ID_ICL_MGPLL2: ICL MG PLL 1 port 2 (D) + */ + DPLL_ID_ICL_MGPLL2 = 3, + /** + * @DPLL_ID_ICL_MGPLL3: ICL MG PLL 1 port 3 (E) + */ + DPLL_ID_ICL_MGPLL3 = 4, + /** + * @DPLL_ID_ICL_MGPLL4: ICL MG PLL 1 port 4 (F) + */ + DPLL_ID_ICL_MGPLL4 = 5, }; #define I915_NUM_PLLS 6 @@ -135,6 +161,21 @@ struct intel_dpll_hw_state { /* bxt */ uint32_t ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, pcsdw12; + + /* + * ICL uses the following, already defined: + * uint32_t cfgcr0, cfgcr1; + */ + uint32_t mg_refclkin_ctl; + uint32_t mg_clktop2_coreclkctl1; + uint32_t mg_clktop2_hsclkctl; + uint32_t mg_pll_div0; + uint32_t mg_pll_div1; + uint32_t mg_pll_lf; + uint32_t mg_pll_frac_lock; + uint32_t mg_pll_ssc; + uint32_t mg_pll_bias; + uint32_t mg_pll_tdc_coldst_bias; }; /** @@ -206,6 +247,37 @@ struct intel_shared_dpll_funcs { }; /** + * struct dpll_info - display PLL platform specific info + */ +struct dpll_info { + /** + * @name: DPLL name; used for logging + */ + const char *name; + + /** + * @funcs: platform specific hooks + */ + const struct intel_shared_dpll_funcs *funcs; + + /** + * @id: unique indentifier for this DPLL; should match the index in the + * dev_priv->shared_dplls array + */ + enum intel_dpll_id id; + +#define INTEL_DPLL_ALWAYS_ON (1 << 0) + /** + * @flags: + * + * INTEL_DPLL_ALWAYS_ON + * Inform the state checker that the DPLL is kept enabled even if + * not in use by any CRTC. + */ + uint32_t flags; +}; + +/** * struct intel_shared_dpll - display PLL with tracked state and users */ struct intel_shared_dpll { @@ -228,30 +300,9 @@ struct intel_shared_dpll { bool on; /** - * @name: DPLL name; used for logging + * @info: platform specific info */ - const char *name; - - /** - * @id: unique indentifier for this DPLL; should match the index in the - * dev_priv->shared_dplls array - */ - enum intel_dpll_id id; - - /** - * @funcs: platform specific hooks - */ - struct intel_shared_dpll_funcs funcs; - -#define INTEL_DPLL_ALWAYS_ON (1 << 0) - /** - * @flags: - * - * INTEL_DPLL_ALWAYS_ON - * Inform the state checker that the DPLL is kept enabled even if - * not in use by any CRTC. - */ - uint32_t flags; + const struct dpll_info *info; }; #define SKL_DPLL0 0 diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a80fbad9be0f..d7dbca1aabff 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -56,6 +56,8 @@ for (;;) { \ const bool expired__ = ktime_after(ktime_get_raw(), end__); \ OP; \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ if (COND) { \ ret__ = 0; \ break; \ @@ -96,6 +98,8 @@ u64 now = local_clock(); \ if (!(ATOMIC)) \ preempt_enable(); \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ if (COND) { \ ret = 0; \ break; \ @@ -140,6 +144,10 @@ #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) +#define KBps(x) (1000 * (x)) +#define MBps(x) KBps(1000 * (x)) +#define GBps(x) ((u64)1000 * MBps((x))) + /* * Display related stuff */ @@ -482,7 +490,7 @@ struct intel_atomic_state { bool skip_intermediate_wm; /* Gen9+ only */ - struct skl_wm_values wm_results; + struct skl_ddb_values wm_results; struct i915_sw_fence commit_ready; @@ -548,6 +556,12 @@ struct intel_initial_plane_config { #define SKL_MAX_DST_W 4096 #define SKL_MIN_DST_H 8 #define SKL_MAX_DST_H 4096 +#define ICL_MAX_SRC_W 5120 +#define ICL_MAX_SRC_H 4096 +#define ICL_MAX_DST_W 5120 +#define ICL_MAX_DST_H 4096 +#define SKL_MIN_YUV_420_SRC_W 16 +#define SKL_MIN_YUV_420_SRC_H 16 struct intel_scaler { int in_use; @@ -598,7 +612,9 @@ struct intel_pipe_wm { struct skl_plane_wm { struct skl_wm_level wm[8]; + struct skl_wm_level uv_wm[8]; struct skl_wm_level trans_wm; + bool is_planar; }; struct skl_pipe_wm { @@ -874,6 +890,7 @@ struct intel_crtc_state { /* bitmask of visible planes (enum plane_id) */ u8 active_planes; + u8 nv12_planes; /* HDMI scrambling status */ bool hdmi_scrambling; @@ -1321,10 +1338,14 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv); void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv); /* i915_irq.c */ +bool gen11_reset_one_iir(struct drm_i915_private * const i915, + const unsigned int bank, + const unsigned int bit); void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); +void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv); @@ -1389,6 +1410,12 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, bool enable); +void icl_map_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state); +void icl_unmap_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state); unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, int plane, unsigned int height); @@ -1571,8 +1598,6 @@ void bxt_enable_dc9(struct drm_i915_private *dev_priv); void bxt_disable_dc9(struct drm_i915_private *dev_priv); void gen9_enable_dc5(struct drm_i915_private *dev_priv); unsigned int skl_cdclk_get_vco(unsigned int freq); -void skl_enable_dc6(struct drm_i915_private *dev_priv); -void skl_disable_dc6(struct drm_i915_private *dev_priv); void intel_dp_get_m_n(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n); @@ -1588,9 +1613,12 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state); enum intel_display_power_domain intel_port_to_power_domain(enum port port); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); +void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state); int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); -int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); +int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, + uint32_t pixel_format); static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) { @@ -1607,6 +1635,7 @@ u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, int skl_check_plane_surface(const struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); int i9xx_check_plane_surface(struct intel_plane_state *plane_state); +int skl_format_to_fourcc(int format, bool rgb_order, bool alpha); /* intel_csr.c */ void intel_csr_ucode_init(struct drm_i915_private *); @@ -1773,6 +1802,7 @@ void intel_fbc_flush(struct drm_i915_private *dev_priv, unsigned int frontbuffer_bits, enum fb_op_origin origin); void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv); void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *dev_priv); +int intel_fbc_reset_underrun(struct drm_i915_private *dev_priv); /* intel_hdmi.c */ void intel_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, @@ -1783,7 +1813,7 @@ struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder); bool intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state); -void intel_hdmi_handle_sink_scrambling(struct intel_encoder *intel_encoder, +bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, struct drm_connector *connector, bool high_tmds_clock_ratio, bool scrambling); @@ -1877,7 +1907,8 @@ void intel_psr_enable(struct intel_dp *intel_dp, void intel_psr_disable(struct intel_dp *intel_dp, const struct intel_crtc_state *old_crtc_state); void intel_psr_invalidate(struct drm_i915_private *dev_priv, - unsigned frontbuffer_bits); + unsigned frontbuffer_bits, + enum fb_op_origin origin); void intel_psr_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin); @@ -1886,6 +1917,8 @@ void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state); +void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug); +void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir); /* intel_runtime_pm.c */ int intel_power_domains_init(struct drm_i915_private *); @@ -1909,6 +1942,8 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); void intel_display_power_put(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); +void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, + u8 req_slices); static inline void assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) @@ -2046,6 +2081,9 @@ void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc); bool skl_plane_get_hw_state(struct intel_plane *plane); bool skl_plane_has_ccs(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id); +bool intel_format_is_yuv(uint32_t format); +bool skl_plane_has_planar(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); @@ -2082,31 +2120,6 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state, return to_intel_crtc_state(crtc_state); } -static inline struct intel_crtc_state * -intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state, - struct intel_crtc *crtc) -{ - struct drm_crtc_state *crtc_state; - - crtc_state = drm_atomic_get_existing_crtc_state(state, &crtc->base); - - if (crtc_state) - return to_intel_crtc_state(crtc_state); - else - return NULL; -} - -static inline struct intel_plane_state * -intel_atomic_get_existing_plane_state(struct drm_atomic_state *state, - struct intel_plane *plane) -{ - struct drm_plane_state *plane_state; - - plane_state = drm_atomic_get_existing_plane_state(state, &plane->base); - - return to_intel_plane_state(plane_state); -} - int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state); @@ -2138,8 +2151,17 @@ int intel_pipe_crc_create(struct drm_minor *minor); #ifdef CONFIG_DEBUG_FS int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, size_t *values_cnt); +void intel_crtc_disable_pipe_crc(struct intel_crtc *crtc); +void intel_crtc_enable_pipe_crc(struct intel_crtc *crtc); #else #define intel_crtc_set_crc_source NULL +static inline void intel_crtc_disable_pipe_crc(struct intel_crtc *crtc) +{ +} + +static inline void intel_crtc_enable_pipe_crc(struct intel_crtc *crtc) +{ +} #endif extern const struct file_operations i915_display_crc_ctl_fops; #endif /* __INTEL_DRV_H__ */ diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 91c07b0c8db9..4d6ffa7b3e7b 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -647,6 +647,11 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) /* prepare count */ prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul); + if (prepare_cnt > PREPARE_CNT_MAX) { + DRM_DEBUG_KMS("prepare count too high %u\n", prepare_cnt); + prepare_cnt = PREPARE_CNT_MAX; + } + /* exit zero count */ exit_zero_cnt = DIV_ROUND_UP( (ths_prepare_hszero - ths_prepare_ns) * ui_den, @@ -662,32 +667,29 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) if (exit_zero_cnt < (55 * ui_den / ui_num) && (55 * ui_den) % ui_num) exit_zero_cnt += 1; + if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) { + DRM_DEBUG_KMS("exit zero count too high %u\n", exit_zero_cnt); + exit_zero_cnt = EXIT_ZERO_CNT_MAX; + } + /* clk zero count */ clk_zero_cnt = DIV_ROUND_UP( (tclk_prepare_clkzero - ths_prepare_ns) * ui_den, ui_num * mul); + if (clk_zero_cnt > CLK_ZERO_CNT_MAX) { + DRM_DEBUG_KMS("clock zero count too high %u\n", clk_zero_cnt); + clk_zero_cnt = CLK_ZERO_CNT_MAX; + } + /* trail count */ tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail); trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul); - if (prepare_cnt > PREPARE_CNT_MAX || - exit_zero_cnt > EXIT_ZERO_CNT_MAX || - clk_zero_cnt > CLK_ZERO_CNT_MAX || - trail_cnt > TRAIL_CNT_MAX) - DRM_DEBUG_DRIVER("Values crossing maximum limits, restricting to max values\n"); - - if (prepare_cnt > PREPARE_CNT_MAX) - prepare_cnt = PREPARE_CNT_MAX; - - if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) - exit_zero_cnt = EXIT_ZERO_CNT_MAX; - - if (clk_zero_cnt > CLK_ZERO_CNT_MAX) - clk_zero_cnt = CLK_ZERO_CNT_MAX; - - if (trail_cnt > TRAIL_CNT_MAX) + if (trail_cnt > TRAIL_CNT_MAX) { + DRM_DEBUG_KMS("trail count too high %u\n", trail_cnt); trail_cnt = TRAIL_CNT_MAX; + } /* B080 */ intel_dsi->dphy_reg = exit_zero_cnt << 24 | trail_cnt << 16 | diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f7c25828d3bb..6bfd7e3ed152 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -81,13 +81,17 @@ static const struct engine_class_info intel_engine_classes[] = { }, }; +#define MAX_MMIO_BASES 3 struct engine_info { unsigned int hw_id; unsigned int uabi_id; u8 class; u8 instance; - u32 mmio_base; - unsigned irq_shift; + /* mmio bases table *must* be sorted in reverse gen order */ + struct engine_mmio_base { + u32 gen : 8; + u32 base : 24; + } mmio_bases[MAX_MMIO_BASES]; }; static const struct engine_info intel_engines[] = { @@ -96,64 +100,76 @@ static const struct engine_info intel_engines[] = { .uabi_id = I915_EXEC_RENDER, .class = RENDER_CLASS, .instance = 0, - .mmio_base = RENDER_RING_BASE, - .irq_shift = GEN8_RCS_IRQ_SHIFT, + .mmio_bases = { + { .gen = 1, .base = RENDER_RING_BASE } + }, }, [BCS] = { .hw_id = BCS_HW, .uabi_id = I915_EXEC_BLT, .class = COPY_ENGINE_CLASS, .instance = 0, - .mmio_base = BLT_RING_BASE, - .irq_shift = GEN8_BCS_IRQ_SHIFT, + .mmio_bases = { + { .gen = 6, .base = BLT_RING_BASE } + }, }, [VCS] = { .hw_id = VCS_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 0, - .mmio_base = GEN6_BSD_RING_BASE, - .irq_shift = GEN8_VCS1_IRQ_SHIFT, + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD_RING_BASE }, + { .gen = 6, .base = GEN6_BSD_RING_BASE }, + { .gen = 4, .base = BSD_RING_BASE } + }, }, [VCS2] = { .hw_id = VCS2_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 1, - .mmio_base = GEN8_BSD2_RING_BASE, - .irq_shift = GEN8_VCS2_IRQ_SHIFT, + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD2_RING_BASE }, + { .gen = 8, .base = GEN8_BSD2_RING_BASE } + }, }, [VCS3] = { .hw_id = VCS3_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 2, - .mmio_base = GEN11_BSD3_RING_BASE, - .irq_shift = 0, /* not used */ + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD3_RING_BASE } + }, }, [VCS4] = { .hw_id = VCS4_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 3, - .mmio_base = GEN11_BSD4_RING_BASE, - .irq_shift = 0, /* not used */ + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD4_RING_BASE } + }, }, [VECS] = { .hw_id = VECS_HW, .uabi_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, - .mmio_base = VEBOX_RING_BASE, - .irq_shift = GEN8_VECS_IRQ_SHIFT, + .mmio_bases = { + { .gen = 11, .base = GEN11_VEBOX_RING_BASE }, + { .gen = 7, .base = VEBOX_RING_BASE } + }, }, [VECS2] = { .hw_id = VECS2_HW, .uabi_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 1, - .mmio_base = GEN11_VEBOX2_RING_BASE, - .irq_shift = 0, /* not used */ + .mmio_bases = { + { .gen = 11, .base = GEN11_VEBOX2_RING_BASE } + }, }, }; @@ -223,16 +239,36 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) } } +static u32 __engine_mmio_base(struct drm_i915_private *i915, + const struct engine_mmio_base *bases) +{ + int i; + + for (i = 0; i < MAX_MMIO_BASES; i++) + if (INTEL_GEN(i915) >= bases[i].gen) + break; + + GEM_BUG_ON(i == MAX_MMIO_BASES); + GEM_BUG_ON(!bases[i].base); + + return bases[i].base; +} + +static void __sprint_engine_name(char *name, const struct engine_info *info) +{ + WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u", + intel_engine_classes[info->class].name, + info->instance) >= INTEL_ENGINE_CS_MAX_NAME); +} + static int intel_engine_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) { const struct engine_info *info = &intel_engines[id]; - const struct engine_class_info *class_info; struct intel_engine_cs *engine; GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); - class_info = &intel_engine_classes[info->class]; BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); @@ -253,35 +289,14 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->id = id; engine->i915 = dev_priv; - WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", - class_info->name, info->instance) >= - sizeof(engine->name)); + __sprint_engine_name(engine->name, info); engine->hw_id = engine->guc_id = info->hw_id; - if (INTEL_GEN(dev_priv) >= 11) { - switch (engine->id) { - case VCS: - engine->mmio_base = GEN11_BSD_RING_BASE; - break; - case VCS2: - engine->mmio_base = GEN11_BSD2_RING_BASE; - break; - case VECS: - engine->mmio_base = GEN11_VEBOX_RING_BASE; - break; - default: - /* take the original value for all other engines */ - engine->mmio_base = info->mmio_base; - break; - } - } else { - engine->mmio_base = info->mmio_base; - } - engine->irq_shift = info->irq_shift; + engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases); engine->class = info->class; engine->instance = info->instance; engine->uabi_id = info->uabi_id; - engine->uabi_class = class_info->uabi_class; + engine->uabi_class = intel_engine_classes[info->class].uabi_class; engine->context_size = __intel_engine_context_size(dev_priv, engine->class); @@ -291,7 +306,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv, /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; - spin_lock_init(&engine->stats.lock); + seqlock_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -436,21 +451,13 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); } -static void intel_engine_init_timeline(struct intel_engine_cs *engine) +static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) { - engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; + i915_gem_batch_pool_init(&engine->batch_pool, engine); } static bool csb_force_mmio(struct drm_i915_private *i915) { - /* - * IOMMU adds unpredictable latency causing the CSB write (from the - * GPU into the HWSP) to only be visible some time after the interrupt - * (missed breadcrumb syndrome). - */ - if (intel_vtd_active()) - return true; - /* Older GVT emulation depends upon intercepting CSB mmio */ if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) return true; @@ -484,12 +491,11 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) */ void intel_engine_setup_common(struct intel_engine_cs *engine) { - intel_engine_init_execlist(engine); + i915_timeline_init(engine->i915, &engine->timeline, engine->name); - intel_engine_init_timeline(engine); + intel_engine_init_execlist(engine); intel_engine_init_hangcheck(engine); - i915_gem_batch_pool_init(engine, &engine->batch_pool); - + intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); } @@ -520,8 +526,6 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) goto err_unref; engine->scratch = vma; - DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", - engine->name, i915_ggtt_offset(vma)); return 0; err_unref: @@ -615,9 +619,6 @@ static int init_status_page(struct intel_engine_cs *engine) engine->status_page.vma = vma; engine->status_page.ggtt_offset = i915_ggtt_offset(vma); engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); - - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", - engine->name, i915_ggtt_offset(vma)); return 0; err_unpin: @@ -669,7 +670,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * be available. To avoid this we always pin the default * context. */ - ring = engine->context_pin(engine, engine->i915->kernel_context); + ring = intel_context_pin(engine->i915->kernel_context, engine); if (IS_ERR(ring)) return PTR_ERR(ring); @@ -678,8 +679,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * we can interrupt the engine at any time. */ if (engine->i915->preempt_context) { - ring = engine->context_pin(engine, - engine->i915->preempt_context); + ring = intel_context_pin(engine->i915->preempt_context, engine); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto err_unpin_kernel; @@ -703,9 +703,9 @@ err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); + intel_context_unpin(engine->i915->preempt_context, engine); err_unpin_kernel: - engine->context_unpin(engine, engine->i915->kernel_context); + intel_context_unpin(engine->i915->kernel_context, engine); return ret; } @@ -733,8 +733,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) i915_gem_object_put(engine->default_state); if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); - engine->context_unpin(engine, engine->i915->kernel_context); + intel_context_unpin(engine->i915->preempt_context, engine); + intel_context_unpin(engine->i915->kernel_context, engine); + + i915_timeline_fini(&engine->timeline); } u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) @@ -782,10 +784,24 @@ static inline uint32_t read_subslice_reg(struct drm_i915_private *dev_priv, int slice, int subslice, i915_reg_t reg) { + uint32_t mcr_slice_subslice_mask; + uint32_t mcr_slice_subslice_select; uint32_t mcr; uint32_t ret; enum forcewake_domains fw_domains; + if (INTEL_GEN(dev_priv) >= 11) { + mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | + GEN11_MCR_SUBSLICE_MASK; + mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) | + GEN11_MCR_SUBSLICE(subslice); + } else { + mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | + GEN8_MCR_SUBSLICE_MASK; + mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) | + GEN8_MCR_SUBSLICE(subslice); + } + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, @@ -800,14 +816,14 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, * The HW expects the slice and sublice selectors to be reset to 0 * after reading out the registers. */ - WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK)); - mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); - mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + WARN_ON_ONCE(mcr & mcr_slice_subslice_mask); + mcr &= ~mcr_slice_subslice_mask; + mcr |= mcr_slice_subslice_select; I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); ret = I915_READ_FW(reg); - mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); + mcr &= ~mcr_slice_subslice_mask; I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); intel_uncore_forcewake_put__locked(dev_priv, fw_domains); @@ -871,644 +887,6 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, } } -static int wa_add(struct drm_i915_private *dev_priv, - i915_reg_t addr, - const u32 mask, const u32 val) -{ - const u32 idx = dev_priv->workarounds.count; - - if (WARN_ON(idx >= I915_MAX_WA_REGS)) - return -ENOSPC; - - dev_priv->workarounds.reg[idx].addr = addr; - dev_priv->workarounds.reg[idx].value = val; - dev_priv->workarounds.reg[idx].mask = mask; - - dev_priv->workarounds.count++; - - return 0; -} - -#define WA_REG(addr, mask, val) do { \ - const int r = wa_add(dev_priv, (addr), (mask), (val)); \ - if (r) \ - return r; \ - } while (0) - -#define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) - -#define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) - -#define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, mask, _MASKED_FIELD(mask, value)) - -static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, - i915_reg_t reg) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct i915_workarounds *wa = &dev_priv->workarounds; - const uint32_t index = wa->hw_whitelist_count[engine->id]; - - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) - return -EINVAL; - - I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); - wa->hw_whitelist_count[engine->id]++; - - return 0; -} - -static int gen8_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); - - /* WaDisableAsyncFlipPerfMode:bdw,chv */ - WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); - - /* WaDisablePartialInstShootdown:bdw,chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - /* WaForceEnableNonCoherent:bdw,chv */ - /* WaHdcDisableFetchWhenMasked:bdw,chv */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_DONOT_FETCH_MEM_WHEN_MASKED | - HDC_FORCE_NON_COHERENT); - - /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: - * "The Hierarchical Z RAW Stall Optimization allows non-overlapping - * polygons in the same 8x4 pixel/sample area to be processed without - * stalling waiting for the earlier ones to write to Hierarchical Z - * buffer." - * - * This optimization is off by default for BDW and CHV; turn it on. - */ - WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); - - /* Wa4x4STCOptimizationDisable:bdw,chv */ - WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4); - - return 0; -} - -static int bdw_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* WaDisableDopClockGating:bdw - * - * Also see the related UCGTCL1 write in broadwell_init_clock_gating() - * to disable EUTC clock gating. - */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); - - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - WA_SET_BIT_MASKED(HDC_CHICKEN0, - /* WaForceContextSaveRestoreNonCoherent:bdw */ - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); - - return 0; -} - -static int chv_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* Improve HiZ throughput on CHV. */ - WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); - - return 0; -} - -static int gen9_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ - I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); - - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ - I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - - /* WaDisableKillLogic:bxt,skl,kbl */ - if (!IS_COFFEELAKE(dev_priv)) - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - ECOCHK_DIS_TLB); - - if (HAS_LLC(dev_priv)) { - /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl - * - * Must match Display Engine. See - * WaCompressedResourceDisplayNewHashMode. - */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN9_PBE_COMPRESSED_HASH_SELECTION); - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); - - I915_WRITE(MMCD_MISC_CTRL, - I915_READ(MMCD_MISC_CTRL) | - MMCD_PCLA | - MMCD_HOTSPOT_EN); - } - - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - if (!IS_COFFEELAKE(dev_priv)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_YV12_BUGFIX | - GEN9_ENABLE_GPGPU_PREEMPTION); - - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ - /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | - GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); - - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); - - /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are - * both tied to WaForceContextSaveRestoreNonCoherent - * in some hsds for skl. We keep the tie for all gen9. The - * documentation is a bit hazy and so we want to get common behaviour, - * even though there is no clear evidence we would need both on kbl/bxt. - * This area has been source of system hangs so we play it safe - * and mimic the skl regardless of what bspec says. - * - * Use Force Non-Coherent whenever executing a 3D context. This - * is a workaround for a possible hang in the unlikely event - * a TLB invalidation occurs during a PSD flush. - */ - - /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); - - /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - BDW_DISABLE_HDC_INVALIDATION); - - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ - if (IS_SKYLAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); - - /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ - if (IS_GEN9_LP(dev_priv)) { - u32 val = I915_READ(GEN8_L3SQCREG1); - - val &= ~L3_PRIO_CREDITS_MASK; - val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); - I915_WRITE(GEN8_L3SQCREG1, val); - } - - /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ - I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_FLUSH_COHERENT_LINES)); - - /* - * Supporting preemption with fine-granularity requires changes in the - * batch buffer programming. Since we can't break old userspace, we - * need to set our default preemption level to safe value. Userspace is - * still able to use more fine-grained preemption levels, since in - * WaEnablePreemptionGranularityControlByUMD we're whitelisting the - * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are - * not real HW workarounds, but merely a way to start using preemption - * while maintaining old contract with userspace. - */ - - /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); - - /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - - /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ - if (IS_GEN9_LP(dev_priv)) - WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); - - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); - if (ret) - return ret; - - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; - - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); - if (ret) - return ret; - - return 0; -} - -static int skl_tune_iz_hashing(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - u8 vals[3] = { 0, 0, 0 }; - unsigned int i; - - for (i = 0; i < 3; i++) { - u8 ss; - - /* - * Only consider slices where one, and only one, subslice has 7 - * EUs - */ - if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) - continue; - - /* - * subslice_7eu[i] != 0 (because of the check above) and - * ss_max == 4 (maximum number of subslices possible per slice) - * - * -> 0 <= ss <= 3; - */ - ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; - vals[i] = 3 - ss; - } - - if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) - return 0; - - /* Tune IZ hashing. See intel_device_info_runtime_init() */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN9_IZ_HASHING_MASK(2) | - GEN9_IZ_HASHING_MASK(1) | - GEN9_IZ_HASHING_MASK(0), - GEN9_IZ_HASHING(2, vals[2]) | - GEN9_IZ_HASHING(1, vals[1]) | - GEN9_IZ_HASHING(0, vals[0])); - - return 0; -} - -static int skl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:skl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableGafsUnitClkGating:skl */ - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); - - /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - /* WaDisableLSQCROPERFforOCL:skl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return skl_tune_iz_hashing(engine); -} - -static int bxt_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:bxt */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); - - /* WaDisablePooledEuLoadBalancingFix:bxt */ - I915_WRITE(FF_SLICE_CS_CHICKEN2, - _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); - - /* WaToEnableHwFixForPushConstHWBug:bxt */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaInPlaceDecompressionHang:bxt */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - return 0; -} - -static int cnl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) - I915_WRITE(GAMT_CHKN_BIT_REG, - (I915_READ(GAMT_CHKN_BIT_REG) | - GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT)); - - /* WaForceContextSaveRestoreNonCoherent:cnl */ - WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); - - /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); - - /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); - - /* WaInPlaceDecompressionHang:cnl */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - /* WaPushConstantDereferenceHoldDisable:cnl */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); - - /* FtrEnableFastAnisoL1BankingFix: cnl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); - - /* WaDisable3DMidCmdPreemption:cnl */ - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); - - /* WaDisableGPGPUMidCmdPreemption:cnl */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - - /* WaEnablePreemptionGranularityControlByUMD:cnl */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; - - /* WaDisableEarlyEOT:cnl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); - - return 0; -} - -static int kbl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:kbl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) - I915_WRITE(GAMT_CHKN_BIT_REG, - (I915_READ(GAMT_CHKN_BIT_REG) | - GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING)); - - /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FENCE_DEST_SLM_DISABLE); - - /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableGafsUnitClkGating:kbl */ - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); - - /* WaDisableSbeCacheDispatchPortSharing:kbl */ - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - /* WaInPlaceDecompressionHang:kbl */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - /* WaDisableLSQCROPERFforOCL:kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; -} - -static int glk_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ - ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); - if (ret) - return ret; - - /* WaToEnableHwFixForPushConstHWBug:glk */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - return 0; -} - -static int cfl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:cfl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaToEnableHwFixForPushConstHWBug:cfl */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableGafsUnitClkGating:cfl */ - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); - - /* WaDisableSbeCacheDispatchPortSharing:cfl */ - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - /* WaInPlaceDecompressionHang:cfl */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - return 0; -} - -int init_workarounds_ring(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int err; - - if (GEM_WARN_ON(engine->id != RCS)) - return -EINVAL; - - dev_priv->workarounds.count = 0; - dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; - - if (IS_BROADWELL(dev_priv)) - err = bdw_init_workarounds(engine); - else if (IS_CHERRYVIEW(dev_priv)) - err = chv_init_workarounds(engine); - else if (IS_SKYLAKE(dev_priv)) - err = skl_init_workarounds(engine); - else if (IS_BROXTON(dev_priv)) - err = bxt_init_workarounds(engine); - else if (IS_KABYLAKE(dev_priv)) - err = kbl_init_workarounds(engine); - else if (IS_GEMINILAKE(dev_priv)) - err = glk_init_workarounds(engine); - else if (IS_COFFEELAKE(dev_priv)) - err = cfl_init_workarounds(engine); - else if (IS_CANNONLAKE(dev_priv)) - err = cnl_init_workarounds(engine); - else - err = 0; - if (err) - return err; - - DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", - engine->name, dev_priv->workarounds.count); - return 0; -} - -int intel_ring_workarounds_emit(struct i915_request *rq) -{ - struct i915_workarounds *w = &rq->i915->workarounds; - u32 *cs; - int ret, i; - - if (w->count == 0) - return 0; - - ret = rq->engine->emit_flush(rq, EMIT_BARRIER); - if (ret) - return ret; - - cs = intel_ring_begin(rq, w->count * 2 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(w->count); - for (i = 0; i < w->count; i++) { - *cs++ = i915_mmio_reg_offset(w->reg[i].addr); - *cs++ = w->reg[i].value; - } - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - ret = rq->engine->emit_flush(rq, EMIT_BARRIER); - if (ret) - return ret; - - return 0; -} - static bool ring_is_idle(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1611,7 +989,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) * the last request that remains in the timeline. When idle, it is * the last executed context as tracked by retirement. */ - rq = __i915_gem_active_peek(&engine->timeline->last_request); + rq = __i915_gem_active_peek(&engine->timeline.last_request); if (rq) return rq->ctx == kernel_context; else @@ -1659,6 +1037,9 @@ void intel_engines_park(struct drm_i915_private *i915) intel_engine_dump(engine, &p, NULL); } + /* Must be reset upon idling, or we may miss the busy wakeup. */ + GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN); + if (engine->park) engine->park(engine); @@ -1681,6 +1062,8 @@ void intel_engines_unpark(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { if (engine->unpark) engine->unpark(engine); + + intel_engine_init_hangcheck(engine); } } @@ -1713,17 +1096,37 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) return which; } +static int print_sched_attr(struct drm_i915_private *i915, + const struct i915_sched_attr *attr, + char *buf, int x, int len) +{ + if (attr->priority == I915_PRIORITY_INVALID) + return x; + + x += snprintf(buf + x, len - x, + " prio=%d", attr->priority); + + return x; +} + static void print_request(struct drm_printer *m, struct i915_request *rq, const char *prefix) { - drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix, + const char *name = rq->fence.ops->get_timeline_name(&rq->fence); + char buf[80]; + int x = 0; + + x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); + + drm_printf(m, "%s%x%s [%llx:%x]%s @ %dms: %s\n", + prefix, rq->global_seqno, i915_request_completed(rq) ? "!" : "", rq->fence.context, rq->fence.seqno, - rq->priotree.priority, + buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), - rq->timeline->common->name); + name); } static void hexdump(struct drm_printer *m, const void *buf, size_t len) @@ -1829,12 +1232,15 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); read = GEN8_CSB_READ_PTR(ptr); write = GEN8_CSB_WRITE_PTR(ptr); - drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", + drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n", read, execlists->csb_head, write, intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), yesno(test_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted))); + &engine->irq_posted)), + yesno(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state)), + enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); if (read >= GEN8_CSB_ENTRIES) read = 0; if (write >= GEN8_CSB_ENTRIES) @@ -1861,8 +1267,9 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, char hdr[80]; snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, rq: ", - idx, count); + "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", + idx, count, + i915_ggtt_offset(rq->ring->vma)); print_request(m, rq, hdr); } else { drm_printf(m, "\t\tELSP[%d] idle\n", idx); @@ -1884,11 +1291,13 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) { + const int MAX_REQUESTS_TO_SHOW = 8; struct intel_breadcrumbs * const b = &engine->breadcrumbs; const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; - struct i915_request *rq; + struct i915_request *rq, *last; struct rb_node *rb; + int count; if (header) { va_list ap; @@ -1901,12 +1310,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_terminally_wedged(&engine->i915->gpu_error)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); @@ -1915,14 +1323,14 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); - rq = list_first_entry(&engine->timeline->requests, + rq = list_first_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tfirst "); - rq = list_last_entry(&engine->timeline->requests, + rq = list_last_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tlast "); rq = i915_gem_find_active_request(engine); @@ -1933,12 +1341,16 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->head, rq->postfix, rq->tail, rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); - drm_printf(m, "\t\tring->start: 0x%08x\n", + drm_printf(m, "\t\tring->start: 0x%08x\n", i915_ggtt_offset(rq->ring->vma)); - drm_printf(m, "\t\tring->head: 0x%08x\n", + drm_printf(m, "\t\tring->head: 0x%08x\n", rq->ring->head); - drm_printf(m, "\t\tring->tail: 0x%08x\n", + drm_printf(m, "\t\tring->tail: 0x%08x\n", rq->ring->tail); + drm_printf(m, "\t\tring->emit: 0x%08x\n", + rq->ring->emit); + drm_printf(m, "\t\tring->space: 0x%08x\n", + rq->ring->space); } rcu_read_unlock(); @@ -1950,18 +1362,49 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - spin_lock_irq(&engine->timeline->lock); - list_for_each_entry(rq, &engine->timeline->requests, link) - print_request(m, rq, "\t\tE "); + spin_lock_irq(&engine->timeline.lock); + + last = NULL; + count = 0; + list_for_each_entry(rq, &engine->timeline.requests, link) { + if (count++ < MAX_REQUESTS_TO_SHOW - 1) + print_request(m, rq, "\t\tE "); + else + last = rq; + } + if (last) { + if (count > MAX_REQUESTS_TO_SHOW) { + drm_printf(m, + "\t\t...skipping %d executing requests...\n", + count - MAX_REQUESTS_TO_SHOW); + } + print_request(m, last, "\t\tE "); + } + + last = NULL; + count = 0; drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); for (rb = execlists->first; rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - list_for_each_entry(rq, &p->requests, priotree.link) - print_request(m, rq, "\t\tQ "); + list_for_each_entry(rq, &p->requests, sched.link) { + if (count++ < MAX_REQUESTS_TO_SHOW - 1) + print_request(m, rq, "\t\tQ "); + else + last = rq; + } + } + if (last) { + if (count > MAX_REQUESTS_TO_SHOW) { + drm_printf(m, + "\t\t...skipping %d queued requests...\n", + count - MAX_REQUESTS_TO_SHOW); + } + print_request(m, last, "\t\tQ "); } - spin_unlock_irq(&engine->timeline->lock); + + spin_unlock_irq(&engine->timeline.lock); spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { @@ -2026,7 +1469,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) return -ENODEV; tasklet_disable(&execlists->tasklet); - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -2050,7 +1493,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); tasklet_enable(&execlists->tasklet); return err; @@ -2079,12 +1522,13 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) */ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) { + unsigned int seq; ktime_t total; - unsigned long flags; - spin_lock_irqsave(&engine->stats.lock, flags); - total = __intel_engine_get_busy_time(engine); - spin_unlock_irqrestore(&engine->stats.lock, flags); + do { + seq = read_seqbegin(&engine->stats.lock); + total = __intel_engine_get_busy_time(engine); + } while (read_seqretry(&engine->stats.lock, seq)); return total; } @@ -2102,15 +1546,16 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); WARN_ON_ONCE(engine->stats.enabled == 0); if (--engine->stats.enabled == 0) { engine->stats.total = __intel_engine_get_busy_time(engine); engine->stats.active = 0; } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" +#include "selftests/intel_engine_cs.c" #endif diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 707d49c12638..b431b6733cc1 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1272,6 +1272,34 @@ out: mutex_unlock(&fbc->lock); } +/* + * intel_fbc_reset_underrun - reset FBC fifo underrun status. + * @dev_priv: i915 device instance + * + * See intel_fbc_handle_fifo_underrun_irq(). For automated testing we + * want to re-enable FBC after an underrun to increase test coverage. + */ +int intel_fbc_reset_underrun(struct drm_i915_private *dev_priv) +{ + int ret; + + cancel_work_sync(&dev_priv->fbc.underrun_work); + + ret = mutex_lock_interruptible(&dev_priv->fbc.lock); + if (ret) + return ret; + + if (dev_priv->fbc.underrun_detected) { + DRM_DEBUG_KMS("Re-allowing FBC after fifo underrun\n"); + dev_priv->fbc.no_fbc_reason = "FIFO underrun cleared"; + } + + dev_priv->fbc.underrun_detected = false; + mutex_unlock(&dev_priv->fbc.lock); + + return 0; +} + /** * intel_fbc_handle_fifo_underrun_irq - disable FBC when we get a FIFO underrun * @dev_priv: i915 device instance diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 6467a5cc2ca3..e9e02b58b7be 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -221,6 +221,9 @@ static int intelfb_create(struct drm_fb_helper *helper, goto out_unlock; } + fb = &ifbdev->fb->base; + intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_DIRTYFB); + info = drm_fb_helper_alloc_fbi(helper); if (IS_ERR(info)) { DRM_ERROR("Failed to allocate fb_info\n"); @@ -230,8 +233,6 @@ static int intelfb_create(struct drm_fb_helper *helper, info->par = helper; - fb = &ifbdev->fb->base; - ifbdev->helper.fb = fb; strcpy(info->fix.id, "inteldrmfb"); @@ -640,7 +641,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, if (!crtc->state->active) continue; - WARN(!crtc->primary->fb, + WARN(!crtc->primary->state->fb, "re-used BIOS config but lost an fb on crtc %d\n", crtc->base.id); } diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index 3a8d3d06c26a..7fff0a0eceb4 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -80,7 +80,7 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, } might_sleep(); - intel_psr_invalidate(dev_priv, frontbuffer_bits); + intel_psr_invalidate(dev_priv, frontbuffer_bits, origin); intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits); intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin); } diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h new file mode 100644 index 000000000000..105e2a9e874a --- /dev/null +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h @@ -0,0 +1,274 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright � 2003-2018 Intel Corporation + */ + +#ifndef _INTEL_GPU_COMMANDS_H_ +#define _INTEL_GPU_COMMANDS_H_ + +/* + * Instruction field definitions used by the command parser + */ +#define INSTR_CLIENT_SHIFT 29 +#define INSTR_MI_CLIENT 0x0 +#define INSTR_BC_CLIENT 0x2 +#define INSTR_RC_CLIENT 0x3 +#define INSTR_SUBCLIENT_SHIFT 27 +#define INSTR_SUBCLIENT_MASK 0x18000000 +#define INSTR_MEDIA_SUBCLIENT 0x2 +#define INSTR_26_TO_24_MASK 0x7000000 +#define INSTR_26_TO_24_SHIFT 24 + +/* + * Memory interface instructions used by the kernel + */ +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) +/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */ +#define MI_GLOBAL_GTT (1<<22) + +#define MI_NOOP MI_INSTR(0, 0) +#define MI_USER_INTERRUPT MI_INSTR(0x02, 0) +#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0) +#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) +#define MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1) +#define MI_FLUSH MI_INSTR(0x04, 0) +#define MI_READ_FLUSH (1 << 0) +#define MI_EXE_FLUSH (1 << 1) +#define MI_NO_WRITE_FLUSH (1 << 2) +#define MI_SCENE_COUNT (1 << 3) /* just increment scene count */ +#define MI_END_SCENE (1 << 4) /* flush binner and incr scene count */ +#define MI_INVALIDATE_ISP (1 << 5) /* invalidate indirect state pointers */ +#define MI_REPORT_HEAD MI_INSTR(0x07, 0) +#define MI_ARB_ON_OFF MI_INSTR(0x08, 0) +#define MI_ARB_ENABLE (1<<0) +#define MI_ARB_DISABLE (0<<0) +#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) +#define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0) +#define MI_SUSPEND_FLUSH_EN (1<<0) +#define MI_SET_APPID MI_INSTR(0x0e, 0) +#define MI_OVERLAY_FLIP MI_INSTR(0x11, 0) +#define MI_OVERLAY_CONTINUE (0x0<<21) +#define MI_OVERLAY_ON (0x1<<21) +#define MI_OVERLAY_OFF (0x2<<21) +#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0) +#define MI_DISPLAY_FLIP MI_INSTR(0x14, 2) +#define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1) +#define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20) +/* IVB has funny definitions for which plane to flip. */ +#define MI_DISPLAY_FLIP_IVB_PLANE_A (0 << 19) +#define MI_DISPLAY_FLIP_IVB_PLANE_B (1 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19) +#define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19) +/* SKL ones */ +#define MI_DISPLAY_FLIP_SKL_PLANE_1_A (0 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_1_B (1 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_1_C (2 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_2_A (4 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_2_B (5 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_2_C (6 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_3_A (7 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_3_B (8 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_3_C (9 << 8) +#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6, gen7 */ +#define MI_SEMAPHORE_GLOBAL_GTT (1<<22) +#define MI_SEMAPHORE_UPDATE (1<<21) +#define MI_SEMAPHORE_COMPARE (1<<20) +#define MI_SEMAPHORE_REGISTER (1<<18) +#define MI_SEMAPHORE_SYNC_VR (0<<16) /* RCS wait for VCS (RVSYNC) */ +#define MI_SEMAPHORE_SYNC_VER (1<<16) /* RCS wait for VECS (RVESYNC) */ +#define MI_SEMAPHORE_SYNC_BR (2<<16) /* RCS wait for BCS (RBSYNC) */ +#define MI_SEMAPHORE_SYNC_BV (0<<16) /* VCS wait for BCS (VBSYNC) */ +#define MI_SEMAPHORE_SYNC_VEV (1<<16) /* VCS wait for VECS (VVESYNC) */ +#define MI_SEMAPHORE_SYNC_RV (2<<16) /* VCS wait for RCS (VRSYNC) */ +#define MI_SEMAPHORE_SYNC_RB (0<<16) /* BCS wait for RCS (BRSYNC) */ +#define MI_SEMAPHORE_SYNC_VEB (1<<16) /* BCS wait for VECS (BVESYNC) */ +#define MI_SEMAPHORE_SYNC_VB (2<<16) /* BCS wait for VCS (BVSYNC) */ +#define MI_SEMAPHORE_SYNC_BVE (0<<16) /* VECS wait for BCS (VEBSYNC) */ +#define MI_SEMAPHORE_SYNC_VVE (1<<16) /* VECS wait for VCS (VEVSYNC) */ +#define MI_SEMAPHORE_SYNC_RVE (2<<16) /* VECS wait for RCS (VERSYNC) */ +#define MI_SEMAPHORE_SYNC_INVALID (3<<16) +#define MI_SEMAPHORE_SYNC_MASK (3<<16) +#define MI_SET_CONTEXT MI_INSTR(0x18, 0) +#define MI_MM_SPACE_GTT (1<<8) +#define MI_MM_SPACE_PHYSICAL (0<<8) +#define MI_SAVE_EXT_STATE_EN (1<<3) +#define MI_RESTORE_EXT_STATE_EN (1<<2) +#define MI_FORCE_RESTORE (1<<1) +#define MI_RESTORE_INHIBIT (1<<0) +#define HSW_MI_RS_SAVE_STATE_EN (1<<3) +#define HSW_MI_RS_RESTORE_STATE_EN (1<<2) +#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ +#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) +#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ +#define MI_SEMAPHORE_POLL (1<<15) +#define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) +#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) +#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) +#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ +#define MI_USE_GGTT (1 << 22) /* g4x+ */ +#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) +#define MI_STORE_DWORD_INDEX_SHIFT 2 +/* + * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: + * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw + * simply ignores the register load under certain conditions. + * - One can actually load arbitrary many arbitrary registers: Simply issue x + * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! + */ +#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +#define MI_LRI_FORCE_POSTED (1<<12) +#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) +#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) +#define MI_SRM_LRM_GLOBAL_GTT (1<<22) +#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */ +#define MI_FLUSH_DW_STORE_INDEX (1<<21) +#define MI_INVALIDATE_TLB (1<<18) +#define MI_FLUSH_DW_OP_STOREDW (1<<14) +#define MI_FLUSH_DW_OP_MASK (3<<14) +#define MI_FLUSH_DW_NOTIFY (1<<8) +#define MI_INVALIDATE_BSD (1<<7) +#define MI_FLUSH_DW_USE_GTT (1<<2) +#define MI_FLUSH_DW_USE_PPGTT (0<<2) +#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) +#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) +#define MI_BATCH_BUFFER MI_INSTR(0x30, 1) +#define MI_BATCH_NON_SECURE (1) +/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ +#define MI_BATCH_NON_SECURE_I965 (1<<8) +#define MI_BATCH_PPGTT_HSW (1<<8) +#define MI_BATCH_NON_SECURE_HSW (1<<13) +#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) +#define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ +#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) +#define MI_BATCH_RESOURCE_STREAMER (1<<10) + +/* + * 3D instructions used by the kernel + */ +#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags)) + +#define GEN9_MEDIA_POOL_STATE ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4) +#define GEN9_MEDIA_POOL_ENABLE (1 << 31) +#define GFX_OP_RASTER_RULES ((0x3<<29)|(0x7<<24)) +#define GFX_OP_SCISSOR ((0x3<<29)|(0x1c<<24)|(0x10<<19)) +#define SC_UPDATE_SCISSOR (0x1<<1) +#define SC_ENABLE_MASK (0x1<<0) +#define SC_ENABLE (0x1<<0) +#define GFX_OP_LOAD_INDIRECT ((0x3<<29)|(0x1d<<24)|(0x7<<16)) +#define GFX_OP_SCISSOR_INFO ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1)) +#define SCI_YMIN_MASK (0xffff<<16) +#define SCI_XMIN_MASK (0xffff<<0) +#define SCI_YMAX_MASK (0xffff<<16) +#define SCI_XMAX_MASK (0xffff<<0) +#define GFX_OP_SCISSOR_ENABLE ((0x3<<29)|(0x1c<<24)|(0x10<<19)) +#define GFX_OP_SCISSOR_RECT ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1) +#define GFX_OP_COLOR_FACTOR ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0) +#define GFX_OP_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define GFX_OP_MAP_INFO ((0x3<<29)|(0x1d<<24)|0x4) +#define GFX_OP_DESTBUFFER_VARS ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0) +#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) +#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) +#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) + +#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) +#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) +#define BLT_WRITE_A (2<<20) +#define BLT_WRITE_RGB (1<<20) +#define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A) +#define BLT_DEPTH_8 (0<<24) +#define BLT_DEPTH_16_565 (1<<24) +#define BLT_DEPTH_16_1555 (2<<24) +#define BLT_DEPTH_32 (3<<24) +#define BLT_ROP_SRC_COPY (0xcc<<16) +#define BLT_ROP_COLOR_COPY (0xf0<<16) +#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) /* 965+ only */ +#define XY_SRC_COPY_BLT_DST_TILED (1<<11) /* 965+ only */ +#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2) +#define ASYNC_FLIP (1<<22) +#define DISPLAY_PLANE_A (0<<20) +#define DISPLAY_PLANE_B (1<<20) +#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL_FLUSH_L3 (1<<27) +#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ +#define PIPE_CONTROL_MMIO_WRITE (1<<23) +#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) +#define PIPE_CONTROL_CS_STALL (1<<20) +#define PIPE_CONTROL_TLB_INVALIDATE (1<<18) +#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) +#define PIPE_CONTROL_QW_WRITE (1<<14) +#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) +#define PIPE_CONTROL_DEPTH_STALL (1<<13) +#define PIPE_CONTROL_WRITE_FLUSH (1<<12) +#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ +#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ +#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ +#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) +#define PIPE_CONTROL_NOTIFY (1<<8) +#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ +#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) +#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) +#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) +#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ + +/* + * Commands used only by the command parser + */ +#define MI_SET_PREDICATE MI_INSTR(0x01, 0) +#define MI_ARB_CHECK MI_INSTR(0x05, 0) +#define MI_RS_CONTROL MI_INSTR(0x06, 0) +#define MI_URB_ATOMIC_ALLOC MI_INSTR(0x09, 0) +#define MI_PREDICATE MI_INSTR(0x0C, 0) +#define MI_RS_CONTEXT MI_INSTR(0x0F, 0) +#define MI_TOPOLOGY_FILTER MI_INSTR(0x0D, 0) +#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0) +#define MI_URB_CLEAR MI_INSTR(0x19, 0) +#define MI_UPDATE_GTT MI_INSTR(0x23, 0) +#define MI_CLFLUSH MI_INSTR(0x27, 0) +#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) +#define MI_REPORT_PERF_COUNT_GGTT (1<<0) +#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) +#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) +#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) +#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) +#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) + +#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16)) +#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16)) +#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16)) +#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18) +#define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16)) +#define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16)) +#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16)) +#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16)) +#define GFX_OP_3DSTATE_SO_DECL_LIST \ + ((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16)) + +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16)) + +#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16)) + +#define COLOR_BLT ((0x2<<29)|(0x40<<22)) +#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) + +#endif /* _INTEL_GPU_COMMANDS_H_ */ diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index ff08ea0ebf49..116f4ccf1bbd 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -64,10 +64,12 @@ void intel_guc_init_early(struct intel_guc *guc) { intel_guc_fw_init_early(guc); intel_guc_ct_init_early(&guc->ct); - intel_guc_log_init_early(guc); + intel_guc_log_init_early(&guc->log); mutex_init(&guc->send_mutex); + spin_lock_init(&guc->irq_lock); guc->send = intel_guc_send_nop; + guc->handler = intel_guc_to_host_event_handler_nop; guc->notify = gen8_guc_raise_irq; } @@ -86,9 +88,10 @@ int intel_guc_init_wq(struct intel_guc *guc) * or scheduled later on resume. This way the handling of work * item can be kept same between system suspend & rpm suspend. */ - guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log", - WQ_HIGHPRI | WQ_FREEZABLE); - if (!guc->log.runtime.flush_wq) { + guc->log.relay.flush_wq = + alloc_ordered_workqueue("i915-guc_log", + WQ_HIGHPRI | WQ_FREEZABLE); + if (!guc->log.relay.flush_wq) { DRM_ERROR("Couldn't allocate workqueue for GuC log\n"); return -ENOMEM; } @@ -111,7 +114,7 @@ int intel_guc_init_wq(struct intel_guc *guc) guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt", WQ_HIGHPRI); if (!guc->preempt_wq) { - destroy_workqueue(guc->log.runtime.flush_wq); + destroy_workqueue(guc->log.relay.flush_wq); DRM_ERROR("Couldn't allocate workqueue for GuC " "preemption\n"); return -ENOMEM; @@ -129,7 +132,7 @@ void intel_guc_fini_wq(struct intel_guc *guc) USES_GUC_SUBMISSION(dev_priv)) destroy_workqueue(guc->preempt_wq); - destroy_workqueue(guc->log.runtime.flush_wq); + destroy_workqueue(guc->log.relay.flush_wq); } static int guc_shared_data_create(struct intel_guc *guc) @@ -169,7 +172,7 @@ int intel_guc_init(struct intel_guc *guc) return ret; GEM_BUG_ON(!guc->shared_data); - ret = intel_guc_log_create(guc); + ret = intel_guc_log_create(&guc->log); if (ret) goto err_shared; @@ -184,7 +187,7 @@ int intel_guc_init(struct intel_guc *guc) return 0; err_log: - intel_guc_log_destroy(guc); + intel_guc_log_destroy(&guc->log); err_shared: guc_shared_data_destroy(guc); return ret; @@ -196,41 +199,27 @@ void intel_guc_fini(struct intel_guc *guc) i915_ggtt_disable_guc(dev_priv); intel_guc_ads_destroy(guc); - intel_guc_log_destroy(guc); + intel_guc_log_destroy(&guc->log); guc_shared_data_destroy(guc); } -static u32 get_gt_type(struct drm_i915_private *dev_priv) +static u32 get_log_control_flags(void) { - /* XXX: GT type based on PCI device ID? field seems unused by fw */ - return 0; -} - -static u32 get_core_family(struct drm_i915_private *dev_priv) -{ - u32 gen = INTEL_GEN(dev_priv); + u32 level = i915_modparams.guc_log_level; + u32 flags = 0; - switch (gen) { - case 9: - return GUC_CORE_FAMILY_GEN9; - - default: - MISSING_CASE(gen); - return GUC_CORE_FAMILY_UNKNOWN; - } -} + GEM_BUG_ON(level < 0); -static u32 get_log_verbosity_flags(void) -{ - if (i915_modparams.guc_log_level > 0) { - u32 verbosity = i915_modparams.guc_log_level - 1; + if (!GUC_LOG_LEVEL_IS_ENABLED(level)) + flags |= GUC_LOG_DEFAULT_DISABLED; - GEM_BUG_ON(verbosity > GUC_LOG_VERBOSITY_MAX); - return verbosity << GUC_LOG_VERBOSITY_SHIFT; - } + if (!GUC_LOG_LEVEL_IS_VERBOSE(level)) + flags |= GUC_LOG_DISABLED; + else + flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << + GUC_LOG_VERBOSITY_SHIFT; - GEM_BUG_ON(i915_modparams.enable_guc < 0); - return GUC_LOG_DISABLED; + return flags; } /* @@ -246,10 +235,6 @@ void intel_guc_init_params(struct intel_guc *guc) memset(params, 0, sizeof(params)); - params[GUC_CTL_DEVICE_INFO] |= - (get_gt_type(dev_priv) << GUC_CTL_GT_TYPE_SHIFT) | - (get_core_family(dev_priv) << GUC_CTL_CORE_FAMILY_SHIFT); - /* * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one * second. This ARAR is calculated by: @@ -265,12 +250,13 @@ void intel_guc_init_params(struct intel_guc *guc) params[GUC_CTL_LOG_PARAMS] = guc->log.flags; - params[GUC_CTL_DEBUG] = get_log_verbosity_flags(); + params[GUC_CTL_DEBUG] = get_log_control_flags(); /* If GuC submission is enabled, set up additional parameters here */ if (USES_GUC_SUBMISSION(dev_priv)) { - u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; - u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); + u32 ads = intel_guc_ggtt_offset(guc, + guc->ads_vma) >> PAGE_SHIFT; + u32 pgs = intel_guc_ggtt_offset(guc, guc->stage_desc_pool); u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; @@ -301,16 +287,23 @@ void intel_guc_init_params(struct intel_guc *guc) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER); } -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) { WARN(1, "Unexpected send: action=%#x\n", *action); return -ENODEV; } +void intel_guc_to_host_event_handler_nop(struct intel_guc *guc) +{ + WARN(1, "Unexpected event: no suitable handler\n"); +} + /* * This function implements the MMIO based host to GuC interface. */ -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) { struct drm_i915_private *dev_priv = guc_to_i915(guc); u32 status; @@ -320,6 +313,9 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) GEM_BUG_ON(!len); GEM_BUG_ON(len > guc->send_regs.count); + /* We expect only action code */ + GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK); + /* If CT is available, we expect to use MMIO only during init/fini */ GEM_BUG_ON(HAS_GUC_CT(dev_priv) && *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && @@ -341,29 +337,74 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) */ ret = __intel_wait_for_register_fw(dev_priv, guc_send_reg(guc, 0), - INTEL_GUC_RECV_MASK, - INTEL_GUC_RECV_MASK, + INTEL_GUC_MSG_TYPE_MASK, + INTEL_GUC_MSG_TYPE_RESPONSE << + INTEL_GUC_MSG_TYPE_SHIFT, 10, 10, &status); - if (status != INTEL_GUC_STATUS_SUCCESS) { - /* - * Either the GuC explicitly returned an error (which - * we convert to -EIO here) or no response at all was - * received within the timeout limit (-ETIMEDOUT) - */ - if (ret != -ETIMEDOUT) - ret = -EIO; - - DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" - " ret=%d status=0x%08X response=0x%08X\n", - action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); + /* If GuC explicitly returned an error, convert it to -EIO */ + if (!ret && !INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(status)) + ret = -EIO; + + if (ret) { + DRM_DEBUG_DRIVER("INTEL_GUC_SEND: Action 0x%X failed;" + " ret=%d status=0x%08X response=0x%08X\n", + action[0], ret, status, + I915_READ(SOFT_SCRATCH(15))); + goto out; } + if (response_buf) { + int count = min(response_buf_size, guc->send_regs.count - 1); + + for (i = 0; i < count; i++) + response_buf[i] = I915_READ(guc_send_reg(guc, i + 1)); + } + + /* Use data from the GuC response as our return value */ + ret = INTEL_GUC_MSG_TO_DATA(status); + +out: intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); mutex_unlock(&guc->send_mutex); return ret; } +void intel_guc_to_host_event_handler_mmio(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 msg, val; + + /* + * Sample the log buffer flush related bits & clear them out now + * itself from the message identity register to minimize the + * probability of losing a flush interrupt, when there are back + * to back flush interrupts. + * There can be a new flush interrupt, for different log buffer + * type (like for ISR), whilst Host is handling one (for DPC). + * Since same bit is used in message register for ISR & DPC, it + * could happen that GuC sets the bit for 2nd interrupt but Host + * clears out the bit on handling the 1st interrupt. + */ + spin_lock(&guc->irq_lock); + val = I915_READ(SOFT_SCRATCH(15)); + msg = val & guc->msg_enabled_mask; + I915_WRITE(SOFT_SCRATCH(15), val & ~msg); + spin_unlock(&guc->irq_lock); + + intel_guc_to_host_process_recv_msg(guc, msg); +} + +void intel_guc_to_host_process_recv_msg(struct intel_guc *guc, u32 msg) +{ + /* Make sure to handle only enabled messages */ + msg &= guc->msg_enabled_mask; + + if (msg & (INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | + INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED)) + intel_guc_log_handle_flush_event(&guc->log); +} + int intel_guc_sample_forcewake(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -410,7 +451,7 @@ int intel_guc_suspend(struct intel_guc *guc) u32 data[] = { INTEL_GUC_ACTION_ENTER_S_STATE, GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */ - guc_ggtt_offset(guc->shared_data) + intel_guc_ggtt_offset(guc, guc->shared_data) }; return intel_guc_send(guc, data, ARRAY_SIZE(data)); @@ -434,7 +475,7 @@ int intel_guc_reset_engine(struct intel_guc *guc, data[3] = 0; data[4] = 0; data[5] = guc->execbuf_client->stage_id; - data[6] = guc_ggtt_offset(guc->shared_data); + data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); return intel_guc_send(guc, data, ARRAY_SIZE(data)); } @@ -448,13 +489,66 @@ int intel_guc_resume(struct intel_guc *guc) u32 data[] = { INTEL_GUC_ACTION_EXIT_S_STATE, GUC_POWER_D0, - guc_ggtt_offset(guc->shared_data) + intel_guc_ggtt_offset(guc, guc->shared_data) }; return intel_guc_send(guc, data, ARRAY_SIZE(data)); } /** + * DOC: GuC Address Space + * + * The layout of GuC address space is shown below: + * + * :: + * + * +==============> +====================+ <== GUC_GGTT_TOP + * ^ | | + * | | | + * | | DRAM | + * | | Memory | + * | | | + * GuC | | + * Address +========> +====================+ <== WOPCM Top + * Space ^ | HW contexts RSVD | + * | | | WOPCM | + * | | +==> +--------------------+ <== GuC WOPCM Top + * | GuC ^ | | + * | GGTT | | | + * | Pin GuC | GuC | + * | Bias WOPCM | WOPCM | + * | | Size | | + * | | | | | + * v v v | | + * +=====+=====+==> +====================+ <== GuC WOPCM Base + * | Non-GuC WOPCM | + * | (HuC/Reserved) | + * +====================+ <== WOPCM Base + * + * The lower part of GuC Address Space [0, ggtt_pin_bias) is mapped to WOPCM + * while upper part of GuC Address Space [ggtt_pin_bias, GUC_GGTT_TOP) is mapped + * to DRAM. The value of the GuC ggtt_pin_bias is determined by WOPCM size and + * actual GuC WOPCM size. + */ + +/** + * intel_guc_init_ggtt_pin_bias() - Initialize the GuC ggtt_pin_bias value. + * @guc: intel_guc structure. + * + * This function will calculate and initialize the ggtt_pin_bias value based on + * overall WOPCM size and GuC WOPCM size. + */ +void intel_guc_init_ggtt_pin_bias(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_i915(guc); + + GEM_BUG_ON(!i915->wopcm.size); + GEM_BUG_ON(i915->wopcm.size < i915->wopcm.guc.base); + + guc->ggtt_pin_bias = i915->wopcm.size - i915->wopcm.guc.base; +} + +/** * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage * @guc: the guc * @size: size of area to allocate (both virtual space and memory) @@ -462,7 +556,7 @@ int intel_guc_resume(struct intel_guc *guc) * This is a wrapper to create an object for use with the GuC. In order to * use it inside the GuC, an object needs to be pinned lifetime, so we allocate * both some backing storage and a range inside the Global GTT. We must pin - * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that + * it in the GGTT somewhere other than than [0, GUC ggtt_pin_bias) because that * range is reserved inside GuC. * * Return: A i915_vma if successful, otherwise an ERR_PTR. @@ -483,7 +577,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) goto err; ret = i915_vma_pin(vma, 0, PAGE_SIZE, - PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + PIN_GLOBAL | PIN_OFFSET_BIAS | guc->ggtt_pin_bias); if (ret) { vma = ERR_PTR(ret); goto err; @@ -495,14 +589,3 @@ err: i915_gem_object_put(obj); return vma; } - -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv) -{ - u32 wopcm_size = GUC_WOPCM_TOP; - - /* On BXT, the top of WOPCM is reserved for RC6 context */ - if (IS_GEN9_LP(dev_priv)) - wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED; - - return wopcm_size; -} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index b9424ac644ac..f1265e122d30 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -49,11 +49,16 @@ struct intel_guc { struct intel_guc_log log; struct intel_guc_ct ct; + /* Offset where Non-WOPCM memory starts. */ + u32 ggtt_pin_bias; + /* Log snapshot if GuC errors during load */ struct drm_i915_gem_object *load_err_log; /* intel_guc_recv interrupt related state */ + spinlock_t irq_lock; bool interrupts_enabled; + unsigned int msg_enabled_mask; struct i915_vma *ads_vma; struct i915_vma *stage_desc_pool; @@ -83,7 +88,11 @@ struct intel_guc { struct mutex send_mutex; /* GuC's FW specific send function */ - int (*send)(struct intel_guc *guc, const u32 *data, u32 len); + int (*send)(struct intel_guc *guc, const u32 *data, u32 len, + u32 *response_buf, u32 response_buf_size); + + /* GuC's FW specific event handler function */ + void (*handler)(struct intel_guc *guc); /* GuC's FW specific notify function */ void (*notify)(struct intel_guc *guc); @@ -92,7 +101,14 @@ struct intel_guc { static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { - return guc->send(guc, action, len); + return guc->send(guc, action, len, NULL, 0); +} + +static inline int +intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) +{ + return guc->send(guc, action, len, response_buf, response_buf_size); } static inline void intel_guc_notify(struct intel_guc *guc) @@ -100,17 +116,33 @@ static inline void intel_guc_notify(struct intel_guc *guc) guc->notify(guc); } -/* - * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP), - * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is - * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects - * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM. +static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) +{ + guc->handler(guc); +} + +/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ +#define GUC_GGTT_TOP 0xFEE00000 + +/** + * intel_guc_ggtt_offset() - Get and validate the GGTT offset of @vma + * @guc: intel_guc structure. + * @vma: i915 graphics virtual memory area. + * + * GuC does not allow any gfx GGTT address that falls into range + * [0, GuC ggtt_pin_bias), which is reserved for Boot ROM, SRAM and WOPCM. + * Currently, in order to exclude [0, GuC ggtt_pin_bias) address space from + * GGTT, all gfx objects used by GuC are allocated with intel_guc_allocate_vma() + * and pinned with PIN_OFFSET_BIAS along with the value of GuC ggtt_pin_bias. + * + * Return: GGTT offset of the @vma. */ -static inline u32 guc_ggtt_offset(struct i915_vma *vma) +static inline u32 intel_guc_ggtt_offset(struct intel_guc *guc, + struct i915_vma *vma) { u32 offset = i915_ggtt_offset(vma); - GEM_BUG_ON(offset < GUC_WOPCM_TOP); + GEM_BUG_ON(offset < guc->ggtt_pin_bias); GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); return offset; @@ -119,17 +151,43 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) void intel_guc_init_early(struct intel_guc *guc); void intel_guc_init_send_regs(struct intel_guc *guc); void intel_guc_init_params(struct intel_guc *guc); +void intel_guc_init_ggtt_pin_bias(struct intel_guc *guc); int intel_guc_init_wq(struct intel_guc *guc); void intel_guc_fini_wq(struct intel_guc *guc); int intel_guc_init(struct intel_guc *guc); void intel_guc_fini(struct intel_guc *guc); -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size); +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size); +void intel_guc_to_host_event_handler(struct intel_guc *guc); +void intel_guc_to_host_event_handler_nop(struct intel_guc *guc); +void intel_guc_to_host_event_handler_mmio(struct intel_guc *guc); +void intel_guc_to_host_process_recv_msg(struct intel_guc *guc, u32 msg); int intel_guc_sample_forcewake(struct intel_guc *guc); int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); int intel_guc_suspend(struct intel_guc *guc); int intel_guc_resume(struct intel_guc *guc); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); + +static inline int intel_guc_sanitize(struct intel_guc *guc) +{ + intel_uc_fw_sanitize(&guc->fw); + return 0; +} + +static inline void intel_guc_enable_msg(struct intel_guc *guc, u32 mask) +{ + spin_lock_irq(&guc->irq_lock); + guc->msg_enabled_mask |= mask; + spin_unlock_irq(&guc->irq_lock); +} + +static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) +{ + spin_lock_irq(&guc->irq_lock); + guc->msg_enabled_mask &= ~mask; + spin_unlock_irq(&guc->irq_lock); +} #endif diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c index ac627534667d..dcaa3fb71765 100644 --- a/drivers/gpu/drm/i915/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/intel_guc_ads.c @@ -75,7 +75,7 @@ static void guc_policies_init(struct guc_policies *policies) int intel_guc_ads_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct i915_vma *vma; + struct i915_vma *vma, *kernel_ctx_vma; struct page *page; /* The ads obj includes the struct itself and buffers passed to GuC */ struct { @@ -121,9 +121,10 @@ int intel_guc_ads_create(struct intel_guc *guc) * to find it. Note that we have to skip our header (1 page), * because our GuC shared data is there. */ + kernel_ctx_vma = to_intel_context(dev_priv->kernel_context, + dev_priv->engine[RCS])->state; blob->ads.golden_context_lrca = - guc_ggtt_offset(dev_priv->kernel_context->engine[RCS].state) + - skipped_offset; + intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset; /* * The GuC expects us to exclude the portion of the context image that @@ -135,7 +136,7 @@ int intel_guc_ads_create(struct intel_guc *guc) blob->ads.eng_state_size[engine->guc_id] = engine->context_size - skipped_size; - base = guc_ggtt_offset(vma); + base = intel_guc_ggtt_offset(guc, vma); blob->ads.scheduler_policies = base + ptr_offset(blob, policies); blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer); blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c index 24ad55752396..371b6005954a 100644 --- a/drivers/gpu/drm/i915/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/intel_guc_ct.c @@ -24,14 +24,49 @@ #include "i915_drv.h" #include "intel_guc_ct.h" +#ifdef CONFIG_DRM_I915_DEBUG_GUC +#define CT_DEBUG_DRIVER(...) DRM_DEBUG_DRIVER(__VA_ARGS__) +#else +#define CT_DEBUG_DRIVER(...) do { } while (0) +#endif + +struct ct_request { + struct list_head link; + u32 fence; + u32 status; + u32 response_len; + u32 *response_buf; +}; + +struct ct_incoming_request { + struct list_head link; + u32 msg[]; +}; + enum { CTB_SEND = 0, CTB_RECV = 1 }; enum { CTB_OWNER_HOST = 0 }; +static void ct_incoming_request_worker_func(struct work_struct *w); + +/** + * intel_guc_ct_init_early - Initialize CT state without requiring device access + * @ct: pointer to CT struct + */ void intel_guc_ct_init_early(struct intel_guc_ct *ct) { /* we're using static channel owners */ ct->host_channel.owner = CTB_OWNER_HOST; + + spin_lock_init(&ct->lock); + INIT_LIST_HEAD(&ct->pending_requests); + INIT_LIST_HEAD(&ct->incoming_requests); + INIT_WORK(&ct->worker, ct_incoming_request_worker_func); +} + +static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) +{ + return container_of(ct, struct intel_guc, ct); } static inline const char *guc_ct_buffer_type_to_str(u32 type) @@ -49,8 +84,8 @@ static inline const char *guc_ct_buffer_type_to_str(u32 type) static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, u32 cmds_addr, u32 size, u32 owner) { - DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n", - desc, cmds_addr, size, owner); + CT_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n", + desc, cmds_addr, size, owner); memset(desc, 0, sizeof(*desc)); desc->addr = cmds_addr; desc->size = size; @@ -59,8 +94,8 @@ static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc) { - DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n", - desc, desc->head, desc->tail); + CT_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n", + desc, desc->head, desc->tail); desc->head = 0; desc->tail = 0; desc->is_in_error = 0; @@ -79,7 +114,7 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc, int err; /* Can't use generic send(), CT registration must go over MMIO */ - err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action)); + err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); if (err) DRM_ERROR("CT: register %s buffer failed; err=%d\n", guc_ct_buffer_type_to_str(type), err); @@ -98,7 +133,7 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc, int err; /* Can't use generic send(), CT deregistration must go over MMIO */ - err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action)); + err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); if (err) DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n", guc_ct_buffer_type_to_str(type), owner, err); @@ -156,7 +191,8 @@ static int ctch_init(struct intel_guc *guc, err = PTR_ERR(blob); goto err_vma; } - DRM_DEBUG_DRIVER("CT: vma base=%#x\n", guc_ggtt_offset(ctch->vma)); + CT_DEBUG_DRIVER("CT: vma base=%#x\n", + intel_guc_ggtt_offset(guc, ctch->vma)); /* store pointers to desc and cmds */ for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { @@ -170,8 +206,8 @@ static int ctch_init(struct intel_guc *guc, err_vma: i915_vma_unpin_and_release(&ctch->vma); err_out: - DRM_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n", - ctch->owner, err); + CT_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n", + ctch->owner, err); return err; } @@ -191,8 +227,8 @@ static int ctch_open(struct intel_guc *guc, int err; int i; - DRM_DEBUG_DRIVER("CT: channel %d reopen=%s\n", - ctch->owner, yesno(ctch_is_open(ctch))); + CT_DEBUG_DRIVER("CT: channel %d reopen=%s\n", + ctch->owner, yesno(ctch_is_open(ctch))); if (!ctch->vma) { err = ctch_init(guc, ctch); @@ -202,7 +238,7 @@ static int ctch_open(struct intel_guc *guc, } /* vma should be already allocated and map'ed */ - base = guc_ggtt_offset(ctch->vma); + base = intel_guc_ggtt_offset(guc, ctch->vma); /* (re)initialize descriptors * cmds buffers are in the second half of the blob page @@ -263,10 +299,29 @@ static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch) return ++ctch->next_fence; } +/** + * DOC: CTB Host to GuC request + * + * Format of the CTB Host to GuC request message is as follows:: + * + * +------------+---------+---------+---------+---------+ + * | msg[0] | [1] | [2] | ... | [n-1] | + * +------------+---------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+---------+ + * | | 0 | 1 | ... | n | + * +============+=========+=========+=========+=========+ + * | len >= 1 | FENCE | request specific data | + * +------+-----+---------+---------+---------+---------+ + * + * ^-----------------len-------------------^ + */ + static int ctb_write(struct intel_guc_ct_buffer *ctb, const u32 *action, u32 len /* in dwords */, - u32 fence) + u32 fence, + bool want_response) { struct guc_ct_buffer_desc *desc = ctb->desc; u32 head = desc->head / 4; /* in dwords */ @@ -295,15 +350,21 @@ static int ctb_write(struct intel_guc_ct_buffer *ctb, if (unlikely(used + len + 1 >= size)) return -ENOSPC; - /* Write the message. The format is the following: + /* + * Write the message. The format is the following: * DW0: header (including action code) * DW1: fence * DW2+: action data */ header = (len << GUC_CT_MSG_LEN_SHIFT) | (GUC_CT_MSG_WRITE_FENCE_TO_DESC) | + (want_response ? GUC_CT_MSG_SEND_STATUS : 0) | (action[0] << GUC_CT_MSG_ACTION_SHIFT); + CT_DEBUG_DRIVER("CT: writing %*ph %*ph %*ph\n", + 4, &header, 4, &fence, + 4 * (len - 1), &action[1]); + cmds[tail] = header; tail = (tail + 1) % size; @@ -322,16 +383,25 @@ static int ctb_write(struct intel_guc_ct_buffer *ctb, return 0; } -/* Wait for the response from the GuC. +/** + * wait_for_ctb_desc_update - Wait for the CT buffer descriptor update. + * @desc: buffer descriptor * @fence: response fence * @status: placeholder for status - * return: 0 response received (status is valid) - * -ETIMEDOUT no response within hardcoded timeout - * -EPROTO no response, ct buffer was in error + * + * Guc will update CT buffer descriptor with new fence and status + * after processing the command identified by the fence. Wait for + * specified fence and then read from the descriptor status of the + * command. + * + * Return: + * * 0 response received (status is valid) + * * -ETIMEDOUT no response within hardcoded timeout + * * -EPROTO no response, CT buffer is in error */ -static int wait_for_response(struct guc_ct_buffer_desc *desc, - u32 fence, - u32 *status) +static int wait_for_ctb_desc_update(struct guc_ct_buffer_desc *desc, + u32 fence, + u32 *status) { int err; @@ -363,71 +433,440 @@ static int wait_for_response(struct guc_ct_buffer_desc *desc, return err; } -static int ctch_send(struct intel_guc *guc, +/** + * wait_for_ct_request_update - Wait for CT request state update. + * @req: pointer to pending request + * @status: placeholder for status + * + * For each sent request, Guc shall send bac CT response message. + * Our message handler will update status of tracked request once + * response message with given fence is received. Wait here and + * check for valid response status value. + * + * Return: + * * 0 response received (status is valid) + * * -ETIMEDOUT no response within hardcoded timeout + */ +static int wait_for_ct_request_update(struct ct_request *req, u32 *status) +{ + int err; + + /* + * Fast commands should complete in less than 10us, so sample quickly + * up to that length of time, then switch to a slower sleep-wait loop. + * No GuC command should ever take longer than 10ms. + */ +#define done INTEL_GUC_MSG_IS_RESPONSE(READ_ONCE(req->status)) + err = wait_for_us(done, 10); + if (err) + err = wait_for(done, 10); +#undef done + + if (unlikely(err)) + DRM_ERROR("CT: fence %u err %d\n", req->fence, err); + + *status = req->status; + return err; +} + +static int ctch_send(struct intel_guc_ct *ct, struct intel_guc_ct_channel *ctch, const u32 *action, u32 len, + u32 *response_buf, + u32 response_buf_size, u32 *status) { struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND]; struct guc_ct_buffer_desc *desc = ctb->desc; + struct ct_request request; + unsigned long flags; u32 fence; int err; GEM_BUG_ON(!ctch_is_open(ctch)); GEM_BUG_ON(!len); GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); + GEM_BUG_ON(!response_buf && response_buf_size); fence = ctch_get_next_fence(ctch); - err = ctb_write(ctb, action, len, fence); + request.fence = fence; + request.status = 0; + request.response_len = response_buf_size; + request.response_buf = response_buf; + + spin_lock_irqsave(&ct->lock, flags); + list_add_tail(&request.link, &ct->pending_requests); + spin_unlock_irqrestore(&ct->lock, flags); + + err = ctb_write(ctb, action, len, fence, !!response_buf); if (unlikely(err)) - return err; + goto unlink; - intel_guc_notify(guc); + intel_guc_notify(ct_to_guc(ct)); - err = wait_for_response(desc, fence, status); + if (response_buf) + err = wait_for_ct_request_update(&request, status); + else + err = wait_for_ctb_desc_update(desc, fence, status); if (unlikely(err)) - return err; - if (*status != INTEL_GUC_STATUS_SUCCESS) - return -EIO; - return 0; + goto unlink; + + if (!INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(*status)) { + err = -EIO; + goto unlink; + } + + if (response_buf) { + /* There shall be no data in the status */ + WARN_ON(INTEL_GUC_MSG_TO_DATA(request.status)); + /* Return actual response len */ + err = request.response_len; + } else { + /* There shall be no response payload */ + WARN_ON(request.response_len); + /* Return data decoded from the status dword */ + err = INTEL_GUC_MSG_TO_DATA(*status); + } + +unlink: + spin_lock_irqsave(&ct->lock, flags); + list_del(&request.link); + spin_unlock_irqrestore(&ct->lock, flags); + + return err; } /* * Command Transport (CT) buffer based GuC send function. */ -static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len) +static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) { - struct intel_guc_ct_channel *ctch = &guc->ct.host_channel; + struct intel_guc_ct *ct = &guc->ct; + struct intel_guc_ct_channel *ctch = &ct->host_channel; u32 status = ~0; /* undefined */ - int err; + int ret; mutex_lock(&guc->send_mutex); - err = ctch_send(guc, ctch, action, len, &status); - if (unlikely(err)) { + ret = ctch_send(ct, ctch, action, len, response_buf, response_buf_size, + &status); + if (unlikely(ret < 0)) { DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n", - action[0], err, status); + action[0], ret, status); + } else if (unlikely(ret)) { + CT_DEBUG_DRIVER("CT: send action %#x returned %d (%#x)\n", + action[0], ret, ret); } mutex_unlock(&guc->send_mutex); - return err; + return ret; +} + +static inline unsigned int ct_header_get_len(u32 header) +{ + return (header >> GUC_CT_MSG_LEN_SHIFT) & GUC_CT_MSG_LEN_MASK; +} + +static inline unsigned int ct_header_get_action(u32 header) +{ + return (header >> GUC_CT_MSG_ACTION_SHIFT) & GUC_CT_MSG_ACTION_MASK; +} + +static inline bool ct_header_is_response(u32 header) +{ + return ct_header_get_action(header) == INTEL_GUC_ACTION_DEFAULT; +} + +static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data) +{ + struct guc_ct_buffer_desc *desc = ctb->desc; + u32 head = desc->head / 4; /* in dwords */ + u32 tail = desc->tail / 4; /* in dwords */ + u32 size = desc->size / 4; /* in dwords */ + u32 *cmds = ctb->cmds; + s32 available; /* in dwords */ + unsigned int len; + unsigned int i; + + GEM_BUG_ON(desc->size % 4); + GEM_BUG_ON(desc->head % 4); + GEM_BUG_ON(desc->tail % 4); + GEM_BUG_ON(tail >= size); + GEM_BUG_ON(head >= size); + + /* tail == head condition indicates empty */ + available = tail - head; + if (unlikely(available == 0)) + return -ENODATA; + + /* beware of buffer wrap case */ + if (unlikely(available < 0)) + available += size; + CT_DEBUG_DRIVER("CT: available %d (%u:%u)\n", available, head, tail); + GEM_BUG_ON(available < 0); + + data[0] = cmds[head]; + head = (head + 1) % size; + + /* message len with header */ + len = ct_header_get_len(data[0]) + 1; + if (unlikely(len > (u32)available)) { + DRM_ERROR("CT: incomplete message %*ph %*ph %*ph\n", + 4, data, + 4 * (head + available - 1 > size ? + size - head : available - 1), &cmds[head], + 4 * (head + available - 1 > size ? + available - 1 - size + head : 0), &cmds[0]); + return -EPROTO; + } + + for (i = 1; i < len; i++) { + data[i] = cmds[head]; + head = (head + 1) % size; + } + CT_DEBUG_DRIVER("CT: received %*ph\n", 4 * len, data); + + desc->head = head * 4; + return 0; } /** - * Enable buffer based command transport + * DOC: CTB GuC to Host response + * + * Format of the CTB GuC to Host response message is as follows:: + * + * +------------+---------+---------+---------+---------+---------+ + * | msg[0] | [1] | [2] | [3] | ... | [n-1] | + * +------------+---------+---------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+---------+---------+ + * | | 0 | 1 | 2 | ... | n | + * +============+=========+=========+=========+=========+=========+ + * | len >= 2 | FENCE | STATUS | response specific data | + * +------+-----+---------+---------+---------+---------+---------+ + * + * ^-----------------------len-----------------------^ + */ + +static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) +{ + u32 header = msg[0]; + u32 len = ct_header_get_len(header); + u32 msglen = len + 1; /* total message length including header */ + u32 fence; + u32 status; + u32 datalen; + struct ct_request *req; + bool found = false; + + GEM_BUG_ON(!ct_header_is_response(header)); + GEM_BUG_ON(!in_irq()); + + /* Response payload shall at least include fence and status */ + if (unlikely(len < 2)) { + DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg); + return -EPROTO; + } + + fence = msg[1]; + status = msg[2]; + datalen = len - 2; + + /* Format of the status follows RESPONSE message */ + if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) { + DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg); + return -EPROTO; + } + + CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status); + + spin_lock(&ct->lock); + list_for_each_entry(req, &ct->pending_requests, link) { + if (unlikely(fence != req->fence)) { + CT_DEBUG_DRIVER("CT: request %u awaits response\n", + req->fence); + continue; + } + if (unlikely(datalen > req->response_len)) { + DRM_ERROR("CT: response %u too long %*ph\n", + req->fence, 4 * msglen, msg); + datalen = 0; + } + if (datalen) + memcpy(req->response_buf, msg + 3, 4 * datalen); + req->response_len = datalen; + WRITE_ONCE(req->status, status); + found = true; + break; + } + spin_unlock(&ct->lock); + + if (!found) + DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg); + return 0; +} + +static void ct_process_request(struct intel_guc_ct *ct, + u32 action, u32 len, const u32 *payload) +{ + struct intel_guc *guc = ct_to_guc(ct); + + CT_DEBUG_DRIVER("CT: request %x %*ph\n", action, 4 * len, payload); + + switch (action) { + case INTEL_GUC_ACTION_DEFAULT: + if (unlikely(len < 1)) + goto fail_unexpected; + intel_guc_to_host_process_recv_msg(guc, *payload); + break; + + default: +fail_unexpected: + DRM_ERROR("CT: unexpected request %x %*ph\n", + action, 4 * len, payload); + break; + } +} + +static bool ct_process_incoming_requests(struct intel_guc_ct *ct) +{ + unsigned long flags; + struct ct_incoming_request *request; + u32 header; + u32 *payload; + bool done; + + spin_lock_irqsave(&ct->lock, flags); + request = list_first_entry_or_null(&ct->incoming_requests, + struct ct_incoming_request, link); + if (request) + list_del(&request->link); + done = !!list_empty(&ct->incoming_requests); + spin_unlock_irqrestore(&ct->lock, flags); + + if (!request) + return true; + + header = request->msg[0]; + payload = &request->msg[1]; + ct_process_request(ct, + ct_header_get_action(header), + ct_header_get_len(header), + payload); + + kfree(request); + return done; +} + +static void ct_incoming_request_worker_func(struct work_struct *w) +{ + struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, worker); + bool done; + + done = ct_process_incoming_requests(ct); + if (!done) + queue_work(system_unbound_wq, &ct->worker); +} + +/** + * DOC: CTB GuC to Host request + * + * Format of the CTB GuC to Host request message is as follows:: + * + * +------------+---------+---------+---------+---------+---------+ + * | msg[0] | [1] | [2] | [3] | ... | [n-1] | + * +------------+---------+---------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+---------+---------+ + * | | 0 | 1 | 2 | ... | n | + * +============+=========+=========+=========+=========+=========+ + * | len | request specific data | + * +------+-----+---------+---------+---------+---------+---------+ + * + * ^-----------------------len-----------------------^ + */ + +static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg) +{ + u32 header = msg[0]; + u32 len = ct_header_get_len(header); + u32 msglen = len + 1; /* total message length including header */ + struct ct_incoming_request *request; + unsigned long flags; + + GEM_BUG_ON(ct_header_is_response(header)); + + request = kmalloc(sizeof(*request) + 4 * msglen, GFP_ATOMIC); + if (unlikely(!request)) { + DRM_ERROR("CT: dropping request %*ph\n", 4 * msglen, msg); + return 0; /* XXX: -ENOMEM ? */ + } + memcpy(request->msg, msg, 4 * msglen); + + spin_lock_irqsave(&ct->lock, flags); + list_add_tail(&request->link, &ct->incoming_requests); + spin_unlock_irqrestore(&ct->lock, flags); + + queue_work(system_unbound_wq, &ct->worker); + return 0; +} + +static void ct_process_host_channel(struct intel_guc_ct *ct) +{ + struct intel_guc_ct_channel *ctch = &ct->host_channel; + struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_RECV]; + u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */ + int err = 0; + + if (!ctch_is_open(ctch)) + return; + + do { + err = ctb_read(ctb, msg); + if (err) + break; + + if (ct_header_is_response(msg[0])) + err = ct_handle_response(ct, msg); + else + err = ct_handle_request(ct, msg); + } while (!err); + + if (GEM_WARN_ON(err == -EPROTO)) { + DRM_ERROR("CT: corrupted message detected!\n"); + ctb->desc->is_in_error = 1; + } +} + +/* + * When we're communicating with the GuC over CT, GuC uses events + * to notify us about new messages being posted on the RECV buffer. + */ +static void intel_guc_to_host_event_handler_ct(struct intel_guc *guc) +{ + struct intel_guc_ct *ct = &guc->ct; + + ct_process_host_channel(ct); +} + +/** + * intel_guc_ct_enable - Enable buffer based command transport. + * @ct: pointer to CT struct + * * Shall only be called for platforms with HAS_GUC_CT. - * @guc: the guc - * return: 0 on success - * non-zero on failure + * + * Return: 0 on success, a negative errno code on failure. */ -int intel_guc_enable_ct(struct intel_guc *guc) +int intel_guc_ct_enable(struct intel_guc_ct *ct) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_guc_ct_channel *ctch = &guc->ct.host_channel; + struct intel_guc *guc = ct_to_guc(ct); + struct drm_i915_private *i915 = guc_to_i915(guc); + struct intel_guc_ct_channel *ctch = &ct->host_channel; int err; - GEM_BUG_ON(!HAS_GUC_CT(dev_priv)); + GEM_BUG_ON(!HAS_GUC_CT(i915)); err = ctch_open(guc, ctch); if (unlikely(err)) @@ -435,21 +874,24 @@ int intel_guc_enable_ct(struct intel_guc *guc) /* Switch into cmd transport buffer based send() */ guc->send = intel_guc_send_ct; + guc->handler = intel_guc_to_host_event_handler_ct; DRM_INFO("CT: %s\n", enableddisabled(true)); return 0; } /** - * Disable buffer based command transport. + * intel_guc_ct_disable - Disable buffer based command transport. + * @ct: pointer to CT struct + * * Shall only be called for platforms with HAS_GUC_CT. - * @guc: the guc */ -void intel_guc_disable_ct(struct intel_guc *guc) +void intel_guc_ct_disable(struct intel_guc_ct *ct) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_guc_ct_channel *ctch = &guc->ct.host_channel; + struct intel_guc *guc = ct_to_guc(ct); + struct drm_i915_private *i915 = guc_to_i915(guc); + struct intel_guc_ct_channel *ctch = &ct->host_channel; - GEM_BUG_ON(!HAS_GUC_CT(dev_priv)); + GEM_BUG_ON(!HAS_GUC_CT(i915)); if (!ctch_is_open(ctch)) return; @@ -458,5 +900,6 @@ void intel_guc_disable_ct(struct intel_guc *guc) /* Disable send */ guc->send = intel_guc_send_nop; + guc->handler = intel_guc_to_host_event_handler_nop; DRM_INFO("CT: %s\n", enableddisabled(false)); } diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h index 6d97f36fcc62..d774895ab143 100644 --- a/drivers/gpu/drm/i915/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/intel_guc_ct.h @@ -75,12 +75,22 @@ struct intel_guc_ct_channel { struct intel_guc_ct { struct intel_guc_ct_channel host_channel; /* other channels are tbd */ + + /** @lock: protects pending requests list */ + spinlock_t lock; + + /** @pending_requests: list of requests waiting for response */ + struct list_head pending_requests; + + /** @incoming_requests: list of incoming requests */ + struct list_head incoming_requests; + + /** @worker: worker for handling incoming requests */ + struct work_struct worker; }; void intel_guc_ct_init_early(struct intel_guc_ct *ct); - -/* XXX: move to intel_uc.h ? don't fit there either */ -int intel_guc_enable_ct(struct intel_guc *guc); -void intel_guc_disable_ct(struct intel_guc *guc); +int intel_guc_ct_enable(struct intel_guc_ct *ct); +void intel_guc_ct_disable(struct intel_guc_ct *ct); #endif /* _INTEL_GUC_CT_H_ */ diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index d07f2b985f1c..a9e6fcce467c 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -165,7 +165,7 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); /* Set the source address for the new blob */ - offset = guc_ggtt_offset(vma) + guc_fw->header_offset; + offset = intel_guc_ggtt_offset(guc, vma) + guc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); @@ -275,9 +275,8 @@ static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) * Called from intel_uc_init_hw() during driver load, resume from sleep and * after a GPU reset. * - * The firmware image should have already been fetched into memory by the - * earlier call to intel_uc_init_fw(), so here we need to only check that - * fetch succeeded, and then transfer the image to the h/w. + * The firmware image should have already been fetched into memory, so only + * check that fetch succeeded, and then transfer the image to the h/w. * * Return: non-zero code on error */ diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 6a10aa6f04d3..0867ba76d445 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -23,9 +23,6 @@ #ifndef _INTEL_GUC_FWIF_H #define _INTEL_GUC_FWIF_H -#define GUC_CORE_FAMILY_GEN9 12 -#define GUC_CORE_FAMILY_UNKNOWN 0x7fffffff - #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 #define GUC_CLIENT_PRIORITY_HIGH 1 #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 @@ -82,8 +79,6 @@ #define GUC_CTL_ARAT_LOW 2 #define GUC_CTL_DEVICE_INFO 3 -#define GUC_CTL_GT_TYPE_SHIFT 0 -#define GUC_CTL_CORE_FAMILY_SHIFT 7 #define GUC_CTL_LOG_PARAMS 4 #define GUC_LOG_VALID (1 << 0) @@ -127,7 +122,7 @@ #define GUC_PROFILE_ENABLED (1 << 7) #define GUC_WQ_TRACK_ENABLED (1 << 8) #define GUC_ADS_ENABLED (1 << 9) -#define GUC_DEBUG_RESERVED (1 << 10) +#define GUC_LOG_DEFAULT_DISABLED (1 << 10) #define GUC_ADS_ADDR_SHIFT 11 #define GUC_ADS_ADDR_MASK 0xfffff800 @@ -327,6 +322,58 @@ struct guc_stage_desc { u64 desc_private; } __packed; +/** + * DOC: CTB based communication + * + * The CTB (command transport buffer) communication between Host and GuC + * is based on u32 data stream written to the shared buffer. One buffer can + * be used to transmit data only in one direction (one-directional channel). + * + * Current status of the each buffer is stored in the buffer descriptor. + * Buffer descriptor holds tail and head fields that represents active data + * stream. The tail field is updated by the data producer (sender), and head + * field is updated by the data consumer (receiver):: + * + * +------------+ + * | DESCRIPTOR | +=================+============+========+ + * +============+ | | MESSAGE(s) | | + * | address |--------->+=================+============+========+ + * +------------+ + * | head | ^-----head--------^ + * +------------+ + * | tail | ^---------tail-----------------^ + * +------------+ + * | size | ^---------------size--------------------^ + * +------------+ + * + * Each message in data stream starts with the single u32 treated as a header, + * followed by optional set of u32 data that makes message specific payload:: + * + * +------------+---------+---------+---------+ + * | MESSAGE | + * +------------+---------+---------+---------+ + * | msg[0] | [1] | ... | [n-1] | + * +------------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+ + * | | 0 | ... | n | + * +======+=====+=========+=========+=========+ + * | 31:16| code| | | | + * +------+-----+ | | | + * | 15:5|flags| | | | + * +------+-----+ | | | + * | 4:0| len| | | | + * +------+-----+---------+---------+---------+ + * + * ^-------------len-------------^ + * + * The message header consists of: + * + * - **len**, indicates length of the message payload (in u32) + * - **code**, indicates message code + * - **flags**, holds various bits to control message handling + */ + /* * Describes single command transport buffer. * Used by both guc-master and clients. @@ -534,16 +581,6 @@ struct guc_log_buffer_state { u32 version; } __packed; -union guc_log_control { - struct { - u32 logging_enabled:1; - u32 reserved1:3; - u32 verbosity:4; - u32 reserved2:24; - }; - u32 value; -} __packed; - struct guc_ctx_report { u32 report_return_status; u32 reserved1[64]; @@ -570,7 +607,68 @@ struct guc_shared_ctx_data { struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM]; } __packed; -/* This Action will be programmed in C180 - SOFT_SCRATCH_O_REG */ +/** + * DOC: MMIO based communication + * + * The MMIO based communication between Host and GuC uses software scratch + * registers, where first register holds data treated as message header, + * and other registers are used to hold message payload. + * + * For Gen9+, GuC uses software scratch registers 0xC180-0xC1B8 + * + * +-----------+---------+---------+---------+ + * | MMIO[0] | MMIO[1] | ... | MMIO[n] | + * +-----------+---------+---------+---------+ + * | header | optional payload | + * +======+====+=========+=========+=========+ + * | 31:28|type| | | | + * +------+----+ | | | + * | 27:16|data| | | | + * +------+----+ | | | + * | 15:0|code| | | | + * +------+----+---------+---------+---------+ + * + * The message header consists of: + * + * - **type**, indicates message type + * - **code**, indicates message code, is specific for **type** + * - **data**, indicates message data, optional, depends on **code** + * + * The following message **types** are supported: + * + * - **REQUEST**, indicates Host-to-GuC request, requested GuC action code + * must be priovided in **code** field. Optional action specific parameters + * can be provided in remaining payload registers or **data** field. + * + * - **RESPONSE**, indicates GuC-to-Host response from earlier GuC request, + * action response status will be provided in **code** field. Optional + * response data can be returned in remaining payload registers or **data** + * field. + */ + +#define INTEL_GUC_MSG_TYPE_SHIFT 28 +#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) +#define INTEL_GUC_MSG_DATA_SHIFT 16 +#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) +#define INTEL_GUC_MSG_CODE_SHIFT 0 +#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) + +#define __INTEL_GUC_MSG_GET(T, m) \ + (((m) & INTEL_GUC_MSG_ ## T ## _MASK) >> INTEL_GUC_MSG_ ## T ## _SHIFT) +#define INTEL_GUC_MSG_TO_TYPE(m) __INTEL_GUC_MSG_GET(TYPE, m) +#define INTEL_GUC_MSG_TO_DATA(m) __INTEL_GUC_MSG_GET(DATA, m) +#define INTEL_GUC_MSG_TO_CODE(m) __INTEL_GUC_MSG_GET(CODE, m) + +enum intel_guc_msg_type { + INTEL_GUC_MSG_TYPE_REQUEST = 0x0, + INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, +}; + +#define __INTEL_GUC_MSG_TYPE_IS(T, m) \ + (INTEL_GUC_MSG_TO_TYPE(m) == INTEL_GUC_MSG_TYPE_ ## T) +#define INTEL_GUC_MSG_IS_REQUEST(m) __INTEL_GUC_MSG_TYPE_IS(REQUEST, m) +#define INTEL_GUC_MSG_IS_RESPONSE(m) __INTEL_GUC_MSG_TYPE_IS(RESPONSE, m) + enum intel_guc_action { INTEL_GUC_ACTION_DEFAULT = 0x0, INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2, @@ -602,24 +700,22 @@ enum intel_guc_report_status { INTEL_GUC_REPORT_STATUS_COMPLETE = 0x4, }; -/* - * The GuC sends its response to a command by overwriting the - * command in SS0. The response is distinguishable from a command - * by the fact that all the MASK bits are set. The remaining bits - * give more detail. - */ -#define INTEL_GUC_RECV_MASK ((u32)0xF0000000) -#define INTEL_GUC_RECV_IS_RESPONSE(x) ((u32)(x) >= INTEL_GUC_RECV_MASK) -#define INTEL_GUC_RECV_STATUS(x) (INTEL_GUC_RECV_MASK | (x)) - -/* GUC will return status back to SOFT_SCRATCH_O_REG */ -enum intel_guc_status { - INTEL_GUC_STATUS_SUCCESS = INTEL_GUC_RECV_STATUS(0x0), - INTEL_GUC_STATUS_ALLOCATE_DOORBELL_FAIL = INTEL_GUC_RECV_STATUS(0x10), - INTEL_GUC_STATUS_DEALLOCATE_DOORBELL_FAIL = INTEL_GUC_RECV_STATUS(0x20), - INTEL_GUC_STATUS_GENERIC_FAIL = INTEL_GUC_RECV_STATUS(0x0000F000) +#define GUC_LOG_CONTROL_LOGGING_ENABLED (1 << 0) +#define GUC_LOG_CONTROL_VERBOSITY_SHIFT 4 +#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT) +#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8) + +enum intel_guc_response_status { + INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0, + INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, }; +#define INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(m) \ + (typecheck(u32, (m)) && \ + ((m) & (INTEL_GUC_MSG_TYPE_MASK | INTEL_GUC_MSG_CODE_MASK)) == \ + ((INTEL_GUC_MSG_TYPE_RESPONSE << INTEL_GUC_MSG_TYPE_SHIFT) | \ + (INTEL_GUC_RESPONSE_STATUS_SUCCESS << INTEL_GUC_MSG_CODE_SHIFT))) + /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ enum intel_guc_recv_message { INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index c0c2e7d1c7d7..401e1704d61e 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -23,12 +23,11 @@ */ #include <linux/debugfs.h> -#include <linux/relay.h> #include "intel_guc_log.h" #include "i915_drv.h" -static void guc_log_capture_logs(struct intel_guc *guc); +static void guc_log_capture_logs(struct intel_guc_log *log); /** * DOC: GuC firmware log @@ -39,7 +38,7 @@ static void guc_log_capture_logs(struct intel_guc *guc); * registers value. */ -static int guc_log_flush_complete(struct intel_guc *guc) +static int guc_action_flush_log_complete(struct intel_guc *guc) { u32 action[] = { INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE @@ -48,7 +47,7 @@ static int guc_log_flush_complete(struct intel_guc *guc) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static int guc_log_flush(struct intel_guc *guc) +static int guc_action_flush_log(struct intel_guc *guc) { u32 action[] = { INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH, @@ -58,22 +57,40 @@ static int guc_log_flush(struct intel_guc *guc) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static int guc_log_control(struct intel_guc *guc, bool enable, u32 verbosity) +static int guc_action_control_log(struct intel_guc *guc, bool enable, + bool default_logging, u32 verbosity) { - union guc_log_control control_val = { - { - .logging_enabled = enable, - .verbosity = verbosity, - }, - }; u32 action[] = { INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING, - control_val.value + (enable ? GUC_LOG_CONTROL_LOGGING_ENABLED : 0) | + (verbosity << GUC_LOG_CONTROL_VERBOSITY_SHIFT) | + (default_logging ? GUC_LOG_CONTROL_DEFAULT_LOGGING : 0) }; + GEM_BUG_ON(verbosity > GUC_LOG_VERBOSITY_MAX); + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } +static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) +{ + return container_of(log, struct intel_guc, log); +} + +static void guc_log_enable_flush_events(struct intel_guc_log *log) +{ + intel_guc_enable_msg(log_to_guc(log), + INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | + INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED); +} + +static void guc_log_disable_flush_events(struct intel_guc_log *log) +{ + intel_guc_disable_msg(log_to_guc(log), + INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | + INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED); +} + /* * Sub buffer switch callback. Called whenever relay has to switch to a new * sub buffer, relay stays on the same sub buffer if 0 is returned. @@ -121,14 +138,7 @@ static struct dentry *create_buf_file_callback(const char *filename, if (!parent) return NULL; - /* - * Not using the channel filename passed as an argument, since for each - * channel relay appends the corresponding CPU number to the filename - * passed in relay_open(). This should be fine as relay just needs a - * dentry of the file associated with the channel buffer and that file's - * name need not be same as the filename passed as an argument. - */ - buf_file = debugfs_create_file("guc_log", mode, + buf_file = debugfs_create_file(filename, mode, parent, buf, &relay_file_operations); return buf_file; } @@ -149,59 +159,7 @@ static struct rchan_callbacks relay_callbacks = { .remove_buf_file = remove_buf_file_callback, }; -static int guc_log_relay_file_create(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct dentry *log_dir; - int ret; - - if (!i915_modparams.guc_log_level) - return 0; - - mutex_lock(&guc->log.runtime.relay_lock); - - /* For now create the log file in /sys/kernel/debug/dri/0 dir */ - log_dir = dev_priv->drm.primary->debugfs_root; - - /* - * If /sys/kernel/debug/dri/0 location do not exist, then debugfs is - * not mounted and so can't create the relay file. - * The relay API seems to fit well with debugfs only, for availing relay - * there are 3 requirements which can be met for debugfs file only in a - * straightforward/clean manner :- - * i) Need the associated dentry pointer of the file, while opening the - * relay channel. - * ii) Should be able to use 'relay_file_operations' fops for the file. - * iii) Set the 'i_private' field of file's inode to the pointer of - * relay channel buffer. - */ - if (!log_dir) { - DRM_ERROR("Debugfs dir not available yet for GuC log file\n"); - ret = -ENODEV; - goto out_unlock; - } - - ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir); - if (ret < 0 && ret != -EEXIST) { - DRM_ERROR("Couldn't associate relay chan with file %d\n", ret); - goto out_unlock; - } - - ret = 0; - -out_unlock: - mutex_unlock(&guc->log.runtime.relay_lock); - return ret; -} - -static bool guc_log_has_relay(struct intel_guc *guc) -{ - lockdep_assert_held(&guc->log.runtime.relay_lock); - - return guc->log.runtime.relay_chan != NULL; -} - -static void guc_move_to_next_buf(struct intel_guc *guc) +static void guc_move_to_next_buf(struct intel_guc_log *log) { /* * Make sure the updates made in the sub buffer are visible when @@ -209,21 +167,15 @@ static void guc_move_to_next_buf(struct intel_guc *guc) */ smp_wmb(); - if (!guc_log_has_relay(guc)) - return; - /* All data has been written, so now move the offset of sub buffer. */ - relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size); + relay_reserve(log->relay.channel, log->vma->obj->base.size); /* Switch to the next sub buffer */ - relay_flush(guc->log.runtime.relay_chan); + relay_flush(log->relay.channel); } -static void *guc_get_write_buffer(struct intel_guc *guc) +static void *guc_get_write_buffer(struct intel_guc_log *log) { - if (!guc_log_has_relay(guc)) - return NULL; - /* * Just get the base address of a new sub buffer and copy data into it * ourselves. NULL will be returned in no-overwrite mode, if all sub @@ -233,25 +185,25 @@ static void *guc_get_write_buffer(struct intel_guc *guc) * done without using relay_reserve() along with relay_write(). So its * better to use relay_reserve() alone. */ - return relay_reserve(guc->log.runtime.relay_chan, 0); + return relay_reserve(log->relay.channel, 0); } -static bool guc_check_log_buf_overflow(struct intel_guc *guc, +static bool guc_check_log_buf_overflow(struct intel_guc_log *log, enum guc_log_buffer_type type, unsigned int full_cnt) { - unsigned int prev_full_cnt = guc->log.prev_overflow_count[type]; + unsigned int prev_full_cnt = log->stats[type].sampled_overflow; bool overflow = false; if (full_cnt != prev_full_cnt) { overflow = true; - guc->log.prev_overflow_count[type] = full_cnt; - guc->log.total_overflow_count[type] += full_cnt - prev_full_cnt; + log->stats[type].overflow = full_cnt; + log->stats[type].sampled_overflow += full_cnt - prev_full_cnt; if (full_cnt < prev_full_cnt) { /* buffer_full_cnt is a 4 bit counter */ - guc->log.total_overflow_count[type] += 16; + log->stats[type].sampled_overflow += 16; } DRM_ERROR_RATELIMITED("GuC log buffer overflow\n"); } @@ -275,7 +227,7 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) return 0; } -static void guc_read_update_log_buffer(struct intel_guc *guc) +static void guc_read_update_log_buffer(struct intel_guc_log *log) { unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt; struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; @@ -284,16 +236,16 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) void *src_data, *dst_data; bool new_overflow; - if (WARN_ON(!guc->log.runtime.buf_addr)) - return; + mutex_lock(&log->relay.lock); - /* Get the pointer to shared GuC log buffer */ - log_buf_state = src_data = guc->log.runtime.buf_addr; + if (WARN_ON(!intel_guc_log_relay_enabled(log))) + goto out_unlock; - mutex_lock(&guc->log.runtime.relay_lock); + /* Get the pointer to shared GuC log buffer */ + log_buf_state = src_data = log->relay.buf_addr; /* Get the pointer to local buffer to store the logs */ - log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc); + log_buf_snapshot_state = dst_data = guc_get_write_buffer(log); if (unlikely(!log_buf_snapshot_state)) { /* @@ -301,10 +253,9 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) * getting consumed by User at a slow rate. */ DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); - guc->log.capture_miss_count++; - mutex_unlock(&guc->log.runtime.relay_lock); + log->relay.full_count++; - return; + goto out_unlock; } /* Actual logs are present from the 2nd page */ @@ -325,8 +276,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) full_cnt = log_buf_state_local.buffer_full_cnt; /* Bookkeeping stuff */ - guc->log.flush_count[type] += log_buf_state_local.flush_to_file; - new_overflow = guc_check_log_buf_overflow(guc, type, full_cnt); + log->stats[type].flush += log_buf_state_local.flush_to_file; + new_overflow = guc_check_log_buf_overflow(log, type, full_cnt); /* Update the state of shared log buffer */ log_buf_state->read_ptr = write_offset; @@ -373,38 +324,35 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) dst_data += buffer_size; } - guc_move_to_next_buf(guc); + guc_move_to_next_buf(log); - mutex_unlock(&guc->log.runtime.relay_lock); +out_unlock: + mutex_unlock(&log->relay.lock); } static void capture_logs_work(struct work_struct *work) { - struct intel_guc *guc = - container_of(work, struct intel_guc, log.runtime.flush_work); - - guc_log_capture_logs(guc); -} + struct intel_guc_log *log = + container_of(work, struct intel_guc_log, relay.flush_work); -static bool guc_log_has_runtime(struct intel_guc *guc) -{ - return guc->log.runtime.buf_addr != NULL; + guc_log_capture_logs(log); } -static int guc_log_runtime_create(struct intel_guc *guc) +static int guc_log_map(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); void *vaddr; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + lockdep_assert_held(&log->relay.lock); - if (!guc->log.vma) + if (!log->vma) return -ENODEV; - GEM_BUG_ON(guc_log_has_runtime(guc)); - - ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true); + mutex_lock(&dev_priv->drm.struct_mutex); + ret = i915_gem_object_set_to_wc_domain(log->vma->obj, true); + mutex_unlock(&dev_priv->drm.struct_mutex); if (ret) return ret; @@ -413,49 +361,40 @@ static int guc_log_runtime_create(struct intel_guc *guc) * buffer pages, so that we can directly get the data * (up-to-date) from memory. */ - vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map(log->vma->obj, I915_MAP_WC); if (IS_ERR(vaddr)) { DRM_ERROR("Couldn't map log buffer pages %d\n", ret); return PTR_ERR(vaddr); } - guc->log.runtime.buf_addr = vaddr; + log->relay.buf_addr = vaddr; return 0; } -static void guc_log_runtime_destroy(struct intel_guc *guc) +static void guc_log_unmap(struct intel_guc_log *log) { - /* - * It's possible that the runtime stuff was never allocated because - * GuC log was disabled at the boot time. - */ - if (!guc_log_has_runtime(guc)) - return; + lockdep_assert_held(&log->relay.lock); - i915_gem_object_unpin_map(guc->log.vma->obj); - guc->log.runtime.buf_addr = NULL; + i915_gem_object_unpin_map(log->vma->obj); + log->relay.buf_addr = NULL; } -void intel_guc_log_init_early(struct intel_guc *guc) +void intel_guc_log_init_early(struct intel_guc_log *log) { - mutex_init(&guc->log.runtime.relay_lock); - INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work); + mutex_init(&log->relay.lock); + INIT_WORK(&log->relay.flush_work, capture_logs_work); } -int intel_guc_log_relay_create(struct intel_guc *guc) +static int guc_log_relay_create(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); struct rchan *guc_log_relay_chan; size_t n_subbufs, subbuf_size; int ret; - if (!i915_modparams.guc_log_level) - return 0; - - mutex_lock(&guc->log.runtime.relay_lock); - - GEM_BUG_ON(guc_log_has_relay(guc)); + lockdep_assert_held(&log->relay.lock); /* Keep the size of sub buffers same as shared log buffer */ subbuf_size = GUC_LOG_SIZE; @@ -468,157 +407,56 @@ int intel_guc_log_relay_create(struct intel_guc *guc) */ n_subbufs = 8; - /* - * Create a relay channel, so that we have buffers for storing - * the GuC firmware logs, the channel will be linked with a file - * later on when debugfs is registered. - */ - guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size, - n_subbufs, &relay_callbacks, dev_priv); + guc_log_relay_chan = relay_open("guc_log", + dev_priv->drm.primary->debugfs_root, + subbuf_size, n_subbufs, + &relay_callbacks, dev_priv); if (!guc_log_relay_chan) { DRM_ERROR("Couldn't create relay chan for GuC logging\n"); ret = -ENOMEM; - goto err; + return ret; } GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); - guc->log.runtime.relay_chan = guc_log_relay_chan; - - mutex_unlock(&guc->log.runtime.relay_lock); + log->relay.channel = guc_log_relay_chan; return 0; - -err: - mutex_unlock(&guc->log.runtime.relay_lock); - /* logging will be off */ - i915_modparams.guc_log_level = 0; - return ret; -} - -void intel_guc_log_relay_destroy(struct intel_guc *guc) -{ - mutex_lock(&guc->log.runtime.relay_lock); - - /* - * It's possible that the relay was never allocated because - * GuC log was disabled at the boot time. - */ - if (!guc_log_has_relay(guc)) - goto out_unlock; - - relay_close(guc->log.runtime.relay_chan); - guc->log.runtime.relay_chan = NULL; - -out_unlock: - mutex_unlock(&guc->log.runtime.relay_lock); } -static int guc_log_late_setup(struct intel_guc *guc) +static void guc_log_relay_destroy(struct intel_guc_log *log) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - int ret; - - if (!guc_log_has_runtime(guc)) { - /* - * If log was disabled at boot time, then setup needed to handle - * log buffer flush interrupts would not have been done yet, so - * do that now. - */ - ret = intel_guc_log_relay_create(guc); - if (ret) - goto err; - - mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); - ret = guc_log_runtime_create(guc); - intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - - if (ret) - goto err_relay; - } - - ret = guc_log_relay_file_create(guc); - if (ret) - goto err_runtime; - - return 0; + lockdep_assert_held(&log->relay.lock); -err_runtime: - mutex_lock(&dev_priv->drm.struct_mutex); - guc_log_runtime_destroy(guc); - mutex_unlock(&dev_priv->drm.struct_mutex); -err_relay: - intel_guc_log_relay_destroy(guc); -err: - /* logging will remain off */ - i915_modparams.guc_log_level = 0; - return ret; + relay_close(log->relay.channel); + log->relay.channel = NULL; } -static void guc_log_capture_logs(struct intel_guc *guc) +static void guc_log_capture_logs(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); - guc_read_update_log_buffer(guc); + guc_read_update_log_buffer(log); /* * Generally device is expected to be active only at this * time, so get/put should be really quick. */ intel_runtime_pm_get(dev_priv); - guc_log_flush_complete(guc); - intel_runtime_pm_put(dev_priv); -} - -static void guc_flush_logs(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level) - return; - - /* First disable the interrupts, will be renabled afterwards */ - mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); - gen9_disable_guc_interrupts(dev_priv); - intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - - /* - * Before initiating the forceful flush, wait for any pending/ongoing - * flush to complete otherwise forceful flush may not actually happen. - */ - flush_work(&guc->log.runtime.flush_work); - - /* Ask GuC to update the log buffer state */ - intel_runtime_pm_get(dev_priv); - guc_log_flush(guc); + guc_action_flush_log_complete(guc); intel_runtime_pm_put(dev_priv); - - /* GuC would have updated log buffer by now, so capture it */ - guc_log_capture_logs(guc); } -int intel_guc_log_create(struct intel_guc *guc) +int intel_guc_log_create(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); struct i915_vma *vma; unsigned long offset; u32 flags; int ret; - GEM_BUG_ON(guc->log.vma); - - /* - * We require SSE 4.1 for fast reads from the GuC log buffer and - * it should be present on the chipsets supporting GuC based - * submisssions. - */ - if (WARN_ON(!i915_has_memcpy_from_wc())) { - ret = -EINVAL; - goto err; - } + GEM_BUG_ON(log->vma); vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE); if (IS_ERR(vma)) { @@ -626,13 +464,7 @@ int intel_guc_log_create(struct intel_guc *guc) goto err; } - guc->log.vma = vma; - - if (i915_modparams.guc_log_level) { - ret = guc_log_runtime_create(guc); - if (ret < 0) - goto err_vma; - } + log->vma = vma; /* each allocated unit is a page */ flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | @@ -640,117 +472,159 @@ int intel_guc_log_create(struct intel_guc *guc) (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); - offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ - guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; + offset = intel_guc_ggtt_offset(guc, vma) >> PAGE_SHIFT; + log->flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; return 0; -err_vma: - i915_vma_unpin_and_release(&guc->log.vma); err: /* logging will be off */ i915_modparams.guc_log_level = 0; return ret; } -void intel_guc_log_destroy(struct intel_guc *guc) +void intel_guc_log_destroy(struct intel_guc_log *log) +{ + i915_vma_unpin_and_release(&log->vma); +} + +int intel_guc_log_level_get(struct intel_guc_log *log) { - guc_log_runtime_destroy(guc); - i915_vma_unpin_and_release(&guc->log.vma); + GEM_BUG_ON(!log->vma); + GEM_BUG_ON(i915_modparams.guc_log_level < 0); + + return i915_modparams.guc_log_level; } -int intel_guc_log_control(struct intel_guc *guc, u64 control_val) +int intel_guc_log_level_set(struct intel_guc_log *log, u64 val) { + struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); - bool enable_logging = control_val > 0; - u32 verbosity; int ret; - if (!guc->log.vma) - return -ENODEV; + BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0); + GEM_BUG_ON(!log->vma); + GEM_BUG_ON(i915_modparams.guc_log_level < 0); - BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN); - if (control_val > 1 + GUC_LOG_VERBOSITY_MAX) + /* + * GuC is recognizing log levels starting from 0 to max, we're using 0 + * as indication that logging should be disabled. + */ + if (val < GUC_LOG_LEVEL_DISABLED || val > GUC_LOG_LEVEL_MAX) return -EINVAL; - /* This combination doesn't make sense & won't have any effect */ - if (!enable_logging && !i915_modparams.guc_log_level) - return 0; + mutex_lock(&dev_priv->drm.struct_mutex); - verbosity = enable_logging ? control_val - 1 : 0; + if (i915_modparams.guc_log_level == val) { + ret = 0; + goto out_unlock; + } - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) - return ret; intel_runtime_pm_get(dev_priv); - ret = guc_log_control(guc, enable_logging, verbosity); + ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(val), + GUC_LOG_LEVEL_IS_ENABLED(val), + GUC_LOG_LEVEL_TO_VERBOSITY(val)); intel_runtime_pm_put(dev_priv); + if (ret) { + DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret); + goto out_unlock; + } + + i915_modparams.guc_log_level = val; + +out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); - if (ret < 0) { - DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret); - return ret; - } + return ret; +} - if (enable_logging) { - i915_modparams.guc_log_level = 1 + verbosity; +bool intel_guc_log_relay_enabled(const struct intel_guc_log *log) +{ + return log->relay.buf_addr; +} - /* - * If log was disabled at boot time, then the relay channel file - * wouldn't have been created by now and interrupts also would - * not have been enabled. Try again now, just in case. - */ - ret = guc_log_late_setup(guc); - if (ret < 0) { - DRM_DEBUG_DRIVER("GuC log late setup failed %d\n", ret); - return ret; - } +int intel_guc_log_relay_open(struct intel_guc_log *log) +{ + int ret; - /* GuC logging is currently the only user of Guc2Host interrupts */ - mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); - gen9_enable_guc_interrupts(dev_priv); - intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - } else { - /* - * Once logging is disabled, GuC won't generate logs & send an - * interrupt. But there could be some data in the log buffer - * which is yet to be captured. So request GuC to update the log - * buffer state and then collect the left over logs. - */ - guc_flush_logs(guc); + mutex_lock(&log->relay.lock); - /* As logging is disabled, update log level to reflect that */ - i915_modparams.guc_log_level = 0; + if (intel_guc_log_relay_enabled(log)) { + ret = -EEXIST; + goto out_unlock; } - return ret; -} + /* + * We require SSE 4.1 for fast reads from the GuC log buffer and + * it should be present on the chipsets supporting GuC based + * submisssions. + */ + if (!i915_has_memcpy_from_wc()) { + ret = -ENXIO; + goto out_unlock; + } -void i915_guc_log_register(struct drm_i915_private *dev_priv) -{ - if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level) - return; + ret = guc_log_relay_create(log); + if (ret) + goto out_unlock; + + ret = guc_log_map(log); + if (ret) + goto out_relay; - guc_log_late_setup(&dev_priv->guc); + mutex_unlock(&log->relay.lock); + + guc_log_enable_flush_events(log); + + /* + * When GuC is logging without us relaying to userspace, we're ignoring + * the flush notification. This means that we need to unconditionally + * flush on relay enabling, since GuC only notifies us once. + */ + queue_work(log->relay.flush_wq, &log->relay.flush_work); + + return 0; + +out_relay: + guc_log_relay_destroy(log); +out_unlock: + mutex_unlock(&log->relay.lock); + + return ret; } -void i915_guc_log_unregister(struct drm_i915_private *dev_priv) +void intel_guc_log_relay_flush(struct intel_guc_log *log) { - struct intel_guc *guc = &dev_priv->guc; + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *i915 = guc_to_i915(guc); + + /* + * Before initiating the forceful flush, wait for any pending/ongoing + * flush to complete otherwise forceful flush may not actually happen. + */ + flush_work(&log->relay.flush_work); - if (!USES_GUC_SUBMISSION(dev_priv)) - return; + intel_runtime_pm_get(i915); + guc_action_flush_log(guc); + intel_runtime_pm_put(i915); - mutex_lock(&dev_priv->drm.struct_mutex); - /* GuC logging is currently the only user of Guc2Host interrupts */ - intel_runtime_pm_get(dev_priv); - gen9_disable_guc_interrupts(dev_priv); - intel_runtime_pm_put(dev_priv); + /* GuC would have updated log buffer by now, so capture it */ + guc_log_capture_logs(log); +} - guc_log_runtime_destroy(guc); - mutex_unlock(&dev_priv->drm.struct_mutex); +void intel_guc_log_relay_close(struct intel_guc_log *log) +{ + guc_log_disable_flush_events(log); + flush_work(&log->relay.flush_work); + + mutex_lock(&log->relay.lock); + GEM_BUG_ON(!intel_guc_log_relay_enabled(log)); + guc_log_unmap(log); + guc_log_relay_destroy(log); + mutex_unlock(&log->relay.lock); +} - intel_guc_log_relay_destroy(guc); +void intel_guc_log_handle_flush_event(struct intel_guc_log *log) +{ + queue_work(log->relay.flush_wq, &log->relay.flush_work); } diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h index dab0e949567a..fa80535a6f9d 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.h +++ b/drivers/gpu/drm/i915/intel_guc_log.h @@ -25,11 +25,12 @@ #ifndef _INTEL_GUC_LOG_H_ #define _INTEL_GUC_LOG_H_ +#include <linux/mutex.h> +#include <linux/relay.h> #include <linux/workqueue.h> #include "intel_guc_fwif.h" -struct drm_i915_private; struct intel_guc; /* @@ -39,33 +40,53 @@ struct intel_guc; #define GUC_LOG_SIZE ((1 + GUC_LOG_DPC_PAGES + 1 + GUC_LOG_ISR_PAGES + \ 1 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT) +/* + * While we're using plain log level in i915, GuC controls are much more... + * "elaborate"? We have a couple of bits for verbosity, separate bit for actual + * log enabling, and separate bit for default logging - which "conveniently" + * ignores the enable bit. + */ +#define GUC_LOG_LEVEL_DISABLED 0 +#define GUC_LOG_LEVEL_NON_VERBOSE 1 +#define GUC_LOG_LEVEL_IS_ENABLED(x) ((x) > GUC_LOG_LEVEL_DISABLED) +#define GUC_LOG_LEVEL_IS_VERBOSE(x) ((x) > GUC_LOG_LEVEL_NON_VERBOSE) +#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({ \ + typeof(x) _x = (x); \ + GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0; \ +}) +#define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) +#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) + struct intel_guc_log { u32 flags; struct i915_vma *vma; - /* The runtime stuff gets created only when GuC logging gets enabled */ struct { void *buf_addr; struct workqueue_struct *flush_wq; struct work_struct flush_work; - struct rchan *relay_chan; - /* To serialize the access to relay_chan */ - struct mutex relay_lock; - } runtime; + struct rchan *channel; + struct mutex lock; + u32 full_count; + } relay; /* logging related stats */ - u32 capture_miss_count; - u32 flush_interrupt_count; - u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 flush_count[GUC_MAX_LOG_BUFFER]; + struct { + u32 sampled_overflow; + u32 overflow; + u32 flush; + } stats[GUC_MAX_LOG_BUFFER]; }; -int intel_guc_log_create(struct intel_guc *guc); -void intel_guc_log_destroy(struct intel_guc *guc); -void intel_guc_log_init_early(struct intel_guc *guc); -int intel_guc_log_relay_create(struct intel_guc *guc); -void intel_guc_log_relay_destroy(struct intel_guc *guc); -int intel_guc_log_control(struct intel_guc *guc, u64 control_val); -void i915_guc_log_register(struct drm_i915_private *dev_priv); -void i915_guc_log_unregister(struct drm_i915_private *dev_priv); +void intel_guc_log_init_early(struct intel_guc_log *log); +int intel_guc_log_create(struct intel_guc_log *log); +void intel_guc_log_destroy(struct intel_guc_log *log); + +int intel_guc_log_level_get(struct intel_guc_log *log); +int intel_guc_log_level_set(struct intel_guc_log *log, u64 control_val); +bool intel_guc_log_relay_enabled(const struct intel_guc_log *log); +int intel_guc_log_relay_open(struct intel_guc_log *log); +void intel_guc_log_relay_flush(struct intel_guc_log *log); +void intel_guc_log_relay_close(struct intel_guc_log *log); + +void intel_guc_log_handle_flush_event(struct intel_guc_log *log); #endif diff --git a/drivers/gpu/drm/i915/intel_guc_reg.h b/drivers/gpu/drm/i915/intel_guc_reg.h index 19a9247c5664..d86084742a4a 100644 --- a/drivers/gpu/drm/i915/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/intel_guc_reg.h @@ -66,22 +66,20 @@ #define UOS_MOVE (1<<4) #define START_DMA (1<<0) #define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) +#define GUC_WOPCM_OFFSET_VALID (1<<0) #define HUC_LOADING_AGENT_VCR (0<<1) #define HUC_LOADING_AGENT_GUC (1<<1) -#define GUC_WOPCM_OFFSET_VALUE 0x80000 /* 512KB */ +#define GUC_WOPCM_OFFSET_SHIFT 14 +#define GUC_WOPCM_OFFSET_MASK (0x3ffff << GUC_WOPCM_OFFSET_SHIFT) #define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4) #define HUC_STATUS2 _MMIO(0xD3B0) #define HUC_FW_VERIFIED (1<<7) -/* Defines WOPCM space available to GuC firmware */ #define GUC_WOPCM_SIZE _MMIO(0xc050) -/* GuC addresses below GUC_WOPCM_TOP don't map through the GTT */ -#define GUC_WOPCM_TOP (0x80 << 12) /* 512KB */ -#define BXT_GUC_WOPCM_RC6_RESERVED (0x10 << 12) /* 64KB */ - -/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ -#define GUC_GGTT_TOP 0xFEE00000 +#define GUC_WOPCM_SIZE_LOCKED (1<<0) +#define GUC_WOPCM_SIZE_SHIFT 12 +#define GUC_WOPCM_SIZE_MASK (0xfffff << GUC_WOPCM_SIZE_SHIFT) #define GEN8_GT_PM_CONFIG _MMIO(0x138140) #define GEN9LP_GT_PM_CONFIG _MMIO(0x138140) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 8a8ad2fe158d..2feb65096966 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -124,9 +124,17 @@ static int reserve_doorbell(struct intel_guc_client *client) return 0; } +static bool has_doorbell(struct intel_guc_client *client) +{ + if (client->doorbell_id == GUC_DOORBELL_INVALID) + return false; + + return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); +} + static void unreserve_doorbell(struct intel_guc_client *client) { - GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID); + GEM_BUG_ON(!has_doorbell(client)); __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); client->doorbell_id = GUC_DOORBELL_INVALID; @@ -184,14 +192,6 @@ static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client) return client->vaddr + client->doorbell_offset; } -static bool has_doorbell(struct intel_guc_client *client) -{ - if (client->doorbell_id == GUC_DOORBELL_INVALID) - return false; - - return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); -} - static void __create_doorbell(struct intel_guc_client *client) { struct guc_doorbell_info *doorbell; @@ -207,7 +207,6 @@ static void __destroy_doorbell(struct intel_guc_client *client) struct guc_doorbell_info *doorbell; u16 db_id = client->doorbell_id; - doorbell = __get_doorbell(client); doorbell->db_status = GUC_DOORBELL_DISABLED; doorbell->cookie = 0; @@ -224,6 +223,9 @@ static int create_doorbell(struct intel_guc_client *client) { int ret; + if (WARN_ON(!has_doorbell(client))) + return -ENODEV; /* internal setup error, should never happen */ + __update_doorbell_desc(client, client->doorbell_id); __create_doorbell(client); @@ -231,8 +233,8 @@ static int create_doorbell(struct intel_guc_client *client) if (ret) { __destroy_doorbell(client); __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - DRM_ERROR("Couldn't create client %u doorbell: %d\n", - client->stage_id, ret); + DRM_DEBUG_DRIVER("Couldn't create client %u doorbell: %d\n", + client->stage_id, ret); return ret; } @@ -362,7 +364,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, desc->db_id = client->doorbell_id; for_each_engine_masked(engine, dev_priv, client->engines, tmp) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; @@ -386,8 +388,8 @@ static void guc_stage_desc_init(struct intel_guc *guc, lrc->context_desc = lower_32_bits(ce->lrc_desc); /* The state page is after PPHWSP */ - lrc->ring_lrca = - guc_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; + lrc->ring_lrca = intel_guc_ggtt_offset(guc, ce->state) + + LRC_STATE_PN * PAGE_SIZE; /* XXX: In direct submission, the GuC wants the HW context id * here. In proxy submission, it wants the stage id @@ -395,7 +397,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) | (guc_engine_id << GUC_ELC_ENGINE_OFFSET); - lrc->ring_begin = guc_ggtt_offset(ce->ring->vma); + lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma); lrc->ring_end = lrc->ring_begin + ce->ring->size - 1; lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; @@ -411,7 +413,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, * The doorbell, process descriptor, and workqueue are all parts * of the client object, which the GuC will reference via the GGTT */ - gfx_addr = guc_ggtt_offset(client->vma); + gfx_addr = intel_guc_ggtt_offset(guc, client->vma); desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client)); @@ -584,7 +586,7 @@ static void inject_preempt_context(struct work_struct *work) data[3] = engine->guc_id; data[4] = guc->execbuf_client->priority; data[5] = guc->execbuf_client->stage_id; - data[6] = guc_ggtt_offset(guc->shared_data); + data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) { execlists_clear_active(&engine->execlists, @@ -657,7 +659,17 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) port_set(port, i915_request_get(rq)); } -static void guc_dequeue(struct intel_engine_cs *engine) +static inline int rq_prio(const struct i915_request *rq) +{ + return rq->sched.attr.priority; +} + +static inline int port_prio(const struct execlist_port *port) +{ + return rq_prio(port_request(port)); +} + +static bool __guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -667,28 +679,29 @@ static void guc_dequeue(struct intel_engine_cs *engine) bool submit = false; struct rb_node *rb; - spin_lock_irq(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); + rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); if (port_isset(port)) { - if (engine->i915->preempt_context) { + if (intel_engine_has_preemption(engine)) { struct guc_preempt_work *preempt_work = &engine->i915->guc.preempt_work[engine->id]; + int prio = execlists->queue_priority; - if (execlists->queue_priority > - max(port_request(port)->priotree.priority, 0)) { + if (__execlists_need_preempt(prio, port_prio(port))) { execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); queue_work(engine->i915->guc.preempt_wq, &preempt_work->work); - goto unlock; + return false; } } port++; if (port_isset(port)) - goto unlock; + return false; } GEM_BUG_ON(port_isset(port)); @@ -696,11 +709,11 @@ static void guc_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { if (last && rq->ctx != last->ctx) { if (port == last_port) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -709,7 +722,7 @@ static void guc_dequeue(struct intel_engine_cs *engine) port++; } - INIT_LIST_HEAD(&rq->priotree.link); + INIT_LIST_HEAD(&rq->sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); @@ -726,19 +739,34 @@ static void guc_dequeue(struct intel_engine_cs *engine) done: execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; execlists->first = rb; - if (submit) { + if (submit) port_assign(port, last); - execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); - guc_submit(engine); - } + if (last) + execlists_user_begin(execlists, execlists->port); /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); -unlock: - spin_unlock_irq(&engine->timeline->lock); + return submit; +} + +static void guc_dequeue(struct intel_engine_cs *engine) +{ + unsigned long flags; + bool submit; + + local_irq_save(flags); + + spin_lock(&engine->timeline.lock); + submit = __guc_dequeue(engine); + spin_unlock(&engine->timeline.lock); + + if (submit) + guc_submit(engine); + + local_irq_restore(flags); } static void guc_submission_tasklet(unsigned long data) @@ -748,17 +776,20 @@ static void guc_submission_tasklet(unsigned long data) struct execlist_port *port = execlists->port; struct i915_request *rq; - rq = port_request(&port[0]); + rq = port_request(port); while (rq && i915_request_completed(rq)) { trace_i915_request_out(rq); i915_request_put(rq); - execlists_port_complete(execlists, port); - - rq = port_request(&port[0]); + port = execlists_port_complete(execlists, port); + if (port_isset(port)) { + execlists_user_begin(execlists, port); + rq = port_request(port); + } else { + execlists_user_end(execlists); + rq = NULL; + } } - if (!rq) - execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) == @@ -977,7 +1008,8 @@ static void guc_fill_preempt_context(struct intel_guc *guc) enum intel_engine_id id; for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &client->owner->engine[id]; + struct intel_context *ce = + to_intel_context(client->owner, engine); u32 addr = intel_hws_preempt_done_address(engine); u32 *cs; diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index c8ea510629fa..d47e346bd49e 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) */ tmp = I915_READ_CTL(engine); if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, BIT(engine->id), - "Kicking stuck wait on %s", - engine->name); + i915_handle_error(dev_priv, BIT(engine->id), 0, + "stuck wait on %s", engine->name); I915_WRITE_CTL(engine, tmp); return ENGINE_WAIT_KICK; } @@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) default: return ENGINE_DEAD; case 1: - i915_handle_error(dev_priv, ALL_ENGINES, - "Kicking stuck semaphore on %s", + i915_handle_error(dev_priv, ALL_ENGINES, 0, + "stuck semaphore on %s", engine->name); I915_WRITE_CTL(engine, tmp); return ENGINE_WAIT_KICK; @@ -357,7 +356,7 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, break; case ENGINE_DEAD: - if (drm_debug & DRM_UT_DRIVER) { + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer("hangcheck"); intel_engine_dump(engine, &p, "%s\n", engine->name); } @@ -386,13 +385,13 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915, if (stuck != hung) hung &= ~stuck; len = scnprintf(msg, sizeof(msg), - "%s on ", stuck == hung ? "No progress" : "Hang"); + "%s on ", stuck == hung ? "no progress" : "hang"); for_each_engine_masked(engine, i915, hung, tmp) len += scnprintf(msg + len, sizeof(msg) - len, "%s, ", engine->name); msg[len-2] = '\0'; - return i915_handle_error(i915, hung, "%s", msg); + return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg); } /* @@ -453,6 +452,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) void intel_engine_init_hangcheck(struct intel_engine_cs *engine) { memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); + engine->hangcheck.action_timestamp = jiffies; } void intel_hangcheck_init(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c index 14ca5d3057a7..2db5da550a1c 100644 --- a/drivers/gpu/drm/i915/intel_hdcp.c +++ b/drivers/gpu/drm/i915/intel_hdcp.c @@ -37,6 +37,43 @@ static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *intel_dig_port, return 0; } +static bool hdcp_key_loadable(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + enum i915_power_well_id id; + bool enabled = false; + + /* + * On HSW and BDW, Display HW loads the Key as soon as Display resumes. + * On all BXT+, SW can load the keys only when the PW#1 is turned on. + */ + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + id = HSW_DISP_PW_GLOBAL; + else + id = SKL_DISP_PW_1; + + mutex_lock(&power_domains->lock); + + /* PG1 (power well #1) needs to be enabled */ + for_each_power_well(dev_priv, power_well) { + if (power_well->id == id) { + enabled = power_well->ops->is_enabled(dev_priv, + power_well); + break; + } + } + mutex_unlock(&power_domains->lock); + + /* + * Another req for hdcp key loadability is enabled state of pll for + * cdclk. Without active crtc we wont land here. So we are assuming that + * cdclk is already on. + */ + + return enabled; +} + static void intel_hdcp_clear_keys(struct drm_i915_private *dev_priv) { I915_WRITE(HDCP_KEY_CONF, HDCP_CLEAR_KEYS_TRIGGER); @@ -142,53 +179,17 @@ bool intel_hdcp_is_ksv_valid(u8 *ksv) return true; } -/* Implements Part 2 of the HDCP authorization procedure */ static -int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port, - const struct intel_hdcp_shim *shim) +int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port, + const struct intel_hdcp_shim *shim, + u8 *ksv_fifo, u8 num_downstream, u8 *bstatus) { struct drm_i915_private *dev_priv; u32 vprime, sha_text, sha_leftovers, rep_ctl; - u8 bstatus[2], num_downstream, *ksv_fifo; int ret, i, j, sha_idx; dev_priv = intel_dig_port->base.base.dev->dev_private; - ret = intel_hdcp_poll_ksv_fifo(intel_dig_port, shim); - if (ret) { - DRM_ERROR("KSV list failed to become ready (%d)\n", ret); - return ret; - } - - ret = shim->read_bstatus(intel_dig_port, bstatus); - if (ret) - return ret; - - if (DRM_HDCP_MAX_DEVICE_EXCEEDED(bstatus[0]) || - DRM_HDCP_MAX_CASCADE_EXCEEDED(bstatus[1])) { - DRM_ERROR("Max Topology Limit Exceeded\n"); - return -EPERM; - } - - /* - * When repeater reports 0 device count, HDCP1.4 spec allows disabling - * the HDCP encryption. That implies that repeater can't have its own - * display. As there is no consumption of encrypted content in the - * repeater with 0 downstream devices, we are failing the - * authentication. - */ - num_downstream = DRM_HDCP_NUM_DOWNSTREAM(bstatus[0]); - if (num_downstream == 0) - return -EINVAL; - - ksv_fifo = kzalloc(num_downstream * DRM_HDCP_KSV_LEN, GFP_KERNEL); - if (!ksv_fifo) - return -ENOMEM; - - ret = shim->read_ksv_fifo(intel_dig_port, num_downstream, ksv_fifo); - if (ret) - return ret; - /* Process V' values from the receiver */ for (i = 0; i < DRM_HDCP_V_PRIME_NUM_PARTS; i++) { ret = shim->read_v_prime_part(intel_dig_port, i, &vprime); @@ -353,7 +354,8 @@ int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port, return ret; sha_idx += sizeof(sha_text); } else { - DRM_ERROR("Invalid number of leftovers %d\n", sha_leftovers); + DRM_DEBUG_KMS("Invalid number of leftovers %d\n", + sha_leftovers); return -EINVAL; } @@ -381,17 +383,83 @@ int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port, if (intel_wait_for_register(dev_priv, HDCP_REP_CTL, HDCP_SHA1_COMPLETE, HDCP_SHA1_COMPLETE, 1)) { - DRM_ERROR("Timed out waiting for SHA1 complete\n"); + DRM_DEBUG_KMS("Timed out waiting for SHA1 complete\n"); return -ETIMEDOUT; } if (!(I915_READ(HDCP_REP_CTL) & HDCP_SHA1_V_MATCH)) { - DRM_ERROR("SHA-1 mismatch, HDCP failed\n"); + DRM_DEBUG_KMS("SHA-1 mismatch, HDCP failed\n"); return -ENXIO; } + return 0; +} + +/* Implements Part 2 of the HDCP authorization procedure */ +static +int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port, + const struct intel_hdcp_shim *shim) +{ + u8 bstatus[2], num_downstream, *ksv_fifo; + int ret, i, tries = 3; + + ret = intel_hdcp_poll_ksv_fifo(intel_dig_port, shim); + if (ret) { + DRM_ERROR("KSV list failed to become ready (%d)\n", ret); + return ret; + } + + ret = shim->read_bstatus(intel_dig_port, bstatus); + if (ret) + return ret; + + if (DRM_HDCP_MAX_DEVICE_EXCEEDED(bstatus[0]) || + DRM_HDCP_MAX_CASCADE_EXCEEDED(bstatus[1])) { + DRM_ERROR("Max Topology Limit Exceeded\n"); + return -EPERM; + } + + /* + * When repeater reports 0 device count, HDCP1.4 spec allows disabling + * the HDCP encryption. That implies that repeater can't have its own + * display. As there is no consumption of encrypted content in the + * repeater with 0 downstream devices, we are failing the + * authentication. + */ + num_downstream = DRM_HDCP_NUM_DOWNSTREAM(bstatus[0]); + if (num_downstream == 0) + return -EINVAL; + + ksv_fifo = kzalloc(num_downstream * DRM_HDCP_KSV_LEN, GFP_KERNEL); + if (!ksv_fifo) + return -ENOMEM; + + ret = shim->read_ksv_fifo(intel_dig_port, num_downstream, ksv_fifo); + if (ret) + goto err; + + /* + * When V prime mismatches, DP Spec mandates re-read of + * V prime atleast twice. + */ + for (i = 0; i < tries; i++) { + ret = intel_hdcp_validate_v_prime(intel_dig_port, shim, + ksv_fifo, num_downstream, + bstatus); + if (!ret) + break; + } + + if (i == tries) { + DRM_ERROR("V Prime validation failed.(%d)\n", ret); + goto err; + } + DRM_DEBUG_KMS("HDCP is enabled (%d downstream devices)\n", num_downstream); - return 0; + ret = 0; +err: + kfree(ksv_fifo); + return ret; } /* Implements Part 1 of the HDCP authorization procedure */ @@ -506,15 +574,26 @@ static int intel_hdcp_auth(struct intel_digital_port *intel_dig_port, */ wait_remaining_ms_from_jiffies(r0_prime_gen_start, 300); - ri.reg = 0; - ret = shim->read_ri_prime(intel_dig_port, ri.shim); - if (ret) - return ret; - I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + tries = 3; - /* Wait for Ri prime match */ - if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & - (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { + /* + * DP HDCP Spec mandates the two more reattempt to read R0, incase + * of R0 mismatch. + */ + for (i = 0; i < tries; i++) { + ri.reg = 0; + ret = shim->read_ri_prime(intel_dig_port, ri.shim); + if (ret) + return ret; + I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + + /* Wait for Ri prime match */ + if (!wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) + break; + } + + if (i == tries) { DRM_ERROR("Timed out waiting for Ri prime match (%x)\n", I915_READ(PORT_HDCP_STATUS(port))); return -ETIMEDOUT; @@ -580,8 +659,8 @@ static int _intel_hdcp_enable(struct intel_connector *connector) DRM_DEBUG_KMS("[%s:%d] HDCP is being enabled...\n", connector->base.name, connector->base.base.id); - if (!(I915_READ(SKL_FUSE_STATUS) & SKL_FUSE_PG_DIST_STATUS(1))) { - DRM_ERROR("PG1 is disabled, cannot load keys\n"); + if (!hdcp_key_loadable(dev_priv)) { + DRM_ERROR("HDCP key Load is not possible\n"); return -ENXIO; } diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 1baef4ac7ecb..ee929f31f7db 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -2082,41 +2082,33 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c * it enables scrambling. This should be called before enabling the HDMI * 2.0 port, as the sink can choose to disable the scrambling if it doesn't * detect a scrambled clock within 100 ms. + * + * Returns: + * True on success, false on failure. */ -void intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, +bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, struct drm_connector *connector, bool high_tmds_clock_ratio, bool scrambling) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); - struct drm_i915_private *dev_priv = connector->dev->dev_private; struct drm_scrambling *sink_scrambling = - &connector->display_info.hdmi.scdc.scrambling; - struct i2c_adapter *adptr = intel_gmbus_get_adapter(dev_priv, - intel_hdmi->ddc_bus); - bool ret; + &connector->display_info.hdmi.scdc.scrambling; + struct i2c_adapter *adapter = + intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); if (!sink_scrambling->supported) - return; - - DRM_DEBUG_KMS("Setting sink scrambling for enc:%s connector:%s\n", - encoder->base.name, connector->name); + return true; - /* Set TMDS bit clock ratio to 1/40 or 1/10 */ - ret = drm_scdc_set_high_tmds_clock_ratio(adptr, high_tmds_clock_ratio); - if (!ret) { - DRM_ERROR("Set TMDS ratio failed\n"); - return; - } - - /* Enable/disable sink scrambling */ - ret = drm_scdc_set_scrambling(adptr, scrambling); - if (!ret) { - DRM_ERROR("Set sink scrambling failed\n"); - return; - } + DRM_DEBUG_KMS("[CONNECTOR:%d:%s] scrambling=%s, TMDS bit clock ratio=1/%d\n", + connector->base.id, connector->name, + yesno(scrambling), high_tmds_clock_ratio ? 40 : 10); - DRM_DEBUG_KMS("sink scrambling handled\n"); + /* Set TMDS bit clock ratio to 1/40 or 1/10, and enable/disable scrambling */ + return drm_scdc_set_high_tmds_clock_ratio(adapter, + high_tmds_clock_ratio) && + drm_scdc_set_scrambling(adapter, scrambling); } static u8 chv_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 0e3d3e89d66a..43aa92beff2a 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -100,6 +100,8 @@ enum port intel_hpd_pin_to_port(struct drm_i915_private *dev_priv, if (IS_CNL_WITH_PORT_F(dev_priv)) return PORT_F; return PORT_E; + case HPD_PORT_F: + return PORT_F; default: return PORT_NONE; /* no port for this pin */ } @@ -132,6 +134,7 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, case PORT_F: if (IS_CNL_WITH_PORT_F(dev_priv)) return HPD_PORT_E; + return HPD_PORT_F; default: MISSING_CASE(port); return HPD_NONE; diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 65e2afb9b955..291285277403 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -55,7 +55,7 @@ int intel_huc_auth(struct intel_huc *huc) return -ENOEXEC; vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + PIN_OFFSET_BIAS | guc->ggtt_pin_bias); if (IS_ERR(vma)) { ret = PTR_ERR(vma); DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret); @@ -63,7 +63,8 @@ int intel_huc_auth(struct intel_huc *huc) } ret = intel_guc_auth_huc(guc, - guc_ggtt_offset(vma) + huc->fw.rsa_offset); + intel_guc_ggtt_offset(guc, vma) + + huc->fw.rsa_offset); if (ret) { DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); goto fail_unpin; @@ -91,3 +92,28 @@ fail: DRM_ERROR("HuC: Authentication failed %d\n", ret); return ret; } + +/** + * intel_huc_check_status() - check HuC status + * @huc: intel_huc structure + * + * This function reads status register to verify if HuC + * firmware was successfully loaded. + * + * Returns positive value if HuC firmware is loaded and verified + * and -ENODEV if HuC is not present. + */ +int intel_huc_check_status(struct intel_huc *huc) +{ + struct drm_i915_private *dev_priv = huc_to_i915(huc); + u32 status; + + if (!HAS_HUC(dev_priv)) + return -ENODEV; + + intel_runtime_pm_get(dev_priv); + status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; + intel_runtime_pm_put(dev_priv); + + return status; +} diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index 5d6e804f9771..aa854907abac 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -37,5 +37,12 @@ struct intel_huc { void intel_huc_init_early(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc); +int intel_huc_check_status(struct intel_huc *huc); + +static inline int intel_huc_sanitize(struct intel_huc *huc) +{ + intel_uc_fw_sanitize(&huc->fw); + return 0; +} #endif diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c index c66afa9b989a..f93d2384d482 100644 --- a/drivers/gpu/drm/i915/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/intel_huc_fw.c @@ -118,7 +118,8 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* Set the source address for the uCode */ - offset = guc_ggtt_offset(vma) + huc_fw->header_offset; + offset = intel_guc_ggtt_offset(&dev_priv->guc, vma) + + huc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); @@ -154,9 +155,8 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) * Called from intel_uc_init_hw() during driver load, resume from sleep and * after a GPU reset. Note that HuC must be loaded before GuC. * - * The firmware image should have already been fetched into memory by the - * earlier call to intel_uc_init_fw(), so here we need to only check that - * fetch succeeded, and then transfer the image to the h/w. + * The firmware image should have already been fetched into memory, so only + * check that fetch succeeded, and then transfer the image to the h/w. * * Return: non-zero code on error */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8704f7f8d072..15434cad5430 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -139,6 +139,7 @@ #include "i915_gem_render_state.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" +#include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) @@ -176,14 +177,16 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline int rq_prio(const struct i915_request *rq) { - return rq->priotree.priority; + return rq->sched.attr.priority; } static inline bool need_preempt(const struct intel_engine_cs *engine, const struct i915_request *last, int prio) { - return engine->i915->preempt_context && prio > max(rq_prio(last), 0); + return (intel_engine_has_preemption(engine) && + __execlists_need_preempt(prio, rq_prio(last)) && + !i915_request_completed(last)); } /** @@ -221,7 +224,7 @@ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u64 desc; BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); @@ -255,9 +258,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, } static struct i915_priolist * -lookup_priolist(struct intel_engine_cs *engine, - struct i915_priotree *pt, - int prio) +lookup_priolist(struct intel_engine_cs *engine, int prio) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_priolist *p; @@ -328,10 +329,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) struct i915_priolist *uninitialized_var(p); int last_prio = I915_PRIORITY_INVALID; - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline->requests, + &engine->timeline.requests, link) { if (i915_request_completed(rq)) return; @@ -342,10 +343,11 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != last_prio) { last_prio = rq_prio(rq); - p = lookup_priolist(engine, &rq->priotree, last_prio); + p = lookup_priolist(engine, last_prio); } - list_add(&rq->priotree.link, &p->requests); + GEM_BUG_ON(p->priority != rq_prio(rq)); + list_add(&rq->sched.link, &p->requests); } } @@ -354,10 +356,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) { struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); + unsigned long flags; + + spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock_irq(&engine->timeline->lock); __unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->timeline->lock); + + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static inline void @@ -374,6 +379,19 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } +inline void +execlists_user_begin(struct intel_engine_execlists *execlists, + const struct execlist_port *port) +{ + execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER); +} + +inline void +execlists_user_end(struct intel_engine_execlists *execlists) +{ + execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); +} + static inline void execlists_context_schedule_in(struct i915_request *rq) { @@ -382,10 +400,11 @@ execlists_context_schedule_in(struct i915_request *rq) } static inline void -execlists_context_schedule_out(struct i915_request *rq) +execlists_context_schedule_out(struct i915_request *rq, unsigned long status) { intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + execlists_context_status_change(rq, status); + trace_i915_request_out(rq); } static void @@ -399,7 +418,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; + struct intel_context *ce = to_intel_context(rq->ctx, rq->engine); struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; @@ -454,10 +473,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, + rq->fence.context, rq->fence.seqno, + intel_engine_get_seqno(engine), rq_prio(rq)); } else { GEM_BUG_ON(!n); @@ -506,7 +527,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; struct intel_context *ce = - &engine->i915->preempt_context->engine[engine->id]; + to_intel_context(engine->i915->preempt_context, engine); unsigned int n; GEM_BUG_ON(execlists->preempt_complete_status != @@ -535,7 +556,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); } -static void execlists_dequeue(struct intel_engine_cs *engine) +static bool __execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -545,6 +566,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; + lockdep_assert_held(&engine->timeline.lock); + /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute @@ -566,7 +589,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); @@ -581,7 +603,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(!port_count(&port[0])); if (port_count(&port[0]) > 1) - goto unlock; + return false; /* * If we write to ELSP a second time before the HW has had @@ -591,11 +613,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the HW to indicate that it has had a chance to respond. */ if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - goto unlock; + return false; if (need_preempt(engine, last, execlists->queue_priority)) { inject_preempt_context(engine); - goto unlock; + return false; } /* @@ -620,7 +642,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * priorities of the ports haven't been switch. */ if (port_count(&port[1])) - goto unlock; + return false; /* * WaIdleLiteRestore:bdw,skl @@ -637,7 +659,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -657,7 +679,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ if (port == last_port) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -671,7 +693,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (ctx_single_port_submission(last->ctx) || ctx_single_port_submission(rq->ctx)) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -684,7 +706,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(port_isset(port)); } - INIT_LIST_HEAD(&rq->priotree.link); + INIT_LIST_HEAD(&rq->sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); last = rq; @@ -697,8 +719,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); } + done: - execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; + /* + * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. + * + * We choose queue_priority such that if we add a request of greater + * priority than this, we kick the submission tasklet to decide on + * the right order of submitting the requests to hardware. We must + * also be prepared to reorder requests as they are in-flight on the + * HW. We derive the queue_priority then as the first "hole" in + * the HW submission ports and if there are no available slots, + * the priority of the lowest executing request, i.e. last. + * + * When we do receive a higher priority request ready to run from the + * user, see queue_request(), the queue_priority is bumped to that + * request triggering preemption on the next dequeue (or subsequent + * interrupt for secondary ports). + */ + execlists->queue_priority = + port != execlists->port ? rq_prio(last) : INT_MIN; + execlists->first = rb; if (submit) port_assign(port, last); @@ -706,13 +747,25 @@ done: /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); -unlock: - spin_unlock_irq(&engine->timeline->lock); + /* Re-evaluate the executing context setup after each preemptive kick */ + if (last) + execlists_user_begin(execlists, execlists->port); - if (submit) { - execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); + return submit; +} + +static void execlists_dequeue(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + unsigned long flags; + bool submit; + + spin_lock_irqsave(&engine->timeline.lock, flags); + submit = __execlists_dequeue(engine); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + if (submit) execlists_submit_ports(engine); - } GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); @@ -727,13 +780,18 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_BUG_ON(!execlists->active); - intel_engine_context_out(rq->engine); + GEM_TRACE("%s:port%u global=%d (fence %llx:%d), (current %d)\n", + rq->engine->name, + (unsigned int)(port - execlists->port), + rq->global_seqno, + rq->fence.context, rq->fence.seqno, + intel_engine_get_seqno(rq->engine)); - execlists_context_status_change(rq, - i915_request_completed(rq) ? - INTEL_CONTEXT_SCHEDULE_OUT : - INTEL_CONTEXT_SCHEDULE_PREEMPTED); + GEM_BUG_ON(!execlists->active); + execlists_context_schedule_out(rq, + i915_request_completed(rq) ? + INTEL_CONTEXT_SCHEDULE_OUT : + INTEL_CONTEXT_SCHEDULE_PREEMPTED); i915_request_put(rq); @@ -741,7 +799,82 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) port++; } - execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); + execlists_user_end(execlists); +} + +static void clear_gtiir(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int i; + + /* + * Clear any pending interrupt state. + * + * We do it twice out of paranoia that some of the IIR are + * double buffered, and so if we only reset it once there may + * still be an interrupt pending. + */ + if (INTEL_GEN(dev_priv) >= 11) { + static const struct { + u8 bank; + u8 bit; + } gen11_gtiir[] = { + [RCS] = {0, GEN11_RCS0}, + [BCS] = {0, GEN11_BCS}, + [_VCS(0)] = {1, GEN11_VCS(0)}, + [_VCS(1)] = {1, GEN11_VCS(1)}, + [_VCS(2)] = {1, GEN11_VCS(2)}, + [_VCS(3)] = {1, GEN11_VCS(3)}, + [_VECS(0)] = {1, GEN11_VECS(0)}, + [_VECS(1)] = {1, GEN11_VECS(1)}, + }; + unsigned long irqflags; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir)); + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + for (i = 0; i < 2; i++) { + gen11_reset_one_iir(dev_priv, + gen11_gtiir[engine->id].bank, + gen11_gtiir[engine->id].bit); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + } else { + static const u8 gtiir[] = { + [RCS] = 0, + [BCS] = 0, + [VCS] = 1, + [VCS2] = 1, + [VECS] = 3, + }; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); + + for (i = 0; i < 2; i++) { + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + engine->irq_keep_mask); + POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); + } + GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & + engine->irq_keep_mask); + } +} + +static void reset_irq(struct intel_engine_cs *engine) +{ + /* Mark all CS interrupts as complete */ + smp_store_mb(engine->execlists.active, 0); + synchronize_hardirq(engine->i915->drm.irq); + + clear_gtiir(engine); + + /* + * The port is checked prior to scheduling a tasklet, but + * just in case we have suspended the tasklet to do the + * wedging make sure that when it wakes, it decides there + * is no work to do by clearing the irq_posted bit. + */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); } static void execlists_cancel_requests(struct intel_engine_cs *engine) @@ -751,7 +884,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s\n", engine->name); + GEM_TRACE("%s current %d\n", + engine->name, intel_engine_get_seqno(engine)); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -771,11 +905,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Cancel the requests on the HW and clear the ELSP tracker. */ execlists_cancel_port_requests(execlists); + reset_irq(engine); - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline->requests, link) { + list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); if (!i915_request_completed(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -786,8 +921,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) while (rb) { struct i915_priolist *p = to_priolist(rb); - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { - INIT_LIST_HEAD(&rq->priotree.link); + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { + INIT_LIST_HEAD(&rq->sched.link); dma_fence_set_error(&rq->fence, -EIO); __i915_request_submit(rq); @@ -807,18 +942,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->first = NULL; GEM_BUG_ON(port_isset(execlists->port)); - spin_unlock(&engine->timeline->lock); - - /* - * The port is checked prior to scheduling a tasklet, but - * just in case we have suspended the tasklet to do the - * wedging make sure that when it wakes, it decides there - * is no work to do by clearing the irq_posted bit. - */ - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - - /* Mark all CS interrupts as complete */ - execlists->active = 0; + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); } @@ -831,7 +955,7 @@ static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port * const port = execlists->port; + struct execlist_port *port = execlists->port; struct drm_i915_private *dev_priv = engine->i915; bool fw = false; @@ -959,10 +1083,13 @@ static void execlists_submission_tasklet(unsigned long data) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, + rq ? rq->fence.context : 0, + rq ? rq->fence.seqno : 0, + intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); /* Check the context/desc id for this event matches */ @@ -970,28 +1097,43 @@ static void execlists_submission_tasklet(unsigned long data) GEM_BUG_ON(count == 0); if (--count == 0) { + /* + * On the final event corresponding to the + * submission of this context, we expect either + * an element-switch event or a completion + * event (and on completion, the active-idle + * marker). No more preemptions, lite-restore + * or otherwise. + */ GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); GEM_BUG_ON(port_isset(&port[1]) && !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); + GEM_BUG_ON(!port_isset(&port[1]) && + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + + /* + * We rely on the hardware being strongly + * ordered, that the breadcrumb write is + * coherent (visible from the CPU) before the + * user interrupt and CSB is processed. + */ GEM_BUG_ON(!i915_request_completed(rq)); - execlists_context_schedule_out(rq); - trace_i915_request_out(rq); + + execlists_context_schedule_out(rq, + INTEL_CONTEXT_SCHEDULE_OUT); i915_request_put(rq); GEM_TRACE("%s completed ctx=%d\n", engine->name, port->context_id); - execlists_port_complete(execlists, port); + port = execlists_port_complete(execlists, port); + if (port_isset(port)) + execlists_user_begin(execlists, port); + else + execlists_user_end(execlists); } else { port_set(port, port_pack(rq, count)); } - - /* After the final element, the hw should be idle */ - GEM_BUG_ON(port_count(port) == 0 && - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - if (port_count(port) == 0) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_USER); } if (head != execlists->csb_head) { @@ -1014,18 +1156,23 @@ static void execlists_submission_tasklet(unsigned long data) } static void queue_request(struct intel_engine_cs *engine, - struct i915_priotree *pt, + struct i915_sched_node *node, int prio) { - list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests); + list_add_tail(&node->link, + &lookup_priolist(engine, prio)->requests); +} + +static void __submit_queue(struct intel_engine_cs *engine, int prio) +{ + engine->execlists.queue_priority = prio; + tasklet_hi_schedule(&engine->execlists.tasklet); } static void submit_queue(struct intel_engine_cs *engine, int prio) { - if (prio > engine->execlists.queue_priority) { - engine->execlists.queue_priority = prio; - tasklet_hi_schedule(&engine->execlists.tasklet); - } + if (prio > engine->execlists.queue_priority) + __submit_queue(engine, prio); } static void execlists_submit_request(struct i915_request *request) @@ -1034,42 +1181,45 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); - queue_request(engine, &request->priotree, rq_prio(request)); + queue_request(engine, &request->sched, rq_prio(request)); submit_queue(engine, rq_prio(request)); GEM_BUG_ON(!engine->execlists.first); - GEM_BUG_ON(list_empty(&request->priotree.link)); + GEM_BUG_ON(list_empty(&request->sched.link)); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } -static struct i915_request *pt_to_request(struct i915_priotree *pt) +static struct i915_request *sched_to_request(struct i915_sched_node *node) { - return container_of(pt, struct i915_request, priotree); + return container_of(node, struct i915_request, sched); } static struct intel_engine_cs * -pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) +sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine = pt_to_request(pt)->engine; + struct intel_engine_cs *engine = sched_to_request(node)->engine; GEM_BUG_ON(!locked); if (engine != locked) { - spin_unlock(&locked->timeline->lock); - spin_lock(&engine->timeline->lock); + spin_unlock(&locked->timeline.lock); + spin_lock(&engine->timeline.lock); } return engine; } -static void execlists_schedule(struct i915_request *request, int prio) +static void execlists_schedule(struct i915_request *request, + const struct i915_sched_attr *attr) { - struct intel_engine_cs *engine; + struct i915_priolist *uninitialized_var(pl); + struct intel_engine_cs *engine, *last; struct i915_dependency *dep, *p; struct i915_dependency stack; + const int prio = attr->priority; LIST_HEAD(dfs); GEM_BUG_ON(prio == I915_PRIORITY_INVALID); @@ -1077,23 +1227,23 @@ static void execlists_schedule(struct i915_request *request, int prio) if (i915_request_completed(request)) return; - if (prio <= READ_ONCE(request->priotree.priority)) + if (prio <= READ_ONCE(request->sched.attr.priority)) return; /* Need BKL in order to use the temporary link inside i915_dependency */ lockdep_assert_held(&request->i915->drm.struct_mutex); - stack.signaler = &request->priotree; + stack.signaler = &request->sched; list_add(&stack.dfs_link, &dfs); /* * Recursively bump all dependent priorities to match the new request. * * A naive approach would be to use recursion: - * static void update_priorities(struct i915_priotree *pt, prio) { - * list_for_each_entry(dep, &pt->signalers_list, signal_link) + * static void update_priorities(struct i915_sched_node *node, prio) { + * list_for_each_entry(dep, &node->signalers_list, signal_link) * update_priorities(dep->signal, prio) - * queue_request(pt); + * queue_request(node); * } * but that may have unlimited recursion depth and so runs a very * real risk of overunning the kernel stack. Instead, we build @@ -1105,7 +1255,7 @@ static void execlists_schedule(struct i915_request *request, int prio) * last element in the list is the request we must execute first. */ list_for_each_entry(dep, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; /* * Within an engine, there can be no cycle, but we may @@ -1113,14 +1263,14 @@ static void execlists_schedule(struct i915_request *request, int prio) * (redundant dependencies are not eliminated) and across * engines. */ - list_for_each_entry(p, &pt->signalers_list, signal_link) { + list_for_each_entry(p, &node->signalers_list, signal_link) { GEM_BUG_ON(p == dep); /* no cycles! */ - if (i915_priotree_signaled(p->signaler)) + if (i915_sched_node_signaled(p->signaler)) continue; - GEM_BUG_ON(p->signaler->priority < pt->priority); - if (prio > READ_ONCE(p->signaler->priority)) + GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); + if (prio > READ_ONCE(p->signaler->attr.priority)) list_move_tail(&p->dfs_link, &dfs); } } @@ -1131,37 +1281,45 @@ static void execlists_schedule(struct i915_request *request, int prio) * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (request->priotree.priority == I915_PRIORITY_INVALID) { - GEM_BUG_ON(!list_empty(&request->priotree.link)); - request->priotree.priority = prio; + if (request->sched.attr.priority == I915_PRIORITY_INVALID) { + GEM_BUG_ON(!list_empty(&request->sched.link)); + request->sched.attr = *attr; if (stack.dfs_link.next == stack.dfs_link.prev) return; __list_del_entry(&stack.dfs_link); } + last = NULL; engine = request->engine; - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; INIT_LIST_HEAD(&dep->dfs_link); - engine = pt_lock_engine(pt, engine); + engine = sched_lock_engine(node, engine); - if (prio <= pt->priority) + if (prio <= node->attr.priority) continue; - pt->priority = prio; - if (!list_empty(&pt->link)) { - __list_del_entry(&pt->link); - queue_request(engine, pt, prio); + node->attr.priority = prio; + if (!list_empty(&node->link)) { + if (last != engine) { + pl = lookup_priolist(engine, prio); + last = engine; + } + GEM_BUG_ON(pl->priority != prio); + list_move_tail(&node->link, &pl->requests); } - submit_queue(engine, prio); + + if (prio > engine->execlists.queue_priority && + i915_sw_fence_done(&sched_to_request(node)->submit)) + __submit_queue(engine, prio); } - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) @@ -1191,7 +1349,7 @@ static struct intel_ring * execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); void *vaddr; int ret; @@ -1225,6 +1383,7 @@ execlists_context_pin(struct intel_engine_cs *engine, ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = i915_ggtt_offset(ce->ring->vma); + ce->lrc_reg_state[CTX_RING_HEAD+1] = ce->ring->head; ce->state->obj->pin_global++; i915_gem_context_get(ctx); @@ -1243,7 +1402,7 @@ err: static void execlists_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(ce->pin_count == 0); @@ -1262,8 +1421,8 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, static int execlists_request_alloc(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(request->ctx, request->engine); int ret; GEM_BUG_ON(!ce->pin_count); @@ -1523,6 +1682,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return -EINVAL; switch (INTEL_GEN(engine->i915)) { + case 11: + return 0; case 10: wa_bb_fn[0] = gen10_init_indirectctx_bb; wa_bb_fn[1] = NULL; @@ -1575,14 +1736,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } -static u8 gtiir[] = { - [RCS] = 0, - [BCS] = 0, - [VCS] = 1, - [VCS2] = 1, - [VECS] = 3, -}; - static void enable_execlists(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1642,6 +1795,8 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; + intel_whitelist_workarounds_apply(engine); + /* We need to disable the AsyncFlip performance optimisations in order * to use MI_WAIT_FOR_EVENT within the CS. It should already be * programmed to '1' on all products. @@ -1652,7 +1807,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - return init_workarounds_ring(engine); + return 0; } static int gen9_init_render_ring(struct intel_engine_cs *engine) @@ -1663,49 +1818,25 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - return init_workarounds_ring(engine); -} - -static void reset_irq(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int i; - - GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); - - /* - * Clear any pending interrupt state. - * - * We do it twice out of paranoia that some of the IIR are double - * buffered, and if we only reset it once there may still be - * an interrupt pending. - */ - for (i = 0; i < 2; i++) { - I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), - GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); - POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); - } - GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & - (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift)); + intel_whitelist_workarounds_apply(engine); - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + return 0; } static void reset_common_ring(struct intel_engine_cs *engine, struct i915_request *request) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct intel_context *ce; unsigned long flags; + u32 *regs; - GEM_TRACE("%s seqno=%x\n", - engine->name, request ? request->global_seqno : 0); + GEM_TRACE("%s request global=%x, current=%d\n", + engine->name, request ? request->global_seqno : 0, + intel_engine_get_seqno(engine)); /* See execlists_cancel_requests() for the irq/spinlock split. */ local_irq_save(flags); - reset_irq(engine); - /* * Catch up with any missed context-switch interrupts. * @@ -1716,14 +1847,12 @@ static void reset_common_ring(struct intel_engine_cs *engine, * requests were completed. */ execlists_cancel_port_requests(execlists); + reset_irq(engine); /* Push back any incomplete requests for replay after the reset. */ - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); __unwind_incomplete_requests(engine); - spin_unlock(&engine->timeline->lock); - - /* Mark all CS interrupts as complete */ - execlists->active = 0; + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); @@ -1749,14 +1878,24 @@ static void reset_common_ring(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - ce = &request->ctx->engine[engine->id]; - execlists_init_reg_state(ce->lrc_reg_state, - request->ctx, engine, ce->ring); + regs = to_intel_context(request->ctx, engine)->lrc_reg_state; + if (engine->default_state) { + void *defaults; + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR(defaults)) { + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + defaults + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + i915_gem_object_unpin_map(engine->default_state); + } + } + execlists_init_reg_state(regs, request->ctx, engine, request->ring); /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ - ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = - i915_ggtt_offset(ce->ring->vma); - ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; + regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); + regs[CTX_RING_HEAD + 1] = request->postfix; request->ring->head = request->postfix; intel_ring_update_space(request->ring); @@ -1817,7 +1956,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); } - cs = intel_ring_begin(rq, 4); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1846,6 +1985,9 @@ static int gen8_emit_bb_start(struct i915_request *rq, (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; intel_ring_advance(rq, cs); return 0; @@ -1988,7 +2130,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2004,7 +2146,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2016,7 +2158,7 @@ static int gen8_init_rcs_context(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(rq); + ret = intel_ctx_workarounds_emit(rq); if (ret) return ret; @@ -2076,11 +2218,13 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->unpark = NULL; engine->flags |= I915_ENGINE_SUPPORTS_STATS; + if (engine->i915->preempt_context) + engine->flags |= I915_ENGINE_HAS_PREEMPTION; engine->i915->caps.scheduler = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY; - if (engine->i915->preempt_context) + if (intel_engine_has_preemption(engine)) engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION; } @@ -2119,7 +2263,20 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) static inline void logical_ring_default_irqs(struct intel_engine_cs *engine) { - unsigned shift = engine->irq_shift; + unsigned int shift = 0; + + if (INTEL_GEN(engine->i915) < 11) { + const u8 irq_shifts[] = { + [RCS] = GEN8_RCS_IRQ_SHIFT, + [BCS] = GEN8_BCS_IRQ_SHIFT, + [VCS] = GEN8_VCS1_IRQ_SHIFT, + [VCS2] = GEN8_VCS2_IRQ_SHIFT, + [VECS] = GEN8_VECS_IRQ_SHIFT, + }; + + shift = irq_shifts[engine->id]; + } + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } @@ -2175,9 +2332,13 @@ static int logical_ring_init(struct intel_engine_cs *engine) } engine->execlists.preempt_complete_status = ~0u; - if (engine->i915->preempt_context) + if (engine->i915->preempt_context) { + struct intel_context *ce = + to_intel_context(engine->i915->preempt_context, engine); + engine->execlists.preempt_complete_status = - upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc); + upper_32_bits(ce->lrc_desc); + } return 0; @@ -2431,8 +2592,10 @@ populate_lr_context(struct i915_gem_context *ctx, defaults = i915_gem_object_pin_map(engine->default_state, I915_MAP_WB); - if (IS_ERR(defaults)) - return PTR_ERR(defaults); + if (IS_ERR(defaults)) { + ret = PTR_ERR(defaults); + goto err_unpin_ctx; + } memcpy(vaddr + start, defaults + start, engine->context_size); i915_gem_object_unpin_map(engine->default_state); @@ -2450,19 +2613,20 @@ populate_lr_context(struct i915_gem_context *ctx, _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); +err_unpin_ctx: i915_gem_object_unpin_map(ctx_obj); - - return 0; + return ret; } static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); struct i915_vma *vma; uint32_t context_size; struct intel_ring *ring; + struct i915_timeline *timeline; int ret; if (ce->state) @@ -2478,8 +2642,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ctx_obj = i915_gem_object_create(ctx->i915, context_size); if (IS_ERR(ctx_obj)) { - DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n"); - return PTR_ERR(ctx_obj); + ret = PTR_ERR(ctx_obj); + goto error_deref_obj; } vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL); @@ -2488,7 +2652,14 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, goto error_deref_obj; } - ring = intel_engine_create_ring(engine, ctx->ring_size); + timeline = i915_timeline_create(ctx->i915, ctx->name); + if (IS_ERR(timeline)) { + ret = PTR_ERR(timeline); + goto error_deref_obj; + } + + ring = intel_engine_create_ring(engine, timeline, ctx->ring_size); + i915_timeline_put(timeline); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; @@ -2530,7 +2701,8 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(ctx, engine); u32 *reg; if (!ce->state) @@ -2552,3 +2724,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) } } } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_lrc.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 59d7b86012e9..4ec7d8dd13c8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -108,7 +108,7 @@ static inline uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - return ctx->engine[engine->id].lrc_desc; + return to_intel_context(ctx, engine)->lrc_desc; } #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index c0b34b7943b9..9f0bd6a4cb79 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -178,7 +178,8 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || + IS_ICELAKE(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->table = skylake_mocs_table; result = true; @@ -217,6 +218,8 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) return GEN9_VEBOX_MOCS(index); case VCS2: return GEN9_MFX1_MOCS(index); + case VCS3: + return GEN11_MFX2_MOCS(index); default: MISSING_CASE(engine_id); return INVALID_MMIO_REG; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 36671a937fa4..c2f10d899329 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -807,6 +807,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, ret = PTR_ERR(vma); goto out_pin_section; } + intel_fb_obj_flush(new_bo, ORIGIN_DIRTYFB); ret = i915_vma_put_fence(vma); if (ret) diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 1f5cd572a7ff..39a4e4edda07 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -569,7 +569,8 @@ unlock: static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, - uint32_t *val) + uint32_t *val, + bool set_wa) { if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) *source = INTEL_PIPE_CRC_SOURCE_PF; @@ -582,7 +583,7 @@ static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_SPRITE_IVB; break; case INTEL_PIPE_CRC_SOURCE_PF: - if ((IS_HASWELL(dev_priv) || + if (set_wa && (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) && pipe == PIPE_A) hsw_pipe_A_crc_wa(dev_priv, true); @@ -600,7 +601,8 @@ static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, static int get_new_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, - enum intel_pipe_crc_source *source, u32 *val) + enum intel_pipe_crc_source *source, u32 *val, + bool set_wa) { if (IS_GEN2(dev_priv)) return i8xx_pipe_crc_ctl_reg(source, val); @@ -611,7 +613,7 @@ static int get_new_crc_ctl_reg(struct drm_i915_private *dev_priv, else if (IS_GEN5(dev_priv) || IS_GEN6(dev_priv)) return ilk_pipe_crc_ctl_reg(source, val); else - return ivb_pipe_crc_ctl_reg(dev_priv, pipe, source, val); + return ivb_pipe_crc_ctl_reg(dev_priv, pipe, source, val, set_wa); } static int pipe_crc_set_source(struct drm_i915_private *dev_priv, @@ -636,7 +638,7 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, return -EIO; } - ret = get_new_crc_ctl_reg(dev_priv, pipe, &source, &val); + ret = get_new_crc_ctl_reg(dev_priv, pipe, &source, &val, true); if (ret != 0) goto out; @@ -764,13 +766,12 @@ display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o) { int i; - for (i = 0; i < ARRAY_SIZE(pipe_crc_objects); i++) - if (!strcmp(buf, pipe_crc_objects[i])) { - *o = i; - return 0; - } + i = match_string(pipe_crc_objects, ARRAY_SIZE(pipe_crc_objects), buf); + if (i < 0) + return i; - return -EINVAL; + *o = i; + return 0; } static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv, @@ -796,13 +797,12 @@ display_crc_ctl_parse_source(const char *buf, enum intel_pipe_crc_source *s) return 0; } - for (i = 0; i < ARRAY_SIZE(pipe_crc_sources); i++) - if (!strcmp(buf, pipe_crc_sources[i])) { - *s = i; - return 0; - } + i = match_string(pipe_crc_sources, ARRAY_SIZE(pipe_crc_sources), buf); + if (i < 0) + return i; - return -EINVAL; + *s = i; + return 0; } static int display_crc_ctl_parse(struct drm_i915_private *dev_priv, @@ -916,7 +916,7 @@ int intel_pipe_crc_create(struct drm_minor *minor) int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, size_t *values_cnt) { - struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index]; enum intel_display_power_domain power_domain; enum intel_pipe_crc_source source; @@ -934,10 +934,11 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, return -EIO; } - ret = get_new_crc_ctl_reg(dev_priv, crtc->index, &source, &val); + ret = get_new_crc_ctl_reg(dev_priv, crtc->index, &source, &val, true); if (ret != 0) goto out; + pipe_crc->source = source; I915_WRITE(PIPE_CRC_CTL(crtc->index), val); POSTING_READ(PIPE_CRC_CTL(crtc->index)); @@ -959,3 +960,39 @@ out: return ret; } + +void intel_crtc_enable_pipe_crc(struct intel_crtc *intel_crtc) +{ + struct drm_crtc *crtc = &intel_crtc->base; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index]; + u32 val = 0; + + if (!crtc->crc.opened) + return; + + if (get_new_crc_ctl_reg(dev_priv, crtc->index, &pipe_crc->source, &val, false) < 0) + return; + + /* Don't need pipe_crc->lock here, IRQs are not generated. */ + pipe_crc->skipped = 0; + + I915_WRITE(PIPE_CRC_CTL(crtc->index), val); + POSTING_READ(PIPE_CRC_CTL(crtc->index)); +} + +void intel_crtc_disable_pipe_crc(struct intel_crtc *intel_crtc) +{ + struct drm_crtc *crtc = &intel_crtc->base; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index]; + + /* Swallow crc's until we stop generating them. */ + spin_lock_irq(&pipe_crc->lock); + pipe_crc->skipped = INT_MIN; + spin_unlock_irq(&pipe_crc->lock); + + I915_WRITE(PIPE_CRC_CTL(crtc->index), 0); + POSTING_READ(PIPE_CRC_CTL(crtc->index)); + synchronize_irq(dev_priv->drm.irq); +} diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b8da4dcdd584..b85229e153c4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3567,6 +3567,23 @@ bool ilk_disable_lp_wm(struct drm_device *dev) return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); } +static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv) +{ + u8 enabled_slices; + + /* Slice 1 will always be enabled */ + enabled_slices = 1; + + /* Gen prior to GEN11 have only one DBuf slice */ + if (INTEL_GEN(dev_priv) < 11) + return enabled_slices; + + if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE) + enabled_slices++; + + return enabled_slices; +} + /* * FIXME: We still don't have the proper code detect if we need to apply the WA, * so assume we'll always need it in order to avoid underruns. @@ -3754,9 +3771,42 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) return true; } +static unsigned int intel_get_ddb_size(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *cstate, + const unsigned int total_data_rate, + const int num_active, + struct skl_ddb_allocation *ddb) +{ + const struct drm_display_mode *adjusted_mode; + u64 total_data_bw; + u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size; + + WARN_ON(ddb_size == 0); + + if (INTEL_GEN(dev_priv) < 11) + return ddb_size - 4; /* 4 blocks for bypass path allocation */ + + adjusted_mode = &cstate->base.adjusted_mode; + total_data_bw = (u64)total_data_rate * drm_mode_vrefresh(adjusted_mode); + + /* + * 12GB/s is maximum BW supported by single DBuf slice. + */ + if (total_data_bw >= GBps(12) || num_active > 1) { + ddb->enabled_slices = 2; + } else { + ddb->enabled_slices = 1; + ddb_size /= 2; + } + + return ddb_size; +} + static void skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, const struct intel_crtc_state *cstate, + const unsigned int total_data_rate, + struct skl_ddb_allocation *ddb, struct skl_ddb_entry *alloc, /* out */ int *num_active /* out */) { @@ -3779,11 +3829,8 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, else *num_active = hweight32(dev_priv->active_crtcs); - ddb_size = INTEL_INFO(dev_priv)->ddb_size; - WARN_ON(ddb_size == 0); - - if (INTEL_GEN(dev_priv) < 11) - ddb_size -= 4; /* 4 blocks for bypass path allocation */ + ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate, + *num_active, ddb); /* * If the state doesn't change the active CRTC's, then there's @@ -3817,14 +3864,64 @@ static unsigned int skl_cursor_allocation(int num_active) return 8; } -static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) +static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv, + struct skl_ddb_entry *entry, u32 reg) { - entry->start = reg & 0x3ff; - entry->end = (reg >> 16) & 0x3ff; + u16 mask; + + if (INTEL_GEN(dev_priv) >= 11) + mask = ICL_DDB_ENTRY_MASK; + else + mask = SKL_DDB_ENTRY_MASK; + entry->start = reg & mask; + entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask; + if (entry->end) entry->end += 1; } +static void +skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv, + const enum pipe pipe, + const enum plane_id plane_id, + struct skl_ddb_allocation *ddb /* out */) +{ + u32 val, val2 = 0; + int fourcc, pixel_format; + + /* Cursor doesn't support NV12/planar, so no extra calculation needed */ + if (plane_id == PLANE_CURSOR) { + val = I915_READ(CUR_BUF_CFG(pipe)); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val); + return; + } + + val = I915_READ(PLANE_CTL(pipe, plane_id)); + + /* No DDB allocated for disabled planes */ + if (!(val & PLANE_CTL_ENABLE)) + return; + + pixel_format = val & PLANE_CTL_FORMAT_MASK; + fourcc = skl_format_to_fourcc(pixel_format, + val & PLANE_CTL_ORDER_RGBX, + val & PLANE_CTL_ALPHA_MASK); + + val = I915_READ(PLANE_BUF_CFG(pipe, plane_id)); + val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id)); + + if (fourcc == DRM_FORMAT_NV12) { + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val2); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->uv_plane[pipe][plane_id], val); + } else { + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val); + } +} + void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb /* out */) { @@ -3832,6 +3929,8 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, memset(ddb, 0, sizeof(*ddb)); + ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv); + for_each_intel_crtc(&dev_priv->drm, crtc) { enum intel_display_power_domain power_domain; enum plane_id plane_id; @@ -3841,16 +3940,9 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) continue; - for_each_plane_id_on_crtc(crtc, plane_id) { - u32 val; - - if (plane_id != PLANE_CURSOR) - val = I915_READ(PLANE_BUF_CFG(pipe, plane_id)); - else - val = I915_READ(CUR_BUF_CFG(pipe)); - - skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val); - } + for_each_plane_id_on_crtc(crtc, plane_id) + skl_ddb_get_hw_plane_state(dev_priv, pipe, + plane_id, ddb); intel_display_power_put(dev_priv, power_domain); } @@ -4009,9 +4101,9 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, static unsigned int skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, const struct drm_plane_state *pstate, - int y) + const int plane) { - struct intel_plane *plane = to_intel_plane(pstate->plane); + struct intel_plane *intel_plane = to_intel_plane(pstate->plane); struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); uint32_t data_rate; uint32_t width = 0, height = 0; @@ -4025,9 +4117,9 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, fb = pstate->fb; format = fb->format->format; - if (plane->id == PLANE_CURSOR) + if (intel_plane->id == PLANE_CURSOR) return 0; - if (y && format != DRM_FORMAT_NV12) + if (plane == 1 && format != DRM_FORMAT_NV12) return 0; /* @@ -4038,19 +4130,14 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, width = drm_rect_width(&intel_pstate->base.src) >> 16; height = drm_rect_height(&intel_pstate->base.src) >> 16; - /* for planar format */ - if (format == DRM_FORMAT_NV12) { - if (y) /* y-plane data rate */ - data_rate = width * height * - fb->format->cpp[0]; - else /* uv-plane data rate */ - data_rate = (width / 2) * (height / 2) * - fb->format->cpp[1]; - } else { - /* for packed formats */ - data_rate = width * height * fb->format->cpp[0]; + /* UV plane does 1/2 pixel sub-sampling */ + if (plane == 1 && format == DRM_FORMAT_NV12) { + width /= 2; + height /= 2; } + data_rate = width * height * fb->format->cpp[plane]; + down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate); return mul_round_up_u32_fixed16(data_rate, down_scale_amount); @@ -4063,8 +4150,8 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, */ static unsigned int skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, - unsigned *plane_data_rate, - unsigned *plane_y_data_rate) + unsigned int *plane_data_rate, + unsigned int *uv_plane_data_rate) { struct drm_crtc_state *cstate = &intel_cstate->base; struct drm_atomic_state *state = cstate->state; @@ -4080,17 +4167,17 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, enum plane_id plane_id = to_intel_plane(plane)->id; unsigned int rate; - /* packed/uv */ + /* packed/y */ rate = skl_plane_relative_data_rate(intel_cstate, pstate, 0); plane_data_rate[plane_id] = rate; total_data_rate += rate; - /* y-plane */ + /* uv-plane */ rate = skl_plane_relative_data_rate(intel_cstate, pstate, 1); - plane_y_data_rate[plane_id] = rate; + uv_plane_data_rate[plane_id] = rate; total_data_rate += rate; } @@ -4099,8 +4186,7 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, } static uint16_t -skl_ddb_min_alloc(const struct drm_plane_state *pstate, - const int y) +skl_ddb_min_alloc(const struct drm_plane_state *pstate, const int plane) { struct drm_framebuffer *fb = pstate->fb; struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); @@ -4111,8 +4197,8 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, if (WARN_ON(!fb)) return 0; - /* For packed formats, no y-plane, return 0 */ - if (y && fb->format->format != DRM_FORMAT_NV12) + /* For packed formats, and uv-plane, return 0 */ + if (plane == 1 && fb->format->format != DRM_FORMAT_NV12) return 0; /* For Non Y-tile return 8-blocks */ @@ -4131,15 +4217,12 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, src_h = drm_rect_height(&intel_pstate->base.src) >> 16; /* Halve UV plane width and height for NV12 */ - if (fb->format->format == DRM_FORMAT_NV12 && !y) { + if (plane == 1) { src_w /= 2; src_h /= 2; } - if (fb->format->format == DRM_FORMAT_NV12 && !y) - plane_bpp = fb->format->cpp[1]; - else - plane_bpp = fb->format->cpp[0]; + plane_bpp = fb->format->cpp[plane]; if (drm_rotation_90_or_270(pstate->rotation)) { switch (plane_bpp) { @@ -4167,7 +4250,7 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, static void skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active, - uint16_t *minimum, uint16_t *y_minimum) + uint16_t *minimum, uint16_t *uv_minimum) { const struct drm_plane_state *pstate; struct drm_plane *plane; @@ -4182,7 +4265,7 @@ skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active, continue; minimum[plane_id] = skl_ddb_min_alloc(pstate, 0); - y_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1); + uv_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1); } minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active); @@ -4200,17 +4283,17 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; uint16_t alloc_size, start; uint16_t minimum[I915_MAX_PLANES] = {}; - uint16_t y_minimum[I915_MAX_PLANES] = {}; + uint16_t uv_minimum[I915_MAX_PLANES] = {}; unsigned int total_data_rate; enum plane_id plane_id; int num_active; - unsigned plane_data_rate[I915_MAX_PLANES] = {}; - unsigned plane_y_data_rate[I915_MAX_PLANES] = {}; + unsigned int plane_data_rate[I915_MAX_PLANES] = {}; + unsigned int uv_plane_data_rate[I915_MAX_PLANES] = {}; uint16_t total_min_blocks = 0; /* Clear the partitioning for disabled planes. */ memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); - memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe])); + memset(ddb->uv_plane[pipe], 0, sizeof(ddb->uv_plane[pipe])); if (WARN_ON(!state)) return 0; @@ -4220,12 +4303,16 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, return 0; } - skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active); + total_data_rate = skl_get_total_relative_data_rate(cstate, + plane_data_rate, + uv_plane_data_rate); + skl_ddb_get_pipe_allocation_limits(dev, cstate, total_data_rate, ddb, + alloc, &num_active); alloc_size = skl_ddb_entry_size(alloc); if (alloc_size == 0) return 0; - skl_ddb_calc_min(cstate, num_active, minimum, y_minimum); + skl_ddb_calc_min(cstate, num_active, minimum, uv_minimum); /* * 1. Allocate the mininum required blocks for each active plane @@ -4235,7 +4322,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, for_each_plane_id_on_crtc(intel_crtc, plane_id) { total_min_blocks += minimum[plane_id]; - total_min_blocks += y_minimum[plane_id]; + total_min_blocks += uv_minimum[plane_id]; } if (total_min_blocks > alloc_size) { @@ -4255,16 +4342,13 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * * FIXME: we may not allocate every single block here. */ - total_data_rate = skl_get_total_relative_data_rate(cstate, - plane_data_rate, - plane_y_data_rate); if (total_data_rate == 0) return 0; start = alloc->start; for_each_plane_id_on_crtc(intel_crtc, plane_id) { - unsigned int data_rate, y_data_rate; - uint16_t plane_blocks, y_plane_blocks = 0; + unsigned int data_rate, uv_data_rate; + uint16_t plane_blocks, uv_plane_blocks; if (plane_id == PLANE_CURSOR) continue; @@ -4288,21 +4372,20 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, start += plane_blocks; - /* - * allocation for y_plane part of planar format: - */ - y_data_rate = plane_y_data_rate[plane_id]; + /* Allocate DDB for UV plane for planar format/NV12 */ + uv_data_rate = uv_plane_data_rate[plane_id]; - y_plane_blocks = y_minimum[plane_id]; - y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, - total_data_rate); + uv_plane_blocks = uv_minimum[plane_id]; + uv_plane_blocks += div_u64((uint64_t)alloc_size * uv_data_rate, + total_data_rate); - if (y_data_rate) { - ddb->y_plane[pipe][plane_id].start = start; - ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks; + if (uv_data_rate) { + ddb->uv_plane[pipe][plane_id].start = start; + ddb->uv_plane[pipe][plane_id].end = + start + uv_plane_blocks; } - start += y_plane_blocks; + start += uv_plane_blocks; } return 0; @@ -4398,7 +4481,7 @@ static int skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, - struct skl_wm_params *wp) + struct skl_wm_params *wp, int plane_id) { struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); const struct drm_plane_state *pstate = &intel_pstate->base; @@ -4411,6 +4494,12 @@ skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, if (!intel_wm_plane_visible(cstate, intel_pstate)) return 0; + /* only NV12 format has two planes */ + if (plane_id == 1 && fb->format->format != DRM_FORMAT_NV12) { + DRM_DEBUG_KMS("Non NV12 format have single plane\n"); + return -EINVAL; + } + wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || fb->modifier == I915_FORMAT_MOD_Yf_TILED || fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || @@ -4418,6 +4507,7 @@ skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; + wp->is_planar = fb->format->format == DRM_FORMAT_NV12; if (plane->id == PLANE_CURSOR) { wp->width = intel_pstate->base.crtc_w; @@ -4430,8 +4520,10 @@ skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, wp->width = drm_rect_width(&intel_pstate->base.src) >> 16; } - wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : - fb->format->cpp[0]; + if (plane_id == 1 && wp->is_planar) + wp->width /= 2; + + wp->cpp = fb->format->cpp[plane_id]; wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); @@ -4499,9 +4591,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint16_t ddb_allocation, int level, const struct skl_wm_params *wp, - uint16_t *out_blocks, /* out */ - uint8_t *out_lines, /* out */ - bool *enabled /* out */) + const struct skl_wm_level *result_prev, + struct skl_wm_level *result /* out */) { const struct drm_plane_state *pstate = &intel_pstate->base; uint32_t latency = dev_priv->wm.skl_latency[level]; @@ -4515,7 +4606,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (latency == 0 || !intel_wm_plane_visible(cstate, intel_pstate)) { - *enabled = false; + result->plane_en = false; return 0; } @@ -4568,6 +4659,15 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } else { res_blocks++; } + + /* + * Make sure result blocks for higher latency levels are atleast + * as high as level below the current level. + * Assumption in DDB algorithm optimization for special cases. + * Also covers Display WA #1125 for RC. + */ + if (result_prev->plane_res_b > res_blocks) + res_blocks = result_prev->plane_res_b; } if (INTEL_GEN(dev_priv) >= 11) { @@ -4596,7 +4696,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if ((level > 0 && res_lines > 31) || res_blocks >= ddb_allocation || min_disp_buf_needed >= ddb_allocation) { - *enabled = false; + result->plane_en = false; /* * If there are no valid level 0 watermarks, then we can't @@ -4615,10 +4715,21 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } } + /* + * Display WA #826 (SKL:ALL, BXT:ALL) & #1059 (CNL:A) + * disable wm level 1-7 on NV12 planes + */ + if (wp->is_planar && level >= 1 && + (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || + IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))) { + result->plane_en = false; + return 0; + } + /* The number of lines are ignored for the level 0 watermark. */ - *out_lines = level ? res_lines : 0; - *out_blocks = res_blocks; - *enabled = true; + result->plane_res_b = res_blocks; + result->plane_res_l = res_lines; + result->plane_en = true; return 0; } @@ -4629,7 +4740,8 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv, struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, const struct skl_wm_params *wm_params, - struct skl_plane_wm *wm) + struct skl_plane_wm *wm, + int plane_id) { struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); struct drm_plane *plane = intel_pstate->base.plane; @@ -4637,15 +4749,26 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv, uint16_t ddb_blocks; enum pipe pipe = intel_crtc->pipe; int level, max_level = ilk_wm_max_level(dev_priv); + enum plane_id intel_plane_id = intel_plane->id; int ret; if (WARN_ON(!intel_pstate->base.fb)) return -EINVAL; - ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]); + ddb_blocks = plane_id ? + skl_ddb_entry_size(&ddb->uv_plane[pipe][intel_plane_id]) : + skl_ddb_entry_size(&ddb->plane[pipe][intel_plane_id]); for (level = 0; level <= max_level; level++) { - struct skl_wm_level *result = &wm->wm[level]; + struct skl_wm_level *result = plane_id ? &wm->uv_wm[level] : + &wm->wm[level]; + struct skl_wm_level *result_prev; + + if (level) + result_prev = plane_id ? &wm->uv_wm[level - 1] : + &wm->wm[level - 1]; + else + result_prev = plane_id ? &wm->uv_wm[0] : &wm->wm[0]; ret = skl_compute_plane_wm(dev_priv, cstate, @@ -4653,13 +4776,15 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv, ddb_blocks, level, wm_params, - &result->plane_res_b, - &result->plane_res_l, - &result->plane_en); + result_prev, + result); if (ret) return ret; } + if (intel_pstate->base.fb->format->format == DRM_FORMAT_NV12) + wm->is_planar = true; + return 0; } @@ -4769,20 +4894,39 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate, wm = &pipe_wm->planes[plane_id]; ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][plane_id]); - memset(&wm_params, 0, sizeof(struct skl_wm_params)); ret = skl_compute_plane_wm_params(dev_priv, cstate, - intel_pstate, &wm_params); + intel_pstate, &wm_params, 0); if (ret) return ret; ret = skl_compute_wm_levels(dev_priv, ddb, cstate, - intel_pstate, &wm_params, wm); + intel_pstate, &wm_params, wm, 0); if (ret) return ret; + skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0], ddb_blocks, &wm->trans_wm); + + /* uv plane watermarks must also be validated for NV12/Planar */ + if (wm_params.is_planar) { + memset(&wm_params, 0, sizeof(struct skl_wm_params)); + wm->is_planar = true; + + ret = skl_compute_plane_wm_params(dev_priv, cstate, + intel_pstate, + &wm_params, 1); + if (ret) + return ret; + + ret = skl_compute_wm_levels(dev_priv, ddb, cstate, + intel_pstate, &wm_params, + wm, 1); + if (ret) + return ret; + } } + pipe_wm->linetime = skl_compute_linetime_wm(cstate); return 0; @@ -4833,10 +4977,21 @@ static void skl_write_plane_wm(struct intel_crtc *intel_crtc, skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id), &ddb->plane[pipe][plane_id]); - if (INTEL_GEN(dev_priv) < 11) + if (INTEL_GEN(dev_priv) >= 11) + return skl_ddb_entry_write(dev_priv, + PLANE_BUF_CFG(pipe, plane_id), + &ddb->plane[pipe][plane_id]); + if (wm->is_planar) { + skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id), + &ddb->uv_plane[pipe][plane_id]); skl_ddb_entry_write(dev_priv, PLANE_NV12_BUF_CFG(pipe, plane_id), - &ddb->y_plane[pipe][plane_id]); + &ddb->plane[pipe][plane_id]); + } else { + skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id), + &ddb->plane[pipe][plane_id]); + I915_WRITE(PLANE_NV12_BUF_CFG(pipe, plane_id), 0x0); + } } static void skl_write_cursor_wm(struct intel_crtc *intel_crtc, @@ -4944,15 +5099,13 @@ skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) struct drm_plane *plane; enum pipe pipe = intel_crtc->pipe; - WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); - drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) { enum plane_id plane_id = to_intel_plane(plane)->id; if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id], &new_ddb->plane[pipe][plane_id]) && - skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id], - &new_ddb->y_plane[pipe][plane_id])) + skl_ddb_entry_equal(&cur_ddb->uv_plane[pipe][plane_id], + &new_ddb->uv_plane[pipe][plane_id])) continue; plane_state = drm_atomic_get_plane_state(state, plane); @@ -4966,69 +5119,16 @@ skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) static int skl_compute_ddb(struct drm_atomic_state *state) { - struct drm_device *dev = state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + const struct drm_i915_private *dev_priv = to_i915(state->dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct intel_crtc *intel_crtc; struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb; - uint32_t realloc_pipes = pipes_modified(state); - int ret; - - /* - * If this is our first atomic update following hardware readout, - * we can't trust the DDB that the BIOS programmed for us. Let's - * pretend that all pipes switched active status so that we'll - * ensure a full DDB recompute. - */ - if (dev_priv->wm.distrust_bios_wm) { - ret = drm_modeset_lock(&dev->mode_config.connection_mutex, - state->acquire_ctx); - if (ret) - return ret; - - intel_state->active_pipe_changes = ~0; - - /* - * We usually only initialize intel_state->active_crtcs if we - * we're doing a modeset; make sure this field is always - * initialized during the sanitization process that happens - * on the first commit too. - */ - if (!intel_state->modeset) - intel_state->active_crtcs = dev_priv->active_crtcs; - } - - /* - * If the modeset changes which CRTC's are active, we need to - * recompute the DDB allocation for *all* active pipes, even - * those that weren't otherwise being modified in any way by this - * atomic commit. Due to the shrinking of the per-pipe allocations - * when new active CRTC's are added, it's possible for a pipe that - * we were already using and aren't changing at all here to suddenly - * become invalid if its DDB needs exceeds its new allocation. - * - * Note that if we wind up doing a full DDB recompute, we can't let - * any other display updates race with this transaction, so we need - * to grab the lock on *all* CRTC's. - */ - if (intel_state->active_pipe_changes) { - realloc_pipes = ~0; - intel_state->wm_results.dirty_pipes = ~0; - } + struct intel_crtc *crtc; + struct intel_crtc_state *cstate; + int ret, i; - /* - * We're not recomputing for the pipes not included in the commit, so - * make sure we start with the current state. - */ memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb)); - for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) { - struct intel_crtc_state *cstate; - - cstate = intel_atomic_get_crtc_state(state, intel_crtc); - if (IS_ERR(cstate)) - return PTR_ERR(cstate); - + for_each_new_intel_crtc_in_state(intel_state, crtc, cstate, i) { ret = skl_allocate_pipe_ddb(cstate, ddb); if (ret) return ret; @@ -5042,14 +5142,15 @@ skl_compute_ddb(struct drm_atomic_state *state) } static void -skl_copy_wm_for_pipe(struct skl_wm_values *dst, - struct skl_wm_values *src, - enum pipe pipe) +skl_copy_ddb_for_pipe(struct skl_ddb_values *dst, + struct skl_ddb_values *src, + enum pipe pipe) { - memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe], - sizeof(dst->ddb.y_plane[pipe])); + memcpy(dst->ddb.uv_plane[pipe], src->ddb.uv_plane[pipe], + sizeof(dst->ddb.uv_plane[pipe])); memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe], sizeof(dst->ddb.plane[pipe])); + dst->ddb.enabled_slices = src->ddb.enabled_slices; } static void @@ -5090,23 +5191,23 @@ skl_print_wm_changes(const struct drm_atomic_state *state) } static int -skl_compute_wm(struct drm_atomic_state *state) +skl_ddb_add_affected_pipes(struct drm_atomic_state *state, bool *changed) { - struct drm_crtc *crtc; - struct drm_crtc_state *cstate; - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct skl_wm_values *results = &intel_state->wm_results; struct drm_device *dev = state->dev; - struct skl_pipe_wm *pipe_wm; - bool changed = false; + const struct drm_i915_private *dev_priv = to_i915(dev); + const struct drm_crtc *crtc; + const struct drm_crtc_state *cstate; + struct intel_crtc *intel_crtc; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + uint32_t realloc_pipes = pipes_modified(state); int ret, i; /* * When we distrust bios wm we always need to recompute to set the * expected DDB allocations for each CRTC. */ - if (to_i915(dev)->wm.distrust_bios_wm) - changed = true; + if (dev_priv->wm.distrust_bios_wm) + (*changed) = true; /* * If this transaction isn't actually touching any CRTC's, don't @@ -5117,14 +5218,86 @@ skl_compute_wm(struct drm_atomic_state *state) * hold _all_ CRTC state mutexes. */ for_each_new_crtc_in_state(state, crtc, cstate, i) - changed = true; + (*changed) = true; - if (!changed) + if (!*changed) return 0; + /* + * If this is our first atomic update following hardware readout, + * we can't trust the DDB that the BIOS programmed for us. Let's + * pretend that all pipes switched active status so that we'll + * ensure a full DDB recompute. + */ + if (dev_priv->wm.distrust_bios_wm) { + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, + state->acquire_ctx); + if (ret) + return ret; + + intel_state->active_pipe_changes = ~0; + + /* + * We usually only initialize intel_state->active_crtcs if we + * we're doing a modeset; make sure this field is always + * initialized during the sanitization process that happens + * on the first commit too. + */ + if (!intel_state->modeset) + intel_state->active_crtcs = dev_priv->active_crtcs; + } + + /* + * If the modeset changes which CRTC's are active, we need to + * recompute the DDB allocation for *all* active pipes, even + * those that weren't otherwise being modified in any way by this + * atomic commit. Due to the shrinking of the per-pipe allocations + * when new active CRTC's are added, it's possible for a pipe that + * we were already using and aren't changing at all here to suddenly + * become invalid if its DDB needs exceeds its new allocation. + * + * Note that if we wind up doing a full DDB recompute, we can't let + * any other display updates race with this transaction, so we need + * to grab the lock on *all* CRTC's. + */ + if (intel_state->active_pipe_changes) { + realloc_pipes = ~0; + intel_state->wm_results.dirty_pipes = ~0; + } + + /* + * We're not recomputing for the pipes not included in the commit, so + * make sure we start with the current state. + */ + for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) { + struct intel_crtc_state *cstate; + + cstate = intel_atomic_get_crtc_state(state, intel_crtc); + if (IS_ERR(cstate)) + return PTR_ERR(cstate); + } + + return 0; +} + +static int +skl_compute_wm(struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *cstate; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct skl_ddb_values *results = &intel_state->wm_results; + struct skl_pipe_wm *pipe_wm; + bool changed = false; + int ret, i; + /* Clear all dirty flags */ results->dirty_pipes = 0; + ret = skl_ddb_add_affected_pipes(state, &changed); + if (ret || !changed) + return ret; + ret = skl_compute_ddb(state); if (ret) return ret; @@ -5197,8 +5370,8 @@ static void skl_initial_wm(struct intel_atomic_state *state, struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct skl_wm_values *results = &state->wm_results; - struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; + struct skl_ddb_values *results = &state->wm_results; + struct skl_ddb_values *hw_vals = &dev_priv->wm.skl_hw; enum pipe pipe = intel_crtc->pipe; if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0) @@ -5209,7 +5382,7 @@ static void skl_initial_wm(struct intel_atomic_state *state, if (cstate->base.active_changed) skl_atomic_update_crtc_wm(state, cstate); - skl_copy_wm_for_pipe(hw_vals, results, pipe); + skl_copy_ddb_for_pipe(hw_vals, results, pipe); mutex_unlock(&dev_priv->wm.wm_mutex); } @@ -5341,7 +5514,7 @@ void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc, void skl_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct skl_wm_values *hw = &dev_priv->wm.skl_hw; + struct skl_ddb_values *hw = &dev_priv->wm.skl_hw; struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; struct drm_crtc *crtc; struct intel_crtc *intel_crtc; @@ -5362,8 +5535,12 @@ void skl_wm_get_hw_state(struct drm_device *dev) /* Fully recompute DDB on first atomic commit */ dev_priv->wm.distrust_bios_wm = true; } else { - /* Easy/common case; just sanitize DDB now if everything off */ - memset(ddb, 0, sizeof(*ddb)); + /* + * Easy/common case; just sanitize DDB now if everything off + * Keep dbuf slice info intact + */ + memset(ddb->plane, 0, sizeof(ddb->plane)); + memset(ddb->uv_plane, 0, sizeof(ddb->uv_plane)); } } @@ -6572,7 +6749,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) rps->efficient_freq = rps->rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { u32 ddcc_status = 0; if (sandybridge_pcode_read(dev_priv, @@ -6585,7 +6762,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) rps->max_freq); } - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -6890,15 +7067,18 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; - int min_freq = 15; + const int min_freq = 15; + const int scaling_factor = 180; unsigned int gpu_freq; unsigned int max_ia_freq, min_ring_freq; unsigned int max_gpu_freq, min_gpu_freq; - int scaling_factor = 180; struct cpufreq_policy *policy; WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + if (rps->max_freq <= rps->min_freq) + return; + policy = cpufreq_cpu_get(0); if (policy) { max_ia_freq = policy->cpuinfo.max_freq; @@ -6918,13 +7098,12 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) /* convert DDR frequency from units of 266.6MHz to bandwidth */ min_ring_freq = mult_frac(min_ring_freq, 8, 3); - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; + if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER; - max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER; - } else { - min_gpu_freq = rps->min_freq; - max_gpu_freq = rps->max_freq; + min_gpu_freq /= GEN9_FREQ_SCALER; + max_gpu_freq /= GEN9_FREQ_SCALER; } /* @@ -6933,10 +7112,10 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) * the PCU should use as a reference to determine the ring frequency. */ for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) { - int diff = max_gpu_freq - gpu_freq; + const int diff = max_gpu_freq - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { /* * ring_freq = 2 * GT. ring_freq is in 100MHz units * No floor required for ring frequency on SKL. @@ -8026,10 +8205,10 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ intel_disable_gt_powersave(dev_priv); - if (INTEL_GEN(dev_priv) < 11) - gen6_reset_rps_interrupts(dev_priv); + if (INTEL_GEN(dev_priv) >= 11) + gen11_reset_rps_interrupts(dev_priv); else - WARN_ON_ONCE(1); + gen6_reset_rps_interrupts(dev_priv); } static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) @@ -8142,8 +8321,6 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv) cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rps(dev_priv); - } else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) { - /* TODO */ } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rps(dev_priv); } else if (IS_BROADWELL(dev_priv)) { @@ -8487,6 +8664,13 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, I915_WRITE(GEN7_MISCCPCTL, misccpctl); } +static void icl_init_clock_gating(struct drm_i915_private *dev_priv) +{ + /* This is not an Wa. Enable to reduce Sampler power */ + I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN, + I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE); +} + static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) { if (!HAS_PCH_CNP(dev_priv)) @@ -9013,7 +9197,9 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv) */ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + dev_priv->display.init_clock_gating = icl_init_clock_gating; + else if (IS_CANNONLAKE(dev_priv)) dev_priv->display.init_clock_gating = cnl_init_clock_gating; else if (IS_COFFEELAKE(dev_priv)) dev_priv->display.init_clock_gating = cfl_init_clock_gating; diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 23175c5c4a50..db27f2faa1de 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -93,7 +93,115 @@ static void psr_aux_io_power_put(struct intel_dp *intel_dp) intel_display_power_put(dev_priv, psr_aux_domain(intel_dp)); } -static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp) +void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug) +{ + u32 debug_mask, mask; + + /* No PSR interrupts on VLV/CHV */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + return; + + mask = EDP_PSR_ERROR(TRANSCODER_EDP); + debug_mask = EDP_PSR_POST_EXIT(TRANSCODER_EDP) | + EDP_PSR_PRE_ENTRY(TRANSCODER_EDP); + + if (INTEL_GEN(dev_priv) >= 8) { + mask |= EDP_PSR_ERROR(TRANSCODER_A) | + EDP_PSR_ERROR(TRANSCODER_B) | + EDP_PSR_ERROR(TRANSCODER_C); + + debug_mask |= EDP_PSR_POST_EXIT(TRANSCODER_A) | + EDP_PSR_PRE_ENTRY(TRANSCODER_A) | + EDP_PSR_POST_EXIT(TRANSCODER_B) | + EDP_PSR_PRE_ENTRY(TRANSCODER_B) | + EDP_PSR_POST_EXIT(TRANSCODER_C) | + EDP_PSR_PRE_ENTRY(TRANSCODER_C); + } + + if (debug) + mask |= debug_mask; + + WRITE_ONCE(dev_priv->psr.debug, debug); + I915_WRITE(EDP_PSR_IMR, ~mask); +} + +static void psr_event_print(u32 val, bool psr2_enabled) +{ + DRM_DEBUG_KMS("PSR exit events: 0x%x\n", val); + if (val & PSR_EVENT_PSR2_WD_TIMER_EXPIRE) + DRM_DEBUG_KMS("\tPSR2 watchdog timer expired\n"); + if ((val & PSR_EVENT_PSR2_DISABLED) && psr2_enabled) + DRM_DEBUG_KMS("\tPSR2 disabled\n"); + if (val & PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN) + DRM_DEBUG_KMS("\tSU dirty FIFO underrun\n"); + if (val & PSR_EVENT_SU_CRC_FIFO_UNDERRUN) + DRM_DEBUG_KMS("\tSU CRC FIFO underrun\n"); + if (val & PSR_EVENT_GRAPHICS_RESET) + DRM_DEBUG_KMS("\tGraphics reset\n"); + if (val & PSR_EVENT_PCH_INTERRUPT) + DRM_DEBUG_KMS("\tPCH interrupt\n"); + if (val & PSR_EVENT_MEMORY_UP) + DRM_DEBUG_KMS("\tMemory up\n"); + if (val & PSR_EVENT_FRONT_BUFFER_MODIFY) + DRM_DEBUG_KMS("\tFront buffer modification\n"); + if (val & PSR_EVENT_WD_TIMER_EXPIRE) + DRM_DEBUG_KMS("\tPSR watchdog timer expired\n"); + if (val & PSR_EVENT_PIPE_REGISTERS_UPDATE) + DRM_DEBUG_KMS("\tPIPE registers updated\n"); + if (val & PSR_EVENT_REGISTER_UPDATE) + DRM_DEBUG_KMS("\tRegister updated\n"); + if (val & PSR_EVENT_HDCP_ENABLE) + DRM_DEBUG_KMS("\tHDCP enabled\n"); + if (val & PSR_EVENT_KVMR_SESSION_ENABLE) + DRM_DEBUG_KMS("\tKVMR session enabled\n"); + if (val & PSR_EVENT_VBI_ENABLE) + DRM_DEBUG_KMS("\tVBI enabled\n"); + if (val & PSR_EVENT_LPSP_MODE_EXIT) + DRM_DEBUG_KMS("\tLPSP mode exited\n"); + if ((val & PSR_EVENT_PSR_DISABLE) && !psr2_enabled) + DRM_DEBUG_KMS("\tPSR disabled\n"); +} + +void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) +{ + u32 transcoders = BIT(TRANSCODER_EDP); + enum transcoder cpu_transcoder; + ktime_t time_ns = ktime_get(); + + if (INTEL_GEN(dev_priv) >= 8) + transcoders |= BIT(TRANSCODER_A) | + BIT(TRANSCODER_B) | + BIT(TRANSCODER_C); + + for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { + /* FIXME: Exit PSR and link train manually when this happens. */ + if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) + DRM_DEBUG_KMS("[transcoder %s] PSR aux error\n", + transcoder_name(cpu_transcoder)); + + if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) { + dev_priv->psr.last_entry_attempt = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", + transcoder_name(cpu_transcoder)); + } + + if (psr_iir & EDP_PSR_POST_EXIT(cpu_transcoder)) { + dev_priv->psr.last_exit = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", + transcoder_name(cpu_transcoder)); + + if (INTEL_GEN(dev_priv) >= 9) { + u32 val = I915_READ(PSR_EVENT(cpu_transcoder)); + bool psr2_enabled = dev_priv->psr.psr2_enabled; + + I915_WRITE(PSR_EVENT(cpu_transcoder), val); + psr_event_print(val, psr2_enabled); + } + } + } +} + +static bool intel_dp_get_y_coord_required(struct intel_dp *intel_dp) { uint8_t psr_caps = 0; @@ -122,6 +230,18 @@ static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) return alpm_caps & DP_ALPM_CAP; } +static u8 intel_dp_get_sink_sync_latency(struct intel_dp *intel_dp) +{ + u8 val = 0; + + if (drm_dp_dpcd_readb(&intel_dp->aux, + DP_SYNCHRONIZATION_LATENCY_IN_SINK, &val) == 1) + val &= DP_MAX_RESYNC_FRAME_COUNT_MASK; + else + DRM_ERROR("Unable to get sink synchronization latency\n"); + return val; +} + void intel_psr_init_dpcd(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = @@ -130,33 +250,36 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, sizeof(intel_dp->psr_dpcd)); - if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { + if (intel_dp->psr_dpcd[0]) { dev_priv->psr.sink_support = true; DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); } if (INTEL_GEN(dev_priv) >= 9 && - (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { - uint8_t frame_sync_cap; + (intel_dp->psr_dpcd[0] == DP_PSR2_WITH_Y_COORD_IS_SUPPORTED)) { + /* + * All panels that supports PSR version 03h (PSR2 + + * Y-coordinate) can handle Y-coordinates in VSC but we are + * only sure that it is going to be used when required by the + * panel. This way panel is capable to do selective update + * without a aux frame sync. + * + * To support PSR version 02h and PSR version 03h without + * Y-coordinate requirement panels we would need to enable + * GTC first. + */ + dev_priv->psr.sink_psr2_support = + intel_dp_get_y_coord_required(intel_dp); + DRM_DEBUG_KMS("PSR2 %s on sink", dev_priv->psr.sink_psr2_support + ? "supported" : "not supported"); - dev_priv->psr.sink_support = true; - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, - &frame_sync_cap) != 1) - frame_sync_cap = 0; - dev_priv->psr.aux_frame_sync = frame_sync_cap & DP_AUX_FRAME_SYNC_CAP; - /* PSR2 needs frame sync as well */ - dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; - DRM_DEBUG_KMS("PSR2 %s on sink", - dev_priv->psr.psr2_support ? "supported" : "not supported"); - - if (dev_priv->psr.psr2_support) { - dev_priv->psr.y_cord_support = - intel_dp_get_y_cord_status(intel_dp); + if (dev_priv->psr.sink_psr2_support) { dev_priv->psr.colorimetry_support = intel_dp_get_colorimetry_status(intel_dp); dev_priv->psr.alpm = intel_dp_get_alpm_status(intel_dp); + dev_priv->psr.sink_sync_latency = + intel_dp_get_sink_sync_latency(intel_dp); } } } @@ -193,21 +316,17 @@ static void hsw_psr_setup_vsc(struct intel_dp *intel_dp, struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); struct edp_vsc_psr psr_vsc; - if (dev_priv->psr.psr2_support) { + if (dev_priv->psr.psr2_enabled) { /* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */ memset(&psr_vsc, 0, sizeof(psr_vsc)); psr_vsc.sdp_header.HB0 = 0; psr_vsc.sdp_header.HB1 = 0x7; - if (dev_priv->psr.colorimetry_support && - dev_priv->psr.y_cord_support) { + if (dev_priv->psr.colorimetry_support) { psr_vsc.sdp_header.HB2 = 0x5; psr_vsc.sdp_header.HB3 = 0x13; - } else if (dev_priv->psr.y_cord_support) { + } else { psr_vsc.sdp_header.HB2 = 0x4; psr_vsc.sdp_header.HB3 = 0xe; - } else { - psr_vsc.sdp_header.HB2 = 0x3; - psr_vsc.sdp_header.HB3 = 0xc; } } else { /* Prepare VSC packet as per EDP 1.3 spec, Table 3.10 */ @@ -228,31 +347,12 @@ static void vlv_psr_enable_sink(struct intel_dp *intel_dp) DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE); } -static i915_reg_t psr_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum port port) -{ - if (INTEL_GEN(dev_priv) >= 9) - return DP_AUX_CH_CTL(port); - else - return EDP_PSR_AUX_CTL; -} - -static i915_reg_t psr_aux_data_reg(struct drm_i915_private *dev_priv, - enum port port, int index) -{ - if (INTEL_GEN(dev_priv) >= 9) - return DP_AUX_CH_DATA(port, index); - else - return EDP_PSR_AUX_DATA(index); -} - -static void hsw_psr_enable_sink(struct intel_dp *intel_dp) +static void hsw_psr_setup_aux(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t aux_clock_divider; - i915_reg_t aux_ctl_reg; + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + u32 aux_clock_divider, aux_ctl; + int i; static const uint8_t aux_msg[] = { [0] = DP_AUX_NATIVE_WRITE << 4, [1] = DP_SET_POWER >> 8, @@ -260,41 +360,47 @@ static void hsw_psr_enable_sink(struct intel_dp *intel_dp) [3] = 1 - 1, [4] = DP_SET_POWER_D0, }; - enum port port = dig_port->base.port; - u32 aux_ctl; - int i; + u32 psr_aux_mask = EDP_PSR_AUX_CTL_TIME_OUT_MASK | + EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK | + EDP_PSR_AUX_CTL_PRECHARGE_2US_MASK | + EDP_PSR_AUX_CTL_BIT_CLOCK_2X_MASK; BUILD_BUG_ON(sizeof(aux_msg) > 20); + for (i = 0; i < sizeof(aux_msg); i += 4) + I915_WRITE(EDP_PSR_AUX_DATA(i >> 2), + intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i)); aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, 0); - /* Enable AUX frame sync at sink */ - if (dev_priv->psr.aux_frame_sync) - drm_dp_dpcd_writeb(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF, - DP_AUX_FRAME_SYNC_ENABLE); + /* Start with bits set for DDI_AUX_CTL register */ + aux_ctl = intel_dp->get_aux_send_ctl(intel_dp, 0, sizeof(aux_msg), + aux_clock_divider); + + /* Select only valid bits for SRD_AUX_CTL */ + aux_ctl &= psr_aux_mask; + I915_WRITE(EDP_PSR_AUX_CTL, aux_ctl); +} + +static void hsw_psr_enable_sink(struct intel_dp *intel_dp) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_device *dev = dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + u8 dpcd_val = DP_PSR_ENABLE; + /* Enable ALPM at sink for psr2 */ - if (dev_priv->psr.psr2_support && dev_priv->psr.alpm) + if (dev_priv->psr.psr2_enabled && dev_priv->psr.alpm) drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG, DP_ALPM_ENABLE); - if (dev_priv->psr.link_standby) - drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, - DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE); - else - drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, - DP_PSR_ENABLE); - - aux_ctl_reg = psr_aux_ctl_reg(dev_priv, port); - /* Setup AUX registers */ - for (i = 0; i < sizeof(aux_msg); i += 4) - I915_WRITE(psr_aux_data_reg(dev_priv, port, i >> 2), - intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i)); + if (dev_priv->psr.psr2_enabled) + dpcd_val |= DP_PSR_ENABLE_PSR2; + if (dev_priv->psr.link_standby) + dpcd_val |= DP_PSR_MAIN_LINK_ACTIVE; + drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, dpcd_val); - aux_ctl = intel_dp->get_aux_send_ctl(intel_dp, 0, sizeof(aux_msg), - aux_clock_divider); - I915_WRITE(aux_ctl_reg, aux_ctl); + drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, DP_SET_POWER_D0); } static void vlv_psr_enable_source(struct intel_dp *intel_dp, @@ -396,25 +502,16 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) * with the 5 or 6 idle patterns. */ uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames); - uint32_t val; - uint8_t sink_latency; - - val = idle_frames << EDP_PSR_IDLE_FRAME_SHIFT; + u32 val = idle_frames << EDP_PSR2_IDLE_FRAME_SHIFT; /* FIXME: selective update is probably totally broken because it doesn't * mesh at all with our frontbuffer tracking. And the hw alone isn't * good enough. */ - val |= EDP_PSR2_ENABLE | - EDP_SU_TRACK_ENABLE; + val |= EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE; + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + val |= EDP_Y_COORDINATE_ENABLE; - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_SYNCHRONIZATION_LATENCY_IN_SINK, - &sink_latency) == 1) { - sink_latency &= DP_MAX_RESYNC_FRAME_COUNT_MASK; - } else { - sink_latency = 0; - } - val |= EDP_PSR2_FRAME_BEFORE_SU(sink_latency + 1); + val |= EDP_PSR2_FRAME_BEFORE_SU(dev_priv->psr.sink_sync_latency + 1); if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 5) val |= EDP_PSR2_TP2_TIME_2500; @@ -440,7 +537,7 @@ static void hsw_psr_activate(struct intel_dp *intel_dp) */ /* psr1 and psr2 are mutually exclusive.*/ - if (dev_priv->psr.psr2_support) + if (dev_priv->psr.psr2_enabled) hsw_activate_psr2(intel_dp); else hsw_activate_psr1(intel_dp); @@ -460,7 +557,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, * dynamically during PSR enable, and extracted from sink * caps during eDP detection. */ - if (!dev_priv->psr.psr2_support) + if (!dev_priv->psr.sink_psr2_support) return false; if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { @@ -478,15 +575,6 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } - /* - * FIXME:enable psr2 only for y-cordinate psr2 panels - * After gtc implementation , remove this restriction. - */ - if (!dev_priv->psr.y_cord_support) { - DRM_DEBUG_KMS("PSR2 not enabled, panel does not support Y coordinate\n"); - return false; - } - return true; } @@ -568,7 +656,7 @@ static void intel_psr_activate(struct intel_dp *intel_dp) struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (dev_priv->psr.psr2_support) + if (dev_priv->psr.psr2_enabled) WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); else WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); @@ -586,14 +674,24 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp, struct drm_device *dev = dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - u32 chicken; psr_aux_io_power_get(intel_dp); - if (dev_priv->psr.psr2_support) { - chicken = PSR2_VSC_ENABLE_PROG_HEADER; - if (dev_priv->psr.y_cord_support) - chicken |= PSR2_ADD_VERTICAL_LINE_COUNT; + /* Only HSW and BDW have PSR AUX registers that need to be setup. SKL+ + * use hardcoded values PSR AUX transactions + */ + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + hsw_psr_setup_aux(intel_dp); + + if (dev_priv->psr.psr2_enabled) { + u32 chicken = I915_READ(CHICKEN_TRANS(cpu_transcoder)); + + if (INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) + chicken |= (PSR2_VSC_ENABLE_PROG_HEADER + | PSR2_ADD_VERTICAL_LINE_COUNT); + + else + chicken &= ~VSC_DATA_SEL_SOFTWARE_CONTROL; I915_WRITE(CHICKEN_TRANS(cpu_transcoder), chicken); I915_WRITE(EDP_PSR_DEBUG, @@ -613,7 +711,8 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp, I915_WRITE(EDP_PSR_DEBUG, EDP_PSR_DEBUG_MASK_MEMUP | EDP_PSR_DEBUG_MASK_HPD | - EDP_PSR_DEBUG_MASK_LPSP); + EDP_PSR_DEBUG_MASK_LPSP | + EDP_PSR_DEBUG_MASK_DISP_REG_WRITE); } } @@ -644,7 +743,7 @@ void intel_psr_enable(struct intel_dp *intel_dp, goto unlock; } - dev_priv->psr.psr2_support = crtc_state->has_psr2; + dev_priv->psr.psr2_enabled = crtc_state->has_psr2; dev_priv->psr.busy_frontbuffer_bits = 0; dev_priv->psr.setup_vsc(intel_dp, crtc_state); @@ -714,12 +813,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, i915_reg_t psr_status; u32 psr_status_mask; - if (dev_priv->psr.aux_frame_sync) - drm_dp_dpcd_writeb(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF, - 0); - - if (dev_priv->psr.psr2_support) { + if (dev_priv->psr.psr2_enabled) { psr_status = EDP_PSR2_STATUS; psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; @@ -743,7 +837,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, dev_priv->psr.active = false; } else { - if (dev_priv->psr.psr2_support) + if (dev_priv->psr.psr2_enabled) WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); else WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); @@ -789,53 +883,59 @@ void intel_psr_disable(struct intel_dp *intel_dp, cancel_delayed_work_sync(&dev_priv->psr.work); } -static void intel_psr_work(struct work_struct *work) +static bool psr_wait_for_idle(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), psr.work.work); - struct intel_dp *intel_dp = dev_priv->psr.enabled; - struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc; - enum pipe pipe = to_intel_crtc(crtc)->pipe; + struct intel_dp *intel_dp; + i915_reg_t reg; + u32 mask; + int err; + + intel_dp = dev_priv->psr.enabled; + if (!intel_dp) + return false; - /* We have to make sure PSR is ready for re-enable - * otherwise it keeps disabled until next full enable/disable cycle. - * PSR might take some time to get fully disabled - * and be ready for re-enable. - */ if (HAS_DDI(dev_priv)) { - if (dev_priv->psr.psr2_support) { - if (intel_wait_for_register(dev_priv, - EDP_PSR2_STATUS, - EDP_PSR2_STATUS_STATE_MASK, - 0, - 50)) { - DRM_ERROR("Timed out waiting for PSR2 Idle for re-enable\n"); - return; - } + if (dev_priv->psr.psr2_enabled) { + reg = EDP_PSR2_STATUS; + mask = EDP_PSR2_STATUS_STATE_MASK; } else { - if (intel_wait_for_register(dev_priv, - EDP_PSR_STATUS, - EDP_PSR_STATUS_STATE_MASK, - 0, - 50)) { - DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n"); - return; - } + reg = EDP_PSR_STATUS; + mask = EDP_PSR_STATUS_STATE_MASK; } } else { - if (intel_wait_for_register(dev_priv, - VLV_PSRSTAT(pipe), - VLV_EDP_PSR_IN_TRANS, - 0, - 1)) { - DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n"); - return; - } + struct drm_crtc *crtc = + dp_to_dig_port(intel_dp)->base.base.crtc; + enum pipe pipe = to_intel_crtc(crtc)->pipe; + + reg = VLV_PSRSTAT(pipe); + mask = VLV_EDP_PSR_IN_TRANS; } + + mutex_unlock(&dev_priv->psr.lock); + + err = intel_wait_for_register(dev_priv, reg, mask, 0, 50); + if (err) + DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n"); + + /* After the unlocked wait, verify that PSR is still wanted! */ mutex_lock(&dev_priv->psr.lock); - intel_dp = dev_priv->psr.enabled; + return err == 0 && dev_priv->psr.enabled; +} - if (!intel_dp) +static void intel_psr_work(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), psr.work.work); + + mutex_lock(&dev_priv->psr.lock); + + /* + * We have to make sure PSR is ready for re-enable + * otherwise it keeps disabled until next full enable/disable cycle. + * PSR might take some time to get fully disabled + * and be ready for re-enable. + */ + if (!psr_wait_for_idle(dev_priv)) goto unlock; /* @@ -846,7 +946,7 @@ static void intel_psr_work(struct work_struct *work) if (dev_priv->psr.busy_frontbuffer_bits) goto unlock; - intel_psr_activate(intel_dp); + intel_psr_activate(dev_priv->psr.enabled); unlock: mutex_unlock(&dev_priv->psr.lock); } @@ -862,11 +962,7 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv) return; if (HAS_DDI(dev_priv)) { - if (dev_priv->psr.aux_frame_sync) - drm_dp_dpcd_writeb(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF, - 0); - if (dev_priv->psr.psr2_support) { + if (dev_priv->psr.psr2_enabled) { val = I915_READ(EDP_PSR2_CTL); WARN_ON(!(val & EDP_PSR2_ENABLE)); I915_WRITE(EDP_PSR2_CTL, val & ~EDP_PSR2_ENABLE); @@ -957,6 +1053,7 @@ void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, * intel_psr_invalidate - Invalidade PSR * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits + * @origin: which operation caused the invalidate * * Since the hardware frontbuffer tracking has gaps we need to integrate * with the software frontbuffer tracking. This function gets called every @@ -966,7 +1063,7 @@ void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits." */ void intel_psr_invalidate(struct drm_i915_private *dev_priv, - unsigned frontbuffer_bits) + unsigned frontbuffer_bits, enum fb_op_origin origin) { struct drm_crtc *crtc; enum pipe pipe; @@ -974,6 +1071,9 @@ void intel_psr_invalidate(struct drm_i915_private *dev_priv, if (!CAN_PSR(dev_priv)) return; + if (dev_priv->psr.has_hw_tracking && origin == ORIGIN_FLIP) + return; + mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { mutex_unlock(&dev_priv->psr.lock); @@ -1014,6 +1114,9 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, if (!CAN_PSR(dev_priv)) return; + if (dev_priv->psr.has_hw_tracking && origin == ORIGIN_FLIP) + return; + mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { mutex_unlock(&dev_priv->psr.lock); @@ -1027,8 +1130,23 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, dev_priv->psr.busy_frontbuffer_bits &= ~frontbuffer_bits; /* By definition flush = invalidate + flush */ - if (frontbuffer_bits) - intel_psr_exit(dev_priv); + if (frontbuffer_bits) { + if (dev_priv->psr.psr2_enabled || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + intel_psr_exit(dev_priv); + } else { + /* + * Display WA #0884: all + * This documented WA for bxt can be safely applied + * broadly so we can force HW tracking to exit PSR + * instead of disabling and re-enabling. + * Workaround tells us to write 0 to CUR_SURFLIVE_A, + * but it makes more sense write to the current active + * pipe. + */ + I915_WRITE(CURSURFLIVE(pipe), 0); + } + } if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits) if (!work_busy(&dev_priv->psr.work.work)) @@ -1055,9 +1173,12 @@ void intel_psr_init(struct drm_i915_private *dev_priv) if (!dev_priv->psr.sink_support) return; - /* Per platform default: all disabled. */ - if (i915_modparams.enable_psr == -1) + if (i915_modparams.enable_psr == -1) { + i915_modparams.enable_psr = dev_priv->vbt.psr.enable; + + /* Per platform default: all disabled. */ i915_modparams.enable_psr = 0; + } /* Set link_standby x link_off defaults */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) @@ -1090,6 +1211,7 @@ void intel_psr_init(struct drm_i915_private *dev_priv) dev_priv->psr.activate = vlv_psr_activate; dev_priv->psr.setup_vsc = vlv_psr_setup_vsc; } else { + dev_priv->psr.has_hw_tracking = true; dev_priv->psr.enable_source = hsw_psr_enable_source; dev_priv->psr.disable_source = hsw_psr_disable; dev_priv->psr.enable_sink = hsw_psr_enable_sink; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1d599524a759..8f19349a6055 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -36,6 +36,7 @@ #include "i915_gem_render_state.h" #include "i915_trace.h" #include "intel_drv.h" +#include "intel_workarounds.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -557,7 +558,8 @@ static void reset_ring_common(struct intel_engine_cs *engine, */ if (request) { struct drm_i915_private *dev_priv = request->i915; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(request->ctx, + engine); struct i915_hw_ppgtt *ppgtt; if (ce->state) { @@ -599,7 +601,7 @@ static int intel_rcs_ctx_init(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(rq); + ret = intel_ctx_workarounds_emit(rq); if (ret != 0) return ret; @@ -617,6 +619,8 @@ static int init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; + intel_whitelist_workarounds_apply(engine); + /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ if (IS_GEN(dev_priv, 4, 6)) I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); @@ -658,7 +662,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 6) I915_WRITE_IMR(engine, ~engine->irq_keep_mask); - return init_workarounds_ring(engine); + return 0; } static u32 *gen6_signal(struct i915_request *rq, u32 *cs) @@ -693,17 +697,17 @@ static void cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline->requests, link) { + list_for_each_entry(request, &engine->timeline.requests, link) { GEM_BUG_ON(!request->global_seqno); if (!i915_request_completed(request)) dma_fence_set_error(&request->fence, -EIO); } /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static void i9xx_submit_request(struct i915_request *request) @@ -1062,7 +1066,6 @@ err: void intel_ring_reset(struct intel_ring *ring, u32 tail) { - GEM_BUG_ON(!list_empty(&ring->request_list)); ring->tail = tail; ring->head = tail; ring->emit = tail; @@ -1114,19 +1117,24 @@ err: } struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size) +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_timeline *timeline, + int size) { struct intel_ring *ring; struct i915_vma *vma; GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); + GEM_BUG_ON(timeline == &engine->timeline); + lockdep_assert_held(&engine->i915->drm.struct_mutex); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&ring->request_list); + ring->timeline = i915_timeline_get(timeline); ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -1157,12 +1165,13 @@ intel_ring_free(struct intel_ring *ring) i915_vma_close(ring->vma); __i915_gem_object_release_unless_active(obj); + i915_timeline_put(ring->timeline); kfree(ring); } -static int context_pin(struct i915_gem_context *ctx) +static int context_pin(struct intel_context *ce) { - struct i915_vma *vma = ctx->engine[RCS].state; + struct i915_vma *vma = ce->state; int ret; /* @@ -1253,7 +1262,7 @@ static struct intel_ring * intel_ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); int ret; lockdep_assert_held(&ctx->i915->drm.struct_mutex); @@ -1275,7 +1284,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine, } if (ce->state) { - ret = context_pin(ctx); + ret = context_pin(ce); if (ret) goto err; @@ -1296,7 +1305,7 @@ err: static void intel_ring_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(ce->pin_count == 0); @@ -1315,6 +1324,7 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine, static int intel_init_ring_buffer(struct intel_engine_cs *engine) { struct intel_ring *ring; + struct i915_timeline *timeline; int err; intel_engine_setup_common(engine); @@ -1323,7 +1333,14 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (err) goto err; - ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); + timeline = i915_timeline_create(engine->i915, engine->name); + if (IS_ERR(timeline)) { + err = PTR_ERR(timeline); + goto err; + } + + ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); + i915_timeline_put(timeline); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err; @@ -1424,7 +1441,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->ctx->engine[RCS].state) | flags; + *cs++ = i915_ggtt_offset(to_intel_context(rq->ctx, engine)->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1515,7 +1532,7 @@ static int switch_context(struct i915_request *rq) hw_flags = MI_FORCE_RESTORE; } - if (to_ctx->engine[engine->id].state && + if (to_intel_context(to_ctx, engine)->state && (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { GEM_BUG_ON(engine->id != RCS); @@ -1563,7 +1580,7 @@ static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); + GEM_BUG_ON(!to_intel_context(request->ctx, request->engine)->pin_count); /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just @@ -1593,6 +1610,7 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) if (intel_ring_update_space(ring) >= bytes) return 0; + GEM_BUG_ON(list_empty(&ring->request_list)); list_for_each_entry(target, &ring->request_list, ring_link) { /* Would completion of this request free enough space? */ if (bytes <= __intel_ring_space(target->postfix, @@ -1692,17 +1710,18 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) need_wrap &= ~1; GEM_BUG_ON(need_wrap > ring->space); GEM_BUG_ON(ring->emit + need_wrap > ring->size); + GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); /* Fill the tail with MI_NOOP */ - memset(ring->vaddr + ring->emit, 0, need_wrap); - ring->emit = 0; + memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); ring->space -= need_wrap; + ring->emit = 0; } GEM_BUG_ON(ring->emit > ring->size - bytes); GEM_BUG_ON(ring->space < bytes); cs = ring->vaddr + ring->emit; - GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes)); + GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); ring->emit += bytes; ring->space -= bytes; @@ -1712,22 +1731,24 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct i915_request *rq) { - int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - u32 *cs; + int num_dwords; + void *cs; + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); if (num_dwords == 0) return 0; - num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords; + num_dwords = CACHELINE_DWORDS - num_dwords; + GEM_BUG_ON(num_dwords & 1); + cs = intel_ring_begin(rq, num_dwords); if (IS_ERR(cs)) return PTR_ERR(cs); - while (num_dwords--) - *cs++ = MI_NOOP; - + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); intel_ring_advance(rq, cs); + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); return 0; } @@ -1943,8 +1964,6 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, static void intel_ring_init_irq(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { - engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift; - if (INTEL_GEN(dev_priv) >= 6) { engine->irq_enable = gen6_irq_enable; engine->irq_disable = gen6_irq_disable; @@ -2029,6 +2048,8 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (HAS_L3_DPF(dev_priv)) engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; + if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; @@ -2079,7 +2100,6 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) engine->emit_flush = gen6_bsd_ring_flush; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; } else { - engine->mmio_base = BSD_RING_BASE; engine->emit_flush = bsd_ring_flush; if (IS_GEN5(dev_priv)) engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0320c2c4cfba..010750e8ee44 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -3,15 +3,19 @@ #define _INTEL_RINGBUFFER_H_ #include <linux/hashtable.h> +#include <linux/seqlock.h> #include "i915_gem_batch_pool.h" -#include "i915_gem_timeline.h" +#include "i915_reg.h" #include "i915_pmu.h" #include "i915_request.h" #include "i915_selftest.h" +#include "i915_timeline.h" +#include "intel_gpu_commands.h" struct drm_printer; +struct i915_sched_attr; #define I915_CMD_HASH_ORDER 9 @@ -84,7 +88,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) } #define I915_MAX_SLICES 3 -#define I915_MAX_SUBSLICES 3 +#define I915_MAX_SUBSLICES 8 #define instdone_slice_mask(dev_priv__) \ (INTEL_GEN(dev_priv__) == 7 ? \ @@ -125,7 +129,9 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; + struct i915_timeline *timeline; struct list_head request_list; + struct list_head active_link; u32 head; u32 tail; @@ -330,10 +336,10 @@ struct intel_engine_cs { u8 instance; u32 context_size; u32 mmio_base; - unsigned int irq_shift; struct intel_ring *buffer; - struct intel_timeline *timeline; + + struct i915_timeline timeline; struct drm_i915_gem_object *default_state; @@ -459,7 +465,8 @@ struct intel_engine_cs { * * Called under the struct_mutex. */ - void (*schedule)(struct i915_request *request, int priority); + void (*schedule)(struct i915_request *request, + const struct i915_sched_attr *attr); /* * Cancel all requests on the hardware, or queued for execution. @@ -561,6 +568,7 @@ struct intel_engine_cs { #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) +#define I915_ENGINE_HAS_PREEMPTION BIT(2) unsigned int flags; /* @@ -591,7 +599,7 @@ struct intel_engine_cs { /** * @lock: Lock protecting the below fields. */ - spinlock_t lock; + seqlock_t lock; /** * @enabled: Reference count indicating number of listeners. */ @@ -620,16 +628,29 @@ struct intel_engine_cs { } stats; }; -static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine) +static inline bool +intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) { return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; } -static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine) +static inline bool +intel_engine_supports_stats(const struct intel_engine_cs *engine) { return engine->flags & I915_ENGINE_SUPPORTS_STATS; } +static inline bool +intel_engine_has_preemption(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_PREEMPTION; +} + +static inline bool __execlists_need_preempt(int prio, int last) +{ + return prio > max(0, last); +} + static inline void execlists_set_active(struct intel_engine_execlists *execlists, unsigned int bit) @@ -637,6 +658,13 @@ execlists_set_active(struct intel_engine_execlists *execlists, __set_bit(bit, (unsigned long *)&execlists->active); } +static inline bool +execlists_set_active_once(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); +} + static inline void execlists_clear_active(struct intel_engine_execlists *execlists, unsigned int bit) @@ -651,6 +679,10 @@ execlists_is_active(const struct intel_engine_execlists *execlists, return test_bit(bit, (unsigned long *)&execlists->active); } +void execlists_user_begin(struct intel_engine_execlists *execlists, + const struct execlist_port *port); +void execlists_user_end(struct intel_engine_execlists *execlists); + void execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); @@ -663,7 +695,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists) return execlists->port_mask + 1; } -static inline void +static inline struct execlist_port * execlists_port_complete(struct intel_engine_execlists * const execlists, struct execlist_port * const port) { @@ -674,6 +706,8 @@ execlists_port_complete(struct intel_engine_execlists * const execlists, memmove(port, port + 1, m * sizeof(struct execlist_port)); memset(port + m, 0, sizeof(struct execlist_port)); + + return port; } static inline unsigned int @@ -736,7 +770,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define CNL_HWS_CSB_WRITE_INDEX 0x2f struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size); +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_timeline *timeline, + int size); int intel_ring_pin(struct intel_ring *ring, struct drm_i915_private *i915, unsigned int offset_bias); @@ -854,12 +890,9 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) * wtih serialising this hint with anything, so document it as * a hint and nothing more. */ - return READ_ONCE(engine->timeline->seqno); + return READ_ONCE(engine->timeline.seqno); } -int init_workarounds_ring(struct intel_engine_cs *engine); -int intel_ring_workarounds_emit(struct i915_request *rq); - void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); @@ -939,7 +972,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); -void intel_engine_enable_signaling(struct i915_request *request, bool wakeup); +bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); void intel_engine_cancel_signaling(struct i915_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) @@ -1037,7 +1070,7 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) if (READ_ONCE(engine->stats.enabled) == 0) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (engine->stats.enabled > 0) { if (engine->stats.active++ == 0) @@ -1045,7 +1078,7 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) GEM_BUG_ON(engine->stats.active == 0); } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } static inline void intel_engine_context_out(struct intel_engine_cs *engine) @@ -1055,7 +1088,7 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine) if (READ_ONCE(engine->stats.enabled) == 0) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (engine->stats.enabled > 0) { ktime_t last; @@ -1082,7 +1115,7 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine) } } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } int intel_enable_engine_stats(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 66de4b2dc8b7..53a6eaa9671a 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -542,6 +542,29 @@ void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv) dev_priv->csr.dc_state = val; } +/** + * gen9_set_dc_state - set target display C power state + * @dev_priv: i915 device instance + * @state: target DC power state + * - DC_STATE_DISABLE + * - DC_STATE_EN_UPTO_DC5 + * - DC_STATE_EN_UPTO_DC6 + * - DC_STATE_EN_DC9 + * + * Signal to DMC firmware/HW the target DC power state passed in @state. + * DMC/HW can turn off individual display clocks and power rails when entering + * a deeper DC power state (higher in number) and turns these back when exiting + * that state to a shallower power state (lower in number). The HW will decide + * when to actually enter a given state on an on-demand basis, for instance + * depending on the active state of display pipes. The state of display + * registers backed by affected power rails are saved/restored as needed. + * + * Based on the above enabling a deeper DC power state is asynchronous wrt. + * enabling it. Disabling a deeper power state is synchronous: for instance + * setting %DC_STATE_DISABLE won't complete until all HW resources are turned + * back on and register state is restored. This is guaranteed by the MMIO write + * to DC_STATE_EN blocking until the state is restored. + */ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state) { uint32_t val; @@ -635,7 +658,7 @@ static void assert_can_enable_dc6(struct drm_i915_private *dev_priv) assert_csr_loaded(dev_priv); } -void skl_enable_dc6(struct drm_i915_private *dev_priv) +static void skl_enable_dc6(struct drm_i915_private *dev_priv) { assert_can_enable_dc6(dev_priv); @@ -649,13 +672,6 @@ void skl_enable_dc6(struct drm_i915_private *dev_priv) gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); } -void skl_disable_dc6(struct drm_i915_private *dev_priv) -{ - DRM_DEBUG_KMS("Disabling DC6\n"); - - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); -} - static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -2626,32 +2642,69 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) mutex_unlock(&power_domains->lock); } -static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) +static inline +bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv, + i915_reg_t reg, bool enable) { - I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST); - POSTING_READ(DBUF_CTL); + u32 val, status; + val = I915_READ(reg); + val = enable ? (val | DBUF_POWER_REQUEST) : (val & ~DBUF_POWER_REQUEST); + I915_WRITE(reg, val); + POSTING_READ(reg); udelay(10); - if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE)) - DRM_ERROR("DBuf power enable timeout\n"); + status = I915_READ(reg) & DBUF_POWER_STATE; + if ((enable && !status) || (!enable && status)) { + DRM_ERROR("DBus power %s timeout!\n", + enable ? "enable" : "disable"); + return false; + } + return true; +} + +static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) +{ + intel_dbuf_slice_set(dev_priv, DBUF_CTL, true); } static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) { - I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST); - POSTING_READ(DBUF_CTL); + intel_dbuf_slice_set(dev_priv, DBUF_CTL, false); +} - udelay(10); +static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) < 11) + return 1; + return 2; +} - if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE) - DRM_ERROR("DBuf power disable timeout!\n"); +void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, + u8 req_slices) +{ + u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; + u32 val; + bool ret; + + if (req_slices > intel_dbuf_max_slices(dev_priv)) { + DRM_ERROR("Invalid number of dbuf slices requested\n"); + return; + } + + if (req_slices == hw_enabled_slices || req_slices == 0) + return; + + val = I915_READ(DBUF_CTL_S2); + if (req_slices > hw_enabled_slices) + ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true); + else + ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false); + + if (ret) + dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices; } -/* - * TODO: we shouldn't always enable DBUF_CTL_S2, we should only enable it when - * needed and keep it disabled as much as possible. - */ static void icl_dbuf_enable(struct drm_i915_private *dev_priv) { I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST); @@ -2663,6 +2716,8 @@ static void icl_dbuf_enable(struct drm_i915_private *dev_priv) if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) DRM_ERROR("DBuf power enable timeout\n"); + else + dev_priv->wm.skl_hw.ddb.enabled_slices = 2; } static void icl_dbuf_disable(struct drm_i915_private *dev_priv) @@ -2676,6 +2731,8 @@ static void icl_dbuf_disable(struct drm_i915_private *dev_priv) if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) DRM_ERROR("DBuf power disable timeout!\n"); + else + dev_priv->wm.skl_hw.ddb.enabled_slices = 0; } static void icl_mbus_init(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 96e213ec202d..25005023c243 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2779,9 +2779,8 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, return false; for (i = 0; i < intel_sdvo_connector->format_supported_num; i++) - drm_property_add_enum( - intel_sdvo_connector->tv_format, i, - i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); + drm_property_add_enum(intel_sdvo_connector->tv_format, i, + tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0]; drm_object_attach_property(&intel_sdvo_connector->base.base.base, diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index dbdcf85032df..ee23613f9fd4 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -48,6 +48,7 @@ bool intel_format_is_yuv(u32 format) case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: case DRM_FORMAT_YVYU: + case DRM_FORMAT_NV12: return true; default: return false; @@ -130,7 +131,7 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) if (scanline < min || scanline > max) break; - if (timeout <= 0) { + if (!timeout) { DRM_ERROR("Potential atomic update failure on pipe %c\n", pipe_name(crtc->pipe)); break; @@ -935,20 +936,11 @@ intel_check_sprite_plane(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_framebuffer *fb = state->base.fb; - int crtc_x, crtc_y; - unsigned int crtc_w, crtc_h; - uint32_t src_x, src_y, src_w, src_h; - struct drm_rect *src = &state->base.src; - struct drm_rect *dst = &state->base.dst; - struct drm_rect clip = {}; int max_stride = INTEL_GEN(dev_priv) >= 9 ? 32768 : 16384; - int hscale, vscale; int max_scale, min_scale; bool can_scale; int ret; - - *src = drm_plane_state_src(&state->base); - *dst = drm_plane_state_dest(&state->base); + uint32_t pixel_format = 0; if (!fb) { state->base.visible = false; @@ -969,11 +961,14 @@ intel_check_sprite_plane(struct intel_plane *plane, /* setup can_scale, min_scale, max_scale */ if (INTEL_GEN(dev_priv) >= 9) { + if (state->base.fb) + pixel_format = state->base.fb->format->format; /* use scaler when colorkey is not required */ if (!state->ckey.flags) { can_scale = 1; min_scale = 1; - max_scale = skl_max_scale(crtc, crtc_state); + max_scale = + skl_max_scale(crtc, crtc_state, pixel_format); } else { can_scale = 0; min_scale = DRM_PLANE_HELPER_NO_SCALING; @@ -985,64 +980,19 @@ intel_check_sprite_plane(struct intel_plane *plane, min_scale = plane->can_scale ? 1 : (1 << 16); } - /* - * FIXME the following code does a bunch of fuzzy adjustments to the - * coordinates and sizes. We probably need some way to decide whether - * more strict checking should be done instead. - */ - drm_rect_rotate(src, fb->width << 16, fb->height << 16, - state->base.rotation); - - hscale = drm_rect_calc_hscale_relaxed(src, dst, min_scale, max_scale); - BUG_ON(hscale < 0); - - vscale = drm_rect_calc_vscale_relaxed(src, dst, min_scale, max_scale); - BUG_ON(vscale < 0); - - if (crtc_state->base.enable) - drm_mode_get_hv_timing(&crtc_state->base.mode, - &clip.x2, &clip.y2); - - state->base.visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale); - - crtc_x = dst->x1; - crtc_y = dst->y1; - crtc_w = drm_rect_width(dst); - crtc_h = drm_rect_height(dst); + ret = drm_atomic_helper_check_plane_state(&state->base, + &crtc_state->base, + min_scale, max_scale, + true, true); + if (ret) + return ret; if (state->base.visible) { - /* check again in case clipping clamped the results */ - hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); - if (hscale < 0) { - DRM_DEBUG_KMS("Horizontal scaling factor out of limits\n"); - drm_rect_debug_print("src: ", src, true); - drm_rect_debug_print("dst: ", dst, false); - - return hscale; - } - - vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale); - if (vscale < 0) { - DRM_DEBUG_KMS("Vertical scaling factor out of limits\n"); - drm_rect_debug_print("src: ", src, true); - drm_rect_debug_print("dst: ", dst, false); - - return vscale; - } - - /* Make the source viewport size an exact multiple of the scaling factors. */ - drm_rect_adjust_size(src, - drm_rect_width(dst) * hscale - drm_rect_width(src), - drm_rect_height(dst) * vscale - drm_rect_height(src)); - - drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, - state->base.rotation); - - /* sanity check to make sure the src viewport wasn't enlarged */ - WARN_ON(src->x1 < (int) state->base.src_x || - src->y1 < (int) state->base.src_y || - src->x2 > (int) state->base.src_x + state->base.src_w || - src->y2 > (int) state->base.src_y + state->base.src_h); + struct drm_rect *src = &state->base.src; + struct drm_rect *dst = &state->base.dst; + unsigned int crtc_w = drm_rect_width(dst); + unsigned int crtc_h = drm_rect_height(dst); + uint32_t src_x, src_y, src_w, src_h; /* * Hardware doesn't handle subpixel coordinates. @@ -1055,58 +1005,40 @@ intel_check_sprite_plane(struct intel_plane *plane, src_y = src->y1 >> 16; src_h = drm_rect_height(src) >> 16; - if (intel_format_is_yuv(fb->format->format)) { - src_x &= ~1; - src_w &= ~1; - - /* - * Must keep src and dst the - * same if we can't scale. - */ - if (!can_scale) - crtc_w &= ~1; + src->x1 = src_x << 16; + src->x2 = (src_x + src_w) << 16; + src->y1 = src_y << 16; + src->y2 = (src_y + src_h) << 16; - if (crtc_w == 0) - state->base.visible = false; + if (intel_format_is_yuv(fb->format->format) && + fb->format->format != DRM_FORMAT_NV12 && + (src_x % 2 || src_w % 2)) { + DRM_DEBUG_KMS("src x/w (%u, %u) must be a multiple of 2 for YUV planes\n", + src_x, src_w); + return -EINVAL; } - } - - /* Check size restrictions when scaling */ - if (state->base.visible && (src_w != crtc_w || src_h != crtc_h)) { - unsigned int width_bytes; - int cpp = fb->format->cpp[0]; - WARN_ON(!can_scale); + /* Check size restrictions when scaling */ + if (src_w != crtc_w || src_h != crtc_h) { + unsigned int width_bytes; + int cpp = fb->format->cpp[0]; - /* FIXME interlacing min height is 6 */ + WARN_ON(!can_scale); - if (crtc_w < 3 || crtc_h < 3) - state->base.visible = false; + width_bytes = ((src_x * cpp) & 63) + src_w * cpp; - if (src_w < 3 || src_h < 3) - state->base.visible = false; - - width_bytes = ((src_x * cpp) & 63) + src_w * cpp; - - if (INTEL_GEN(dev_priv) < 9 && (src_w > 2048 || src_h > 2048 || - width_bytes > 4096 || fb->pitches[0] > 4096)) { - DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n"); - return -EINVAL; + /* FIXME interlacing min height is 6 */ + if (INTEL_GEN(dev_priv) < 9 && ( + src_w < 3 || src_h < 3 || + src_w > 2048 || src_h > 2048 || + crtc_w < 3 || crtc_h < 3 || + width_bytes > 4096 || fb->pitches[0] > 4096)) { + DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n"); + return -EINVAL; + } } } - if (state->base.visible) { - src->x1 = src_x << 16; - src->x2 = (src_x + src_w) << 16; - src->y1 = src_y << 16; - src->y2 = (src_y + src_h) << 16; - } - - dst->x1 = crtc_x; - dst->x2 = crtc_x + crtc_w; - dst->y1 = crtc_y; - dst->y2 = crtc_y + crtc_h; - if (INTEL_GEN(dev_priv) >= 9) { ret = skl_check_plane_surface(crtc_state, state); if (ret) @@ -1248,6 +1180,19 @@ static uint32_t skl_plane_formats[] = { DRM_FORMAT_VYUY, }; +static uint32_t skl_planar_formats[] = { + DRM_FORMAT_RGB565, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_YUYV, + DRM_FORMAT_YVYU, + DRM_FORMAT_UYVY, + DRM_FORMAT_VYUY, + DRM_FORMAT_NV12, +}; + static const uint64_t skl_plane_format_modifiers_noccs[] = { I915_FORMAT_MOD_Yf_TILED, I915_FORMAT_MOD_Y_TILED, @@ -1342,6 +1287,7 @@ static bool skl_mod_supported(uint32_t format, uint64_t modifier) case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: if (modifier == I915_FORMAT_MOD_Yf_TILED) return true; /* fall through */ @@ -1441,8 +1387,14 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->disable_plane = skl_disable_plane; intel_plane->get_hw_state = skl_plane_get_hw_state; - plane_formats = skl_plane_formats; - num_plane_formats = ARRAY_SIZE(skl_plane_formats); + if (skl_plane_has_planar(dev_priv, pipe, + PLANE_SPRITE0 + plane)) { + plane_formats = skl_planar_formats; + num_plane_formats = ARRAY_SIZE(skl_planar_formats); + } else { + plane_formats = skl_plane_formats; + num_plane_formats = ARRAY_SIZE(skl_plane_formats); + } if (skl_plane_has_ccs(dev_priv, pipe, PLANE_SPRITE0 + plane)) modifiers = skl_plane_format_modifiers_ccs; diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index e5bf0d37bf43..1cffaf7b5dbe 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -69,13 +69,15 @@ static int __get_platform_enable_guc(struct drm_i915_private *dev_priv) static int __get_default_guc_log_level(struct drm_i915_private *dev_priv) { - int guc_log_level = 0; /* disabled */ + int guc_log_level; - /* Enable if we're running on platform with GuC and debug config */ - if (HAS_GUC(dev_priv) && intel_uc_is_using_guc() && - (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || - IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))) - guc_log_level = 1 + GUC_LOG_VERBOSITY_MAX; + if (!HAS_GUC(dev_priv) || !intel_uc_is_using_guc()) + guc_log_level = GUC_LOG_LEVEL_DISABLED; + else if (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || + IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + guc_log_level = GUC_LOG_LEVEL_MAX; + else + guc_log_level = GUC_LOG_LEVEL_NON_VERBOSE; /* Any platform specific fine-tuning can be done here */ @@ -83,7 +85,7 @@ static int __get_default_guc_log_level(struct drm_i915_private *dev_priv) } /** - * intel_uc_sanitize_options - sanitize uC related modparam options + * sanitize_options_early - sanitize uC related modparam options * @dev_priv: device private * * In case of "enable_guc" option this function will attempt to modify @@ -99,7 +101,7 @@ static int __get_default_guc_log_level(struct drm_i915_private *dev_priv) * unless GuC is enabled on given platform and the driver is compiled with * debug config when this modparam will default to "enable(1..4)". */ -void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) +static void sanitize_options_early(struct drm_i915_private *dev_priv) { struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; @@ -142,51 +144,53 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) i915_modparams.guc_log_level = 0; } - if (i915_modparams.guc_log_level > 1 + GUC_LOG_VERBOSITY_MAX) { + if (i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX) { DRM_WARN("Incompatible option detected: %s=%d, %s!\n", "guc_log_level", i915_modparams.guc_log_level, "verbosity too high"); - i915_modparams.guc_log_level = 1 + GUC_LOG_VERBOSITY_MAX; + i915_modparams.guc_log_level = GUC_LOG_LEVEL_MAX; } - DRM_DEBUG_DRIVER("guc_log_level=%d (enabled:%s verbosity:%d)\n", + DRM_DEBUG_DRIVER("guc_log_level=%d (enabled:%s, verbose:%s, verbosity:%d)\n", i915_modparams.guc_log_level, yesno(i915_modparams.guc_log_level), - i915_modparams.guc_log_level - 1); + yesno(GUC_LOG_LEVEL_IS_VERBOSE(i915_modparams.guc_log_level)), + GUC_LOG_LEVEL_TO_VERBOSITY(i915_modparams.guc_log_level)); /* Make sure that sanitization was done */ GEM_BUG_ON(i915_modparams.enable_guc < 0); GEM_BUG_ON(i915_modparams.guc_log_level < 0); } -void intel_uc_init_early(struct drm_i915_private *dev_priv) +void intel_uc_init_early(struct drm_i915_private *i915) { - intel_guc_init_early(&dev_priv->guc); - intel_huc_init_early(&dev_priv->huc); -} + struct intel_guc *guc = &i915->guc; + struct intel_huc *huc = &i915->huc; -void intel_uc_init_fw(struct drm_i915_private *dev_priv) -{ - if (!USES_GUC(dev_priv)) - return; + intel_guc_init_early(guc); + intel_huc_init_early(huc); - if (USES_HUC(dev_priv)) - intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw); + sanitize_options_early(i915); - intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw); + if (USES_GUC(i915)) + intel_uc_fw_fetch(i915, &guc->fw); + + if (USES_HUC(i915)) + intel_uc_fw_fetch(i915, &huc->fw); } -void intel_uc_fini_fw(struct drm_i915_private *dev_priv) +void intel_uc_cleanup_early(struct drm_i915_private *i915) { - if (!USES_GUC(dev_priv)) - return; + struct intel_guc *guc = &i915->guc; + struct intel_huc *huc = &i915->huc; - intel_uc_fw_fini(&dev_priv->guc.fw); + if (USES_HUC(i915)) + intel_uc_fw_fini(&huc->fw); - if (USES_HUC(dev_priv)) - intel_uc_fw_fini(&dev_priv->huc.fw); + if (USES_GUC(i915)) + intel_uc_fw_fini(&guc->fw); - guc_free_load_err_log(&dev_priv->guc); + guc_free_load_err_log(guc); } /** @@ -223,10 +227,13 @@ static int guc_enable_communication(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); + gen9_enable_guc_interrupts(dev_priv); + if (HAS_GUC_CT(dev_priv)) - return intel_guc_enable_ct(guc); + return intel_guc_ct_enable(&guc->ct); guc->send = intel_guc_send_mmio; + guc->handler = intel_guc_to_host_event_handler_mmio; return 0; } @@ -235,9 +242,12 @@ static void guc_disable_communication(struct intel_guc *guc) struct drm_i915_private *dev_priv = guc_to_i915(guc); if (HAS_GUC_CT(dev_priv)) - intel_guc_disable_ct(guc); + intel_guc_ct_disable(&guc->ct); + + gen9_disable_guc_interrupts(dev_priv); guc->send = intel_guc_send_nop; + guc->handler = intel_guc_to_host_event_handler_nop; } int intel_uc_init_misc(struct drm_i915_private *dev_priv) @@ -248,24 +258,13 @@ int intel_uc_init_misc(struct drm_i915_private *dev_priv) if (!USES_GUC(dev_priv)) return 0; - ret = intel_guc_init_wq(guc); - if (ret) { - DRM_ERROR("Couldn't allocate workqueues for GuC\n"); - goto err; - } + intel_guc_init_ggtt_pin_bias(guc); - ret = intel_guc_log_relay_create(guc); - if (ret) { - DRM_ERROR("Couldn't allocate relay for GuC log\n"); - goto err_relay; - } + ret = intel_guc_init_wq(guc); + if (ret) + return ret; return 0; - -err_relay: - intel_guc_fini_wq(guc); -err: - return ret; } void intel_uc_fini_misc(struct drm_i915_private *dev_priv) @@ -276,8 +275,6 @@ void intel_uc_fini_misc(struct drm_i915_private *dev_priv) return; intel_guc_fini_wq(guc); - - intel_guc_log_relay_destroy(guc); } int intel_uc_init(struct drm_i915_private *dev_priv) @@ -325,6 +322,24 @@ void intel_uc_fini(struct drm_i915_private *dev_priv) intel_guc_fini(guc); } +void intel_uc_sanitize(struct drm_i915_private *i915) +{ + struct intel_guc *guc = &i915->guc; + struct intel_huc *huc = &i915->huc; + + if (!USES_GUC(i915)) + return; + + GEM_BUG_ON(!HAS_GUC(i915)); + + guc_disable_communication(guc); + + intel_huc_sanitize(huc); + intel_guc_sanitize(guc); + + __intel_uc_reset_hw(i915); +} + int intel_uc_init_hw(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; @@ -336,14 +351,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) GEM_BUG_ON(!HAS_GUC(dev_priv)); - guc_disable_communication(guc); gen9_reset_guc_interrupts(dev_priv); - /* init WOPCM */ - I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); - I915_WRITE(DMA_GUC_WOPCM_OFFSET, - GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC); - /* WaEnableuKernelHeaderValidFix:skl */ /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ if (IS_GEN9(dev_priv)) @@ -390,12 +399,9 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) } if (USES_GUC_SUBMISSION(dev_priv)) { - if (i915_modparams.guc_log_level) - gen9_enable_guc_interrupts(dev_priv); - ret = intel_guc_submission_enable(guc); if (ret) - goto err_interrupts; + goto err_communication; } dev_info(dev_priv->drm.dev, "GuC firmware version %u.%u\n", @@ -410,8 +416,6 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) /* * We've failed to load the firmware :( */ -err_interrupts: - gen9_disable_guc_interrupts(dev_priv); err_communication: guc_disable_communication(guc); err_log_capture: @@ -441,9 +445,6 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) intel_guc_submission_disable(guc); guc_disable_communication(guc); - - if (USES_GUC_SUBMISSION(dev_priv)) - gen9_disable_guc_interrupts(dev_priv); } int intel_uc_suspend(struct drm_i915_private *i915) @@ -479,8 +480,7 @@ int intel_uc_resume(struct drm_i915_private *i915) if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) return 0; - if (i915_modparams.guc_log_level) - gen9_enable_guc_interrupts(i915); + gen9_enable_guc_interrupts(i915); err = intel_guc_resume(guc); if (err) { diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index f76d51d1ce70..25d73ada74ae 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -28,13 +28,12 @@ #include "intel_huc.h" #include "i915_params.h" -void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); void intel_uc_init_early(struct drm_i915_private *dev_priv); +void intel_uc_cleanup_early(struct drm_i915_private *dev_priv); void intel_uc_init_mmio(struct drm_i915_private *dev_priv); -void intel_uc_init_fw(struct drm_i915_private *dev_priv); -void intel_uc_fini_fw(struct drm_i915_private *dev_priv); int intel_uc_init_misc(struct drm_i915_private *dev_priv); void intel_uc_fini_misc(struct drm_i915_private *dev_priv); +void intel_uc_sanitize(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); int intel_uc_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index 3ec0ce505b76..6e8e0b546743 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -95,15 +95,6 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); - /* Header and uCode will be loaded to WOPCM */ - size = uc_fw->header_size + uc_fw->ucode_size; - if (size > intel_guc_wopcm_size(dev_priv)) { - DRM_WARN("%s: Firmware is too large to fit in WOPCM\n", - intel_uc_fw_type_repr(uc_fw->type)); - err = -E2BIG; - goto fail; - } - /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_COUNT) { DRM_WARN("%s: Mismatched firmware RSA key size (%u)\n", @@ -209,6 +200,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct i915_vma *vma)) { struct i915_vma *vma; + u32 ggtt_pin_bias; int err; DRM_DEBUG_DRIVER("%s fw load %s\n", @@ -230,8 +222,9 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, goto fail; } + ggtt_pin_bias = to_i915(uc_fw->obj->base.dev)->guc.ggtt_pin_bias; vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + PIN_OFFSET_BIAS | ggtt_pin_bias); if (IS_ERR(vma)) { err = PTR_ERR(vma); DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n", diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index d5fd4609c785..87910aa83267 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -30,7 +30,7 @@ struct drm_i915_private; struct i915_vma; /* Home of GuC, HuC and DMC firmwares */ -#define INTEL_UC_FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" +#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" enum intel_uc_fw_status { INTEL_UC_FIRMWARE_FAIL = -1, @@ -115,6 +115,28 @@ static inline bool intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw) return uc_fw->path != NULL; } +static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw) +{ + if (uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS) + uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; +} + +/** + * intel_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded. + * @uc_fw: uC firmware. + * + * Get the size of the firmware and header that will be uploaded to WOPCM. + * + * Return: Upload firmware size, or zero on firmware fetch failure. + */ +static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) +{ + if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + return uc_fw->header_size + uc_fw->ucode_size; +} + void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 4df7c2ef8576..448293eb638d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -62,6 +62,11 @@ static inline void fw_domain_reset(struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { + /* + * We don't really know if the powerwell for the forcewake domain we are + * trying to reset here does exist at this point (engines could be fused + * off in ICL+), so no waiting for acks + */ __raw_i915_write32(i915, d->reg_set, i915->uncore.fw_reset); } @@ -134,7 +139,9 @@ fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915, * in the hope that the original ack will be delivered along with * the fallback ack. * - * This workaround is described in HSDES #1604254524 + * This workaround is described in HSDES #1604254524 and it's known as: + * WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl,icl + * although the name is a bit misleading. */ pass = 1; @@ -1353,6 +1360,23 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, fw_domain_reset(dev_priv, d); } +static void fw_domain_fini(struct drm_i915_private *dev_priv, + enum forcewake_domain_id domain_id) +{ + struct intel_uncore_forcewake_domain *d; + + if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT)) + return; + + d = &dev_priv->uncore.fw_domain[domain_id]; + + WARN_ON(d->wake_count); + WARN_ON(hrtimer_cancel(&d->timer)); + memset(d, 0, sizeof(*d)); + + dev_priv->uncore.fw_domains &= ~BIT(domain_id); +} + static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) <= 5 || intel_vgpu_active(dev_priv)) @@ -1372,7 +1396,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 11) { int i; - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; + dev_priv->uncore.funcs.force_wake_get = + fw_domains_get_with_fallback; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER_GEN9, @@ -1565,6 +1590,40 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) &dev_priv->uncore.pmic_bus_access_nb); } +/* + * We might have detected that some engines are fused off after we initialized + * the forcewake domains. Prune them, to make sure they only reference existing + * engines. + */ +void intel_uncore_prune(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) >= 11) { + enum forcewake_domains fw_domains = dev_priv->uncore.fw_domains; + enum forcewake_domain_id domain_id; + int i; + + for (i = 0; i < I915_MAX_VCS; i++) { + domain_id = FW_DOMAIN_ID_MEDIA_VDBOX0 + i; + + if (HAS_ENGINE(dev_priv, _VCS(i))) + continue; + + if (fw_domains & BIT(domain_id)) + fw_domain_fini(dev_priv, domain_id); + } + + for (i = 0; i < I915_MAX_VECS; i++) { + domain_id = FW_DOMAIN_ID_MEDIA_VEBOX0 + i; + + if (HAS_ENGINE(dev_priv, _VECS(i))) + continue; + + if (fw_domains & BIT(domain_id)) + fw_domain_fini(dev_priv, domain_id); + } + } +} + void intel_uncore_fini(struct drm_i915_private *dev_priv) { /* Paranoia: make sure we have disabled everything before we exit. */ @@ -1646,11 +1705,10 @@ static void gen3_stop_engine(struct intel_engine_cs *engine) const i915_reg_t mode = RING_MI_MODE(base); I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); - if (intel_wait_for_register_fw(dev_priv, - mode, - MODE_IDLE, - MODE_IDLE, - 500)) + if (__intel_wait_for_register_fw(dev_priv, + mode, MODE_IDLE, MODE_IDLE, + 500, 0, + NULL)) DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name); @@ -1804,9 +1862,10 @@ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, __raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask); /* Wait for the device to ack the reset requests */ - err = intel_wait_for_register_fw(dev_priv, - GEN6_GDRST, hw_domain_mask, 0, - 500); + err = __intel_wait_for_register_fw(dev_priv, + GEN6_GDRST, hw_domain_mask, 0, + 500, 0, + NULL); if (err) DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", hw_domain_mask); @@ -1854,6 +1913,50 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, } /** + * gen11_reset_engines - reset individual engines + * @dev_priv: i915 device + * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset + * + * This function will reset the individual engines that are set in engine_mask. + * If you provide ALL_ENGINES as mask, full global domain reset will be issued. + * + * Note: It is responsibility of the caller to handle the difference between + * asking full domain reset versus reset for all available individual engines. + * + * Returns 0 on success, nonzero on error. + */ +static int gen11_reset_engines(struct drm_i915_private *dev_priv, + unsigned engine_mask) +{ + struct intel_engine_cs *engine; + const u32 hw_engine_mask[I915_NUM_ENGINES] = { + [RCS] = GEN11_GRDOM_RENDER, + [BCS] = GEN11_GRDOM_BLT, + [VCS] = GEN11_GRDOM_MEDIA, + [VCS2] = GEN11_GRDOM_MEDIA2, + [VCS3] = GEN11_GRDOM_MEDIA3, + [VCS4] = GEN11_GRDOM_MEDIA4, + [VECS] = GEN11_GRDOM_VECS, + [VECS2] = GEN11_GRDOM_VECS2, + }; + u32 hw_mask; + + BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES); + + if (engine_mask == ALL_ENGINES) { + hw_mask = GEN11_GRDOM_FULL; + } else { + unsigned int tmp; + + hw_mask = 0; + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) + hw_mask |= hw_engine_mask[engine->id]; + } + + return gen6_hw_domain_reset(dev_priv, hw_mask); +} + +/** * __intel_wait_for_register_fw - wait until register matches expected state * @dev_priv: the i915 device * @reg: the register to read @@ -1940,7 +2043,7 @@ int __intel_wait_for_register(struct drm_i915_private *dev_priv, u32 reg_value; int ret; - might_sleep(); + might_sleep_if(slow_timeout_ms); spin_lock_irq(&dev_priv->uncore.lock); intel_uncore_forcewake_get__locked(dev_priv, fw); @@ -1952,7 +2055,7 @@ int __intel_wait_for_register(struct drm_i915_private *dev_priv, intel_uncore_forcewake_put__locked(dev_priv, fw); spin_unlock_irq(&dev_priv->uncore.lock); - if (ret) + if (ret && slow_timeout_ms) ret = __wait_for(reg_value = I915_READ_NOTRACE(reg), (reg_value & mask) == value, slow_timeout_ms * 1000, 10, 1000); @@ -1971,11 +2074,12 @@ static int gen8_reset_engine_start(struct intel_engine_cs *engine) I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base), _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET)); - ret = intel_wait_for_register_fw(dev_priv, - RING_RESET_CTL(engine->mmio_base), - RESET_CTL_READY_TO_RESET, - RESET_CTL_READY_TO_RESET, - 700); + ret = __intel_wait_for_register_fw(dev_priv, + RING_RESET_CTL(engine->mmio_base), + RESET_CTL_READY_TO_RESET, + RESET_CTL_READY_TO_RESET, + 700, 0, + NULL); if (ret) DRM_ERROR("%s: reset request timeout\n", engine->name); @@ -2000,7 +2104,10 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv, if (gen8_reset_engine_start(engine)) goto not_ready; - return gen6_reset_engines(dev_priv, engine_mask); + if (INTEL_GEN(dev_priv) >= 11) + return gen11_reset_engines(dev_priv, engine_mask); + else + return gen6_reset_engines(dev_priv, engine_mask); not_ready: for_each_engine_masked(engine, dev_priv, engine_mask, tmp) @@ -2038,15 +2145,31 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) int retry; int ret; - might_sleep(); + /* + * We want to perform per-engine reset from atomic context (e.g. + * softirq), which imposes the constraint that we cannot sleep. + * However, experience suggests that spending a bit of time waiting + * for a reset helps in various cases, so for a full-device reset + * we apply the opposite rule and wait if we want to. As we should + * always follow up a failed per-engine reset with a full device reset, + * being a little faster, stricter and more error prone for the + * atomic case seems an acceptable compromise. + * + * Unfortunately this leads to a bimodal routine, when the goal was + * to have a single reset function that worked for resetting any + * number of engines simultaneously. + */ + might_sleep_if(engine_mask == ALL_ENGINES); - /* If the power well sleeps during the reset, the reset + /* + * If the power well sleeps during the reset, the reset * request may be dropped and never completes (causing -EIO). */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); for (retry = 0; retry < 3; retry++) { - /* We stop engines, otherwise we might get failed reset and a + /* + * We stop engines, otherwise we might get failed reset and a * dead gpu (on elk). Also as modern gpu as kbl can suffer * from system hang if batchbuffer is progressing when * the reset is issued, regardless of READY_TO_RESET ack. @@ -2060,9 +2183,11 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) i915_stop_engines(dev_priv, engine_mask); ret = -ENODEV; - if (reset) + if (reset) { + GEM_TRACE("engine_mask=%x\n", engine_mask); ret = reset(dev_priv, engine_mask); - if (ret != -ETIMEDOUT) + } + if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES) break; cond_resched(); @@ -2085,12 +2210,14 @@ bool intel_has_reset_engine(struct drm_i915_private *dev_priv) int intel_reset_guc(struct drm_i915_private *dev_priv) { + u32 guc_domain = INTEL_GEN(dev_priv) >= 11 ? GEN11_GRDOM_GUC : + GEN9_GRDOM_GUC; int ret; GEM_BUG_ON(!HAS_GUC(dev_priv)); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC); + ret = gen6_hw_domain_reset(dev_priv, guc_domain); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index dfdf444e4bcc..47478d609630 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -140,6 +140,7 @@ struct intel_uncore { void intel_uncore_sanitize(struct drm_i915_private *dev_priv); void intel_uncore_init(struct drm_i915_private *dev_priv); +void intel_uncore_prune(struct drm_i915_private *dev_priv); bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv); bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv); void intel_uncore_fini(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c new file mode 100644 index 000000000000..74bf76f3fddc --- /dev/null +++ b/drivers/gpu/drm/i915/intel_wopcm.c @@ -0,0 +1,275 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#include "intel_wopcm.h" +#include "i915_drv.h" + +/** + * DOC: WOPCM Layout + * + * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and + * offset registers whose values are calculated and determined by HuC/GuC + * firmware size and set of hardware requirements/restrictions as shown below: + * + * :: + * + * +=========> +====================+ <== WOPCM Top + * ^ | HW contexts RSVD | + * | +===> +====================+ <== GuC WOPCM Top + * | ^ | | + * | | | | + * | | | | + * | GuC | | + * | WOPCM | | + * | Size +--------------------+ + * WOPCM | | GuC FW RSVD | + * | | +--------------------+ + * | | | GuC Stack RSVD | + * | | +------------------- + + * | v | GuC WOPCM RSVD | + * | +===> +====================+ <== GuC WOPCM base + * | | WOPCM RSVD | + * | +------------------- + <== HuC Firmware Top + * v | HuC FW | + * +=========> +====================+ <== WOPCM Base + * + * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top. + * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6 + * context). + */ + +/* Default WOPCM size 1MB. */ +#define GEN9_WOPCM_SIZE (1024 * 1024) +/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */ +#define WOPCM_RESERVED_SIZE (16 * 1024) + +/* 16KB reserved at the beginning of GuC WOPCM. */ +#define GUC_WOPCM_RESERVED (16 * 1024) +/* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */ +#define GUC_WOPCM_STACK_RESERVED (8 * 1024) + +/* GuC WOPCM Offset value needs to be aligned to 16KB. */ +#define GUC_WOPCM_OFFSET_ALIGNMENT (1UL << GUC_WOPCM_OFFSET_SHIFT) + +/* 24KB at the end of WOPCM is reserved for RC6 CTX on BXT. */ +#define BXT_WOPCM_RC6_CTX_RESERVED (24 * 1024) +/* 36KB WOPCM reserved at the end of WOPCM on CNL. */ +#define CNL_WOPCM_HW_CTX_RESERVED (36 * 1024) + +/* 128KB from GUC_WOPCM_RESERVED is reserved for FW on Gen9. */ +#define GEN9_GUC_FW_RESERVED (128 * 1024) +#define GEN9_GUC_WOPCM_OFFSET (GUC_WOPCM_RESERVED + GEN9_GUC_FW_RESERVED) + +/** + * intel_wopcm_init_early() - Early initialization of the WOPCM. + * @wopcm: pointer to intel_wopcm. + * + * Setup the size of WOPCM which will be used by later on WOPCM partitioning. + */ +void intel_wopcm_init_early(struct intel_wopcm *wopcm) +{ + wopcm->size = GEN9_WOPCM_SIZE; + + DRM_DEBUG_DRIVER("WOPCM size: %uKiB\n", wopcm->size / 1024); +} + +static inline u32 context_reserved_size(struct drm_i915_private *i915) +{ + if (IS_GEN9_LP(i915)) + return BXT_WOPCM_RC6_CTX_RESERVED; + else if (INTEL_GEN(i915) >= 10) + return CNL_WOPCM_HW_CTX_RESERVED; + else + return 0; +} + +static inline int gen9_check_dword_gap(u32 guc_wopcm_base, u32 guc_wopcm_size) +{ + u32 offset; + + /* + * GuC WOPCM size shall be at least a dword larger than the offset from + * WOPCM base (GuC WOPCM offset from WOPCM base + GEN9_GUC_WOPCM_OFFSET) + * due to hardware limitation on Gen9. + */ + offset = guc_wopcm_base + GEN9_GUC_WOPCM_OFFSET; + if (offset > guc_wopcm_size || + (guc_wopcm_size - offset) < sizeof(u32)) { + DRM_ERROR("GuC WOPCM size %uKiB is too small. %uKiB needed.\n", + guc_wopcm_size / 1024, + (u32)(offset + sizeof(u32)) / 1024); + return -E2BIG; + } + + return 0; +} + +static inline int gen9_check_huc_fw_fits(u32 guc_wopcm_size, u32 huc_fw_size) +{ + /* + * On Gen9 & CNL A0, hardware requires the total available GuC WOPCM + * size to be larger than or equal to HuC firmware size. Otherwise, + * firmware uploading would fail. + */ + if (huc_fw_size > guc_wopcm_size - GUC_WOPCM_RESERVED) { + DRM_ERROR("HuC FW (%uKiB) won't fit in GuC WOPCM (%uKiB).\n", + huc_fw_size / 1024, + (guc_wopcm_size - GUC_WOPCM_RESERVED) / 1024); + return -E2BIG; + } + + return 0; +} + +static inline int check_hw_restriction(struct drm_i915_private *i915, + u32 guc_wopcm_base, u32 guc_wopcm_size, + u32 huc_fw_size) +{ + int err = 0; + + if (IS_GEN9(i915)) + err = gen9_check_dword_gap(guc_wopcm_base, guc_wopcm_size); + + if (!err && + (IS_GEN9(i915) || IS_CNL_REVID(i915, CNL_REVID_A0, CNL_REVID_A0))) + err = gen9_check_huc_fw_fits(guc_wopcm_size, huc_fw_size); + + return err; +} + +/** + * intel_wopcm_init() - Initialize the WOPCM structure. + * @wopcm: pointer to intel_wopcm. + * + * This function will partition WOPCM space based on GuC and HuC firmware sizes + * and will allocate max remaining for use by GuC. This function will also + * enforce platform dependent hardware restrictions on GuC WOPCM offset and + * size. It will fail the WOPCM init if any of these checks were failed, so that + * the following GuC firmware uploading would be aborted. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_wopcm_init(struct intel_wopcm *wopcm) +{ + struct drm_i915_private *i915 = wopcm_to_i915(wopcm); + u32 guc_fw_size = intel_uc_fw_get_upload_size(&i915->guc.fw); + u32 huc_fw_size = intel_uc_fw_get_upload_size(&i915->huc.fw); + u32 ctx_rsvd = context_reserved_size(i915); + u32 guc_wopcm_base; + u32 guc_wopcm_size; + u32 guc_wopcm_rsvd; + int err; + + GEM_BUG_ON(!wopcm->size); + + if (guc_fw_size >= wopcm->size) { + DRM_ERROR("GuC FW (%uKiB) is too big to fit in WOPCM.", + guc_fw_size / 1024); + return -E2BIG; + } + + if (huc_fw_size >= wopcm->size) { + DRM_ERROR("HuC FW (%uKiB) is too big to fit in WOPCM.", + huc_fw_size / 1024); + return -E2BIG; + } + + guc_wopcm_base = ALIGN(huc_fw_size + WOPCM_RESERVED_SIZE, + GUC_WOPCM_OFFSET_ALIGNMENT); + if ((guc_wopcm_base + ctx_rsvd) >= wopcm->size) { + DRM_ERROR("GuC WOPCM base (%uKiB) is too big.\n", + guc_wopcm_base / 1024); + return -E2BIG; + } + + guc_wopcm_size = wopcm->size - guc_wopcm_base - ctx_rsvd; + guc_wopcm_size &= GUC_WOPCM_SIZE_MASK; + + DRM_DEBUG_DRIVER("Calculated GuC WOPCM Region: [%uKiB, %uKiB)\n", + guc_wopcm_base / 1024, guc_wopcm_size / 1024); + + guc_wopcm_rsvd = GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED; + if ((guc_fw_size + guc_wopcm_rsvd) > guc_wopcm_size) { + DRM_ERROR("Need %uKiB WOPCM for GuC, %uKiB available.\n", + (guc_fw_size + guc_wopcm_rsvd) / 1024, + guc_wopcm_size / 1024); + return -E2BIG; + } + + err = check_hw_restriction(i915, guc_wopcm_base, guc_wopcm_size, + huc_fw_size); + if (err) + return err; + + wopcm->guc.base = guc_wopcm_base; + wopcm->guc.size = guc_wopcm_size; + + return 0; +} + +static inline int write_and_verify(struct drm_i915_private *dev_priv, + i915_reg_t reg, u32 val, u32 mask, + u32 locked_bit) +{ + u32 reg_val; + + GEM_BUG_ON(val & ~mask); + + I915_WRITE(reg, val); + + reg_val = I915_READ(reg); + + return (reg_val & mask) != (val | locked_bit) ? -EIO : 0; +} + +/** + * intel_wopcm_init_hw() - Setup GuC WOPCM registers. + * @wopcm: pointer to intel_wopcm. + * + * Setup the GuC WOPCM size and offset registers with the calculated values. It + * will verify the register values to make sure the registers are locked with + * correct values. + * + * Return: 0 on success. -EIO if registers were locked with incorrect values. + */ +int intel_wopcm_init_hw(struct intel_wopcm *wopcm) +{ + struct drm_i915_private *dev_priv = wopcm_to_i915(wopcm); + u32 huc_agent; + u32 mask; + int err; + + if (!USES_GUC(dev_priv)) + return 0; + + GEM_BUG_ON(!HAS_GUC(dev_priv)); + GEM_BUG_ON(!wopcm->guc.size); + GEM_BUG_ON(!wopcm->guc.base); + + err = write_and_verify(dev_priv, GUC_WOPCM_SIZE, wopcm->guc.size, + GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED, + GUC_WOPCM_SIZE_LOCKED); + if (err) + goto err_out; + + huc_agent = USES_HUC(dev_priv) ? HUC_LOADING_AGENT_GUC : 0; + mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent; + err = write_and_verify(dev_priv, DMA_GUC_WOPCM_OFFSET, + wopcm->guc.base | huc_agent, mask, + GUC_WOPCM_OFFSET_VALID); + if (err) + goto err_out; + + return 0; + +err_out: + DRM_ERROR("Failed to init WOPCM registers:\n"); + DRM_ERROR("DMA_GUC_WOPCM_OFFSET=%#x\n", + I915_READ(DMA_GUC_WOPCM_OFFSET)); + DRM_ERROR("GUC_WOPCM_SIZE=%#x\n", I915_READ(GUC_WOPCM_SIZE)); + + return err; +} diff --git a/drivers/gpu/drm/i915/intel_wopcm.h b/drivers/gpu/drm/i915/intel_wopcm.h new file mode 100644 index 000000000000..6298910a384c --- /dev/null +++ b/drivers/gpu/drm/i915/intel_wopcm.h @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#ifndef _INTEL_WOPCM_H_ +#define _INTEL_WOPCM_H_ + +#include <linux/types.h> + +/** + * struct intel_wopcm - Overall WOPCM info and WOPCM regions. + * @size: Size of overall WOPCM. + * @guc: GuC WOPCM Region info. + * @guc.base: GuC WOPCM base which is offset from WOPCM base. + * @guc.size: Size of the GuC WOPCM region. + */ +struct intel_wopcm { + u32 size; + struct { + u32 base; + u32 size; + } guc; +}; + +void intel_wopcm_init_early(struct intel_wopcm *wopcm); +int intel_wopcm_init(struct intel_wopcm *wopcm); +int intel_wopcm_init_hw(struct intel_wopcm *wopcm); + +#endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c new file mode 100644 index 000000000000..2df3538ceba5 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -0,0 +1,949 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_workarounds.h" + +/** + * DOC: Hardware workarounds + * + * This file is intended as a central place to implement most [1]_ of the + * required workarounds for hardware to work as originally intended. They fall + * in five basic categories depending on how/when they are applied: + * + * - Workarounds that touch registers that are saved/restored to/from the HW + * context image. The list is emitted (via Load Register Immediate commands) + * everytime a new context is created. + * - GT workarounds. The list of these WAs is applied whenever these registers + * revert to default values (on GPU reset, suspend/resume [2]_, etc..). + * - Display workarounds. The list is applied during display clock-gating + * initialization. + * - Workarounds that whitelist a privileged register, so that UMDs can manage + * them directly. This is just a special case of a MMMIO workaround (as we + * write the list of these to/be-whitelisted registers to some special HW + * registers). + * - Workaround batchbuffers, that get executed automatically by the hardware + * on every HW context restore. + * + * .. [1] Please notice that there are other WAs that, due to their nature, + * cannot be applied from a central place. Those are peppered around the rest + * of the code, as needed. + * + * .. [2] Technically, some registers are powercontext saved & restored, so they + * survive a suspend/resume. In practice, writing them again is not too + * costly and simplifies things. We can revisit this in the future. + * + * Layout + * '''''' + * + * Keep things in this file ordered by WA type, as per the above (context, GT, + * display, register whitelist, batchbuffer). Then, inside each type, keep the + * following order: + * + * - Infrastructure functions and macros + * - WAs per platform in standard gen/chrono order + * - Public functions to init or apply the given workaround type. + */ + +static int wa_add(struct drm_i915_private *dev_priv, + i915_reg_t addr, + const u32 mask, const u32 val) +{ + const unsigned int idx = dev_priv->workarounds.count; + + if (WARN_ON(idx >= I915_MAX_WA_REGS)) + return -ENOSPC; + + dev_priv->workarounds.reg[idx].addr = addr; + dev_priv->workarounds.reg[idx].value = val; + dev_priv->workarounds.reg[idx].mask = mask; + + dev_priv->workarounds.count++; + + return 0; +} + +#define WA_REG(addr, mask, val) do { \ + const int r = wa_add(dev_priv, (addr), (mask), (val)); \ + if (r) \ + return r; \ + } while (0) + +#define WA_SET_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) + +#define WA_CLR_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) + +#define WA_SET_FIELD_MASKED(addr, mask, value) \ + WA_REG(addr, (mask), _MASKED_FIELD(mask, value)) + +static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisableAsyncFlipPerfMode:bdw,chv */ + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + + /* WaDisablePartialInstShootdown:bdw,chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:bdw,chv */ + /* WaHdcDisableFetchWhenMasked:bdw,chv */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_DONOT_FETCH_MEM_WHEN_MASKED | + HDC_FORCE_NON_COHERENT); + + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping + * polygons in the same 8x4 pixel/sample area to be processed without + * stalling waiting for the earlier ones to write to Hierarchical Z + * buffer." + * + * This optimization is off by default for BDW and CHV; turn it on. + */ + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + + /* Wa4x4STCOptimizationDisable:bdw,chv */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); + + return 0; +} + +static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen8_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* WaDisableDopClockGating:bdw + * + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * to disable EUTC clock gating. + */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); + + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + WA_SET_BIT_MASKED(HDC_CHICKEN0, + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ + (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); + + return 0; +} + +static int chv_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen8_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* Improve HiZ throughput on CHV. */ + WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); + + return 0; +} + +static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + if (HAS_LLC(dev_priv)) { + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl + * + * Must match Display Engine. See + * WaCompressedResourceDisplayNewHashMode. + */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN9_PBE_COMPRESSED_HASH_SELECTION); + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); + } + + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ + if (!IS_COFFEELAKE(dev_priv)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + + /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_YV12_BUGFIX | + GEN9_ENABLE_GPGPU_PREEMPTION); + + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ + /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, + GEN8_4x4_STC_OPTIMIZATION_DISABLE | + GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); + + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); + + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); + + /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are + * both tied to WaForceContextSaveRestoreNonCoherent + * in some hsds for skl. We keep the tie for all gen9. The + * documentation is a bit hazy and so we want to get common behaviour, + * even though there is no clear evidence we would need both on kbl/bxt. + * This area has been source of system hangs so we play it safe + * and mimic the skl regardless of what bspec says. + * + * Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + + /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ + if (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + + /* + * Supporting preemption with fine-granularity requires changes in the + * batch buffer programming. Since we can't break old userspace, we + * need to set our default preemption level to safe value. Userspace is + * still able to use more fine-grained preemption levels, since in + * WaEnablePreemptionGranularityControlByUMD we're whitelisting the + * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are + * not real HW workarounds, but merely a way to start using preemption + * while maintaining old contract with userspace. + */ + + /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + + /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ + if (IS_GEN9_LP(dev_priv)) + WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); + + return 0; +} + +static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv) +{ + u8 vals[3] = { 0, 0, 0 }; + unsigned int i; + + for (i = 0; i < 3; i++) { + u8 ss; + + /* + * Only consider slices where one, and only one, subslice has 7 + * EUs + */ + if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) + continue; + + /* + * subslice_7eu[i] != 0 (because of the check above) and + * ss_max == 4 (maximum number of subslices possible per slice) + * + * -> 0 <= ss <= 3; + */ + ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; + vals[i] = 3 - ss; + } + + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) + return 0; + + /* Tune IZ hashing. See intel_device_info_runtime_init() */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN9_IZ_HASHING_MASK(2) | + GEN9_IZ_HASHING_MASK(1) | + GEN9_IZ_HASHING_MASK(0), + GEN9_IZ_HASHING(2, vals[2]) | + GEN9_IZ_HASHING(1, vals[1]) | + GEN9_IZ_HASHING(0, vals[0])); + + return 0; +} + +static int skl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen9_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + return skl_tune_iz_hashing(dev_priv); +} + +static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen9_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:bxt */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); + + /* WaToEnableHwFixForPushConstHWBug:bxt */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + +static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen9_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE); + + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableSbeCacheDispatchPortSharing:kbl */ + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + + return 0; +} + +static int glk_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen9_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaToEnableHwFixForPushConstHWBug:glk */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + +static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen9_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaToEnableHwFixForPushConstHWBug:cfl */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableSbeCacheDispatchPortSharing:cfl */ + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + + return 0; +} + +static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + /* WaForceContextSaveRestoreNonCoherent:cnl */ + WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); + + /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ + if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); + + /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ + if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); + + /* WaPushConstantDereferenceHoldDisable:cnl */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); + + /* FtrEnableFastAnisoL1BankingFix:cnl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); + + /* WaDisable3DMidCmdPreemption:cnl */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:cnl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + + /* WaDisableEarlyEOT:cnl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); + + return 0; +} + +static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + /* Wa_1604370585:icl (pre-prod) + * Formerly known as WaPushConstantDereferenceHoldDisable + */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + PUSH_CONSTANT_DEREF_DISABLE); + + /* WaForceEnableNonCoherent:icl + * This is not the same workaround as in early Gen9 platforms, where + * lacking this could cause system hangs, but coherency performance + * overhead is high and only a few compute workloads really need it + * (the register is whitelisted in hardware now, so UMDs can opt in + * for coherency if they have a good reason). + */ + WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); + + return 0; +} + +int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int err = 0; + + dev_priv->workarounds.count = 0; + + if (INTEL_GEN(dev_priv) < 8) + err = 0; + else if (IS_BROADWELL(dev_priv)) + err = bdw_ctx_workarounds_init(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + err = chv_ctx_workarounds_init(dev_priv); + else if (IS_SKYLAKE(dev_priv)) + err = skl_ctx_workarounds_init(dev_priv); + else if (IS_BROXTON(dev_priv)) + err = bxt_ctx_workarounds_init(dev_priv); + else if (IS_KABYLAKE(dev_priv)) + err = kbl_ctx_workarounds_init(dev_priv); + else if (IS_GEMINILAKE(dev_priv)) + err = glk_ctx_workarounds_init(dev_priv); + else if (IS_COFFEELAKE(dev_priv)) + err = cfl_ctx_workarounds_init(dev_priv); + else if (IS_CANNONLAKE(dev_priv)) + err = cnl_ctx_workarounds_init(dev_priv); + else if (IS_ICELAKE(dev_priv)) + err = icl_ctx_workarounds_init(dev_priv); + else + MISSING_CASE(INTEL_GEN(dev_priv)); + if (err) + return err; + + DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n", + dev_priv->workarounds.count); + return 0; +} + +int intel_ctx_workarounds_emit(struct i915_request *rq) +{ + struct i915_workarounds *w = &rq->i915->workarounds; + u32 *cs; + int ret, i; + + if (w->count == 0) + return 0; + + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); + if (ret) + return ret; + + cs = intel_ring_begin(rq, (w->count * 2 + 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(w->count); + for (i = 0; i < w->count; i++) { + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); + *cs++ = w->reg[i].value; + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); + if (ret) + return ret; + + return 0; +} + +static void bdw_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ +} + +static void chv_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ +} + +static void gen9_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ + I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, + _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); + + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ + I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + + /* WaDisableKillLogic:bxt,skl,kbl */ + if (!IS_COFFEELAKE(dev_priv)) + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + ECOCHK_DIS_TLB); + + if (HAS_LLC(dev_priv)) { + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl + * + * Must match Display Engine. See + * WaCompressedResourceDisplayNewHashMode. + */ + I915_WRITE(MMCD_MISC_CTRL, + I915_READ(MMCD_MISC_CTRL) | + MMCD_PCLA | + MMCD_HOTSPOT_EN); + } + + /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + BDW_DISABLE_HDC_INVALIDATION); + + /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ + if (IS_GEN9_LP(dev_priv)) { + u32 val = I915_READ(GEN8_L3SQCREG1); + + val &= ~L3_PRIO_CREDITS_MASK; + val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); + I915_WRITE(GEN8_L3SQCREG1, val); + } + + /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ + I915_WRITE(GEN8_L3SQCREG4, + I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES); + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); +} + +static void skl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + gen9_gt_workarounds_apply(dev_priv); + + /* WaEnableGapsTsvCreditFix:skl */ + I915_WRITE(GEN8_GARBCNTL, + I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE); + + /* WaDisableGafsUnitClkGating:skl */ + I915_WRITE(GEN7_UCGCTL4, + I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:skl */ + if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void bxt_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + gen9_gt_workarounds_apply(dev_priv); + + /* WaDisablePooledEuLoadBalancingFix:bxt */ + I915_WRITE(FF_SLICE_CS_CHICKEN2, + _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); + + /* WaInPlaceDecompressionHang:bxt */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + gen9_gt_workarounds_apply(dev_priv); + + /* WaEnableGapsTsvCreditFix:kbl */ + I915_WRITE(GEN8_GARBCNTL, + I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE); + + /* WaDisableDynamicCreditSharing:kbl */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + I915_WRITE(GAMT_CHKN_BIT_REG, + I915_READ(GAMT_CHKN_BIT_REG) | + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); + + /* WaDisableGafsUnitClkGating:kbl */ + I915_WRITE(GEN7_UCGCTL4, + I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:kbl */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + gen9_gt_workarounds_apply(dev_priv); +} + +static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + gen9_gt_workarounds_apply(dev_priv); + + /* WaEnableGapsTsvCreditFix:cfl */ + I915_WRITE(GEN8_GARBCNTL, + I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE); + + /* WaDisableGafsUnitClkGating:cfl */ + I915_WRITE(GEN7_UCGCTL4, + I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:cfl */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ + if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) + I915_WRITE(GAMT_CHKN_BIT_REG, + I915_READ(GAMT_CHKN_BIT_REG) | + GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); + + /* WaInPlaceDecompressionHang:cnl */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaEnablePreemptionGranularityControlByUMD:cnl */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); +} + +static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + /* This is not an Wa. Enable for better image quality */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE)); + + /* WaInPlaceDecompressionHang:icl */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaPipelineFlushCoherentLines:icl */ + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_FLUSH_COHERENT_LINES); + + /* Wa_1405543622:icl + * Formerly known as WaGAPZPriorityScheme + */ + I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) | + GEN11_ARBITRATION_PRIO_ORDER_MASK); + + /* Wa_1604223664:icl + * Formerly known as WaL3BankAddressHashing + */ + I915_WRITE(GEN8_GARBCNTL, + (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) | + GEN11_HASH_CTRL_EXCL_BIT0); + I915_WRITE(GEN11_GLBLINVL, + (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) | + GEN11_BANK_HASH_ADDR_EXCL_BIT0); + + /* WaModifyGamTlbPartitioning:icl */ + I915_WRITE(GEN11_GACB_PERF_CTRL, + (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) | + GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); + + /* Wa_1405733216:icl + * Formerly known as WaDisableCleanEvicts + */ + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | + GEN11_LQSC_CLEAN_EVICT_DISABLE); + + /* Wa_1405766107:icl + * Formerly known as WaCL2SFHalfMaxAlloc + */ + I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) | + GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | + GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); + + /* Wa_220166154:icl + * Formerly known as WaDisCtxReload + */ + I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) | + GAMW_ECO_DEV_CTX_RELOAD_DISABLE); + + /* Wa_1405779004:icl (pre-prod) */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0)) + I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, + I915_READ(SLICE_UNIT_LEVEL_CLKGATE) | + MSCUNIT_CLKGATE_DIS); + + /* Wa_1406680159:icl */ + I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, + I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) | + GWUNIT_CLKGATE_DIS); + + /* Wa_1604302699:icl */ + I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER, + I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) | + GEN11_I2M_WRITE_DISABLE); + + /* Wa_1406838659:icl (pre-prod) */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) + I915_WRITE(INF_UNIT_LEVEL_CLKGATE, + I915_READ(INF_UNIT_LEVEL_CLKGATE) | + CGPSF_CLKGATE_DIS); + + /* WaForwardProgressSoftReset:icl */ + I915_WRITE(GEN10_SCRATCH_LNCF2, + I915_READ(GEN10_SCRATCH_LNCF2) | + PMFLUSHDONE_LNICRSDROP | + PMFLUSH_GAPL3UNBLOCK | + PMFLUSHDONE_LNEBLK); +} + +void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) < 8) + return; + else if (IS_BROADWELL(dev_priv)) + bdw_gt_workarounds_apply(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + chv_gt_workarounds_apply(dev_priv); + else if (IS_SKYLAKE(dev_priv)) + skl_gt_workarounds_apply(dev_priv); + else if (IS_BROXTON(dev_priv)) + bxt_gt_workarounds_apply(dev_priv); + else if (IS_KABYLAKE(dev_priv)) + kbl_gt_workarounds_apply(dev_priv); + else if (IS_GEMINILAKE(dev_priv)) + glk_gt_workarounds_apply(dev_priv); + else if (IS_COFFEELAKE(dev_priv)) + cfl_gt_workarounds_apply(dev_priv); + else if (IS_CANNONLAKE(dev_priv)) + cnl_gt_workarounds_apply(dev_priv); + else if (IS_ICELAKE(dev_priv)) + icl_gt_workarounds_apply(dev_priv); + else + MISSING_CASE(INTEL_GEN(dev_priv)); +} + +struct whitelist { + i915_reg_t reg[RING_MAX_NONPRIV_SLOTS]; + unsigned int count; + u32 nopid; +}; + +static void whitelist_reg(struct whitelist *w, i915_reg_t reg) +{ + if (GEM_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS)) + return; + + w->reg[w->count++] = reg; +} + +static void bdw_whitelist_build(struct whitelist *w) +{ +} + +static void chv_whitelist_build(struct whitelist *w) +{ +} + +static void gen9_whitelist_build(struct whitelist *w) +{ + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ + whitelist_reg(w, GEN9_CTX_PREEMPT_REG); + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ + whitelist_reg(w, GEN8_CS_CHICKEN1); + + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ + whitelist_reg(w, GEN8_HDC_CHICKEN1); +} + +static void skl_whitelist_build(struct whitelist *w) +{ + gen9_whitelist_build(w); + + /* WaDisableLSQCROPERFforOCL:skl */ + whitelist_reg(w, GEN8_L3SQCREG4); +} + +static void bxt_whitelist_build(struct whitelist *w) +{ + gen9_whitelist_build(w); +} + +static void kbl_whitelist_build(struct whitelist *w) +{ + gen9_whitelist_build(w); + + /* WaDisableLSQCROPERFforOCL:kbl */ + whitelist_reg(w, GEN8_L3SQCREG4); +} + +static void glk_whitelist_build(struct whitelist *w) +{ + gen9_whitelist_build(w); + + /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ + whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); +} + +static void cfl_whitelist_build(struct whitelist *w) +{ + gen9_whitelist_build(w); +} + +static void cnl_whitelist_build(struct whitelist *w) +{ + /* WaEnablePreemptionGranularityControlByUMD:cnl */ + whitelist_reg(w, GEN8_CS_CHICKEN1); +} + +static void icl_whitelist_build(struct whitelist *w) +{ +} + +static struct whitelist *whitelist_build(struct intel_engine_cs *engine, + struct whitelist *w) +{ + struct drm_i915_private *i915 = engine->i915; + + GEM_BUG_ON(engine->id != RCS); + + w->count = 0; + w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base)); + + if (INTEL_GEN(i915) < 8) + return NULL; + else if (IS_BROADWELL(i915)) + bdw_whitelist_build(w); + else if (IS_CHERRYVIEW(i915)) + chv_whitelist_build(w); + else if (IS_SKYLAKE(i915)) + skl_whitelist_build(w); + else if (IS_BROXTON(i915)) + bxt_whitelist_build(w); + else if (IS_KABYLAKE(i915)) + kbl_whitelist_build(w); + else if (IS_GEMINILAKE(i915)) + glk_whitelist_build(w); + else if (IS_COFFEELAKE(i915)) + cfl_whitelist_build(w); + else if (IS_CANNONLAKE(i915)) + cnl_whitelist_build(w); + else if (IS_ICELAKE(i915)) + icl_whitelist_build(w); + else + MISSING_CASE(INTEL_GEN(i915)); + + return w; +} + +static void whitelist_apply(struct intel_engine_cs *engine, + const struct whitelist *w) +{ + struct drm_i915_private *dev_priv = engine->i915; + const u32 base = engine->mmio_base; + unsigned int i; + + if (!w) + return; + + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); + + for (i = 0; i < w->count; i++) + I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), + i915_mmio_reg_offset(w->reg[i])); + + /* And clear the rest just in case of garbage */ + for (; i < RING_MAX_NONPRIV_SLOTS; i++) + I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid); + + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); +} + +void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine) +{ + struct whitelist w; + + whitelist_apply(engine, whitelist_build(engine, &w)); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_workarounds.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h new file mode 100644 index 000000000000..b11d0623e626 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -0,0 +1,17 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef _I915_WORKAROUNDS_H_ +#define _I915_WORKAROUNDS_H_ + +int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv); +int intel_ctx_workarounds_emit(struct i915_request *rq); + +void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv); + +void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine); + +#endif diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 05bbef363fff..91c72911be3c 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1091,7 +1091,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, out_vma_unpin: i915_vma_unpin(vma); out_vma_close: - i915_vma_close(vma); + i915_vma_destroy(vma); return err; } @@ -1757,6 +1757,9 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) goto out_unlock; } + if (ctx->ppgtt) + ctx->ppgtt->base.scrub_64K = true; + err = i915_subtests(tests, ctx); out_unlock: diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 7ecaed50d0b9..ddb03f009232 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -23,6 +23,7 @@ */ #include "../i915_selftest.h" +#include "igt_flush_test.h" #include "mock_drm.h" #include "huge_gem_object.h" @@ -411,6 +412,8 @@ static int igt_ctx_exec(void *arg) } out_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 9c76f0305b6a..a00e2bd08bce 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -11,6 +11,7 @@ */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) +selftest(workarounds, intel_workarounds_live_selftests) selftest(requests, i915_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) @@ -20,4 +21,5 @@ selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(contexts, i915_gem_context_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) +selftest(execlists, intel_execlists_live_selftests) selftest(guc, intel_guc_live_selftest) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 9a48aa441743..d16d74178e9d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests) selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) +selftest(engine, intel_engine_cs_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(timelines, i915_gem_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 3000e6a7d82d..19f1c6a5c8fb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -1,25 +1,7 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ #include "../i915_selftest.h" @@ -35,21 +17,21 @@ struct __igt_sync { bool set; }; -static int __igt_sync(struct intel_timeline *tl, +static int __igt_sync(struct i915_timeline *tl, u64 ctx, const struct __igt_sync *p, const char *name) { int ret; - if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { + if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", name, p->name, ctx, p->seqno, yesno(p->expected)); return -EINVAL; } if (p->set) { - ret = __intel_timeline_sync_set(tl, ctx, p->seqno); + ret = __i915_timeline_sync_set(tl, ctx, p->seqno); if (ret) return ret; } @@ -77,37 +59,31 @@ static int igt_sync(void *arg) { "unwrap", UINT_MAX, true, false }, {}, }, *p; - struct intel_timeline *tl; + struct i915_timeline tl; int order, offset; int ret = -ENODEV; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; - + mock_timeline_init(&tl, 0); for (p = pass; p->name; p++) { for (order = 1; order < 64; order++) { for (offset = -1; offset <= (order > 1); offset++) { u64 ctx = BIT_ULL(order) + offset; - ret = __igt_sync(tl, ctx, p, "1"); + ret = __igt_sync(&tl, ctx, p, "1"); if (ret) goto out; } } } - mock_timeline_destroy(tl); - - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_fini(&tl); + mock_timeline_init(&tl, 0); for (order = 1; order < 64; order++) { for (offset = -1; offset <= (order > 1); offset++) { u64 ctx = BIT_ULL(order) + offset; for (p = pass; p->name; p++) { - ret = __igt_sync(tl, ctx, p, "2"); + ret = __igt_sync(&tl, ctx, p, "2"); if (ret) goto out; } @@ -115,7 +91,7 @@ static int igt_sync(void *arg) } out: - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); return ret; } @@ -127,15 +103,13 @@ static unsigned int random_engine(struct rnd_state *rnd) static int bench_sync(void *arg) { struct rnd_state prng; - struct intel_timeline *tl; + struct i915_timeline tl; unsigned long end_time, count; u64 prng32_1M; ktime_t kt; int order, last_order; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Lookups from cache are very fast and so the random number generation * and the loop itself becomes a significant factor in the per-iteration @@ -167,7 +141,7 @@ static int bench_sync(void *arg) do { u64 id = i915_prandom_u64_state(&prng); - __intel_timeline_sync_set(tl, id, 0); + __i915_timeline_sync_set(&tl, id, 0); count++; } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); @@ -182,8 +156,8 @@ static int bench_sync(void *arg) while (end_time--) { u64 id = i915_prandom_u64_state(&prng); - if (!__intel_timeline_sync_is_later(tl, id, 0)) { - mock_timeline_destroy(tl); + if (!__i915_timeline_sync_is_later(&tl, id, 0)) { + mock_timeline_fini(&tl); pr_err("Lookup of %llu failed\n", id); return -EINVAL; } @@ -193,19 +167,17 @@ static int bench_sync(void *arg) pr_info("%s: %lu random lookups, %lluns/lookup\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Benchmark setting the first N (in order) contexts */ count = 0; kt = ktime_get(); end_time = jiffies + HZ/10; do { - __intel_timeline_sync_set(tl, count++, 0); + __i915_timeline_sync_set(&tl, count++, 0); } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); pr_info("%s: %lu in-order insertions, %lluns/insert\n", @@ -215,9 +187,9 @@ static int bench_sync(void *arg) end_time = count; kt = ktime_get(); while (end_time--) { - if (!__intel_timeline_sync_is_later(tl, end_time, 0)) { + if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) { pr_err("Lookup of %lu failed\n", end_time); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); return -EINVAL; } } @@ -225,12 +197,10 @@ static int bench_sync(void *arg) pr_info("%s: %lu in-order lookups, %lluns/lookup\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Benchmark searching for a random context id and maybe changing it */ prandom_seed_state(&prng, i915_selftest.random_seed); @@ -241,8 +211,8 @@ static int bench_sync(void *arg) u32 id = random_engine(&prng); u32 seqno = prandom_u32_state(&prng); - if (!__intel_timeline_sync_is_later(tl, id, seqno)) - __intel_timeline_sync_set(tl, id, seqno); + if (!__i915_timeline_sync_is_later(&tl, id, seqno)) + __i915_timeline_sync_set(&tl, id, seqno); count++; } while (!time_after(jiffies, end_time)); @@ -250,7 +220,7 @@ static int bench_sync(void *arg) kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); /* Benchmark searching for a known context id and changing the seqno */ @@ -258,9 +228,7 @@ static int bench_sync(void *arg) ({ int tmp = last_order; last_order = order; order += tmp; })) { unsigned int mask = BIT(order) - 1; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); count = 0; kt = ktime_get(); @@ -272,8 +240,8 @@ static int bench_sync(void *arg) */ u64 id = (u64)(count & mask) << order; - __intel_timeline_sync_is_later(tl, id, 0); - __intel_timeline_sync_set(tl, id, 0); + __i915_timeline_sync_is_later(&tl, id, 0); + __i915_timeline_sync_set(&tl, id, 0); count++; } while (!time_after(jiffies, end_time)); @@ -281,7 +249,7 @@ static int bench_sync(void *arg) pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", __func__, count, order, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index eb89e301b602..e90f97236e50 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -81,7 +81,7 @@ checked_vma_instance(struct drm_i915_gem_object *obj, } if (i915_vma_compare(vma, vm, view)) { - pr_err("i915_vma_compare failed with create parmaters!\n"); + pr_err("i915_vma_compare failed with create parameters!\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c new file mode 100644 index 000000000000..0d06f559243f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_drv.h" + +#include "../i915_selftest.h" +#include "igt_flush_test.h" + +struct wedge_me { + struct delayed_work work; + struct drm_i915_private *i915; + const void *symbol; +}; + +static void wedge_me(struct work_struct *work) +{ + struct wedge_me *w = container_of(work, typeof(*w), work.work); + + pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); + + GEM_TRACE("%pS timed out.\n", w->symbol); + GEM_TRACE_DUMP(); + + i915_gem_set_wedged(w->i915); +} + +static void __init_wedge(struct wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const void *symbol) +{ + w->i915 = i915; + w->symbol = symbol; + + INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); + schedule_delayed_work(&w->work, timeout); +} + +static void __fini_wedge(struct wedge_me *w) +{ + cancel_delayed_work_sync(&w->work); + destroy_delayed_work_on_stack(&w->work); + w->i915 = NULL; +} + +#define wedge_on_timeout(W, DEV, TIMEOUT) \ + for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ + (W)->i915; \ + __fini_wedge((W))) + +int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) +{ + struct wedge_me w; + + cond_resched(); + + if (flags & I915_WAIT_LOCKED && + i915_gem_switch_to_kernel_context(i915)) { + pr_err("Failed to switch back to kernel context; declaring wedged\n"); + i915_gem_set_wedged(i915); + } + + wedge_on_timeout(&w, i915, HZ) + i915_gem_wait_for_idle(i915, flags); + + return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; +} diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h new file mode 100644 index 000000000000..63e009927c43 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef IGT_FLUSH_TEST_H +#define IGT_FLUSH_TEST_H + +struct drm_i915_private; + +int igt_flush_test(struct drm_i915_private *i915, unsigned int flags); + +#endif /* IGT_FLUSH_TEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 46580026c7fc..d6926e7820e5 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -412,10 +412,11 @@ static int igt_wakeup(void *arg) * that they are ready for the next test. We wait until all * threads are complete and waiting for us (i.e. not a seqno). */ - err = wait_var_event_timeout(&done, !atomic_read(&done), 10 * HZ); - if (err) { + if (!wait_var_event_timeout(&done, + !atomic_read(&done), 10 * HZ)) { pr_err("Timed out waiting for %d remaining waiters\n", atomic_read(&done)); + err = -ETIMEDOUT; break; } diff --git a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c b/drivers/gpu/drm/i915/selftests/intel_engine_cs.c new file mode 100644 index 000000000000..cfaa6b296835 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_engine_cs.c @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_selftest.h" + +static int intel_mmio_bases_check(void *arg) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { + const struct engine_info *info = &intel_engines[i]; + char name[INTEL_ENGINE_CS_MAX_NAME]; + u8 prev = U8_MAX; + + __sprint_engine_name(name, info); + + for (j = 0; j < MAX_MMIO_BASES; j++) { + u8 gen = info->mmio_bases[j].gen; + u32 base = info->mmio_bases[j].base; + + if (gen >= prev) { + pr_err("%s: %s: mmio base for gen %x " + "is before the one for gen %x\n", + __func__, name, prev, gen); + return -EINVAL; + } + + if (gen == 0) + break; + + if (!base) { + pr_err("%s: %s: invalid mmio base (%x) " + "for gen %x at entry %u\n", + __func__, name, base, gen, j); + return -EINVAL; + } + + prev = gen; + } + + pr_info("%s: min gen supported for %s = %d\n", + __func__, name, prev); + } + + return 0; +} + +int intel_engine_cs_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(intel_mmio_bases_check), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index df7898c8edcb..438e0b045a2c 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -25,10 +25,14 @@ #include <linux/kthread.h> #include "../i915_selftest.h" +#include "i915_random.h" +#include "igt_flush_test.h" #include "mock_context.h" #include "mock_drm.h" +#define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ + struct hang { struct drm_i915_private *i915; struct drm_i915_gem_object *hws; @@ -250,58 +254,6 @@ static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); } -struct wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const void *symbol; -}; - -static void wedge_me(struct work_struct *work) -{ - struct wedge_me *w = container_of(work, typeof(*w), work.work); - - pr_err("%pS timed out, cancelling all further testing.\n", - w->symbol); - i915_gem_set_wedged(w->i915); -} - -static void __init_wedge(struct wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const void *symbol) -{ - w->i915 = i915; - w->symbol = symbol; - - INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -static void __fini_wedge(struct wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} - -#define wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ - (W)->i915; \ - __fini_wedge((W))) - -static noinline int -flush_test(struct drm_i915_private *i915, unsigned int flags) -{ - struct wedge_me w; - - cond_resched(); - - wedge_on_timeout(&w, i915, HZ) - i915_gem_wait_for_idle(i915, flags); - - return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; -} - static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; @@ -315,10 +267,10 @@ static void hang_fini(struct hang *h) kernel_context_close(h->ctx); - flush_test(h->i915, I915_WAIT_LOCKED); + igt_flush_test(h->i915, I915_WAIT_LOCKED); } -static bool wait_for_hang(struct hang *h, struct i915_request *rq) +static bool wait_until_running(struct hang *h, struct i915_request *rq) { return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), rq->fence.seqno), @@ -433,7 +385,7 @@ static int igt_global_reset(void *arg) mutex_lock(&i915->drm.struct_mutex); reset_count = i915_reset_count(&i915->gpu_error); - i915_reset(i915, I915_RESET_QUIET); + i915_reset(i915, ALL_ENGINES, NULL); if (i915_reset_count(&i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); @@ -450,6 +402,11 @@ static int igt_global_reset(void *arg) return err; } +static bool wait_for_idle(struct intel_engine_cs *engine) +{ + return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0; +} + static int __igt_reset_engine(struct drm_i915_private *i915, bool active) { struct intel_engine_cs *engine; @@ -477,12 +434,21 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) if (active && !intel_engine_can_store_dword(engine)) continue; + if (!wait_for_idle(engine)) { + pr_err("%s failed to idle before reset\n", + engine->name); + err = -EIO; + break; + } + reset_count = i915_reset_count(&i915->gpu_error); reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { + u32 seqno = intel_engine_get_seqno(engine); + if (active) { struct i915_request *rq; @@ -498,7 +464,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); - if (!wait_for_hang(&h, rq)) { + if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("%s: Failed to start request %x, at %x\n", @@ -511,14 +477,12 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } + GEM_BUG_ON(!rq->global_seqno); + seqno = rq->global_seqno - 1; i915_request_put(rq); } - engine->hangcheck.stalled = true; - engine->hangcheck.seqno = - intel_engine_get_seqno(engine); - - err = i915_reset_engine(engine, I915_RESET_QUIET); + err = i915_reset_engine(engine, NULL); if (err) { pr_err("i915_reset_engine failed\n"); break; @@ -539,14 +503,25 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - engine->hangcheck.stalled = false; + if (!wait_for_idle(engine)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("%s failed to idle after reset\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); if (err) break; - err = flush_test(i915, 0); + err = igt_flush_test(i915, 0); if (err) break; } @@ -573,11 +548,25 @@ static int igt_reset_active_engine(void *arg) return __igt_reset_engine(arg, true); } +struct active_engine { + struct task_struct *task; + struct intel_engine_cs *engine; + unsigned long resets; + unsigned int flags; +}; + +#define TEST_ACTIVE BIT(0) +#define TEST_OTHERS BIT(1) +#define TEST_SELF BIT(2) +#define TEST_PRIORITY BIT(3) + static int active_engine(void *data) { - struct intel_engine_cs *engine = data; - struct i915_request *rq[2] = {}; - struct i915_gem_context *ctx[2]; + I915_RND_STATE(prng); + struct active_engine *arg = data; + struct intel_engine_cs *engine = arg->engine; + struct i915_request *rq[8] = {}; + struct i915_gem_context *ctx[ARRAY_SIZE(rq)]; struct drm_file *file; unsigned long count = 0; int err = 0; @@ -586,25 +575,20 @@ static int active_engine(void *data) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&engine->i915->drm.struct_mutex); - ctx[0] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); - if (IS_ERR(ctx[0])) { - err = PTR_ERR(ctx[0]); - goto err_file; - } - - mutex_lock(&engine->i915->drm.struct_mutex); - ctx[1] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); - if (IS_ERR(ctx[1])) { - err = PTR_ERR(ctx[1]); - i915_gem_context_put(ctx[0]); - goto err_file; + for (count = 0; count < ARRAY_SIZE(ctx); count++) { + mutex_lock(&engine->i915->drm.struct_mutex); + ctx[count] = live_context(engine->i915, file); + mutex_unlock(&engine->i915->drm.struct_mutex); + if (IS_ERR(ctx[count])) { + err = PTR_ERR(ctx[count]); + while (--count) + i915_gem_context_put(ctx[count]); + goto err_file; + } } while (!kthread_should_stop()) { - unsigned int idx = count++ & 1; + unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); struct i915_request *old = rq[idx]; struct i915_request *new; @@ -616,14 +600,28 @@ static int active_engine(void *data) break; } + if (arg->flags & TEST_PRIORITY) + ctx[idx]->sched.priority = + i915_prandom_u32_max_state(512, &prng); + rq[idx] = i915_request_get(new); i915_request_add(new); mutex_unlock(&engine->i915->drm.struct_mutex); if (old) { - i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT); + if (i915_request_wait(old, 0, HZ) < 0) { + GEM_TRACE("%s timed out.\n", engine->name); + GEM_TRACE_DUMP(); + + i915_gem_set_wedged(engine->i915); + i915_request_put(old); + err = -EIO; + break; + } i915_request_put(old); } + + cond_resched(); } for (count = 0; count < ARRAY_SIZE(rq); count++) @@ -634,8 +632,9 @@ err_file: return err; } -static int __igt_reset_engine_others(struct drm_i915_private *i915, - bool active) +static int __igt_reset_engines(struct drm_i915_private *i915, + const char *test_name, + unsigned int flags) { struct intel_engine_cs *engine, *other; enum intel_engine_id id, tmp; @@ -649,50 +648,68 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, if (!intel_has_reset_engine(i915)) return 0; - if (active) { + if (flags & TEST_ACTIVE) { mutex_lock(&i915->drm.struct_mutex); err = hang_init(&h, i915); mutex_unlock(&i915->drm.struct_mutex); if (err) return err; + + if (flags & TEST_PRIORITY) + h.ctx->sched.priority = 1024; } for_each_engine(engine, i915, id) { - struct task_struct *threads[I915_NUM_ENGINES] = {}; - unsigned long resets[I915_NUM_ENGINES]; + struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long global = i915_reset_count(&i915->gpu_error); - unsigned long count = 0; + unsigned long count = 0, reported; IGT_TIMEOUT(end_time); - if (active && !intel_engine_can_store_dword(engine)) + if (flags & TEST_ACTIVE && + !intel_engine_can_store_dword(engine)) continue; + if (!wait_for_idle(engine)) { + pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n", + engine->name, test_name); + err = -EIO; + break; + } + memset(threads, 0, sizeof(threads)); for_each_engine(other, i915, tmp) { struct task_struct *tsk; - resets[tmp] = i915_reset_engine_count(&i915->gpu_error, - other); + threads[tmp].resets = + i915_reset_engine_count(&i915->gpu_error, + other); + + if (!(flags & TEST_OTHERS)) + continue; - if (other == engine) + if (other == engine && !(flags & TEST_SELF)) continue; - tsk = kthread_run(active_engine, other, + threads[tmp].engine = other; + threads[tmp].flags = flags; + + tsk = kthread_run(active_engine, &threads[tmp], "igt/%s", other->name); if (IS_ERR(tsk)) { err = PTR_ERR(tsk); goto unwind; } - threads[tmp] = tsk; + threads[tmp].task = tsk; get_task_struct(tsk); } set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { - if (active) { - struct i915_request *rq; + u32 seqno = intel_engine_get_seqno(engine); + struct i915_request *rq = NULL; + if (flags & TEST_ACTIVE) { mutex_lock(&i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { @@ -705,7 +722,7 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); - if (!wait_for_hang(&h, rq)) { + if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("%s: Failed to start request %x, at %x\n", @@ -718,33 +735,48 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, break; } - i915_request_put(rq); + GEM_BUG_ON(!rq->global_seqno); + seqno = rq->global_seqno - 1; } - engine->hangcheck.stalled = true; - engine->hangcheck.seqno = - intel_engine_get_seqno(engine); - - err = i915_reset_engine(engine, I915_RESET_QUIET); + err = i915_reset_engine(engine, NULL); if (err) { - pr_err("i915_reset_engine(%s:%s) failed, err=%d\n", - engine->name, active ? "active" : "idle", err); + pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", + engine->name, test_name, err); break; } - engine->hangcheck.stalled = false; count++; + + if (rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + + if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("i915_reset_engine(%s:%s):" + " failed to idle after reset\n", + engine->name, test_name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); pr_info("i915_reset_engine(%s:%s): %lu resets\n", - engine->name, active ? "active" : "idle", count); - - if (i915_reset_engine_count(&i915->gpu_error, engine) - - resets[engine->id] != (active ? count : 0)) { - pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", - engine->name, active ? "active" : "idle", count, - i915_reset_engine_count(&i915->gpu_error, - engine) - resets[engine->id]); + engine->name, test_name, count); + + reported = i915_reset_engine_count(&i915->gpu_error, engine); + reported -= threads[engine->id].resets; + if (reported != (flags & TEST_ACTIVE ? count : 0)) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n", + engine->name, test_name, count, reported, + (flags & TEST_ACTIVE ? count : 0)); if (!err) err = -EINVAL; } @@ -753,24 +785,26 @@ unwind: for_each_engine(other, i915, tmp) { int ret; - if (!threads[tmp]) + if (!threads[tmp].task) continue; - ret = kthread_stop(threads[tmp]); + ret = kthread_stop(threads[tmp].task); if (ret) { pr_err("kthread for other engine %s failed, err=%d\n", other->name, ret); if (!err) err = ret; } - put_task_struct(threads[tmp]); + put_task_struct(threads[tmp].task); - if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error, - other)) { + if (other != engine && + threads[tmp].resets != + i915_reset_engine_count(&i915->gpu_error, other)) { pr_err("Innocent engine %s was reset (count=%ld)\n", other->name, i915_reset_engine_count(&i915->gpu_error, - other) - resets[tmp]); + other) - + threads[tmp].resets); if (!err) err = -EINVAL; } @@ -786,7 +820,7 @@ unwind: if (err) break; - err = flush_test(i915, 0); + err = igt_flush_test(i915, 0); if (err) break; } @@ -794,7 +828,7 @@ unwind: if (i915_terminally_wedged(&i915->gpu_error)) err = -EIO; - if (active) { + if (flags & TEST_ACTIVE) { mutex_lock(&i915->drm.struct_mutex); hang_fini(&h); mutex_unlock(&i915->drm.struct_mutex); @@ -803,27 +837,56 @@ unwind: return err; } -static int igt_reset_idle_engine_others(void *arg) +static int igt_reset_engines(void *arg) { - return __igt_reset_engine_others(arg, false); -} + static const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "idle", 0 }, + { "active", TEST_ACTIVE }, + { "others-idle", TEST_OTHERS }, + { "others-active", TEST_OTHERS | TEST_ACTIVE }, + { + "others-priority", + TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY + }, + { + "self-priority", + TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, + }, + { } + }; + struct drm_i915_private *i915 = arg; + typeof(*phases) *p; + int err; -static int igt_reset_active_engine_others(void *arg) -{ - return __igt_reset_engine_others(arg, true); + for (p = phases; p->name; p++) { + if (p->flags & TEST_PRIORITY) { + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + continue; + } + + err = __igt_reset_engines(arg, p->name, p->flags); + if (err) + return err; + } + + return 0; } -static u32 fake_hangcheck(struct i915_request *rq) +static u32 fake_hangcheck(struct i915_request *rq, u32 mask) { - u32 reset_count; + struct i915_gpu_error *error = &rq->i915->gpu_error; + u32 reset_count = i915_reset_count(error); - rq->engine->hangcheck.stalled = true; - rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine); + error->stalled_mask = mask; - reset_count = i915_reset_count(&rq->i915->gpu_error); + /* set_bit() must be after we have setup the backchannel (mask) */ + smp_mb__before_atomic(); + set_bit(I915_RESET_HANDOFF, &error->flags); - set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags); - wake_up_all(&rq->i915->gpu_error.wait_queue); + wake_up_all(&error->wait_queue); return reset_count; } @@ -858,21 +921,20 @@ static int igt_wait_reset(void *arg) i915_request_get(rq); __i915_request_add(rq, true); - if (!wait_for_hang(&h, rq)) { + if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("%s: Failed to start request %x, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_reset(i915, 0); i915_gem_set_wedged(i915); err = -EIO; goto out_rq; } - reset_count = fake_hangcheck(rq); + reset_count = fake_hangcheck(rq, ALL_ENGINES); timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); if (timeout < 0) { @@ -903,6 +965,23 @@ unlock: return err; } +static int wait_for_others(struct drm_i915_private *i915, + struct intel_engine_cs *exclude) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + if (engine == exclude) + continue; + + if (!wait_for_idle(engine)) + return -EIO; + } + + return 0; +} + static int igt_reset_queue(void *arg) { struct drm_i915_private *i915 = arg; @@ -951,27 +1030,49 @@ static int igt_reset_queue(void *arg) i915_request_get(rq); __i915_request_add(rq, true); - if (!wait_for_hang(&h, prev)) { + /* + * XXX We don't handle resetting the kernel context + * very well. If we trigger a device reset twice in + * quick succession while the kernel context is + * executing, we may end up skipping the breadcrumb. + * This is really only a problem for the selftest as + * normally there is a large interlude between resets + * (hangcheck), or we focus on resetting just one + * engine and so avoid repeatedly resetting innocents. + */ + err = wait_for_others(i915, engine); + if (err) { + pr_err("%s(%s): Failed to idle other inactive engines after device reset\n", + __func__, engine->name); + i915_request_put(rq); + i915_request_put(prev); + + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + goto fini; + } + + if (!wait_until_running(&h, prev)) { struct drm_printer p = drm_info_printer(i915->drm.dev); - pr_err("%s: Failed to start request %x, at %x\n", - __func__, prev->fence.seqno, hws_seqno(&h, prev)); - intel_engine_dump(prev->engine, &p, - "%s\n", prev->engine->name); + pr_err("%s(%s): Failed to start request %x, at %x\n", + __func__, engine->name, + prev->fence.seqno, hws_seqno(&h, prev)); + intel_engine_dump(engine, &p, + "%s\n", engine->name); i915_request_put(rq); i915_request_put(prev); - i915_reset(i915, 0); i915_gem_set_wedged(i915); err = -EIO; goto fini; } - reset_count = fake_hangcheck(prev); + reset_count = fake_hangcheck(prev, ENGINE_MASK(id)); - i915_reset(i915, I915_RESET_QUIET); + i915_reset(i915, ENGINE_MASK(id), NULL); GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); @@ -1013,7 +1114,7 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); - err = flush_test(i915, I915_WAIT_LOCKED); + err = igt_flush_test(i915, I915_WAIT_LOCKED); if (err) break; } @@ -1044,7 +1145,7 @@ static int igt_handle_error(void *arg) if (!intel_has_reset_engine(i915)) return 0; - if (!intel_engine_can_store_dword(i915->engine[RCS])) + if (!engine || !intel_engine_can_store_dword(engine)) return 0; mutex_lock(&i915->drm.struct_mutex); @@ -1062,14 +1163,13 @@ static int igt_handle_error(void *arg) i915_request_get(rq); __i915_request_add(rq, true); - if (!wait_for_hang(&h, rq)) { + if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("%s: Failed to start request %x, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_reset(i915, 0); i915_gem_set_wedged(i915); err = -EIO; @@ -1081,10 +1181,7 @@ static int igt_handle_error(void *arg) /* Temporarily disable error capture */ error = xchg(&i915->gpu_error.first_error, (void *)-1); - engine->hangcheck.stalled = true; - engine->hangcheck.seqno = intel_engine_get_seqno(engine); - - i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__); + i915_handle_error(i915, ENGINE_MASK(engine->id), 0, NULL); xchg(&i915->gpu_error.first_error, error); @@ -1112,8 +1209,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_hang_sanitycheck), SUBTEST(igt_reset_idle_engine), SUBTEST(igt_reset_active_engine), - SUBTEST(igt_reset_idle_engine_others), - SUBTEST(igt_reset_active_engine_others), + SUBTEST(igt_reset_engines), SUBTEST(igt_wait_reset), SUBTEST(igt_reset_queue), SUBTEST(igt_handle_error), @@ -1129,6 +1225,10 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) err = i915_subtests(tests, i915); + mutex_lock(&i915->drm.struct_mutex); + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + i915_modparams.enable_hangcheck = saved_hangcheck; intel_runtime_pm_put(i915); diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c new file mode 100644 index 000000000000..1b8a07125150 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -0,0 +1,459 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_selftest.h" +#include "igt_flush_test.h" + +#include "mock_context.h" + +struct spinner { + struct drm_i915_private *i915; + struct drm_i915_gem_object *hws; + struct drm_i915_gem_object *obj; + u32 *batch; + void *seqno; +}; + +static int spinner_init(struct spinner *spin, struct drm_i915_private *i915) +{ + unsigned int mode; + void *vaddr; + int err; + + GEM_BUG_ON(INTEL_GEN(i915) < 8); + + memset(spin, 0, sizeof(*spin)); + spin->i915 = i915; + + spin->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(spin->hws)) { + err = PTR_ERR(spin->hws); + goto err; + } + + spin->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(spin->obj)) { + err = PTR_ERR(spin->obj); + goto err_hws; + } + + i915_gem_object_set_cache_level(spin->hws, I915_CACHE_LLC); + vaddr = i915_gem_object_pin_map(spin->hws, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + spin->seqno = memset(vaddr, 0xff, PAGE_SIZE); + + mode = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; + vaddr = i915_gem_object_pin_map(spin->obj, mode); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_unpin_hws; + } + spin->batch = vaddr; + + return 0; + +err_unpin_hws: + i915_gem_object_unpin_map(spin->hws); +err_obj: + i915_gem_object_put(spin->obj); +err_hws: + i915_gem_object_put(spin->hws); +err: + return err; +} + +static unsigned int seqno_offset(u64 fence) +{ + return offset_in_page(sizeof(u32) * fence); +} + +static u64 hws_address(const struct i915_vma *hws, + const struct i915_request *rq) +{ + return hws->node.start + seqno_offset(rq->fence.context); +} + +static int emit_recurse_batch(struct spinner *spin, + struct i915_request *rq, + u32 arbitration_command) +{ + struct i915_address_space *vm = &rq->ctx->ppgtt->base; + struct i915_vma *hws, *vma; + u32 *batch; + int err; + + vma = i915_vma_instance(spin->obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + hws = i915_vma_instance(spin->hws, vm, NULL); + if (IS_ERR(hws)) + return PTR_ERR(hws); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + err = i915_vma_pin(hws, 0, 0, PIN_USER); + if (err) + goto unpin_vma; + + i915_vma_move_to_active(vma, rq, 0); + if (!i915_gem_object_has_active_reference(vma->obj)) { + i915_gem_object_get(vma->obj); + i915_gem_object_set_active_reference(vma->obj); + } + + i915_vma_move_to_active(hws, rq, 0); + if (!i915_gem_object_has_active_reference(hws->obj)) { + i915_gem_object_get(hws->obj); + i915_gem_object_set_active_reference(hws->obj); + } + + batch = spin->batch; + + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = upper_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + + *batch++ = arbitration_command; + + *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *batch++ = lower_32_bits(vma->node.start); + *batch++ = upper_32_bits(vma->node.start); + *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + + i915_gem_chipset_flush(spin->i915); + + err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0); + + i915_vma_unpin(hws); +unpin_vma: + i915_vma_unpin(vma); + return err; +} + +static struct i915_request * +spinner_create_request(struct spinner *spin, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u32 arbitration_command) +{ + struct i915_request *rq; + int err; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return rq; + + err = emit_recurse_batch(spin, rq, arbitration_command); + if (err) { + __i915_request_add(rq, false); + return ERR_PTR(err); + } + + return rq; +} + +static u32 hws_seqno(const struct spinner *spin, const struct i915_request *rq) +{ + u32 *seqno = spin->seqno + seqno_offset(rq->fence.context); + + return READ_ONCE(*seqno); +} + +static void spinner_end(struct spinner *spin) +{ + *spin->batch = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(spin->i915); +} + +static void spinner_fini(struct spinner *spin) +{ + spinner_end(spin); + + i915_gem_object_unpin_map(spin->obj); + i915_gem_object_put(spin->obj); + + i915_gem_object_unpin_map(spin->hws); + i915_gem_object_put(spin->hws); +} + +static bool wait_for_spinner(struct spinner *spin, struct i915_request *rq) +{ + if (!wait_event_timeout(rq->execute, + READ_ONCE(rq->global_seqno), + msecs_to_jiffies(10))) + return false; + + return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq), + rq->fence.seqno), + 10) && + wait_for(i915_seqno_passed(hws_seqno(spin, rq), + rq->fence.seqno), + 1000)); +} + +static int live_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + enum intel_engine_id id; + struct spinner spin; + int err = -ENOMEM; + + if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + if (spinner_init(&spin, i915)) + goto err_unlock; + + ctx = kernel_context(i915); + if (!ctx) + goto err_spin; + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + rq = spinner_create_request(&spin, ctx, engine, MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx; + } + + i915_request_add(rq); + if (!wait_for_spinner(&spin, rq)) { + GEM_TRACE("spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx; + } + + spinner_end(&spin); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + err = -EIO; + goto err_ctx; + } + } + + err = 0; +err_ctx: + kernel_context_close(ctx); +err_spin: + spinner_fini(&spin); +err_unlock: + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct spinner spin_hi, spin_lo; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENOMEM; + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + if (spinner_init(&spin_hi, i915)) + goto err_unlock; + + if (spinner_init(&spin_lo, i915)) + goto err_spin_hi; + + ctx_hi = kernel_context(i915); + if (!ctx_hi) + goto err_spin_lo; + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; + + ctx_lo = kernel_context(i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!wait_for_spinner(&spin_lo, rq)) { + GEM_TRACE("lo spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; + } + + rq = spinner_create_request(&spin_hi, ctx_hi, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!wait_for_spinner(&spin_hi, rq)) { + GEM_TRACE("hi spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; + } + + spinner_end(&spin_hi); + spinner_end(&spin_lo); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + err = -EIO; + goto err_ctx_lo; + } + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + spinner_fini(&spin_lo); +err_spin_hi: + spinner_fini(&spin_hi); +err_unlock: + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_late_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct spinner spin_hi, spin_lo; + struct intel_engine_cs *engine; + struct i915_sched_attr attr = {}; + enum intel_engine_id id; + int err = -ENOMEM; + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + if (spinner_init(&spin_hi, i915)) + goto err_unlock; + + if (spinner_init(&spin_lo, i915)) + goto err_spin_hi; + + ctx_hi = kernel_context(i915); + if (!ctx_hi) + goto err_spin_lo; + + ctx_lo = kernel_context(i915); + if (!ctx_lo) + goto err_ctx_hi; + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!wait_for_spinner(&spin_lo, rq)) { + pr_err("First context failed to start\n"); + goto err_wedged; + } + + rq = spinner_create_request(&spin_hi, ctx_hi, engine, MI_NOOP); + if (IS_ERR(rq)) { + spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (wait_for_spinner(&spin_hi, rq)) { + pr_err("Second context overtook first?\n"); + goto err_wedged; + } + + attr.priority = I915_PRIORITY_MAX; + engine->schedule(rq, &attr); + + if (!wait_for_spinner(&spin_hi, rq)) { + pr_err("High priority context failed to preempt the low priority context\n"); + GEM_TRACE_DUMP(); + goto err_wedged; + } + + spinner_end(&spin_hi); + spinner_end(&spin_lo); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + err = -EIO; + goto err_ctx_lo; + } + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + spinner_fini(&spin_lo); +err_spin_hi: + spinner_fini(&spin_hi); +err_unlock: + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + spinner_end(&spin_hi); + spinner_end(&spin_lo); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; +} + +int intel_execlists_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_sanitycheck), + SUBTEST(live_preempt), + SUBTEST(live_late_preempt), + }; + + if (!HAS_EXECLISTS(i915)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c new file mode 100644 index 000000000000..17444a3abbb9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -0,0 +1,291 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_selftest.h" + +#include "mock_context.h" + +static struct drm_i915_gem_object * +read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *result; + struct i915_request *rq; + struct i915_vma *vma; + const u32 base = engine->mmio_base; + u32 srm, *cs; + int err; + int i; + + result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(result)) + return result; + + i915_gem_object_set_cache_level(result, I915_CACHE_LLC); + + cs = i915_gem_object_pin_map(result, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_obj; + } + memset(cs, 0xc5, PAGE_SIZE); + i915_gem_object_unpin_map(result); + + vma = i915_vma_instance(result, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_obj; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_pin; + } + + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (INTEL_GEN(ctx->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_req; + } + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i)); + *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; + *cs++ = 0; + } + intel_ring_advance(rq, cs); + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + reservation_object_lock(vma->resv, NULL); + reservation_object_add_excl_fence(vma->resv, &rq->fence); + reservation_object_unlock(vma->resv); + + i915_gem_object_get(result); + i915_gem_object_set_active_reference(result); + + __i915_request_add(rq, true); + i915_vma_unpin(vma); + + return result; + +err_req: + i915_request_add(rq); +err_pin: + i915_vma_unpin(vma); +err_obj: + i915_gem_object_put(result); + return ERR_PTR(err); +} + +static u32 get_whitelist_reg(const struct whitelist *w, unsigned int i) +{ + return i < w->count ? i915_mmio_reg_offset(w->reg[i]) : w->nopid; +} + +static void print_results(const struct whitelist *w, const u32 *results) +{ + unsigned int i; + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + u32 expected = get_whitelist_reg(w, i); + u32 actual = results[i]; + + pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n", + i, expected, actual); + } +} + +static int check_whitelist(const struct whitelist *w, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *results; + u32 *vaddr; + int err; + int i; + + results = read_nonprivs(ctx, engine); + if (IS_ERR(results)) + return PTR_ERR(results); + + err = i915_gem_object_set_to_cpu_domain(results, false); + if (err) + goto out_put; + + vaddr = i915_gem_object_pin_map(results, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + u32 expected = get_whitelist_reg(w, i); + u32 actual = vaddr[i]; + + if (expected != actual) { + print_results(w, vaddr); + pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n", + i, expected, actual); + + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(results); +out_put: + i915_gem_object_put(results); + return err; +} + +static int do_device_reset(struct intel_engine_cs *engine) +{ + i915_reset(engine->i915, ENGINE_MASK(engine->id), NULL); + return 0; +} + +static int do_engine_reset(struct intel_engine_cs *engine) +{ + return i915_reset_engine(engine, NULL); +} + +static int switch_to_scratch_context(struct intel_engine_cs *engine) +{ + struct i915_gem_context *ctx; + struct i915_request *rq; + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + rq = i915_request_alloc(engine, ctx); + kernel_context_close(ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + + return 0; +} + +static int check_whitelist_across_reset(struct intel_engine_cs *engine, + int (*reset)(struct intel_engine_cs *), + const struct whitelist *w, + const char *name) +{ + struct i915_gem_context *ctx; + int err; + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Invalid whitelist *before* %s reset!\n", name); + goto out; + } + + err = switch_to_scratch_context(engine); + if (err) + goto out; + + err = reset(engine); + if (err) { + pr_err("%s reset failed\n", name); + goto out; + } + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Whitelist not preserved in context across %s reset!\n", + name); + goto out; + } + + kernel_context_close(ctx); + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Invalid whitelist *after* %s reset in fresh context!\n", + name); + goto out; + } + +out: + kernel_context_close(ctx); + return err; +} + +static int live_reset_whitelist(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine = i915->engine[RCS]; + struct i915_gpu_error *error = &i915->gpu_error; + struct whitelist w; + int err = 0; + + /* If we reset the gpu, we should not lose the RING_NONPRIV */ + + if (!engine) + return 0; + + if (!whitelist_build(engine, &w)) + return 0; + + pr_info("Checking %d whitelisted registers (RING_NONPRIV)\n", w.count); + + set_bit(I915_RESET_BACKOFF, &error->flags); + set_bit(I915_RESET_ENGINE + engine->id, &error->flags); + + if (intel_has_reset_engine(i915)) { + err = check_whitelist_across_reset(engine, + do_engine_reset, &w, + "engine"); + if (err) + goto out; + } + + if (intel_has_gpu_reset(i915)) { + err = check_whitelist_across_reset(engine, + do_device_reset, &w, + "device"); + if (err) + goto out; + } + +out: + clear_bit(I915_RESET_ENGINE + engine->id, &error->flags); + clear_bit(I915_RESET_BACKOFF, &error->flags); + return err; +} + +int intel_workarounds_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_reset_whitelist), + }; + int err; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 78a89efa1119..26bf29d97007 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -25,6 +25,11 @@ #include "mock_engine.h" #include "mock_request.h" +struct mock_ring { + struct intel_ring base; + struct i915_timeline timeline; +}; + static struct mock_request *first_request(struct mock_engine *engine) { return list_first_entry_or_null(&engine->hw_queue, @@ -71,14 +76,21 @@ static struct intel_ring * mock_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - i915_gem_context_get(ctx); + struct intel_context *ce = to_intel_context(ctx, engine); + + if (!ce->pin_count++) + i915_gem_context_get(ctx); + return engine->buffer; } static void mock_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - i915_gem_context_put(ctx); + struct intel_context *ce = to_intel_context(ctx, engine); + + if (!--ce->pin_count) + i915_gem_context_put(ctx); } static int mock_request_alloc(struct i915_request *request) @@ -125,7 +137,7 @@ static void mock_submit_request(struct i915_request *request) static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { const unsigned long sz = PAGE_SIZE / 2; - struct intel_ring *ring; + struct mock_ring *ring; BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz); @@ -133,14 +145,25 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - ring->size = sz; - ring->effective_size = sz; - ring->vaddr = (void *)(ring + 1); + i915_timeline_init(engine->i915, &ring->timeline, engine->name); + + ring->base.size = sz; + ring->base.effective_size = sz; + ring->base.vaddr = (void *)(ring + 1); + ring->base.timeline = &ring->timeline; - INIT_LIST_HEAD(&ring->request_list); - intel_ring_update_space(ring); + INIT_LIST_HEAD(&ring->base.request_list); + intel_ring_update_space(&ring->base); - return ring; + return &ring->base; +} + +static void mock_ring_free(struct intel_ring *base) +{ + struct mock_ring *ring = container_of(base, typeof(*ring), base); + + i915_timeline_fini(&ring->timeline); + kfree(ring); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -155,12 +178,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, if (!engine) return NULL; - engine->base.buffer = mock_ring(&engine->base); - if (!engine->base.buffer) { - kfree(engine); - return NULL; - } - /* minimal engine setup for requests */ engine->base.i915 = i915; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); @@ -174,9 +191,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; - engine->base.timeline = - &i915->gt.global_timeline.engine[engine->base.id]; - + i915_timeline_init(i915, &engine->base.timeline, engine->base.name); intel_engine_init_breadcrumbs(&engine->base); engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ @@ -185,7 +200,17 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); + engine->base.buffer = mock_ring(&engine->base); + if (!engine->base.buffer) + goto err_breadcrumbs; + return &engine->base; + +err_breadcrumbs: + intel_engine_fini_breadcrumbs(&engine->base); + i915_timeline_fini(&engine->base.timeline); + kfree(engine); + return NULL; } void mock_engine_flush(struct intel_engine_cs *engine) @@ -217,10 +242,12 @@ void mock_engine_free(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); + intel_context_unpin(engine->last_retired_context, engine); + + mock_ring_free(engine->buffer); intel_engine_fini_breadcrumbs(engine); + i915_timeline_fini(&engine->timeline); - kfree(engine->buffer); kfree(engine); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index e6d4b882599a..94baedfa0f74 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -44,6 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915) mock_engine_flush(engine); i915_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); } static void mock_device_release(struct drm_device *dev) @@ -72,8 +73,8 @@ static void mock_device_release(struct drm_device *dev) mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(i915); - i915_gem_timeline_fini(&i915->gt.global_timeline); mutex_unlock(&i915->drm.struct_mutex); + WARN_ON(!list_empty(&i915->gt.timelines)); destroy_workqueue(i915->wq); @@ -223,26 +224,25 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->priorities) goto err_dependencies; - mutex_lock(&i915->drm.struct_mutex); INIT_LIST_HEAD(&i915->gt.timelines); - err = i915_gem_timeline_init__global(i915); - if (err) { - mutex_unlock(&i915->drm.struct_mutex); - goto err_priorities; - } + INIT_LIST_HEAD(&i915->gt.active_rings); + INIT_LIST_HEAD(&i915->gt.closed_vma); + + mutex_lock(&i915->drm.struct_mutex); mock_init_ggtt(i915); - mutex_unlock(&i915->drm.struct_mutex); mkwrite_device_info(i915)->ring_mask = BIT(0); i915->engine[RCS] = mock_engine(i915, "mock", RCS); if (!i915->engine[RCS]) - goto err_priorities; + goto err_unlock; i915->kernel_context = mock_context(i915, NULL); if (!i915->kernel_context) goto err_engine; + mutex_unlock(&i915->drm.struct_mutex); + WARN_ON(i915_gemfs_init(i915)); return i915; @@ -250,7 +250,8 @@ struct drm_i915_private *mock_gem_device(void) err_engine: for_each_engine(engine, i915, id) mock_engine_free(engine); -err_priorities: +err_unlock: + mutex_unlock(&i915->drm.struct_mutex); kmem_cache_destroy(i915->priorities); err_dependencies: kmem_cache_destroy(i915->dependencies); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index e96873f96116..36c112088940 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -76,7 +76,6 @@ mock_ppgtt(struct drm_i915_private *i915, INIT_LIST_HEAD(&ppgtt->base.global_link); drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); - i915_gem_timeline_init(i915, &ppgtt->base.timeline, name); ppgtt->base.clear_range = nop_clear_range; ppgtt->base.insert_page = mock_insert_page; diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index 47b1f47c5812..dcf3b16f5a07 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -1,45 +1,28 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ +#include "../i915_timeline.h" + #include "mock_timeline.h" -struct intel_timeline *mock_timeline(u64 context) +void mock_timeline_init(struct i915_timeline *timeline, u64 context) { - static struct lock_class_key class; - struct intel_timeline *tl; + timeline->fence_context = context; + + spin_lock_init(&timeline->lock); - tl = kzalloc(sizeof(*tl), GFP_KERNEL); - if (!tl) - return NULL; + init_request_active(&timeline->last_request, NULL); + INIT_LIST_HEAD(&timeline->requests); - __intel_timeline_init(tl, NULL, context, &class, "mock"); + i915_syncmap_init(&timeline->sync); - return tl; + INIT_LIST_HEAD(&timeline->link); } -void mock_timeline_destroy(struct intel_timeline *tl) +void mock_timeline_fini(struct i915_timeline *timeline) { - __intel_timeline_fini(tl); - kfree(tl); + i915_timeline_fini(timeline); } diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h index c27ff4639b8b..b6deaa61110d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.h +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h @@ -1,33 +1,15 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ #ifndef __MOCK_TIMELINE__ #define __MOCK_TIMELINE__ -#include "../i915_gem_timeline.h" +struct i915_timeline; -struct intel_timeline *mock_timeline(u64 context); -void mock_timeline_destroy(struct intel_timeline *tl); +void mock_timeline_init(struct i915_timeline *timeline, u64 context); +void mock_timeline_fini(struct i915_timeline *timeline); #endif /* !__MOCK_TIMELINE__ */ diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 294de4549922..119ec0a21de2 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -11,6 +11,7 @@ config DRM_MEDIATEK select DRM_PANEL select MEMORY select MTK_SMI + select VIDEOMODE_HELPERS help Choose this option if you have a Mediatek SoCs. The module will be called mediatek-drm diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index e80a603e5fb0..6c0ea39d5739 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -22,6 +22,7 @@ #include <linux/interrupt.h> #include <linux/types.h> #include <linux/clk.h> +#include <video/videomode.h> #include "mtk_dpi_regs.h" #include "mtk_drm_ddp_comp.h" @@ -429,34 +430,35 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, struct mtk_dpi_sync_param vsync_leven = { 0 }; struct mtk_dpi_sync_param vsync_rodd = { 0 }; struct mtk_dpi_sync_param vsync_reven = { 0 }; - unsigned long pix_rate; + struct videomode vm = { 0 }; unsigned long pll_rate; unsigned int factor; /* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */ - pix_rate = 1000UL * mode->clock; + if (mode->clock <= 27000) - factor = 16 * 3; + factor = 3 << 4; else if (mode->clock <= 84000) - factor = 8 * 3; + factor = 3 << 3; else if (mode->clock <= 167000) - factor = 4 * 3; + factor = 3 << 2; else - factor = 2 * 3; - pll_rate = pix_rate * factor; + factor = 3 << 1; + drm_display_mode_to_videomode(mode, &vm); + pll_rate = vm.pixelclock * factor; dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n", - pll_rate, pix_rate); + pll_rate, vm.pixelclock); clk_set_rate(dpi->tvd_clk, pll_rate); pll_rate = clk_get_rate(dpi->tvd_clk); - pix_rate = pll_rate / factor; - clk_set_rate(dpi->pixel_clk, pix_rate); - pix_rate = clk_get_rate(dpi->pixel_clk); + vm.pixelclock = pll_rate / factor; + clk_set_rate(dpi->pixel_clk, vm.pixelclock); + vm.pixelclock = clk_get_rate(dpi->pixel_clk); dev_dbg(dpi->dev, "Got PLL %lu Hz, pixel clock %lu Hz\n", - pll_rate, pix_rate); + pll_rate, vm.pixelclock); limit.c_bottom = 0x0010; limit.c_top = 0x0FE0; @@ -465,33 +467,31 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, dpi_pol.ck_pol = MTK_DPI_POLARITY_FALLING; dpi_pol.de_pol = MTK_DPI_POLARITY_RISING; - dpi_pol.hsync_pol = mode->flags & DRM_MODE_FLAG_PHSYNC ? + dpi_pol.hsync_pol = vm.flags & DISPLAY_FLAGS_HSYNC_HIGH ? MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING; - dpi_pol.vsync_pol = mode->flags & DRM_MODE_FLAG_PVSYNC ? + dpi_pol.vsync_pol = vm.flags & DISPLAY_FLAGS_VSYNC_HIGH ? MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING; - - hsync.sync_width = mode->hsync_end - mode->hsync_start; - hsync.back_porch = mode->htotal - mode->hsync_end; - hsync.front_porch = mode->hsync_start - mode->hdisplay; + hsync.sync_width = vm.hsync_len; + hsync.back_porch = vm.hback_porch; + hsync.front_porch = vm.hfront_porch; hsync.shift_half_line = false; - - vsync_lodd.sync_width = mode->vsync_end - mode->vsync_start; - vsync_lodd.back_porch = mode->vtotal - mode->vsync_end; - vsync_lodd.front_porch = mode->vsync_start - mode->vdisplay; + vsync_lodd.sync_width = vm.vsync_len; + vsync_lodd.back_porch = vm.vback_porch; + vsync_lodd.front_porch = vm.vfront_porch; vsync_lodd.shift_half_line = false; - if (mode->flags & DRM_MODE_FLAG_INTERLACE && + if (vm.flags & DISPLAY_FLAGS_INTERLACED && mode->flags & DRM_MODE_FLAG_3D_MASK) { vsync_leven = vsync_lodd; vsync_rodd = vsync_lodd; vsync_reven = vsync_lodd; vsync_leven.shift_half_line = true; vsync_reven.shift_half_line = true; - } else if (mode->flags & DRM_MODE_FLAG_INTERLACE && + } else if (vm.flags & DISPLAY_FLAGS_INTERLACED && !(mode->flags & DRM_MODE_FLAG_3D_MASK)) { vsync_leven = vsync_lodd; vsync_leven.shift_half_line = true; - } else if (!(mode->flags & DRM_MODE_FLAG_INTERLACE) && + } else if (!(vm.flags & DISPLAY_FLAGS_INTERLACED) && mode->flags & DRM_MODE_FLAG_3D_MASK) { vsync_rodd = vsync_lodd; } @@ -505,12 +505,12 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, mtk_dpi_config_vsync_reven(dpi, &vsync_reven); mtk_dpi_config_3d(dpi, !!(mode->flags & DRM_MODE_FLAG_3D_MASK)); - mtk_dpi_config_interface(dpi, !!(mode->flags & - DRM_MODE_FLAG_INTERLACE)); - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay / 2); + mtk_dpi_config_interface(dpi, !!(vm.flags & + DISPLAY_FLAGS_INTERLACED)); + if (vm.flags & DISPLAY_FLAGS_INTERLACED) + mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive >> 1); else - mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay); + mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive); mtk_dpi_config_channel_limit(dpi, &limit); mtk_dpi_config_bit_num(dpi, dpi->bit_num); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index f595ac816b55..259b7b0de1d2 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -220,7 +220,7 @@ struct drm_gem_object *mtk_gem_prime_import_sg_table(struct drm_device *dev, mtk_gem = mtk_drm_gem_init(dev, attach->dmabuf->size); if (IS_ERR(mtk_gem)) - return ERR_PTR(PTR_ERR(mtk_gem)); + return ERR_CAST(mtk_gem); expected = sg_dma_address(sg->sgl); for_each_sg(sg->sgl, s, sg->nents, i) { diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 7e5e24c2152a..aa0943ec32b0 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -551,13 +551,12 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi) } /** - * vm.pixelclock is in kHz, pixel_clock unit is Hz, so multiply by 1000 * htotal_time = htotal * byte_per_pixel / num_lanes * overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit * mipi_ratio = (htotal_time + overhead_time) / htotal_time * data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes; */ - pixel_clock = dsi->vm.pixelclock * 1000; + pixel_clock = dsi->vm.pixelclock; htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch + dsi->vm.hsync_len; htotal_bits = htotal * bit_per_pixel; @@ -725,16 +724,7 @@ static void mtk_dsi_encoder_mode_set(struct drm_encoder *encoder, { struct mtk_dsi *dsi = encoder_to_dsi(encoder); - dsi->vm.pixelclock = adjusted->clock; - dsi->vm.hactive = adjusted->hdisplay; - dsi->vm.hback_porch = adjusted->htotal - adjusted->hsync_end; - dsi->vm.hfront_porch = adjusted->hsync_start - adjusted->hdisplay; - dsi->vm.hsync_len = adjusted->hsync_end - adjusted->hsync_start; - - dsi->vm.vactive = adjusted->vdisplay; - dsi->vm.vback_porch = adjusted->vtotal - adjusted->vsync_end; - dsi->vm.vfront_porch = adjusted->vsync_start - adjusted->vdisplay; - dsi->vm.vsync_len = adjusted->vsync_end - adjusted->vsync_start; + drm_display_mode_to_videomode(adjusted, &dsi->vm); } static void mtk_dsi_encoder_disable(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index fb50a9ddaae8..8918539a19aa 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -1586,7 +1586,7 @@ static uint32_t mga_vga_calculate_mode_bandwidth(struct drm_display_mode *mode, #define MODE_BANDWIDTH MODE_BAD -static int mga_vga_mode_valid(struct drm_connector *connector, +static enum drm_mode_status mga_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 8e0cb161754b..0ae5ace65462 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -168,7 +168,6 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) if (gpu->funcs->debugfs_init) { gpu->funcs->debugfs_init(gpu, dev->primary); gpu->funcs->debugfs_init(gpu, dev->render); - gpu->funcs->debugfs_init(gpu, dev->control); } #endif diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index ba74cb4f94df..1ff3fda245d1 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -140,9 +140,6 @@ int msm_debugfs_late_init(struct drm_device *dev) if (ret) return ret; ret = late_init_minor(dev->render); - if (ret) - return ret; - ret = late_init_minor(dev->control); return ret; } diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index 5cae8db9dcd4..ffe5137ccaf8 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -99,7 +99,8 @@ static const struct drm_mode_config_funcs mxsfb_mode_config_funcs = { }; static void mxsfb_pipe_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe); @@ -125,12 +126,6 @@ static void mxsfb_pipe_update(struct drm_simple_display_pipe *pipe, mxsfb_plane_atomic_update(mxsfb, plane_state); } -static int mxsfb_pipe_prepare_fb(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *plane_state) -{ - return drm_gem_fb_prepare_fb(&pipe->plane, plane_state); -} - static int mxsfb_pipe_enable_vblank(struct drm_simple_display_pipe *pipe) { struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe); @@ -159,7 +154,7 @@ static struct drm_simple_display_pipe_funcs mxsfb_funcs = { .enable = mxsfb_pipe_enable, .disable = mxsfb_pipe_disable, .update = mxsfb_pipe_update, - .prepare_fb = mxsfb_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, .enable_vblank = mxsfb_pipe_enable_vblank, .disable_vblank = mxsfb_pipe_disable_vblank, }; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 009713404cc4..7d0bec8dd03d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -338,11 +338,9 @@ static struct nouveau_drm_prop_enum_list dither_depth[] = { if (c) { \ p = drm_property_create(dev, DRM_MODE_PROP_ENUM, n, c); \ l = (list); \ - c = 0; \ while (p && l->gen_mask) { \ if (l->gen_mask & (1 << (gen))) { \ - drm_property_add_enum(p, c, l->type, l->name); \ - c++; \ + drm_property_add_enum(p, l->type, l->name); \ } \ l++; \ } \ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c index e4c8d310d870..81c3567d4e67 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c @@ -134,7 +134,7 @@ nvkm_cstate_find_best(struct nvkm_clk *clk, struct nvkm_pstate *pstate, nvkm_volt_map(volt, volt->max2_id, clk->temp)); for (cstate = start; &cstate->head != &pstate->list; - cstate = list_entry(cstate->head.prev, typeof(*cstate), head)) { + cstate = list_prev_entry(cstate, head)) { if (nvkm_cstate_valid(clk, cstate, max_volt, clk->temp)) break; } diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 3632854c2b91..ef3b0e3571ec 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -319,6 +319,9 @@ static int omap_modeset_init(struct drm_device *dev) dev->mode_config.max_width = 8192; dev->mode_config.max_height = 8192; + /* We want the zpos to be normalized */ + dev->mode_config.normalize_zpos = true; + dev->mode_config.funcs = &omap_mode_config_funcs; dev->mode_config.helper_private = &omap_mode_config_helper_funcs; diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index 2899435cad6e..161233cbc9a0 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -65,7 +65,7 @@ static void omap_plane_atomic_update(struct drm_plane *plane, info.rotation_type = OMAP_DSS_ROT_NONE; info.rotation = DRM_MODE_ROTATE_0; info.global_alpha = 0xff; - info.zorder = state->zpos; + info.zorder = state->normalized_zpos; /* update scanout: */ omap_framebuffer_update_scanout(state->fb, state, &info); diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index d964d454e4ae..2c9c9722734f 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -238,12 +238,6 @@ static void rpi_touchscreen_i2c_write(struct rpi_touchscreen *ts, static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val) { -#if 0 - /* The firmware uses LP DSI transactions like this to bring up - * the hardware, which should be faster than using I2C to then - * pass to the Toshiba. However, I was unable to get it to - * work. - */ u8 msg[] = { reg, reg >> 8, @@ -253,13 +247,7 @@ static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val) val >> 24, }; - mipi_dsi_dcs_write_buffer(ts->dsi, msg, sizeof(msg)); -#else - rpi_touchscreen_i2c_write(ts, REG_WR_ADDRH, reg >> 8); - rpi_touchscreen_i2c_write(ts, REG_WR_ADDRL, reg); - rpi_touchscreen_i2c_write(ts, REG_WRITEH, val >> 8); - rpi_touchscreen_i2c_write(ts, REG_WRITEL, val); -#endif + mipi_dsi_generic_write(ts->dsi, msg, sizeof(msg)); return 0; } diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile index 9c5e8dba8ac6..19a8189dc54f 100644 --- a/drivers/gpu/drm/pl111/Makefile +++ b/drivers/gpu/drm/pl111/Makefile @@ -3,6 +3,7 @@ pl111_drm-y += pl111_display.o \ pl111_versatile.o \ pl111_drv.o +pl111_drm-$(CONFIG_ARCH_VEXPRESS) += pl111_vexpress.o pl111_drm-$(CONFIG_DEBUG_FS) += pl111_debugfs.o obj-$(CONFIG_DRM_PL111) += pl111_drm.o diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c index 310646427907..19b0d006a54a 100644 --- a/drivers/gpu/drm/pl111/pl111_display.c +++ b/drivers/gpu/drm/pl111/pl111_display.c @@ -120,7 +120,8 @@ static int pl111_display_check(struct drm_simple_display_pipe *pipe, } static void pl111_display_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *cstate) + struct drm_crtc_state *cstate, + struct drm_plane_state *plane_state) { struct drm_crtc *crtc = &pipe->crtc; struct drm_plane *plane = &pipe->plane; @@ -376,19 +377,13 @@ static void pl111_display_disable_vblank(struct drm_simple_display_pipe *pipe) writel(0, priv->regs + priv->ienb); } -static int pl111_display_prepare_fb(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *plane_state) -{ - return drm_gem_fb_prepare_fb(&pipe->plane, plane_state); -} - static struct drm_simple_display_pipe_funcs pl111_display_funcs = { .mode_valid = pl111_mode_valid, .check = pl111_display_check, .enable = pl111_display_enable, .disable = pl111_display_disable, .update = pl111_display_update, - .prepare_fb = pl111_display_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static int pl111_clk_div_choose_div(struct clk_hw *hw, unsigned long rate, diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h index 8639b2d4ddf7..ce4501d0ab48 100644 --- a/drivers/gpu/drm/pl111/pl111_drm.h +++ b/drivers/gpu/drm/pl111/pl111_drm.h @@ -79,6 +79,7 @@ struct pl111_drm_dev_private { const struct pl111_variant_data *variant; void (*variant_display_enable) (struct drm_device *drm, u32 format); void (*variant_display_disable) (struct drm_device *drm); + bool use_device_memory; }; int pl111_display_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c index 4621259d5387..454ff0804642 100644 --- a/drivers/gpu/drm/pl111/pl111_drv.c +++ b/drivers/gpu/drm/pl111/pl111_drv.c @@ -60,6 +60,7 @@ #include <linux/slab.h> #include <linux/of.h> #include <linux/of_graph.h> +#include <linux/of_reserved_mem.h> #include <drm/drmP.h> #include <drm/drm_atomic_helper.h> @@ -207,6 +208,24 @@ finish: return ret; } +static struct drm_gem_object * +pl111_gem_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct pl111_drm_dev_private *priv = dev->dev_private; + + /* + * When using device-specific reserved memory we can't import + * DMA buffers: those are passed by reference in any global + * memory and we can only handle a specific range of memory. + */ + if (priv->use_device_memory) + return ERR_PTR(-EINVAL); + + return drm_gem_cma_prime_import_sg_table(dev, attach, sgt); +} + DEFINE_DRM_GEM_CMA_FOPS(drm_fops); static struct drm_driver pl111_drm_driver = { @@ -227,7 +246,7 @@ static struct drm_driver pl111_drm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = drm_gem_prime_import, - .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, + .gem_prime_import_sg_table = pl111_gem_import_sg_table, .gem_prime_export = drm_gem_prime_export, .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, @@ -257,6 +276,12 @@ static int pl111_amba_probe(struct amba_device *amba_dev, drm->dev_private = priv; priv->variant = variant; + ret = of_reserved_mem_device_init(dev); + if (!ret) { + dev_info(dev, "using device-specific reserved memory\n"); + priv->use_device_memory = true; + } + if (of_property_read_u32(dev->of_node, "max-memory-bandwidth", &priv->memory_bw)) { dev_info(dev, "no max memory bandwidth specified, assume unlimited\n"); @@ -275,7 +300,8 @@ static int pl111_amba_probe(struct amba_device *amba_dev, priv->regs = devm_ioremap_resource(dev, &amba_dev->res); if (IS_ERR(priv->regs)) { dev_err(dev, "%s failed mmio\n", __func__); - return PTR_ERR(priv->regs); + ret = PTR_ERR(priv->regs); + goto dev_unref; } /* This may override some variant settings */ @@ -305,11 +331,14 @@ static int pl111_amba_probe(struct amba_device *amba_dev, dev_unref: drm_dev_unref(drm); + of_reserved_mem_device_release(dev); + return ret; } static int pl111_amba_remove(struct amba_device *amba_dev) { + struct device *dev = &amba_dev->dev; struct drm_device *drm = amba_get_drvdata(amba_dev); struct pl111_drm_dev_private *priv = drm->dev_private; @@ -319,6 +348,7 @@ static int pl111_amba_remove(struct amba_device *amba_dev) drm_panel_bridge_remove(priv->bridge); drm_mode_config_cleanup(drm); drm_dev_unref(drm); + of_reserved_mem_device_release(dev); return 0; } diff --git a/drivers/gpu/drm/pl111/pl111_versatile.c b/drivers/gpu/drm/pl111/pl111_versatile.c index 9302f516045e..b9baefdba38a 100644 --- a/drivers/gpu/drm/pl111/pl111_versatile.c +++ b/drivers/gpu/drm/pl111/pl111_versatile.c @@ -1,12 +1,14 @@ #include <linux/amba/clcd-regs.h> #include <linux/device.h> #include <linux/of.h> +#include <linux/of_platform.h> #include <linux/regmap.h> #include <linux/mfd/syscon.h> #include <linux/bitops.h> #include <linux/module.h> #include <drm/drmP.h> #include "pl111_versatile.h" +#include "pl111_vexpress.h" #include "pl111_drm.h" static struct regmap *versatile_syscon_map; @@ -22,6 +24,7 @@ enum versatile_clcd { REALVIEW_CLCD_PB11MP, REALVIEW_CLCD_PBA8, REALVIEW_CLCD_PBX, + VEXPRESS_CLCD_V2M, }; static const struct of_device_id versatile_clcd_of_match[] = { @@ -53,6 +56,10 @@ static const struct of_device_id versatile_clcd_of_match[] = { .compatible = "arm,realview-pbx-syscon", .data = (void *)REALVIEW_CLCD_PBX, }, + { + .compatible = "arm,vexpress-muxfpga", + .data = (void *)VEXPRESS_CLCD_V2M, + }, {}, }; @@ -286,12 +293,26 @@ static const struct pl111_variant_data pl111_realview = { .fb_bpp = 16, }; +/* + * Versatile Express PL111 variant, again we just push the maximum + * BPP to 16 to be able to get 1024x768 without saturating the memory + * bus. The clockdivider also seems broken on the Versatile Express. + */ +static const struct pl111_variant_data pl111_vexpress = { + .name = "PL111 Versatile Express", + .formats = pl111_realview_pixel_formats, + .nformats = ARRAY_SIZE(pl111_realview_pixel_formats), + .fb_bpp = 16, + .broken_clockdivider = true, +}; + int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) { const struct of_device_id *clcd_id; enum versatile_clcd versatile_clcd_type; struct device_node *np; struct regmap *map; + int ret; np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match, &clcd_id); @@ -301,7 +322,33 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) } versatile_clcd_type = (enum versatile_clcd)clcd_id->data; - map = syscon_node_to_regmap(np); + /* Versatile Express special handling */ + if (versatile_clcd_type == VEXPRESS_CLCD_V2M) { + struct platform_device *pdev; + + /* Registers a driver for the muxfpga */ + ret = vexpress_muxfpga_init(); + if (ret) { + dev_err(dev, "unable to initialize muxfpga driver\n"); + return ret; + } + + /* Call into deep Vexpress configuration API */ + pdev = of_find_device_by_node(np); + if (!pdev) { + dev_err(dev, "can't find the sysreg device, deferring\n"); + return -EPROBE_DEFER; + } + map = dev_get_drvdata(&pdev->dev); + if (!map) { + dev_err(dev, "sysreg has not yet probed\n"); + platform_device_put(pdev); + return -EPROBE_DEFER; + } + } else { + map = syscon_node_to_regmap(np); + } + if (IS_ERR(map)) { dev_err(dev, "no Versatile syscon regmap\n"); return PTR_ERR(map); @@ -340,6 +387,13 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) priv->variant_display_disable = pl111_realview_clcd_disable; dev_info(dev, "set up callbacks for RealView PL111\n"); break; + case VEXPRESS_CLCD_V2M: + priv->variant = &pl111_vexpress; + dev_info(dev, "initializing Versatile Express PL111\n"); + ret = pl111_vexpress_clcd_init(dev, priv, map); + if (ret) + return ret; + break; default: dev_info(dev, "unknown Versatile system controller\n"); break; diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.c b/drivers/gpu/drm/pl111/pl111_vexpress.c new file mode 100644 index 000000000000..a534b225e31b --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_vexpress.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Versatile Express PL111 handling + * Copyright (C) 2018 Linus Walleij + * + * This module binds to the "arm,vexpress-muxfpga" device on the + * Versatile Express configuration bus and sets up which CLCD instance + * gets muxed out on the DVI bridge. + */ +#include <linux/device.h> +#include <linux/module.h> +#include <linux/regmap.h> +#include <linux/vexpress.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> +#include "pl111_drm.h" +#include "pl111_vexpress.h" + +#define VEXPRESS_FPGAMUX_MOTHERBOARD 0x00 +#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_1 0x01 +#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_2 0x02 + +int pl111_vexpress_clcd_init(struct device *dev, + struct pl111_drm_dev_private *priv, + struct regmap *map) +{ + struct device_node *root; + struct device_node *child; + struct device_node *ct_clcd = NULL; + bool has_coretile_clcd = false; + bool has_coretile_hdlcd = false; + bool mux_motherboard = true; + u32 val; + int ret; + + /* + * Check if we have a CLCD or HDLCD on the core tile by checking if a + * CLCD or HDLCD is available in the root of the device tree. + */ + root = of_find_node_by_path("/"); + if (!root) + return -EINVAL; + + for_each_available_child_of_node(root, child) { + if (of_device_is_compatible(child, "arm,pl111")) { + has_coretile_clcd = true; + ct_clcd = child; + break; + } + if (of_device_is_compatible(child, "arm,hdlcd")) { + has_coretile_hdlcd = true; + break; + } + } + + /* + * If there is a coretile HDLCD and it has a driver, + * do not mux the CLCD on the motherboard to the DVI. + */ + if (has_coretile_hdlcd && IS_ENABLED(CONFIG_DRM_HDLCD)) + mux_motherboard = false; + + /* + * On the Vexpress CA9 we let the CLCD on the coretile + * take precedence, so also in this case do not mux the + * motherboard to the DVI. + */ + if (has_coretile_clcd) + mux_motherboard = false; + + if (mux_motherboard) { + dev_info(dev, "DVI muxed to motherboard CLCD\n"); + val = VEXPRESS_FPGAMUX_MOTHERBOARD; + } else if (ct_clcd == dev->of_node) { + dev_info(dev, + "DVI muxed to daughterboard 1 (core tile) CLCD\n"); + val = VEXPRESS_FPGAMUX_DAUGHTERBOARD_1; + } else { + dev_info(dev, "core tile graphics present\n"); + dev_info(dev, "this device will be deactivated\n"); + return -ENODEV; + } + + ret = regmap_write(map, 0, val); + if (ret) { + dev_err(dev, "error setting DVI muxmode\n"); + return -ENODEV; + } + + return 0; +} + +/* + * This sets up the regmap pointer that will then be retrieved by + * the detection code in pl111_versatile.c and passed in to the + * pl111_vexpress_clcd_init() function above. + */ +static int vexpress_muxfpga_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct regmap *map; + + map = devm_regmap_init_vexpress_config(&pdev->dev); + if (IS_ERR(map)) + return PTR_ERR(map); + dev_set_drvdata(dev, map); + + return 0; +} + +static const struct of_device_id vexpress_muxfpga_match[] = { + { .compatible = "arm,vexpress-muxfpga", } +}; + +static struct platform_driver vexpress_muxfpga_driver = { + .driver = { + .name = "vexpress-muxfpga", + .of_match_table = of_match_ptr(vexpress_muxfpga_match), + }, + .probe = vexpress_muxfpga_probe, +}; + +int vexpress_muxfpga_init(void) +{ + int ret; + + ret = platform_driver_register(&vexpress_muxfpga_driver); + /* -EBUSY just means this driver is already registered */ + if (ret == -EBUSY) + ret = 0; + return ret; +} diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.h b/drivers/gpu/drm/pl111/pl111_vexpress.h new file mode 100644 index 000000000000..5d3681bb4c00 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_vexpress.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 + +struct device; +struct pl111_drm_dev_private; +struct regmap; + +#ifdef CONFIG_ARCH_VEXPRESS + +int pl111_vexpress_clcd_init(struct device *dev, + struct pl111_drm_dev_private *priv, + struct regmap *map); + +int vexpress_muxfpga_init(void); + +#else + +static inline int pl111_vexpress_clcd_init(struct device *dev, + struct pl111_drm_dev_private *priv, + struct regmap *map) +{ + return -ENODEV; +} + +static inline int vexpress_muxfpga_init(void) +{ + return 0; +} + +#endif diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 01665b98c57e..208af9f37914 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -339,12 +339,9 @@ int qxl_io_update_area(struct qxl_device *qdev, struct qxl_bo *surf, surface_height = surf->surf.height; if (area->left < 0 || area->top < 0 || - area->right > surface_width || area->bottom > surface_height) { - qxl_io_log(qdev, "%s: not doing area update for " - "%d, (%d,%d,%d,%d) (%d,%d)\n", __func__, surface_id, area->left, - area->top, area->right, area->bottom, surface_width, surface_height); + area->right > surface_width || area->bottom > surface_height) return -EINVAL; - } + mutex_lock(&qdev->update_area_mutex); qdev->ram_header->update_area = *area; qdev->ram_header->update_surface = surface_id; @@ -372,6 +369,7 @@ void qxl_io_flush_surfaces(struct qxl_device *qdev) void qxl_io_destroy_primary(struct qxl_device *qdev) { wait_for_io_cmd(qdev, 0, QXL_IO_DESTROY_PRIMARY_ASYNC); + qdev->primary_created = false; } void qxl_io_create_primary(struct qxl_device *qdev, @@ -397,6 +395,7 @@ void qxl_io_create_primary(struct qxl_device *qdev, create->type = QXL_SURF_TYPE_PRIMARY; wait_for_io_cmd(qdev, 0, QXL_IO_CREATE_PRIMARY_ASYNC); + qdev->primary_created = true; } void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id) @@ -405,20 +404,6 @@ void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id) wait_for_io_cmd(qdev, id, QXL_IO_MEMSLOT_ADD_ASYNC); } -void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...) -{ - va_list args; - - va_start(args, fmt); - vsnprintf(qdev->ram_header->log_buf, QXL_LOG_BUF_SIZE, fmt, args); - va_end(args); - /* - * DO not do a DRM output here - this will call printk, which will - * call back into qxl for rendering (qxl_fb) - */ - outb(0, qdev->io_base + QXL_IO_LOG); -} - void qxl_io_reset(struct qxl_device *qdev) { outb(0, qdev->io_base + QXL_IO_RESET); @@ -426,19 +411,6 @@ void qxl_io_reset(struct qxl_device *qdev) void qxl_io_monitors_config(struct qxl_device *qdev) { - qxl_io_log(qdev, "%s: %d [%dx%d+%d+%d]\n", __func__, - qdev->monitors_config ? - qdev->monitors_config->count : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].width : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].height : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].x : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].y : -1 - ); - wait_for_io_cmd(qdev, 0, QXL_IO_MONITORS_CONFIG_ASYNC); } diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index ecb35ed0eac8..b8cda9449241 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -48,12 +48,8 @@ static void qxl_alloc_client_monitors_config(struct qxl_device *qdev, unsigned c qdev->client_monitors_config = kzalloc( sizeof(struct qxl_monitors_config) + sizeof(struct qxl_head) * count, GFP_KERNEL); - if (!qdev->client_monitors_config) { - qxl_io_log(qdev, - "%s: allocation failure for %u heads\n", - __func__, count); + if (!qdev->client_monitors_config) return; - } } qdev->client_monitors_config->count = count; } @@ -74,12 +70,8 @@ static int qxl_display_copy_rom_client_monitors_config(struct qxl_device *qdev) num_monitors = qdev->rom->client_monitors_config.count; crc = crc32(0, (const uint8_t *)&qdev->rom->client_monitors_config, sizeof(qdev->rom->client_monitors_config)); - if (crc != qdev->rom->client_monitors_config_crc) { - qxl_io_log(qdev, "crc mismatch: have %X (%zd) != %X\n", crc, - sizeof(qdev->rom->client_monitors_config), - qdev->rom->client_monitors_config_crc); + if (crc != qdev->rom->client_monitors_config_crc) return MONITORS_CONFIG_BAD_CRC; - } if (!num_monitors) { DRM_DEBUG_KMS("no client monitors configured\n"); return status; @@ -170,12 +162,10 @@ void qxl_display_read_client_monitors_config(struct qxl_device *qdev) udelay(5); } if (status == MONITORS_CONFIG_BAD_CRC) { - qxl_io_log(qdev, "config: bad crc\n"); DRM_DEBUG_KMS("ignoring client monitors config: bad crc"); return; } if (status == MONITORS_CONFIG_UNCHANGED) { - qxl_io_log(qdev, "config: unchanged\n"); DRM_DEBUG_KMS("ignoring client monitors config: unchanged"); return; } @@ -268,6 +258,89 @@ static int qxl_add_common_modes(struct drm_connector *connector, return i - 1; } +static void qxl_send_monitors_config(struct qxl_device *qdev) +{ + int i; + + BUG_ON(!qdev->ram_header->monitors_config); + + if (qdev->monitors_config->count == 0) + return; + + for (i = 0 ; i < qdev->monitors_config->count ; ++i) { + struct qxl_head *head = &qdev->monitors_config->heads[i]; + + if (head->y > 8192 || head->x > 8192 || + head->width > 8192 || head->height > 8192) { + DRM_ERROR("head %d wrong: %dx%d+%d+%d\n", + i, head->width, head->height, + head->x, head->y); + return; + } + } + qxl_io_monitors_config(qdev); +} + +static void qxl_crtc_update_monitors_config(struct drm_crtc *crtc, + const char *reason) +{ + struct drm_device *dev = crtc->dev; + struct qxl_device *qdev = dev->dev_private; + struct qxl_crtc *qcrtc = to_qxl_crtc(crtc); + struct qxl_head head; + int oldcount, i = qcrtc->index; + + if (!qdev->primary_created) { + DRM_DEBUG_KMS("no primary surface, skip (%s)\n", reason); + return; + } + + if (!qdev->monitors_config || + qdev->monitors_config->max_allowed <= i) + return; + + head.id = i; + head.flags = 0; + oldcount = qdev->monitors_config->count; + if (crtc->state->active) { + struct drm_display_mode *mode = &crtc->mode; + head.width = mode->hdisplay; + head.height = mode->vdisplay; + head.x = crtc->x; + head.y = crtc->y; + if (qdev->monitors_config->count < i + 1) + qdev->monitors_config->count = i + 1; + } else if (i > 0) { + head.width = 0; + head.height = 0; + head.x = 0; + head.y = 0; + if (qdev->monitors_config->count == i + 1) + qdev->monitors_config->count = i; + } else { + DRM_DEBUG_KMS("inactive head 0, skip (%s)\n", reason); + return; + } + + if (head.width == qdev->monitors_config->heads[i].width && + head.height == qdev->monitors_config->heads[i].height && + head.x == qdev->monitors_config->heads[i].x && + head.y == qdev->monitors_config->heads[i].y && + oldcount == qdev->monitors_config->count) + return; + + DRM_DEBUG_KMS("head %d, %dx%d, at +%d+%d, %s (%s)\n", + i, head.width, head.height, head.x, head.y, + crtc->state->active ? "on" : "off", reason); + if (oldcount != qdev->monitors_config->count) + DRM_DEBUG_KMS("active heads %d -> %d (%d total)\n", + oldcount, qdev->monitors_config->count, + qdev->monitors_config->max_allowed); + + qdev->monitors_config->heads[i] = head; + qxl_send_monitors_config(qdev); +} + static void qxl_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { @@ -283,6 +356,8 @@ static void qxl_crtc_atomic_flush(struct drm_crtc *crtc, drm_crtc_send_vblank_event(crtc, event); spin_unlock_irqrestore(&dev->event_lock, flags); } + + qxl_crtc_update_monitors_config(crtc, "flush"); } static void qxl_crtc_destroy(struct drm_crtc *crtc) @@ -381,95 +456,19 @@ qxl_framebuffer_init(struct drm_device *dev, return 0; } -static bool qxl_crtc_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct drm_device *dev = crtc->dev; - struct qxl_device *qdev = dev->dev_private; - - qxl_io_log(qdev, "%s: (%d,%d) => (%d,%d)\n", - __func__, - mode->hdisplay, mode->vdisplay, - adjusted_mode->hdisplay, - adjusted_mode->vdisplay); - return true; -} - -static void -qxl_send_monitors_config(struct qxl_device *qdev) -{ - int i; - - BUG_ON(!qdev->ram_header->monitors_config); - - if (qdev->monitors_config->count == 0) { - qxl_io_log(qdev, "%s: 0 monitors??\n", __func__); - return; - } - for (i = 0 ; i < qdev->monitors_config->count ; ++i) { - struct qxl_head *head = &qdev->monitors_config->heads[i]; - - if (head->y > 8192 || head->x > 8192 || - head->width > 8192 || head->height > 8192) { - DRM_ERROR("head %d wrong: %dx%d+%d+%d\n", - i, head->width, head->height, - head->x, head->y); - return; - } - } - qxl_io_monitors_config(qdev); -} - -static void qxl_monitors_config_set(struct qxl_device *qdev, - int index, - unsigned x, unsigned y, - unsigned width, unsigned height, - unsigned surf_id) -{ - DRM_DEBUG_KMS("%d:%dx%d+%d+%d\n", index, width, height, x, y); - qdev->monitors_config->heads[index].x = x; - qdev->monitors_config->heads[index].y = y; - qdev->monitors_config->heads[index].width = width; - qdev->monitors_config->heads[index].height = height; - qdev->monitors_config->heads[index].surface_id = surf_id; - -} - -static void qxl_mode_set_nofb(struct drm_crtc *crtc) -{ - struct qxl_device *qdev = crtc->dev->dev_private; - struct qxl_crtc *qcrtc = to_qxl_crtc(crtc); - struct drm_display_mode *mode = &crtc->mode; - - DRM_DEBUG("Mode set (%d,%d)\n", - mode->hdisplay, mode->vdisplay); - - qxl_monitors_config_set(qdev, qcrtc->index, 0, 0, - mode->hdisplay, mode->vdisplay, 0); - -} - static void qxl_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { - DRM_DEBUG("\n"); + qxl_crtc_update_monitors_config(crtc, "enable"); } static void qxl_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { - struct qxl_crtc *qcrtc = to_qxl_crtc(crtc); - struct qxl_device *qdev = crtc->dev->dev_private; - - qxl_monitors_config_set(qdev, qcrtc->index, 0, 0, 0, 0, 0); - - qxl_send_monitors_config(qdev); + qxl_crtc_update_monitors_config(crtc, "disable"); } static const struct drm_crtc_helper_funcs qxl_crtc_helper_funcs = { - .mode_fixup = qxl_crtc_mode_fixup, - .mode_set_nofb = qxl_mode_set_nofb, .atomic_flush = qxl_crtc_atomic_flush, .atomic_enable = qxl_crtc_atomic_enable, .atomic_disable = qxl_crtc_atomic_disable, @@ -613,12 +612,6 @@ static void qxl_primary_atomic_disable(struct drm_plane *plane, } } -static int qxl_plane_atomic_check(struct drm_plane *plane, - struct drm_plane_state *state) -{ - return 0; -} - static void qxl_cursor_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { @@ -824,7 +817,6 @@ static const uint32_t qxl_cursor_plane_formats[] = { }; static const struct drm_plane_helper_funcs qxl_cursor_helper_funcs = { - .atomic_check = qxl_plane_atomic_check, .atomic_update = qxl_cursor_atomic_update, .atomic_disable = qxl_cursor_atomic_disable, .prepare_fb = qxl_plane_prepare_fb, @@ -949,81 +941,6 @@ free_mem: return r; } -static void qxl_enc_dpms(struct drm_encoder *encoder, int mode) -{ - DRM_DEBUG("\n"); -} - -static void qxl_enc_prepare(struct drm_encoder *encoder) -{ - DRM_DEBUG("\n"); -} - -static void qxl_write_monitors_config_for_encoder(struct qxl_device *qdev, - struct drm_encoder *encoder) -{ - int i; - struct qxl_output *output = drm_encoder_to_qxl_output(encoder); - struct qxl_head *head; - struct drm_display_mode *mode; - - BUG_ON(!encoder); - /* TODO: ugly, do better */ - i = output->index; - if (!qdev->monitors_config || - qdev->monitors_config->max_allowed <= i) { - DRM_ERROR( - "head number too large or missing monitors config: %p, %d", - qdev->monitors_config, - qdev->monitors_config ? - qdev->monitors_config->max_allowed : -1); - return; - } - if (!encoder->crtc) { - DRM_ERROR("missing crtc on encoder %p\n", encoder); - return; - } - if (i != 0) - DRM_DEBUG("missing for multiple monitors: no head holes\n"); - head = &qdev->monitors_config->heads[i]; - head->id = i; - if (encoder->crtc->enabled) { - mode = &encoder->crtc->mode; - head->width = mode->hdisplay; - head->height = mode->vdisplay; - head->x = encoder->crtc->x; - head->y = encoder->crtc->y; - if (qdev->monitors_config->count < i + 1) - qdev->monitors_config->count = i + 1; - } else { - head->width = 0; - head->height = 0; - head->x = 0; - head->y = 0; - } - DRM_DEBUG_KMS("setting head %d to +%d+%d %dx%d out of %d\n", - i, head->x, head->y, head->width, head->height, qdev->monitors_config->count); - head->flags = 0; - /* TODO - somewhere else to call this for multiple monitors - * (config_commit?) */ - qxl_send_monitors_config(qdev); -} - -static void qxl_enc_commit(struct drm_encoder *encoder) -{ - struct qxl_device *qdev = encoder->dev->dev_private; - - qxl_write_monitors_config_for_encoder(qdev, encoder); - DRM_DEBUG("\n"); -} - -static void qxl_enc_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - DRM_DEBUG("\n"); -} - static int qxl_conn_get_modes(struct drm_connector *connector) { unsigned pwidth = 1024; @@ -1037,7 +954,7 @@ static int qxl_conn_get_modes(struct drm_connector *connector) return ret; } -static int qxl_conn_mode_valid(struct drm_connector *connector, +static enum drm_mode_status qxl_conn_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *ddev = connector->dev; @@ -1069,10 +986,6 @@ static struct drm_encoder *qxl_best_encoder(struct drm_connector *connector) static const struct drm_encoder_helper_funcs qxl_enc_helper_funcs = { - .dpms = qxl_enc_dpms, - .prepare = qxl_enc_prepare, - .mode_set = qxl_enc_mode_set, - .commit = qxl_enc_commit, }; static const struct drm_connector_helper_funcs qxl_connector_helper_funcs = { @@ -1100,21 +1013,11 @@ static enum drm_connector_status qxl_conn_detect( qxl_head_enabled(&qdev->client_monitors_config->heads[output->index]); DRM_DEBUG("#%d connected: %d\n", output->index, connected); - if (!connected) - qxl_monitors_config_set(qdev, output->index, 0, 0, 0, 0, 0); return connected ? connector_status_connected : connector_status_disconnected; } -static int qxl_conn_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t value) -{ - DRM_DEBUG("\n"); - return 0; -} - static void qxl_conn_destroy(struct drm_connector *connector) { struct qxl_output *qxl_output = @@ -1129,7 +1032,6 @@ static const struct drm_connector_funcs qxl_connector_funcs = { .dpms = drm_helper_connector_dpms, .detect = qxl_conn_detect, .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = qxl_conn_set_property, .destroy = qxl_conn_destroy, .reset = drm_atomic_helper_connector_reset, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 864b456080c4..01220d386b0a 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -299,9 +299,6 @@ struct qxl_device { int monitors_config_height; }; -/* forward declaration for QXL_INFO_IO */ -__printf(2,3) void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...); - extern const struct drm_ioctl_desc qxl_ioctls[]; extern int qxl_max_ioctl; diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c index 338891401f35..9a6752606079 100644 --- a/drivers/gpu/drm/qxl/qxl_fb.c +++ b/drivers/gpu/drm/qxl/qxl_fb.c @@ -185,8 +185,6 @@ static int qxlfb_framebuffer_dirty(struct drm_framebuffer *fb, /* * we are using a shadow draw buffer, at qdev->surface0_shadow */ - qxl_io_log(qdev, "dirty x[%d, %d], y[%d, %d]\n", clips->x1, clips->x2, - clips->y1, clips->y2); image->dx = clips->x1; image->dy = clips->y1; image->width = clips->x2 - clips->x1; diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c index 23a40106ab53..3bb31add6350 100644 --- a/drivers/gpu/drm/qxl/qxl_irq.c +++ b/drivers/gpu/drm/qxl/qxl_irq.c @@ -57,10 +57,9 @@ irqreturn_t qxl_irq_handler(int irq, void *arg) * to avoid endless loops). */ qdev->irq_received_error++; - qxl_io_log(qdev, "%s: driver is in bug mode.\n", __func__); + DRM_WARN("driver is in bug mode\n"); } if (pending & QXL_INTERRUPT_CLIENT_MONITORS_CONFIG) { - qxl_io_log(qdev, "QXL_INTERRUPT_CLIENT_MONITORS_CONFIG\n"); schedule_work(&qdev->client_monitors_config_work); } qdev->ram_header->int_mask = QXL_INTERRUPT_MASK; diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index ee2340e31f06..86a1fb32f6db 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -105,16 +105,16 @@ static void qxl_ttm_global_fini(struct qxl_device *qdev) static struct vm_operations_struct qxl_ttm_vm_ops; static const struct vm_operations_struct *ttm_vm_ops; -static int qxl_ttm_fault(struct vm_fault *vmf) +static vm_fault_t qxl_ttm_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo; - int r; + vm_fault_t ret; bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data; if (bo == NULL) return VM_FAULT_NOPAGE; - r = ttm_vm_ops->fault(vmf); - return r; + ret = ttm_vm_ops->fault(vmf); + return ret; } int qxl_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 02baaaf20e9d..efbd5816082d 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1145,7 +1145,6 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; - struct radeon_framebuffer *radeon_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct radeon_bo *rbo; @@ -1164,19 +1163,15 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - radeon_fb = to_radeon_framebuffer(fb); + if (atomic) target_fb = fb; - } - else { - radeon_fb = to_radeon_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = radeon_fb->obj; + obj = target_fb->obj[0]; rbo = gem_to_radeon_bo(obj); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) @@ -1441,8 +1436,7 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - radeon_fb = to_radeon_framebuffer(fb); - rbo = gem_to_radeon_bo(radeon_fb->obj); + rbo = gem_to_radeon_bo(fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) return r; @@ -1463,7 +1457,6 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; - struct radeon_framebuffer *radeon_fb; struct drm_gem_object *obj; struct radeon_bo *rbo; struct drm_framebuffer *target_fb; @@ -1481,16 +1474,12 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - radeon_fb = to_radeon_framebuffer(fb); + if (atomic) target_fb = fb; - } - else { - radeon_fb = to_radeon_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } - obj = radeon_fb->obj; + obj = target_fb->obj[0]; rbo = gem_to_radeon_bo(obj); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) @@ -1641,8 +1630,7 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3); if (!atomic && fb && fb != crtc->primary->fb) { - radeon_fb = to_radeon_framebuffer(fb); - rbo = gem_to_radeon_bo(radeon_fb->obj); + rbo = gem_to_radeon_bo(fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) return r; @@ -2149,11 +2137,9 @@ static void atombios_crtc_disable(struct drm_crtc *crtc) atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct radeon_framebuffer *radeon_fb; struct radeon_bo *rbo; - radeon_fb = to_radeon_framebuffer(crtc->primary->fb); - rbo = gem_to_radeon_bo(radeon_fb->obj); + rbo = gem_to_radeon_bo(crtc->primary->fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r)) DRM_ERROR("failed to reserve rbo before unpin\n"); diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 40be4068ca69..fa5fadaa9bbb 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -526,7 +526,7 @@ static int radeon_atpx_init(void) * look up whether we are the integrated or discrete GPU (all asics). * Returns the client id. */ -static int radeon_atpx_get_client_id(struct pci_dev *pdev) +static enum vga_switcheroo_client_id radeon_atpx_get_client_id(struct pci_dev *pdev) { if (radeon_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev)) return VGA_SWITCHEROO_IGD; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index df9469a8fdb1..2aea2bdff99b 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -852,7 +852,7 @@ static int radeon_lvds_get_modes(struct drm_connector *connector) return ret; } -static int radeon_lvds_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_encoder *encoder = radeon_best_single_encoder(connector); @@ -1012,7 +1012,7 @@ static int radeon_vga_get_modes(struct drm_connector *connector) return ret; } -static int radeon_vga_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1156,7 +1156,7 @@ static int radeon_tv_get_modes(struct drm_connector *connector) return 1; } -static int radeon_tv_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { if ((mode->hdisplay > 1024) || (mode->vdisplay > 768)) @@ -1498,7 +1498,7 @@ static void radeon_dvi_force(struct drm_connector *connector) radeon_connector->use_digital = true; } -static int radeon_dvi_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_dvi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1800,7 +1800,7 @@ out: return ret; } -static int radeon_dp_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 48d0e6bd0508..59c8a6647ff2 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1591,7 +1591,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb); + struct drm_framebuffer *fb = crtc->primary->fb; struct radeon_bo *robj; if (radeon_crtc->cursor_bo) { @@ -1603,10 +1603,10 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, } } - if (rfb == NULL || rfb->obj == NULL) { + if (fb == NULL || fb->obj[0] == NULL) { continue; } - robj = gem_to_radeon_bo(rfb->obj); + robj = gem_to_radeon_bo(fb->obj[0]); /* don't unpin kernel fb objects */ if (!radeon_fbdev_robj_is_fb(rdev, robj)) { r = radeon_bo_reserve(robj, false); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 26129b2b082d..9d3ac8b981da 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -32,6 +32,7 @@ #include <linux/pm_runtime.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fb_helper.h> #include <drm/drm_plane_helper.h> #include <drm/drm_edid.h> @@ -478,8 +479,6 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct radeon_framebuffer *old_radeon_fb; - struct radeon_framebuffer *new_radeon_fb; struct drm_gem_object *obj; struct radeon_flip_work *work; struct radeon_bo *new_rbo; @@ -501,15 +500,13 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc, work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; /* schedule unpin of the old buffer */ - old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb); - obj = old_radeon_fb->obj; + obj = crtc->primary->fb->obj[0]; /* take a reference to the old object */ drm_gem_object_get(obj); work->old_rbo = gem_to_radeon_bo(obj); - new_radeon_fb = to_radeon_framebuffer(fb); - obj = new_radeon_fb->obj; + obj = fb->obj[0]; new_rbo = gem_to_radeon_bo(obj); /* pin the new buffer */ @@ -1285,41 +1282,23 @@ void radeon_compute_pll_legacy(struct radeon_pll *pll, } -static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb) -{ - struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb); - - drm_gem_object_put_unlocked(radeon_fb->obj); - drm_framebuffer_cleanup(fb); - kfree(radeon_fb); -} - -static int radeon_user_framebuffer_create_handle(struct drm_framebuffer *fb, - struct drm_file *file_priv, - unsigned int *handle) -{ - struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb); - - return drm_gem_handle_create(file_priv, radeon_fb->obj, handle); -} - static const struct drm_framebuffer_funcs radeon_fb_funcs = { - .destroy = radeon_user_framebuffer_destroy, - .create_handle = radeon_user_framebuffer_create_handle, + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, }; int radeon_framebuffer_init(struct drm_device *dev, - struct radeon_framebuffer *rfb, + struct drm_framebuffer *fb, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { int ret; - rfb->obj = obj; - drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); - ret = drm_framebuffer_init(dev, &rfb->base, &radeon_fb_funcs); + fb->obj[0] = obj; + drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + ret = drm_framebuffer_init(dev, fb, &radeon_fb_funcs); if (ret) { - rfb->obj = NULL; + fb->obj[0] = NULL; return ret; } return 0; @@ -1331,7 +1310,7 @@ radeon_user_framebuffer_create(struct drm_device *dev, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_gem_object *obj; - struct radeon_framebuffer *radeon_fb; + struct drm_framebuffer *fb; int ret; obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]); @@ -1347,20 +1326,20 @@ radeon_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-EINVAL); } - radeon_fb = kzalloc(sizeof(*radeon_fb), GFP_KERNEL); - if (radeon_fb == NULL) { + fb = kzalloc(sizeof(*fb), GFP_KERNEL); + if (fb == NULL) { drm_gem_object_put_unlocked(obj); return ERR_PTR(-ENOMEM); } - ret = radeon_framebuffer_init(dev, radeon_fb, mode_cmd, obj); + ret = radeon_framebuffer_init(dev, fb, mode_cmd, obj); if (ret) { - kfree(radeon_fb); + kfree(fb); drm_gem_object_put_unlocked(obj); return ERR_PTR(ret); } - return &radeon_fb->base; + return fb; } static const struct drm_mode_config_funcs radeon_mode_funcs = { diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index b28288a781ef..2a7977a23b31 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -168,7 +168,12 @@ int radeon_no_wb; int radeon_modeset = -1; int radeon_dynclks = -1; int radeon_r4xx_atom = 0; +#ifdef __powerpc__ +/* Default to PCI on PowerPC (fdo #95017) */ +int radeon_agpmode = -1; +#else int radeon_agpmode = 0; +#endif int radeon_vram_limit = 0; int radeon_gart_size = -1; /* auto */ int radeon_benchmarking = 0; diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 57c5404a1654..1179034024ae 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -43,7 +43,7 @@ */ struct radeon_fbdev { struct drm_fb_helper helper; - struct radeon_framebuffer rfb; + struct drm_framebuffer fb; struct radeon_device *rdev; }; @@ -246,13 +246,13 @@ static int radeonfb_create(struct drm_fb_helper *helper, info->par = rfbdev; - ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); + ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->fb, &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); goto out; } - fb = &rfbdev->rfb.base; + fb = &rfbdev->fb; /* setup helper */ rfbdev->helper.fb = fb; @@ -308,15 +308,15 @@ out: static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfbdev) { - struct radeon_framebuffer *rfb = &rfbdev->rfb; + struct drm_framebuffer *fb = &rfbdev->fb; drm_fb_helper_unregister_fbi(&rfbdev->helper); - if (rfb->obj) { - radeonfb_destroy_pinned_object(rfb->obj); - rfb->obj = NULL; - drm_framebuffer_unregister_private(&rfb->base); - drm_framebuffer_cleanup(&rfb->base); + if (fb->obj[0]) { + radeonfb_destroy_pinned_object(fb->obj[0]); + fb->obj[0] = NULL; + drm_framebuffer_unregister_private(fb); + drm_framebuffer_cleanup(fb); } drm_fb_helper_fini(&rfbdev->helper); @@ -400,7 +400,7 @@ bool radeon_fbdev_robj_is_fb(struct radeon_device *rdev, struct radeon_bo *robj) if (!rdev->mode_info.rfbdev) return false; - if (robj == gem_to_radeon_bo(rdev->mode_info.rfbdev->rfb.obj)) + if (robj == gem_to_radeon_bo(rdev->mode_info.rfbdev->fb.obj[0])) return true; return false; } diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c index 1f1856e0b1e0..35a205ae4318 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -374,7 +374,6 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct radeon_framebuffer *radeon_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct radeon_bo *rbo; @@ -393,14 +392,10 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - radeon_fb = to_radeon_framebuffer(fb); + if (atomic) target_fb = fb; - } - else { - radeon_fb = to_radeon_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } switch (target_fb->format->cpp[0] * 8) { case 8: @@ -423,7 +418,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, } /* Pin framebuffer & get tilling informations */ - obj = radeon_fb->obj; + obj = target_fb->obj[0]; rbo = gem_to_radeon_bo(obj); retry: r = radeon_bo_reserve(rbo, false); @@ -451,7 +446,7 @@ retry: struct radeon_bo *old_rbo; unsigned long nsize, osize; - old_rbo = gem_to_radeon_bo(to_radeon_framebuffer(fb)->obj); + old_rbo = gem_to_radeon_bo(fb->obj[0]); osize = radeon_bo_size(old_rbo); nsize = radeon_bo_size(rbo); if (nsize <= osize && !radeon_bo_reserve(old_rbo, false)) { @@ -558,8 +553,7 @@ retry: WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch); if (!atomic && fb && fb != crtc->primary->fb) { - radeon_fb = to_radeon_framebuffer(fb); - rbo = gem_to_radeon_bo(radeon_fb->obj); + rbo = gem_to_radeon_bo(fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) return r; @@ -1093,11 +1087,9 @@ static void radeon_crtc_disable(struct drm_crtc *crtc) radeon_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct radeon_framebuffer *radeon_fb; struct radeon_bo *rbo; - radeon_fb = to_radeon_framebuffer(crtc->primary->fb); - rbo = gem_to_radeon_bo(radeon_fb->obj); + rbo = gem_to_radeon_bo(crtc->primary->fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r)) DRM_ERROR("failed to reserve rbo before unpin\n"); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 3243e5e01432..fd470d6bf3f4 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -46,7 +46,6 @@ struct radeon_device; #define to_radeon_crtc(x) container_of(x, struct radeon_crtc, base) #define to_radeon_connector(x) container_of(x, struct radeon_connector, base) #define to_radeon_encoder(x) container_of(x, struct radeon_encoder, base) -#define to_radeon_framebuffer(x) container_of(x, struct radeon_framebuffer, base) #define RADEON_MAX_HPD_PINS 7 #define RADEON_MAX_CRTCS 6 @@ -574,11 +573,6 @@ struct radeon_connector { int enabled_attribs; }; -struct radeon_framebuffer { - struct drm_framebuffer base; - struct drm_gem_object *obj; -}; - #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \ ((em) == ATOM_ENCODER_MODE_DP_MST)) @@ -932,7 +926,7 @@ radeon_combios_encoder_crtc_scratch_regs(struct drm_encoder *encoder, int crtc); extern void radeon_combios_encoder_dpms_scratch_regs(struct drm_encoder *encoder, bool on); int radeon_framebuffer_init(struct drm_device *dev, - struct radeon_framebuffer *rfb, + struct drm_framebuffer *rfb, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index d71d709fe3d9..15dc9caa128b 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -909,7 +909,8 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg) * Initialization */ -int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) +int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex, + unsigned int hwindex) { static const unsigned int mmio_offsets[] = { DU0_REG_OFFSET, DU1_REG_OFFSET, DU2_REG_OFFSET, DU3_REG_OFFSET @@ -917,7 +918,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) struct rcar_du_device *rcdu = rgrp->dev; struct platform_device *pdev = to_platform_device(rcdu->dev); - struct rcar_du_crtc *rcrtc = &rcdu->crtcs[index]; + struct rcar_du_crtc *rcrtc = &rcdu->crtcs[swindex]; struct drm_crtc *crtc = &rcrtc->crtc; struct drm_plane *primary; unsigned int irqflags; @@ -929,7 +930,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) /* Get the CRTC clock and the optional external clock. */ if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) { - sprintf(clk_name, "du.%u", index); + sprintf(clk_name, "du.%u", hwindex); name = clk_name; } else { name = NULL; @@ -937,16 +938,16 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) rcrtc->clock = devm_clk_get(rcdu->dev, name); if (IS_ERR(rcrtc->clock)) { - dev_err(rcdu->dev, "no clock for CRTC %u\n", index); + dev_err(rcdu->dev, "no clock for DU channel %u\n", hwindex); return PTR_ERR(rcrtc->clock); } - sprintf(clk_name, "dclkin.%u", index); + sprintf(clk_name, "dclkin.%u", hwindex); clk = devm_clk_get(rcdu->dev, clk_name); if (!IS_ERR(clk)) { rcrtc->extclock = clk; } else if (PTR_ERR(rcrtc->clock) == -EPROBE_DEFER) { - dev_info(rcdu->dev, "can't get external clock %u\n", index); + dev_info(rcdu->dev, "can't get external clock %u\n", hwindex); return -EPROBE_DEFER; } @@ -955,13 +956,13 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) spin_lock_init(&rcrtc->vblank_lock); rcrtc->group = rgrp; - rcrtc->mmio_offset = mmio_offsets[index]; - rcrtc->index = index; + rcrtc->mmio_offset = mmio_offsets[hwindex]; + rcrtc->index = hwindex; if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE)) primary = &rcrtc->vsp->planes[rcrtc->vsp_pipe].plane; else - primary = &rgrp->planes[index % 2].plane; + primary = &rgrp->planes[swindex % 2].plane; ret = drm_crtc_init_with_planes(rcdu->ddev, crtc, primary, NULL, rcdu->info->gen <= 2 ? @@ -977,7 +978,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) /* Register the interrupt handler. */ if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) { - irq = platform_get_irq(pdev, index); + /* The IRQ's are associated with the CRTC (sw)index. */ + irq = platform_get_irq(pdev, swindex); irqflags = 0; } else { irq = platform_get_irq(pdev, 0); @@ -985,7 +987,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) } if (irq < 0) { - dev_err(rcdu->dev, "no IRQ for CRTC %u\n", index); + dev_err(rcdu->dev, "no IRQ for CRTC %u\n", swindex); return irq; } @@ -993,7 +995,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) dev_name(rcdu->dev), rcrtc); if (ret < 0) { dev_err(rcdu->dev, - "failed to register IRQ for CRTC %u\n", index); + "failed to register IRQ for CRTC %u\n", swindex); return ret; } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h index 07a718232309..7680cb2636c8 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h @@ -95,7 +95,8 @@ enum rcar_du_output { RCAR_DU_OUTPUT_MAX, }; -int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index); +int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex, + unsigned int hwindex); void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc); void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c index 3917d839c04c..02aee6cb0e53 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c @@ -40,7 +40,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7743_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7743 has one RGB output and one LVDS output @@ -61,7 +61,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7745 has two RGB outputs @@ -80,7 +80,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = { static const struct rcar_du_device_info rcar_du_r8a7779_info = { .gen = 2, .features = 0, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7779 has two RGB outputs and one (currently unsupported) @@ -102,7 +102,7 @@ static const struct rcar_du_device_info rcar_du_r8a7790_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, .quirks = RCAR_DU_QUIRK_ALIGN_128B, - .num_crtcs = 3, + .channels_mask = BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7790 has one RGB output, two LVDS outputs and one @@ -129,7 +129,7 @@ static const struct rcar_du_device_info rcar_du_r8a7791_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A779[13] has one RGB output, one LVDS output and one @@ -151,7 +151,7 @@ static const struct rcar_du_device_info rcar_du_r8a7792_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* R8A7792 has two RGB outputs. */ [RCAR_DU_OUTPUT_DPAD0] = { @@ -169,7 +169,7 @@ static const struct rcar_du_device_info rcar_du_r8a7794_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7794 has two RGB outputs and one (currently unsupported) @@ -191,7 +191,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 4, + .channels_mask = BIT(3) | BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7795 has one RGB output, two HDMI outputs and one @@ -215,7 +215,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = { }, }, .num_lvds = 1, - .dpll_ch = BIT(1) | BIT(2), + .dpll_ch = BIT(2) | BIT(1), }; static const struct rcar_du_device_info rcar_du_r8a7796_info = { @@ -223,7 +223,7 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 3, + .channels_mask = BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7796 has one RGB output, one LVDS output and one HDMI @@ -246,12 +246,40 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = { .dpll_ch = BIT(1), }; +static const struct rcar_du_device_info rcar_du_r8a77965_info = { + .gen = 3, + .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK + | RCAR_DU_FEATURE_EXT_CTRL_REGS + | RCAR_DU_FEATURE_VSP1_SOURCE, + .channels_mask = BIT(3) | BIT(1) | BIT(0), + .routes = { + /* + * R8A77965 has one RGB output, one LVDS output and one HDMI + * output. + */ + [RCAR_DU_OUTPUT_DPAD0] = { + .possible_crtcs = BIT(2), + .port = 0, + }, + [RCAR_DU_OUTPUT_HDMI0] = { + .possible_crtcs = BIT(1), + .port = 1, + }, + [RCAR_DU_OUTPUT_LVDS0] = { + .possible_crtcs = BIT(0), + .port = 2, + }, + }, + .num_lvds = 1, + .dpll_ch = BIT(1), +}; + static const struct rcar_du_device_info rcar_du_r8a77970_info = { .gen = 3, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 1, + .channels_mask = BIT(0), .routes = { /* R8A77970 has one RGB output and one LVDS output. */ [RCAR_DU_OUTPUT_DPAD0] = { @@ -277,6 +305,7 @@ static const struct of_device_id rcar_du_of_table[] = { { .compatible = "renesas,du-r8a7794", .data = &rcar_du_r8a7794_info }, { .compatible = "renesas,du-r8a7795", .data = &rcar_du_r8a7795_info }, { .compatible = "renesas,du-r8a7796", .data = &rcar_du_r8a7796_info }, + { .compatible = "renesas,du-r8a77965", .data = &rcar_du_r8a77965_info }, { .compatible = "renesas,du-r8a77970", .data = &rcar_du_r8a77970_info }, { } }; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h index 5c7ec15818c7..b3a25e8e07d0 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h @@ -52,7 +52,7 @@ struct rcar_du_output_routing { * @gen: device generation (2 or 3) * @features: device features (RCAR_DU_FEATURE_*) * @quirks: device quirks (RCAR_DU_QUIRK_*) - * @num_crtcs: total number of CRTCs + * @channels_mask: bit mask of available DU channels * @routes: array of CRTC to output routes, indexed by output (RCAR_DU_OUTPUT_*) * @num_lvds: number of internal LVDS encoders */ @@ -60,7 +60,7 @@ struct rcar_du_device_info { unsigned int gen; unsigned int features; unsigned int quirks; - unsigned int num_crtcs; + unsigned int channels_mask; struct rcar_du_output_routing routes[RCAR_DU_OUTPUT_MAX]; unsigned int num_lvds; unsigned int dpll_ch; @@ -87,7 +87,6 @@ struct rcar_du_device { struct rcar_du_vsp vsps[RCAR_DU_MAX_VSPS]; struct { - struct drm_property *alpha; struct drm_property *colorkey; } props; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c index 2f37ea901873..d539cb290a35 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_group.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c @@ -46,10 +46,13 @@ void rcar_du_group_write(struct rcar_du_group *rgrp, u32 reg, u32 data) static void rcar_du_group_setup_pins(struct rcar_du_group *rgrp) { - u32 defr6 = DEFR6_CODE | DEFR6_ODPM12_DISP; + u32 defr6 = DEFR6_CODE; - if (rgrp->num_crtcs > 1) - defr6 |= DEFR6_ODPM22_DISP; + if (rgrp->channels_mask & BIT(0)) + defr6 |= DEFR6_ODPM02_DISP; + + if (rgrp->channels_mask & BIT(1)) + defr6 |= DEFR6_ODPM12_DISP; rcar_du_group_write(rgrp, DEFR6, defr6); } @@ -80,10 +83,11 @@ static void rcar_du_group_setup_defr8(struct rcar_du_group *rgrp) * On Gen3 VSPD routing can't be configured, but DPAD routing * needs to be set despite having a single option available. */ - u32 crtc = ffs(possible_crtcs) - 1; + unsigned int rgb_crtc = ffs(possible_crtcs) - 1; + struct rcar_du_crtc *crtc = &rcdu->crtcs[rgb_crtc]; - if (crtc / 2 == rgrp->index) - defr8 |= DEFR8_DRGBS_DU(crtc); + if (crtc->index / 2 == rgrp->index) + defr8 |= DEFR8_DRGBS_DU(crtc->index); } rcar_du_group_write(rgrp, DEFR8, defr8); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.h b/drivers/gpu/drm/rcar-du/rcar_du_group.h index 5e3adc6b31b5..42105aedecc8 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_group.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_group.h @@ -25,6 +25,7 @@ struct rcar_du_device; * @dev: the DU device * @mmio_offset: registers offset in the device memory map * @index: group index + * @channels_mask: bitmask of populated DU channels in this group * @num_crtcs: number of CRTCs in this group (1 or 2) * @use_count: number of users of the group (rcar_du_group_(get|put)) * @used_crtcs: number of CRTCs currently in use @@ -39,6 +40,7 @@ struct rcar_du_group { unsigned int mmio_offset; unsigned int index; + unsigned int channels_mask; unsigned int num_crtcs; unsigned int use_count; unsigned int used_crtcs; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index 0329b354bfa0..f0bc7cc0e913 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -233,15 +233,7 @@ static int rcar_du_atomic_check(struct drm_device *dev, struct rcar_du_device *rcdu = dev->dev_private; int ret; - ret = drm_atomic_helper_check_modeset(dev, state); - if (ret) - return ret; - - ret = drm_atomic_normalize_zpos(dev, state); - if (ret) - return ret; - - ret = drm_atomic_helper_check_planes(dev, state); + ret = drm_atomic_helper_check(dev, state); if (ret) return ret; @@ -415,11 +407,6 @@ static int rcar_du_encoders_init(struct rcar_du_device *rcdu) static int rcar_du_properties_init(struct rcar_du_device *rcdu) { - rcdu->props.alpha = - drm_property_create_range(rcdu->ddev, 0, "alpha", 0, 255); - if (rcdu->props.alpha == NULL) - return -ENOMEM; - /* * The color key is expressed as an RGB888 triplet stored in a 32-bit * integer in XRGB8888 format. Bit 24 is used as a flag to disable (0) @@ -441,7 +428,7 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) struct { struct device_node *np; unsigned int crtcs_mask; - } vsps[RCAR_DU_MAX_VSPS] = { { 0, }, }; + } vsps[RCAR_DU_MAX_VSPS] = { { NULL, }, }; unsigned int vsps_count = 0; unsigned int cells; unsigned int i; @@ -520,6 +507,8 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) struct drm_fbdev_cma *fbdev; unsigned int num_encoders; unsigned int num_groups; + unsigned int swindex; + unsigned int hwindex; unsigned int i; int ret; @@ -529,10 +518,11 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) dev->mode_config.min_height = 0; dev->mode_config.max_width = 4095; dev->mode_config.max_height = 2047; + dev->mode_config.normalize_zpos = true; dev->mode_config.funcs = &rcar_du_mode_config_funcs; dev->mode_config.helper_private = &rcar_du_mode_config_helper; - rcdu->num_crtcs = rcdu->info->num_crtcs; + rcdu->num_crtcs = hweight8(rcdu->info->channels_mask); ret = rcar_du_properties_init(rcdu); if (ret < 0) @@ -542,7 +532,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) * Initialize vertical blanking interrupts handling. Start with vblank * disabled for all CRTCs. */ - ret = drm_vblank_init(dev, (1 << rcdu->info->num_crtcs) - 1); + ret = drm_vblank_init(dev, (1 << rcdu->num_crtcs) - 1); if (ret < 0) return ret; @@ -557,7 +547,10 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) rgrp->dev = rcdu; rgrp->mmio_offset = mmio_offsets[i]; rgrp->index = i; - rgrp->num_crtcs = min(rcdu->num_crtcs - 2 * i, 2U); + /* Extract the channel mask for this group only. */ + rgrp->channels_mask = (rcdu->info->channels_mask >> (2 * i)) + & GENMASK(1, 0); + rgrp->num_crtcs = hweight8(rgrp->channels_mask); /* * If we have more than one CRTCs in this group pre-associate @@ -584,10 +577,16 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) } /* Create the CRTCs. */ - for (i = 0; i < rcdu->num_crtcs; ++i) { - struct rcar_du_group *rgrp = &rcdu->groups[i / 2]; + for (swindex = 0, hwindex = 0; swindex < rcdu->num_crtcs; ++hwindex) { + struct rcar_du_group *rgrp; + + /* Skip unpopulated DU channels. */ + if (!(rcdu->info->channels_mask & BIT(hwindex))) + continue; + + rgrp = &rcdu->groups[hwindex / 2]; - ret = rcar_du_crtc_create(rgrp, i); + ret = rcar_du_crtc_create(rgrp, swindex++, hwindex); if (ret < 0) return ret; } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_of.c b/drivers/gpu/drm/rcar-du/rcar_du_of.c index 68a0b82cb17e..afef69669bb4 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_of.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_of.c @@ -18,6 +18,7 @@ #include "rcar_du_crtc.h" #include "rcar_du_drv.h" +#include "rcar_du_of.h" /* ----------------------------------------------------------------------------- * Generic Overlay Handling diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c index 68556bd9dad2..c20f7ed48c8d 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c @@ -423,7 +423,7 @@ static void rcar_du_plane_setup_mode(struct rcar_du_group *rgrp, rcar_du_plane_write(rgrp, index, PnALPHAR, PnALPHAR_ABIT_0); else rcar_du_plane_write(rgrp, index, PnALPHAR, - PnALPHAR_ABIT_X | state->alpha); + PnALPHAR_ABIT_X | state->state.alpha >> 8); pnmr = PnMR_BM_MD | state->format->pnmr; @@ -692,11 +692,11 @@ static void rcar_du_plane_reset(struct drm_plane *plane) state->hwindex = -1; state->source = RCAR_DU_PLANE_MEMORY; - state->alpha = 255; state->colorkey = RCAR_DU_COLORKEY_NONE; state->state.zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1; plane->state = &state->state; + plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE; plane->state->plane = plane; } @@ -708,9 +708,7 @@ static int rcar_du_plane_atomic_set_property(struct drm_plane *plane, struct rcar_du_plane_state *rstate = to_rcar_plane_state(state); struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev; - if (property == rcdu->props.alpha) - rstate->alpha = val; - else if (property == rcdu->props.colorkey) + if (property == rcdu->props.colorkey) rstate->colorkey = val; else return -EINVAL; @@ -726,9 +724,7 @@ static int rcar_du_plane_atomic_get_property(struct drm_plane *plane, container_of(state, const struct rcar_du_plane_state, state); struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev; - if (property == rcdu->props.alpha) - *val = rstate->alpha; - else if (property == rcdu->props.colorkey) + if (property == rcdu->props.colorkey) *val = rstate->colorkey; else return -EINVAL; @@ -797,10 +793,9 @@ int rcar_du_planes_init(struct rcar_du_group *rgrp) continue; drm_object_attach_property(&plane->plane.base, - rcdu->props.alpha, 255); - drm_object_attach_property(&plane->plane.base, rcdu->props.colorkey, RCAR_DU_COLORKEY_NONE); + drm_plane_create_alpha_property(&plane->plane); drm_plane_create_zpos_property(&plane->plane, 1, 1, 7); } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.h b/drivers/gpu/drm/rcar-du/rcar_du_plane.h index 890321b4665d..5c19c69e4691 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_plane.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.h @@ -50,7 +50,6 @@ static inline struct rcar_du_plane *to_rcar_plane(struct drm_plane *plane) * @state: base DRM plane state * @format: information about the pixel format used by the plane * @hwindex: 0-based hardware plane index, -1 means unused - * @alpha: value of the plane alpha property * @colorkey: value of the plane colorkey property */ struct rcar_du_plane_state { @@ -60,7 +59,6 @@ struct rcar_du_plane_state { int hwindex; enum rcar_du_plane_source source; - unsigned int alpha; unsigned int colorkey; }; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_regs.h b/drivers/gpu/drm/rcar-du/rcar_du_regs.h index d5bae99d3cfe..9dfd220ceda1 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_regs.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_regs.h @@ -187,14 +187,14 @@ #define DEFR6 0x000e8 #define DEFR6_CODE (0x7778 << 16) -#define DEFR6_ODPM22_DSMR (0 << 10) -#define DEFR6_ODPM22_DISP (2 << 10) -#define DEFR6_ODPM22_CDE (3 << 10) -#define DEFR6_ODPM22_MASK (3 << 10) -#define DEFR6_ODPM12_DSMR (0 << 8) -#define DEFR6_ODPM12_DISP (2 << 8) -#define DEFR6_ODPM12_CDE (3 << 8) -#define DEFR6_ODPM12_MASK (3 << 8) +#define DEFR6_ODPM12_DSMR (0 << 10) +#define DEFR6_ODPM12_DISP (2 << 10) +#define DEFR6_ODPM12_CDE (3 << 10) +#define DEFR6_ODPM12_MASK (3 << 10) +#define DEFR6_ODPM02_DSMR (0 << 8) +#define DEFR6_ODPM02_DISP (2 << 8) +#define DEFR6_ODPM02_CDE (3 << 8) +#define DEFR6_ODPM02_MASK (3 << 8) #define DEFR6_TCNE1 (1 << 6) #define DEFR6_TCNE0 (1 << 4) #define DEFR6_MLOS1 (1 << 2) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c index af7822a66dee..4a98470626d5 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c @@ -17,6 +17,7 @@ #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_cma_helper.h> #include <drm/drm_gem_cma_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_plane_helper.h> #include <linux/bitops.h> @@ -56,6 +57,7 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc) }; struct rcar_du_plane_state state = { .state = { + .alpha = DRM_BLEND_ALPHA_OPAQUE, .crtc = &crtc->crtc, .dst.x1 = 0, .dst.y1 = 0, @@ -69,7 +71,6 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc) }, .format = rcar_du_format_info(DRM_FORMAT_ARGB8888), .source = RCAR_DU_PLANE_VSPD1, - .alpha = 255, .colorkey = 0, }; @@ -181,7 +182,7 @@ static void rcar_du_vsp_plane_setup(struct rcar_du_vsp_plane *plane) struct vsp1_du_atomic_config cfg = { .pixelformat = 0, .pitch = fb->pitches[0], - .alpha = state->alpha, + .alpha = state->state.alpha >> 8, .zpos = state->state.zpos, }; unsigned int i; @@ -245,6 +246,10 @@ static int rcar_du_vsp_plane_prepare_fb(struct drm_plane *plane, } } + ret = drm_gem_fb_prepare_fb(plane, state); + if (ret) + goto fail; + return 0; fail: @@ -307,18 +312,17 @@ static const struct drm_plane_helper_funcs rcar_du_vsp_plane_helper_funcs = { static struct drm_plane_state * rcar_du_vsp_plane_atomic_duplicate_state(struct drm_plane *plane) { - struct rcar_du_vsp_plane_state *state; struct rcar_du_vsp_plane_state *copy; if (WARN_ON(!plane->state)) return NULL; - state = to_rcar_vsp_plane_state(plane->state); - copy = kmemdup(state, sizeof(*state), GFP_KERNEL); + copy = kzalloc(sizeof(*copy), GFP_KERNEL); if (copy == NULL) return NULL; __drm_atomic_helper_plane_duplicate_state(plane, ©->state); + copy->alpha = to_rcar_vsp_plane_state(plane->state)->alpha; return ©->state; } @@ -343,44 +347,13 @@ static void rcar_du_vsp_plane_reset(struct drm_plane *plane) if (state == NULL) return; - state->alpha = 255; + state->state.alpha = DRM_BLEND_ALPHA_OPAQUE; state->state.zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1; plane->state = &state->state; plane->state->plane = plane; } -static int rcar_du_vsp_plane_atomic_set_property(struct drm_plane *plane, - struct drm_plane_state *state, struct drm_property *property, - uint64_t val) -{ - struct rcar_du_vsp_plane_state *rstate = to_rcar_vsp_plane_state(state); - struct rcar_du_device *rcdu = to_rcar_vsp_plane(plane)->vsp->dev; - - if (property == rcdu->props.alpha) - rstate->alpha = val; - else - return -EINVAL; - - return 0; -} - -static int rcar_du_vsp_plane_atomic_get_property(struct drm_plane *plane, - const struct drm_plane_state *state, struct drm_property *property, - uint64_t *val) -{ - const struct rcar_du_vsp_plane_state *rstate = - container_of(state, const struct rcar_du_vsp_plane_state, state); - struct rcar_du_device *rcdu = to_rcar_vsp_plane(plane)->vsp->dev; - - if (property == rcdu->props.alpha) - *val = rstate->alpha; - else - return -EINVAL; - - return 0; -} - static const struct drm_plane_funcs rcar_du_vsp_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -388,8 +361,6 @@ static const struct drm_plane_funcs rcar_du_vsp_plane_funcs = { .destroy = drm_plane_cleanup, .atomic_duplicate_state = rcar_du_vsp_plane_atomic_duplicate_state, .atomic_destroy_state = rcar_du_vsp_plane_atomic_destroy_state, - .atomic_set_property = rcar_du_vsp_plane_atomic_set_property, - .atomic_get_property = rcar_du_vsp_plane_atomic_get_property, }; int rcar_du_vsp_init(struct rcar_du_vsp *vsp, struct device_node *np, @@ -446,8 +417,7 @@ int rcar_du_vsp_init(struct rcar_du_vsp *vsp, struct device_node *np, if (type == DRM_PLANE_TYPE_PRIMARY) continue; - drm_object_attach_property(&plane->plane.base, - rcdu->props.alpha, 255); + drm_plane_create_alpha_property(&plane->plane); drm_plane_create_zpos_property(&plane->plane, 1, 1, vsp->num_planes - 1); } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.h b/drivers/gpu/drm/rcar-du/rcar_du_vsp.h index 4c5d7bbce6aa..8a8a25c8c8e8 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.h @@ -44,15 +44,12 @@ static inline struct rcar_du_vsp_plane *to_rcar_vsp_plane(struct drm_plane *p) * @state: base DRM plane state * @format: information about the pixel format used by the plane * @sg_tables: scatter-gather tables for the frame buffer memory - * @alpha: value of the plane alpha property */ struct rcar_du_vsp_plane_state { struct drm_plane_state state; const struct rcar_du_format_info *format; struct sg_table sg_tables[3]; - - unsigned int alpha; }; static inline struct rcar_du_vsp_plane_state * diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index 3e8bf79bea58..080f05352195 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -77,13 +77,13 @@ struct rockchip_dp_device { struct analogix_dp_plat_data plat_data; }; -static void analogix_dp_psr_set(struct drm_encoder *encoder, bool enabled) +static int analogix_dp_psr_set(struct drm_encoder *encoder, bool enabled) { struct rockchip_dp_device *dp = to_dp(encoder); int ret; if (!analogix_dp_psr_enabled(dp->adp)) - return; + return 0; DRM_DEV_DEBUG(dp->dev, "%s PSR...\n", enabled ? "Entry" : "Exit"); @@ -91,13 +91,13 @@ static void analogix_dp_psr_set(struct drm_encoder *encoder, bool enabled) PSR_WAIT_LINE_FLAG_TIMEOUT_MS); if (ret) { DRM_DEV_ERROR(dp->dev, "line flag interrupt did not arrive\n"); - return; + return -ETIMEDOUT; } if (enabled) - analogix_dp_enable_psr(dp->adp); + return analogix_dp_enable_psr(dp->adp); else - analogix_dp_disable_psr(dp->adp); + return analogix_dp_disable_psr(dp->adp); } static int rockchip_dp_pre_init(struct rockchip_dp_device *dp) @@ -109,7 +109,7 @@ static int rockchip_dp_pre_init(struct rockchip_dp_device *dp) return 0; } -static int rockchip_dp_poweron(struct analogix_dp_plat_data *plat_data) +static int rockchip_dp_poweron_start(struct analogix_dp_plat_data *plat_data) { struct rockchip_dp_device *dp = to_dp(plat_data); int ret; @@ -127,7 +127,14 @@ static int rockchip_dp_poweron(struct analogix_dp_plat_data *plat_data) return ret; } - return rockchip_drm_psr_activate(&dp->encoder); + return ret; +} + +static int rockchip_dp_poweron_end(struct analogix_dp_plat_data *plat_data) +{ + struct rockchip_dp_device *dp = to_dp(plat_data); + + return rockchip_drm_psr_inhibit_put(&dp->encoder); } static int rockchip_dp_powerdown(struct analogix_dp_plat_data *plat_data) @@ -135,7 +142,7 @@ static int rockchip_dp_powerdown(struct analogix_dp_plat_data *plat_data) struct rockchip_dp_device *dp = to_dp(plat_data); int ret; - ret = rockchip_drm_psr_deactivate(&dp->encoder); + ret = rockchip_drm_psr_inhibit_get(&dp->encoder); if (ret != 0) return ret; @@ -218,6 +225,7 @@ rockchip_dp_drm_encoder_atomic_check(struct drm_encoder *encoder, struct drm_connector_state *conn_state) { struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state); + struct drm_display_info *di = &conn_state->connector->display_info; /* * The hardware IC designed that VOP must output the RGB10 video @@ -229,6 +237,7 @@ rockchip_dp_drm_encoder_atomic_check(struct drm_encoder *encoder, s->output_mode = ROCKCHIP_OUT_MODE_AAAA; s->output_type = DRM_MODE_CONNECTOR_eDP; + s->output_bpc = di->bpc; return 0; } @@ -328,7 +337,8 @@ static int rockchip_dp_bind(struct device *dev, struct device *master, dp->plat_data.encoder = &dp->encoder; dp->plat_data.dev_type = dp->data->chip_type; - dp->plat_data.power_on = rockchip_dp_poweron; + dp->plat_data.power_on_start = rockchip_dp_poweron_start; + dp->plat_data.power_on_end = rockchip_dp_poweron_end; dp->plat_data.power_off = rockchip_dp_powerdown; dp->plat_data.get_modes = rockchip_dp_get_modes; @@ -358,6 +368,8 @@ static void rockchip_dp_unbind(struct device *dev, struct device *master, analogix_dp_unbind(dp->adp); rockchip_drm_psr_unregister(&dp->encoder); dp->encoder.funcs->destroy(&dp->encoder); + + dp->adp = ERR_PTR(-ENODEV); } static const struct component_ops rockchip_dp_component_ops = { @@ -381,6 +393,7 @@ static int rockchip_dp_probe(struct platform_device *pdev) return -ENOMEM; dp->dev = dev; + dp->adp = ERR_PTR(-ENODEV); dp->plat_data.panel = panel; ret = rockchip_dp_of_probe(dp); @@ -404,6 +417,9 @@ static int rockchip_dp_suspend(struct device *dev) { struct rockchip_dp_device *dp = dev_get_drvdata(dev); + if (IS_ERR(dp->adp)) + return 0; + return analogix_dp_suspend(dp->adp); } @@ -411,6 +427,9 @@ static int rockchip_dp_resume(struct device *dev) { struct rockchip_dp_device *dp = dev_get_drvdata(dev); + if (IS_ERR(dp->adp)) + return 0; + return analogix_dp_resume(dp->adp); } #endif diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h index 9c064a40458b..3a6ebfc26036 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h @@ -36,6 +36,7 @@ struct rockchip_crtc_state { struct drm_crtc_state base; int output_type; int output_mode; + int output_bpc; }; #define to_rockchip_crtc_state(s) \ container_of(s, struct rockchip_crtc_state, base) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index e266539e04e5..d4f4118b482d 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -167,8 +167,67 @@ err_gem_object_unreference: return ERR_PTR(ret); } +static void +rockchip_drm_psr_inhibit_get_state(struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *crtc_state; + struct drm_encoder *encoder; + u32 encoder_mask = 0; + int i; + + for_each_old_crtc_in_state(state, crtc, crtc_state, i) { + encoder_mask |= crtc_state->encoder_mask; + encoder_mask |= crtc->state->encoder_mask; + } + + drm_for_each_encoder_mask(encoder, state->dev, encoder_mask) + rockchip_drm_psr_inhibit_get(encoder); +} + +static void +rockchip_drm_psr_inhibit_put_state(struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *crtc_state; + struct drm_encoder *encoder; + u32 encoder_mask = 0; + int i; + + for_each_old_crtc_in_state(state, crtc, crtc_state, i) { + encoder_mask |= crtc_state->encoder_mask; + encoder_mask |= crtc->state->encoder_mask; + } + + drm_for_each_encoder_mask(encoder, state->dev, encoder_mask) + rockchip_drm_psr_inhibit_put(encoder); +} + +static void +rockchip_atomic_helper_commit_tail_rpm(struct drm_atomic_state *old_state) +{ + struct drm_device *dev = old_state->dev; + + rockchip_drm_psr_inhibit_get_state(old_state); + + drm_atomic_helper_commit_modeset_disables(dev, old_state); + + drm_atomic_helper_commit_modeset_enables(dev, old_state); + + drm_atomic_helper_commit_planes(dev, old_state, + DRM_PLANE_COMMIT_ACTIVE_ONLY); + + rockchip_drm_psr_inhibit_put_state(old_state); + + drm_atomic_helper_commit_hw_done(old_state); + + drm_atomic_helper_wait_for_vblanks(dev, old_state); + + drm_atomic_helper_cleanup_planes(dev, old_state); +} + static const struct drm_mode_config_helper_funcs rockchip_mode_config_helpers = { - .atomic_commit_tail = drm_atomic_helper_commit_tail_rpm, + .atomic_commit_tail = rockchip_atomic_helper_commit_tail_rpm, }; static const struct drm_mode_config_funcs rockchip_drm_mode_config_funcs = { diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index 074db7a92809..a8db758d523e 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -357,8 +357,8 @@ err_free_rk_obj: } /* - * rockchip_gem_free_object - (struct drm_driver)->gem_free_object callback - * function + * rockchip_gem_free_object - (struct drm_driver)->gem_free_object_unlocked + * callback function */ void rockchip_gem_free_object(struct drm_gem_object *obj) { diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_psr.c b/drivers/gpu/drm/rockchip/rockchip_drm_psr.c index b339ca943139..79d00d861a31 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_psr.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_psr.c @@ -20,42 +20,19 @@ #define PSR_FLUSH_TIMEOUT_MS 100 -enum psr_state { - PSR_FLUSH, - PSR_ENABLE, - PSR_DISABLE, -}; - struct psr_drv { struct list_head list; struct drm_encoder *encoder; struct mutex lock; - bool active; - enum psr_state state; + int inhibit_count; + bool enabled; struct delayed_work flush_work; - void (*set)(struct drm_encoder *encoder, bool enable); + int (*set)(struct drm_encoder *encoder, bool enable); }; -static struct psr_drv *find_psr_by_crtc(struct drm_crtc *crtc) -{ - struct rockchip_drm_private *drm_drv = crtc->dev->dev_private; - struct psr_drv *psr; - - mutex_lock(&drm_drv->psr_list_lock); - list_for_each_entry(psr, &drm_drv->psr_list, list) { - if (psr->encoder->crtc == crtc) - goto out; - } - psr = ERR_PTR(-ENODEV); - -out: - mutex_unlock(&drm_drv->psr_list_lock); - return psr; -} - static struct psr_drv *find_psr_by_encoder(struct drm_encoder *encoder) { struct rockchip_drm_private *drm_drv = encoder->dev->dev_private; @@ -73,46 +50,22 @@ out: return psr; } -static void psr_set_state_locked(struct psr_drv *psr, enum psr_state state) +static int psr_set_state_locked(struct psr_drv *psr, bool enable) { - /* - * Allowed finite state machine: - * - * PSR_ENABLE < = = = = = > PSR_FLUSH - * | ^ | - * | | | - * v | | - * PSR_DISABLE < - - - - - - - - - - */ - if (state == psr->state || !psr->active) - return; - - /* Already disabled in flush, change the state, but not the hardware */ - if (state == PSR_DISABLE && psr->state == PSR_FLUSH) { - psr->state = state; - return; - } + int ret; - psr->state = state; + if (psr->inhibit_count > 0) + return -EINVAL; - /* Actually commit the state change to hardware */ - switch (psr->state) { - case PSR_ENABLE: - psr->set(psr->encoder, true); - break; + if (enable == psr->enabled) + return 0; - case PSR_DISABLE: - case PSR_FLUSH: - psr->set(psr->encoder, false); - break; - } -} + ret = psr->set(psr->encoder, enable); + if (ret) + return ret; -static void psr_set_state(struct psr_drv *psr, enum psr_state state) -{ - mutex_lock(&psr->lock); - psr_set_state_locked(psr, state); - mutex_unlock(&psr->lock); + psr->enabled = enable; + return 0; } static void psr_flush_handler(struct work_struct *work) @@ -120,21 +73,24 @@ static void psr_flush_handler(struct work_struct *work) struct psr_drv *psr = container_of(to_delayed_work(work), struct psr_drv, flush_work); - /* If the state has changed since we initiated the flush, do nothing */ mutex_lock(&psr->lock); - if (psr->state == PSR_FLUSH) - psr_set_state_locked(psr, PSR_ENABLE); + psr_set_state_locked(psr, true); mutex_unlock(&psr->lock); } /** - * rockchip_drm_psr_activate - activate PSR on the given pipe + * rockchip_drm_psr_inhibit_put - release PSR inhibit on given encoder * @encoder: encoder to obtain the PSR encoder * + * Decrements PSR inhibit count on given encoder. Should be called only + * for a PSR inhibit count increment done before. If PSR inhibit counter + * reaches zero, PSR flush work is scheduled to make the hardware enter + * PSR mode in PSR_FLUSH_TIMEOUT_MS. + * * Returns: * Zero on success, negative errno on failure. */ -int rockchip_drm_psr_activate(struct drm_encoder *encoder) +int rockchip_drm_psr_inhibit_put(struct drm_encoder *encoder) { struct psr_drv *psr = find_psr_by_encoder(encoder); @@ -142,21 +98,30 @@ int rockchip_drm_psr_activate(struct drm_encoder *encoder) return PTR_ERR(psr); mutex_lock(&psr->lock); - psr->active = true; + --psr->inhibit_count; + WARN_ON(psr->inhibit_count < 0); + if (!psr->inhibit_count) + mod_delayed_work(system_wq, &psr->flush_work, + PSR_FLUSH_TIMEOUT_MS); mutex_unlock(&psr->lock); return 0; } -EXPORT_SYMBOL(rockchip_drm_psr_activate); +EXPORT_SYMBOL(rockchip_drm_psr_inhibit_put); /** - * rockchip_drm_psr_deactivate - deactivate PSR on the given pipe + * rockchip_drm_psr_inhibit_get - acquire PSR inhibit on given encoder * @encoder: encoder to obtain the PSR encoder * + * Increments PSR inhibit count on given encoder. This function guarantees + * that after it returns PSR is turned off on given encoder and no PSR-related + * hardware state change occurs at least until a matching call to + * rockchip_drm_psr_inhibit_put() is done. + * * Returns: * Zero on success, negative errno on failure. */ -int rockchip_drm_psr_deactivate(struct drm_encoder *encoder) +int rockchip_drm_psr_inhibit_get(struct drm_encoder *encoder) { struct psr_drv *psr = find_psr_by_encoder(encoder); @@ -164,37 +129,25 @@ int rockchip_drm_psr_deactivate(struct drm_encoder *encoder) return PTR_ERR(psr); mutex_lock(&psr->lock); - psr->active = false; + psr_set_state_locked(psr, false); + ++psr->inhibit_count; mutex_unlock(&psr->lock); cancel_delayed_work_sync(&psr->flush_work); return 0; } -EXPORT_SYMBOL(rockchip_drm_psr_deactivate); +EXPORT_SYMBOL(rockchip_drm_psr_inhibit_get); static void rockchip_drm_do_flush(struct psr_drv *psr) { - psr_set_state(psr, PSR_FLUSH); - mod_delayed_work(system_wq, &psr->flush_work, PSR_FLUSH_TIMEOUT_MS); -} - -/** - * rockchip_drm_psr_flush - flush a single pipe - * @crtc: CRTC of the pipe to flush - * - * Returns: - * 0 on success, -errno on fail - */ -int rockchip_drm_psr_flush(struct drm_crtc *crtc) -{ - struct psr_drv *psr = find_psr_by_crtc(crtc); - if (IS_ERR(psr)) - return PTR_ERR(psr); + cancel_delayed_work_sync(&psr->flush_work); - rockchip_drm_do_flush(psr); - return 0; + mutex_lock(&psr->lock); + if (!psr_set_state_locked(psr, false)) + mod_delayed_work(system_wq, &psr->flush_work, + PSR_FLUSH_TIMEOUT_MS); + mutex_unlock(&psr->lock); } -EXPORT_SYMBOL(rockchip_drm_psr_flush); /** * rockchip_drm_psr_flush_all - force to flush all registered PSR encoders @@ -225,11 +178,16 @@ EXPORT_SYMBOL(rockchip_drm_psr_flush_all); * @encoder: encoder that obtain the PSR function * @psr_set: call back to set PSR state * + * The function returns with PSR inhibit counter initialized with one + * and the caller (typically encoder driver) needs to call + * rockchip_drm_psr_inhibit_put() when it becomes ready to accept PSR + * enable request. + * * Returns: * Zero on success, negative errno on failure. */ int rockchip_drm_psr_register(struct drm_encoder *encoder, - void (*psr_set)(struct drm_encoder *, bool enable)) + int (*psr_set)(struct drm_encoder *, bool enable)) { struct rockchip_drm_private *drm_drv = encoder->dev->dev_private; struct psr_drv *psr; @@ -244,8 +202,8 @@ int rockchip_drm_psr_register(struct drm_encoder *encoder, INIT_DELAYED_WORK(&psr->flush_work, psr_flush_handler); mutex_init(&psr->lock); - psr->active = true; - psr->state = PSR_DISABLE; + psr->inhibit_count = 1; + psr->enabled = false; psr->encoder = encoder; psr->set = psr_set; @@ -262,6 +220,11 @@ EXPORT_SYMBOL(rockchip_drm_psr_register); * @encoder: encoder that obtain the PSR function * @psr_set: call back to set PSR state * + * It is expected that the PSR inhibit counter is 1 when this function is + * called, which corresponds to a state when related encoder has been + * disconnected from any CRTCs and its driver called + * rockchip_drm_psr_inhibit_get() to stop the PSR logic. + * * Returns: * Zero on success, negative errno on failure. */ @@ -273,7 +236,12 @@ void rockchip_drm_psr_unregister(struct drm_encoder *encoder) mutex_lock(&drm_drv->psr_list_lock); list_for_each_entry_safe(psr, n, &drm_drv->psr_list, list) { if (psr->encoder == encoder) { - cancel_delayed_work_sync(&psr->flush_work); + /* + * Any other value would mean that the encoder + * is still in use. + */ + WARN_ON(psr->inhibit_count != 1); + list_del(&psr->list); kfree(psr); } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_psr.h b/drivers/gpu/drm/rockchip/rockchip_drm_psr.h index b1ea0155e57c..860c62494496 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_psr.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_psr.h @@ -16,13 +16,12 @@ #define __ROCKCHIP_DRM_PSR___ void rockchip_drm_psr_flush_all(struct drm_device *dev); -int rockchip_drm_psr_flush(struct drm_crtc *crtc); -int rockchip_drm_psr_activate(struct drm_encoder *encoder); -int rockchip_drm_psr_deactivate(struct drm_encoder *encoder); +int rockchip_drm_psr_inhibit_put(struct drm_encoder *encoder); +int rockchip_drm_psr_inhibit_get(struct drm_encoder *encoder); int rockchip_drm_psr_register(struct drm_encoder *encoder, - void (*psr_set)(struct drm_encoder *, bool enable)); + int (*psr_set)(struct drm_encoder *, bool enable)); void rockchip_drm_psr_unregister(struct drm_encoder *encoder); #endif /* __ROCKCHIP_DRM_PSR__ */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 53d4afe15278..2121345a61af 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -76,6 +76,9 @@ #define VOP_WIN_GET_YRGBADDR(vop, win) \ vop_readl(vop, win->base + win->phy->yrgb_mst.offset) +#define VOP_WIN_TO_INDEX(vop_win) \ + ((vop_win) - (vop_win)->vop->win) + #define to_vop(x) container_of(x, struct vop, crtc) #define to_vop_win(x) container_of(x, struct vop_win, base) @@ -708,6 +711,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane, dma_addr_t dma_addr; uint32_t val; bool rb_swap; + int win_index = VOP_WIN_TO_INDEX(vop_win); int format; /* @@ -777,7 +781,14 @@ static void vop_plane_atomic_update(struct drm_plane *plane, rb_swap = has_rb_swapped(fb->format->format); VOP_WIN_SET(vop, win, rb_swap, rb_swap); - if (fb->format->has_alpha) { + /* + * Blending win0 with the background color doesn't seem to work + * correctly. We only get the background color, no matter the contents + * of the win0 framebuffer. However, blending pre-multiplied color + * with the default opaque black default background color is a no-op, + * so we can just disable blending to get the correct result. + */ + if (fb->format->has_alpha && win_index > 0) { VOP_WIN_SET(vop, win, dst_alpha_ctl, DST_FACTOR_M0(ALPHA_SRC_INVERSE)); val = SRC_ALPHA_EN(1) | SRC_COLOR_M0(ALPHA_SRC_PRE_MUL) | @@ -925,6 +936,12 @@ static void vop_crtc_atomic_enable(struct drm_crtc *crtc, if (s->output_mode == ROCKCHIP_OUT_MODE_AAAA && !(vop_data->feature & VOP_FEATURE_OUTPUT_RGB10)) s->output_mode = ROCKCHIP_OUT_MODE_P888; + + if (s->output_mode == ROCKCHIP_OUT_MODE_AAAA && s->output_bpc == 8) + VOP_REG_SET(vop, common, pre_dither_down, 1); + else + VOP_REG_SET(vop, common, pre_dither_down, 0); + VOP_REG_SET(vop, common, out_mode, s->output_mode); VOP_REG_SET(vop, modeset, htotal_pw, (htotal << 16) | hsync_len); @@ -1017,22 +1034,15 @@ static void vop_crtc_atomic_flush(struct drm_crtc *crtc, continue; drm_framebuffer_get(old_plane_state->fb); + WARN_ON(drm_crtc_vblank_get(crtc) != 0); drm_flip_work_queue(&vop->fb_unref_work, old_plane_state->fb); set_bit(VOP_PENDING_FB_UNREF, &vop->pending); - WARN_ON(drm_crtc_vblank_get(crtc) != 0); } } -static void vop_crtc_atomic_begin(struct drm_crtc *crtc, - struct drm_crtc_state *old_crtc_state) -{ - rockchip_drm_psr_flush(crtc); -} - static const struct drm_crtc_helper_funcs vop_crtc_helper_funcs = { .mode_fixup = vop_crtc_mode_fixup, .atomic_flush = vop_crtc_atomic_flush, - .atomic_begin = vop_crtc_atomic_begin, .atomic_enable = vop_crtc_atomic_enable, .atomic_disable = vop_crtc_atomic_disable, }; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h index 56bbd2e2a8ef..084acdd0019a 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h @@ -67,6 +67,7 @@ struct vop_common { struct vop_reg cfg_done; struct vop_reg dsp_blank; struct vop_reg data_blank; + struct vop_reg pre_dither_down; struct vop_reg dither_down; struct vop_reg dither_up; struct vop_reg gate_en; diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c index 2e4eea3459fe..08023d3ecb76 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c @@ -264,6 +264,7 @@ static const struct vop_common rk3288_common = { .standby = VOP_REG_SYNC(RK3288_SYS_CTRL, 0x1, 22), .gate_en = VOP_REG(RK3288_SYS_CTRL, 0x1, 23), .mmu_en = VOP_REG(RK3288_SYS_CTRL, 0x1, 20), + .pre_dither_down = VOP_REG(RK3288_DSP_CTRL1, 0x1, 1), .dither_down = VOP_REG(RK3288_DSP_CTRL1, 0xf, 1), .dither_up = VOP_REG(RK3288_DSP_CTRL1, 0x1, 6), .data_blank = VOP_REG(RK3288_DSP_CTRL0, 0x1, 19), diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 0d95888ccc3e..a364fc0b38c3 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -30,7 +30,7 @@ #include <drm/spsc_queue.h> #define CREATE_TRACE_POINTS -#include <drm/gpu_scheduler_trace.h> +#include "gpu_scheduler_trace.h" #define to_drm_sched_job(sched_job) \ container_of((sched_job), struct drm_sched_job, queue_node) @@ -117,15 +117,15 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) * @sched The pointer to the scheduler * @entity The pointer to a valid drm_sched_entity * @rq The run queue this entity belongs - * @kernel If this is an entity for the kernel - * @jobs The max number of jobs in the job queue + * @guilty atomic_t set to 1 when a job on this queue + * is found to be guilty causing a timeout * * return 0 if succeed. negative error code on failure */ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, struct drm_sched_rq *rq, - uint32_t jobs, atomic_t *guilty) + atomic_t *guilty) { if (!(sched && entity && rq)) return -EINVAL; @@ -135,6 +135,8 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, entity->rq = rq; entity->sched = sched; entity->guilty = guilty; + entity->fini_status = 0; + entity->last_scheduled = NULL; spin_lock_init(&entity->rq_lock); spin_lock_init(&entity->queue_lock); @@ -196,19 +198,30 @@ static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity) return true; } +static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, + struct dma_fence_cb *cb) +{ + struct drm_sched_job *job = container_of(cb, struct drm_sched_job, + finish_cb); + drm_sched_fence_finished(job->s_fence); + WARN_ON(job->s_fence->parent); + dma_fence_put(&job->s_fence->finished); + job->sched->ops->free_job(job); +} + + /** * Destroy a context entity * * @sched Pointer to scheduler instance * @entity The pointer to a valid scheduler entity * - * Cleanup and free the allocated resources. + * Splitting drm_sched_entity_fini() into two functions, The first one is does the waiting, + * removes the entity from the runqueue and returns an error when the process was killed. */ -void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, +void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) { - int r; - if (!drm_sched_entity_is_initialized(sched, entity)) return; /** @@ -216,13 +229,28 @@ void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, * queued IBs or discard them on SIGKILL */ if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL) - r = -ERESTARTSYS; + entity->fini_status = -ERESTARTSYS; else - r = wait_event_killable(sched->job_scheduled, + entity->fini_status = wait_event_killable(sched->job_scheduled, drm_sched_entity_is_idle(entity)); drm_sched_entity_set_rq(entity, NULL); - if (r) { +} +EXPORT_SYMBOL(drm_sched_entity_do_release); + +/** + * Destroy a context entity + * + * @sched Pointer to scheduler instance + * @entity The pointer to a valid scheduler entity + * + * The second one then goes over the entity and signals all jobs with an error code. + */ +void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity) +{ + if (entity->fini_status) { struct drm_sched_job *job; + int r; /* Park the kernel for a moment to make sure it isn't processing * our enity. @@ -240,12 +268,25 @@ void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, struct drm_sched_fence *s_fence = job->s_fence; drm_sched_fence_scheduled(s_fence); dma_fence_set_error(&s_fence->finished, -ESRCH); - drm_sched_fence_finished(s_fence); - WARN_ON(s_fence->parent); - dma_fence_put(&s_fence->finished); - sched->ops->free_job(job); + r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb, + drm_sched_entity_kill_jobs_cb); + if (r == -ENOENT) + drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); + else if (r) + DRM_ERROR("fence add callback failed (%d)\n", r); } } + + dma_fence_put(entity->last_scheduled); + entity->last_scheduled = NULL; +} +EXPORT_SYMBOL(drm_sched_entity_cleanup); + +void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity) +{ + drm_sched_entity_do_release(sched, entity); + drm_sched_entity_cleanup(sched, entity); } EXPORT_SYMBOL(drm_sched_entity_fini); @@ -360,6 +401,9 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (entity->guilty && atomic_read(entity->guilty)) dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED); + dma_fence_put(entity->last_scheduled); + entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished); + spsc_queue_pop(&entity->job_queue); return sched_job; } @@ -529,6 +573,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) spin_unlock(&sched->job_list_lock); fence = sched->ops->run_job(s_job); atomic_inc(&sched->hw_rq_count); + if (fence) { s_fence->parent = dma_fence_get(fence); r = dma_fence_add_callback(fence, &s_fence->cb, @@ -555,6 +600,7 @@ int drm_sched_job_init(struct drm_sched_job *job, void *owner) { job->sched = sched; + job->entity = entity; job->s_priority = entity->rq - sched->sched_rq; job->s_fence = drm_sched_fence_create(entity, owner); if (!job->s_fence) diff --git a/include/drm/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index 0789e8d0a0e1..4998ad950a48 100644 --- a/include/drm/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -78,5 +78,5 @@ TRACE_EVENT(drm_sched_process_job, /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/scheduler #include <trace/define_trace.h> diff --git a/drivers/gpu/drm/selftests/Makefile b/drivers/gpu/drm/selftests/Makefile index 4aebfc7f27d4..9fc349fa18e9 100644 --- a/drivers/gpu/drm/selftests/Makefile +++ b/drivers/gpu/drm/selftests/Makefile @@ -1 +1 @@ -obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += test-drm_mm.o +obj-$(CONFIG_DRM_DEBUG_SELFTEST) += test-drm_mm.o test-drm-helper.o diff --git a/drivers/gpu/drm/selftests/drm_helper_selftests.h b/drivers/gpu/drm/selftests/drm_helper_selftests.h new file mode 100644 index 000000000000..9771290ed228 --- /dev/null +++ b/drivers/gpu/drm/selftests/drm_helper_selftests.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as igt__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * Tests are executed in order by igt/drm_selftests_helper + */ +selftest(check_plane_state, igt_check_plane_state) diff --git a/drivers/gpu/drm/selftests/test-drm-helper.c b/drivers/gpu/drm/selftests/test-drm-helper.c new file mode 100644 index 000000000000..a015712b43e8 --- /dev/null +++ b/drivers/gpu/drm/selftests/test-drm-helper.c @@ -0,0 +1,247 @@ +/* + * Test cases for the drm_kms_helper functions + */ + +#define pr_fmt(fmt) "drm_kms_helper: " fmt + +#include <linux/module.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_plane_helper.h> +#include <drm/drm_modes.h> + +#define TESTS "drm_helper_selftests.h" +#include "drm_selftest.h" + +#define FAIL(test, msg, ...) \ + do { \ + if (test) { \ + pr_err("%s/%u: " msg, __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + return -EINVAL; \ + } \ + } while (0) + +#define FAIL_ON(x) FAIL((x), "%s", "FAIL_ON(" __stringify(x) ")\n") + +static void set_src(struct drm_plane_state *plane_state, + unsigned src_x, unsigned src_y, + unsigned src_w, unsigned src_h) +{ + plane_state->src_x = src_x; + plane_state->src_y = src_y; + plane_state->src_w = src_w; + plane_state->src_h = src_h; +} + +static bool check_src_eq(struct drm_plane_state *plane_state, + unsigned src_x, unsigned src_y, + unsigned src_w, unsigned src_h) +{ + if (plane_state->src.x1 < 0) { + pr_err("src x coordinate %x should never be below 0.\n", plane_state->src.x1); + drm_rect_debug_print("src: ", &plane_state->src, true); + return false; + } + if (plane_state->src.y1 < 0) { + pr_err("src y coordinate %x should never be below 0.\n", plane_state->src.y1); + drm_rect_debug_print("src: ", &plane_state->src, true); + return false; + } + + if (plane_state->src.x1 != src_x || + plane_state->src.y1 != src_y || + drm_rect_width(&plane_state->src) != src_w || + drm_rect_height(&plane_state->src) != src_h) { + drm_rect_debug_print("src: ", &plane_state->src, true); + return false; + } + + return true; +} + +static void set_crtc(struct drm_plane_state *plane_state, + int crtc_x, int crtc_y, + unsigned crtc_w, unsigned crtc_h) +{ + plane_state->crtc_x = crtc_x; + plane_state->crtc_y = crtc_y; + plane_state->crtc_w = crtc_w; + plane_state->crtc_h = crtc_h; +} + +static bool check_crtc_eq(struct drm_plane_state *plane_state, + int crtc_x, int crtc_y, + unsigned crtc_w, unsigned crtc_h) +{ + if (plane_state->dst.x1 != crtc_x || + plane_state->dst.y1 != crtc_y || + drm_rect_width(&plane_state->dst) != crtc_w || + drm_rect_height(&plane_state->dst) != crtc_h) { + drm_rect_debug_print("dst: ", &plane_state->dst, false); + + return false; + } + + return true; +} + +static int igt_check_plane_state(void *ignored) +{ + int ret; + + const struct drm_crtc_state crtc_state = { + .crtc = ZERO_SIZE_PTR, + .enable = true, + .active = true, + .mode = { + DRM_MODE("1024x768", 0, 65000, 1024, 1048, + 1184, 1344, 0, 768, 771, 777, 806, 0, + DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) + }, + }; + struct drm_framebuffer fb = { + .width = 2048, + .height = 2048 + }; + struct drm_plane_state plane_state = { + .crtc = ZERO_SIZE_PTR, + .fb = &fb, + .rotation = DRM_MODE_ROTATE_0 + }; + + /* Simple clipping, no scaling. */ + set_src(&plane_state, 0, 0, fb.width << 16, fb.height << 16); + set_crtc(&plane_state, 0, 0, fb.width, fb.height); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Simple clipping check should pass\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 1024 << 16, 768 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + /* Rotated clipping + reflection, no scaling. */ + plane_state.rotation = DRM_MODE_ROTATE_90 | DRM_MODE_REFLECT_X; + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Rotated clipping check should pass\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 768 << 16, 1024 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + plane_state.rotation = DRM_MODE_ROTATE_0; + + /* Check whether positioning works correctly. */ + set_src(&plane_state, 0, 0, 1023 << 16, 767 << 16); + set_crtc(&plane_state, 0, 0, 1023, 767); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(!ret, "Should not be able to position on the crtc with can_position=false\n"); + + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, false); + FAIL(ret < 0, "Simple positioning should work\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 1023 << 16, 767 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1023, 767)); + + /* Simple scaling tests. */ + set_src(&plane_state, 0, 0, 512 << 16, 384 << 16); + set_crtc(&plane_state, 0, 0, 1024, 768); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0x8001, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(!ret, "Upscaling out of range should fail.\n"); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0x8000, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Upscaling exactly 2x should work\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 512 << 16, 384 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + set_src(&plane_state, 0, 0, 2048 << 16, 1536 << 16); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x1ffff, false, false); + FAIL(!ret, "Downscaling out of range should fail.\n"); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x20000, false, false); + FAIL(ret < 0, "Should succeed with exact scaling limit\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2048 << 16, 1536 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + /* Testing rounding errors. */ + set_src(&plane_state, 0, 0, 0x40001, 0x40001); + set_crtc(&plane_state, 1022, 766, 4, 4); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x10001, + true, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2 << 16, 2 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 1022, 766, 2, 2)); + + set_src(&plane_state, 0x20001, 0x20001, 0x4040001, 0x3040001); + set_crtc(&plane_state, -2, -2, 1028, 772); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x10001, + false, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0x40002, 0x40002, 1024 << 16, 768 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + set_src(&plane_state, 0, 0, 0x3ffff, 0x3ffff); + set_crtc(&plane_state, 1022, 766, 4, 4); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0xffff, + DRM_PLANE_HELPER_NO_SCALING, + true, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + /* Should not be rounded to 0x20001, which would be upscaling. */ + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2 << 16, 2 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 1022, 766, 2, 2)); + + set_src(&plane_state, 0x1ffff, 0x1ffff, 0x403ffff, 0x303ffff); + set_crtc(&plane_state, -2, -2, 1028, 772); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0xffff, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0x3fffe, 0x3fffe, 1024 << 16, 768 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + return 0; +} + +#include "drm_selftest.c" + +static int __init test_drm_helper_init(void) +{ + int err; + + err = run_selftests(selftests, ARRAY_SIZE(selftests), NULL); + + return err > 0 ? 0 : err; +} + +module_init(test_drm_helper_init); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/sti/Kconfig b/drivers/gpu/drm/sti/Kconfig index cca4b3c9aeb5..1963cc1b1cc5 100644 --- a/drivers/gpu/drm/sti/Kconfig +++ b/drivers/gpu/drm/sti/Kconfig @@ -1,6 +1,6 @@ config DRM_STI tristate "DRM Support for STMicroelectronics SoC stiH4xx Series" - depends on DRM && (ARCH_STI || ARCH_MULTIPLATFORM) + depends on OF && DRM && (ARCH_STI || ARCH_MULTIPLATFORM) select RESET_CONTROLLER select DRM_KMS_HELPER select DRM_GEM_CMA_HELPER @@ -8,6 +8,5 @@ config DRM_STI select DRM_PANEL select FW_LOADER select SND_SOC_HDMI_CODEC if SND_SOC - select OF help Choose this option to enable DRM on STM stiH4xx chipset diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c index 21e50d7b1f86..5824e6aca8f4 100644 --- a/drivers/gpu/drm/sti/sti_crtc.c +++ b/drivers/gpu/drm/sti/sti_crtc.c @@ -357,7 +357,7 @@ int sti_crtc_init(struct drm_device *drm_dev, struct sti_mixer *mixer, res = drm_crtc_init_with_planes(drm_dev, crtc, primary, cursor, &sti_crtc_funcs, NULL); if (res) { - DRM_ERROR("Can't initialze CRTC\n"); + DRM_ERROR("Can't initialize CRTC\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c index 55b6967d27e1..90c46b49c931 100644 --- a/drivers/gpu/drm/sti/sti_drv.c +++ b/drivers/gpu/drm/sti/sti_drv.c @@ -119,30 +119,10 @@ err: return ret; } -static int sti_atomic_check(struct drm_device *dev, - struct drm_atomic_state *state) -{ - int ret; - - ret = drm_atomic_helper_check_modeset(dev, state); - if (ret) - return ret; - - ret = drm_atomic_normalize_zpos(dev, state); - if (ret) - return ret; - - ret = drm_atomic_helper_check_planes(dev, state); - if (ret) - return ret; - - return ret; -} - static const struct drm_mode_config_funcs sti_mode_config_funcs = { .fb_create = drm_gem_fb_create, .output_poll_changed = drm_fb_helper_output_poll_changed, - .atomic_check = sti_atomic_check, + .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; @@ -160,6 +140,8 @@ static void sti_mode_config_init(struct drm_device *dev) dev->mode_config.max_height = STI_MAX_FB_HEIGHT; dev->mode_config.funcs = &sti_mode_config_funcs; + + dev->mode_config.normalize_zpos = true; } DEFINE_DRM_GEM_CMA_FOPS(sti_driver_fops); diff --git a/drivers/gpu/drm/sti/sti_plane.c b/drivers/gpu/drm/sti/sti_plane.c index b074609c960a..b48cd86e0250 100644 --- a/drivers/gpu/drm/sti/sti_plane.c +++ b/drivers/gpu/drm/sti/sti_plane.c @@ -40,6 +40,7 @@ void sti_plane_update_fps(struct sti_plane *plane, bool new_frame, bool new_field) { + struct drm_plane_state *state = plane->drm_plane.state; ktime_t now; struct sti_fps_info *fps; int fpks, fipks, ms_since_last, num_frames, num_fields; @@ -66,14 +67,14 @@ void sti_plane_update_fps(struct sti_plane *plane, fps->last_timestamp = now; fps->last_frame_counter = fps->curr_frame_counter; - if (plane->drm_plane.fb) { + if (state->fb) { fpks = (num_frames * 1000000) / ms_since_last; snprintf(plane->fps_info.fps_str, FPS_LENGTH, "%-8s %4dx%-4d %.4s @ %3d.%-3.3d fps (%s)", plane->drm_plane.name, - plane->drm_plane.fb->width, - plane->drm_plane.fb->height, - (char *)&plane->drm_plane.fb->format->format, + state->fb->width, + state->fb->height, + (char *)&state->fb->format->format, fpks / 1000, fpks % 1000, sti_plane_to_str(plane)); } diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c index 9ab00a87f7cc..8698e08313e1 100644 --- a/drivers/gpu/drm/stm/drv.c +++ b/drivers/gpu/drm/stm/drv.c @@ -72,8 +72,6 @@ static struct drm_driver drv_driver = { .gem_prime_vmap = drm_gem_cma_prime_vmap, .gem_prime_vunmap = drm_gem_cma_prime_vunmap, .gem_prime_mmap = drm_gem_cma_prime_mmap, - .enable_vblank = ltdc_crtc_enable_vblank, - .disable_vblank = ltdc_crtc_disable_vblank, }; static int drv_load(struct drm_device *ddev) diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index 1a3277e483d5..d997a6014d6c 100644 --- a/drivers/gpu/drm/stm/ltdc.c +++ b/drivers/gpu/drm/stm/ltdc.c @@ -392,9 +392,6 @@ static void ltdc_crtc_update_clut(struct drm_crtc *crtc) u32 val; int i; - if (!crtc || !crtc->state) - return; - if (!crtc->state->color_mgmt_changed || !crtc->state->gamma_lut) return; @@ -448,6 +445,43 @@ static void ltdc_crtc_atomic_disable(struct drm_crtc *crtc, reg_set(ldev->regs, LTDC_SRCR, SRCR_IMR); } +#define CLK_TOLERANCE_HZ 50 + +static enum drm_mode_status +ltdc_crtc_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) +{ + struct ltdc_device *ldev = crtc_to_ltdc(crtc); + int target = mode->clock * 1000; + int target_min = target - CLK_TOLERANCE_HZ; + int target_max = target + CLK_TOLERANCE_HZ; + int result; + + /* + * Accept all "preferred" modes: + * - this is important for panels because panel clock tolerances are + * bigger than hdmi ones and there is no reason to not accept them + * (the fps may vary a little but it is not a problem). + * - the hdmi preferred mode will be accepted too, but userland will + * be able to use others hdmi "valid" modes if necessary. + */ + if (mode->type & DRM_MODE_TYPE_PREFERRED) + return MODE_OK; + + result = clk_round_rate(ldev->pixel_clk, target); + + DRM_DEBUG_DRIVER("clk rate target %d, available %d\n", target, result); + + /* + * Filter modes according to the clock value, particularly useful for + * hdmi modes that require precise pixel clocks. + */ + if (result < target_min || result > target_max) + return MODE_CLOCK_RANGE; + + return MODE_OK; +} + static bool ltdc_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) @@ -562,6 +596,7 @@ static void ltdc_crtc_atomic_flush(struct drm_crtc *crtc, } static const struct drm_crtc_helper_funcs ltdc_crtc_helper_funcs = { + .mode_valid = ltdc_crtc_mode_valid, .mode_fixup = ltdc_crtc_mode_fixup, .mode_set_nofb = ltdc_crtc_mode_set_nofb, .atomic_flush = ltdc_crtc_atomic_flush, @@ -569,9 +604,9 @@ static const struct drm_crtc_helper_funcs ltdc_crtc_helper_funcs = { .atomic_disable = ltdc_crtc_atomic_disable, }; -int ltdc_crtc_enable_vblank(struct drm_device *ddev, unsigned int pipe) +static int ltdc_crtc_enable_vblank(struct drm_crtc *crtc) { - struct ltdc_device *ldev = ddev->dev_private; + struct ltdc_device *ldev = crtc_to_ltdc(crtc); DRM_DEBUG_DRIVER("\n"); reg_set(ldev->regs, LTDC_IER, IER_LIE); @@ -579,9 +614,9 @@ int ltdc_crtc_enable_vblank(struct drm_device *ddev, unsigned int pipe) return 0; } -void ltdc_crtc_disable_vblank(struct drm_device *ddev, unsigned int pipe) +static void ltdc_crtc_disable_vblank(struct drm_crtc *crtc) { - struct ltdc_device *ldev = ddev->dev_private; + struct ltdc_device *ldev = crtc_to_ltdc(crtc); DRM_DEBUG_DRIVER("\n"); reg_clear(ldev->regs, LTDC_IER, IER_LIE); @@ -594,6 +629,8 @@ static const struct drm_crtc_funcs ltdc_crtc_funcs = { .reset = drm_atomic_helper_crtc_reset, .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .enable_vblank = ltdc_crtc_enable_vblank, + .disable_vblank = ltdc_crtc_disable_vblank, .gamma_set = drm_atomic_helper_legacy_gamma_set, }; @@ -727,6 +764,8 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane, reg_update_bits(ldev->regs, LTDC_L1CR + lofs, LXCR_LEN | LXCR_CLUTEN, val); + ldev->plane_fpsi[plane->index].counter++; + mutex_lock(&ldev->err_lock); if (ldev->error_status & ISR_FUIF) { DRM_DEBUG_DRIVER("Fifo underrun\n"); @@ -752,6 +791,25 @@ static void ltdc_plane_atomic_disable(struct drm_plane *plane, oldstate->crtc->base.id, plane->base.id); } +static void ltdc_plane_atomic_print_state(struct drm_printer *p, + const struct drm_plane_state *state) +{ + struct drm_plane *plane = state->plane; + struct ltdc_device *ldev = plane_to_ltdc(plane); + struct fps_info *fpsi = &ldev->plane_fpsi[plane->index]; + int ms_since_last; + ktime_t now; + + now = ktime_get(); + ms_since_last = ktime_to_ms(ktime_sub(now, fpsi->last_timestamp)); + + drm_printf(p, "\tuser_updates=%dfps\n", + DIV_ROUND_CLOSEST(fpsi->counter * 1000, ms_since_last)); + + fpsi->last_timestamp = now; + fpsi->counter = 0; +} + static const struct drm_plane_funcs ltdc_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -759,6 +817,7 @@ static const struct drm_plane_funcs ltdc_plane_funcs = { .reset = drm_atomic_helper_plane_reset, .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, + .atomic_print_state = ltdc_plane_atomic_print_state, }; static const struct drm_plane_helper_funcs ltdc_plane_helper_funcs = { @@ -801,13 +860,13 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev, plane = devm_kzalloc(dev, sizeof(*plane), GFP_KERNEL); if (!plane) - return 0; + return NULL; ret = drm_universal_plane_init(ddev, plane, possible_crtcs, <dc_plane_funcs, formats, nb_fmt, NULL, type, NULL); if (ret < 0) - return 0; + return NULL; drm_plane_helper_add(plane, <dc_plane_helper_funcs); @@ -966,14 +1025,13 @@ int ltdc_load(struct drm_device *ddev) &bridge[i]); /* - * If at least one endpoint is ready, continue probing, - * else if at least one endpoint is -EPROBE_DEFER and - * there is no previous ready endpoints, defer probing. + * If at least one endpoint is -EPROBE_DEFER, defer probing, + * else if at least one endpoint is ready, continue probing. */ - if (!ret) + if (ret == -EPROBE_DEFER) + return ret; + else if (!ret) endpoint_not_ready = 0; - else if (ret == -EPROBE_DEFER && endpoint_not_ready) - endpoint_not_ready = -EPROBE_DEFER; } if (endpoint_not_ready) diff --git a/drivers/gpu/drm/stm/ltdc.h b/drivers/gpu/drm/stm/ltdc.h index edb268129c54..1e16d6afb0d2 100644 --- a/drivers/gpu/drm/stm/ltdc.h +++ b/drivers/gpu/drm/stm/ltdc.h @@ -20,6 +20,13 @@ struct ltdc_caps { bool non_alpha_only_l1; /* non-native no-alpha formats on layer 1 */ }; +#define LTDC_MAX_LAYER 4 + +struct fps_info { + unsigned int counter; + ktime_t last_timestamp; +}; + struct ltdc_device { void __iomem *regs; struct clk *pixel_clk; /* lcd pixel clock */ @@ -27,10 +34,9 @@ struct ltdc_device { struct ltdc_caps caps; u32 error_status; u32 irq_status; + struct fps_info plane_fpsi[LTDC_MAX_LAYER]; }; -int ltdc_crtc_enable_vblank(struct drm_device *dev, unsigned int pipe); -void ltdc_crtc_disable_vblank(struct drm_device *dev, unsigned int pipe); int ltdc_load(struct drm_device *ddev); void ltdc_unload(struct drm_device *ddev); diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig index eee6bc0eaf97..156a865c3e6d 100644 --- a/drivers/gpu/drm/sun4i/Kconfig +++ b/drivers/gpu/drm/sun4i/Kconfig @@ -40,6 +40,16 @@ config DRM_SUN4I_BACKEND do some alpha blending and feed graphics to TCON. If M is selected the module will be called sun4i-backend. +config DRM_SUN6I_DSI + tristate "Allwinner A31 MIPI-DSI Controller Support" + default MACH_SUN8I + select CRC_CCITT + select DRM_MIPI_DSI + help + Choose this option if you want have an Allwinner SoC with + MIPI-DSI support. If M is selected the module will be called + sun6i-dsi + config DRM_SUN8I_DW_HDMI tristate "Support for Allwinner version of DesignWare HDMI" depends on DRM_SUN4I diff --git a/drivers/gpu/drm/sun4i/Makefile b/drivers/gpu/drm/sun4i/Makefile index 330843ce4280..2589f4acd5ae 100644 --- a/drivers/gpu/drm/sun4i/Makefile +++ b/drivers/gpu/drm/sun4i/Makefile @@ -24,6 +24,9 @@ sun4i-tcon-y += sun4i_lvds.o sun4i-tcon-y += sun4i_tcon.o sun4i-tcon-y += sun4i_rgb.o +sun6i-dsi-y += sun6i_mipi_dphy.o +sun6i-dsi-y += sun6i_mipi_dsi.o + obj-$(CONFIG_DRM_SUN4I) += sun4i-drm.o obj-$(CONFIG_DRM_SUN4I) += sun4i-tcon.o obj-$(CONFIG_DRM_SUN4I) += sun4i_tv.o @@ -31,5 +34,6 @@ obj-$(CONFIG_DRM_SUN4I) += sun6i_drc.o obj-$(CONFIG_DRM_SUN4I_BACKEND) += sun4i-backend.o sun4i-frontend.o obj-$(CONFIG_DRM_SUN4I_HDMI) += sun4i-drm-hdmi.o +obj-$(CONFIG_DRM_SUN6I_DSI) += sun6i-dsi.o obj-$(CONFIG_DRM_SUN8I_DW_HDMI) += sun8i-drm-hdmi.o obj-$(CONFIG_DRM_SUN8I_MIXER) += sun8i-mixer.o diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index 9bad54f3de38..de0a76dfa1a2 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -295,6 +295,15 @@ int sun4i_backend_update_layer_formats(struct sun4i_backend *backend, DRM_DEBUG_DRIVER("Switching display backend interlaced mode %s\n", interlaced ? "on" : "off"); + val = SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA(state->alpha >> 8); + if (state->alpha != DRM_BLEND_ALPHA_OPAQUE) + val |= SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_EN; + regmap_update_bits(backend->engine.regs, + SUN4I_BACKEND_ATTCTL_REG0(layer), + SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_MASK | + SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_EN, + val); + if (sun4i_backend_format_is_yuv(fb->format->format)) return sun4i_backend_update_yuv_format(backend, layer, plane); @@ -490,7 +499,7 @@ static int sun4i_backend_atomic_check(struct sunxi_engine *engine, DRM_DEBUG_DRIVER("Plane FB format is %s\n", drm_get_format_name(fb->format->format, &format_name)); - if (fb->format->has_alpha) + if (fb->format->has_alpha || (plane_state->alpha != DRM_BLEND_ALPHA_OPAQUE)) num_alpha_planes++; if (sun4i_backend_format_is_yuv(fb->format->format)) { @@ -548,7 +557,8 @@ static int sun4i_backend_atomic_check(struct sunxi_engine *engine, } /* We can't have an alpha plane at the lowest position */ - if (plane_states[0]->fb->format->has_alpha) + if (plane_states[0]->fb->format->has_alpha || + (plane_states[0]->alpha != DRM_BLEND_ALPHA_OPAQUE)) return -EINVAL; for (i = 1; i < num_planes; i++) { @@ -560,7 +570,7 @@ static int sun4i_backend_atomic_check(struct sunxi_engine *engine, * The only alpha position is the lowest plane of the * second pipe. */ - if (fb->format->has_alpha) + if (fb->format->has_alpha || (p_state->alpha != DRM_BLEND_ALPHA_OPAQUE)) current_pipe++; s_state->pipe = current_pipe; diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.h b/drivers/gpu/drm/sun4i/sun4i_backend.h index 316f2179e9e1..4caee0392fa4 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.h +++ b/drivers/gpu/drm/sun4i/sun4i_backend.h @@ -68,12 +68,15 @@ #define SUN4I_BACKEND_CKMIN_REG 0x884 #define SUN4I_BACKEND_CKCFG_REG 0x888 #define SUN4I_BACKEND_ATTCTL_REG0(l) (0x890 + (0x4 * (l))) +#define SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_MASK GENMASK(31, 24) +#define SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA(x) ((x) << 24) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PIPESEL_MASK BIT(15) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PIPESEL(x) ((x) << 15) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PRISEL_MASK GENMASK(11, 10) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PRISEL(x) ((x) << 10) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_YUVEN BIT(2) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_VDOEN BIT(1) +#define SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_EN BIT(0) #define SUN4I_BACKEND_ATTCTL_REG1(l) (0x8a0 + (0x4 * (l))) #define SUN4I_BACKEND_ATTCTL_REG1_LAY_HSCAFCT GENMASK(15, 14) diff --git a/drivers/gpu/drm/sun4i/sun4i_layer.c b/drivers/gpu/drm/sun4i/sun4i_layer.c index 2949a3c912c1..750ad24de1d7 100644 --- a/drivers/gpu/drm/sun4i/sun4i_layer.c +++ b/drivers/gpu/drm/sun4i/sun4i_layer.c @@ -37,6 +37,7 @@ static void sun4i_backend_layer_reset(struct drm_plane *plane) if (state) { plane->state = &state->state; plane->state->plane = plane; + plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE; plane->state->zpos = layer->id; } } @@ -167,6 +168,7 @@ static struct sun4i_layer *sun4i_layer_init_one(struct drm_device *drm, &sun4i_backend_layer_helper_funcs); layer->backend = backend; + drm_plane_create_alpha_property(&layer->plane); drm_plane_create_zpos_property(&layer->plane, 0, 0, SUN4I_BACKEND_NUM_LAYERS - 1); diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index c3d92d537240..08747fc3ee71 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -35,6 +35,7 @@ #include "sun4i_lvds.h" #include "sun4i_rgb.h" #include "sun4i_tcon.h" +#include "sun6i_mipi_dsi.h" #include "sunxi_engine.h" static struct drm_connector *sun4i_tcon_get_connector(const struct drm_encoder *encoder) @@ -169,6 +170,7 @@ void sun4i_tcon_set_status(struct sun4i_tcon *tcon, case DRM_MODE_ENCODER_LVDS: is_lvds = true; /* Fallthrough */ + case DRM_MODE_ENCODER_DSI: case DRM_MODE_ENCODER_NONE: channel = 0; break; @@ -201,7 +203,8 @@ void sun4i_tcon_enable_vblank(struct sun4i_tcon *tcon, bool enable) DRM_DEBUG_DRIVER("%sabling VBLANK interrupt\n", enable ? "En" : "Dis"); mask = SUN4I_TCON_GINT0_VBLANK_ENABLE(0) | - SUN4I_TCON_GINT0_VBLANK_ENABLE(1); + SUN4I_TCON_GINT0_VBLANK_ENABLE(1) | + SUN4I_TCON_GINT0_TCON0_TRI_FINISH_ENABLE; if (enable) val = mask; @@ -273,6 +276,71 @@ static void sun4i_tcon0_mode_set_common(struct sun4i_tcon *tcon, SUN4I_TCON0_BASIC0_Y(mode->crtc_vdisplay)); } +static void sun4i_tcon0_mode_set_cpu(struct sun4i_tcon *tcon, + struct mipi_dsi_device *device, + const struct drm_display_mode *mode) +{ + u8 bpp = mipi_dsi_pixel_format_to_bpp(device->format); + u8 lanes = device->lanes; + u32 block_space, start_delay; + u32 tcon_div; + + tcon->dclk_min_div = 4; + tcon->dclk_max_div = 127; + + sun4i_tcon0_mode_set_common(tcon, mode); + + regmap_update_bits(tcon->regs, SUN4I_TCON0_CTL_REG, + SUN4I_TCON0_CTL_IF_MASK, + SUN4I_TCON0_CTL_IF_8080); + + regmap_write(tcon->regs, SUN4I_TCON_ECC_FIFO_REG, + SUN4I_TCON_ECC_FIFO_EN); + + regmap_write(tcon->regs, SUN4I_TCON0_CPU_IF_REG, + SUN4I_TCON0_CPU_IF_MODE_DSI | + SUN4I_TCON0_CPU_IF_TRI_FIFO_FLUSH | + SUN4I_TCON0_CPU_IF_TRI_FIFO_EN | + SUN4I_TCON0_CPU_IF_TRI_EN); + + /* + * This looks suspicious, but it works... + * + * The datasheet says that this should be set higher than 20 * + * pixel cycle, but it's not clear what a pixel cycle is. + */ + regmap_read(tcon->regs, SUN4I_TCON0_DCLK_REG, &tcon_div); + tcon_div &= GENMASK(6, 0); + block_space = mode->htotal * bpp / (tcon_div * lanes); + block_space -= mode->hdisplay + 40; + + regmap_write(tcon->regs, SUN4I_TCON0_CPU_TRI0_REG, + SUN4I_TCON0_CPU_TRI0_BLOCK_SPACE(block_space) | + SUN4I_TCON0_CPU_TRI0_BLOCK_SIZE(mode->hdisplay)); + + regmap_write(tcon->regs, SUN4I_TCON0_CPU_TRI1_REG, + SUN4I_TCON0_CPU_TRI1_BLOCK_NUM(mode->vdisplay)); + + start_delay = (mode->crtc_vtotal - mode->crtc_vdisplay - 10 - 1); + start_delay = start_delay * mode->crtc_htotal * 149; + start_delay = start_delay / (mode->crtc_clock / 1000) / 8; + regmap_write(tcon->regs, SUN4I_TCON0_CPU_TRI2_REG, + SUN4I_TCON0_CPU_TRI2_TRANS_START_SET(10) | + SUN4I_TCON0_CPU_TRI2_START_DELAY(start_delay)); + + /* + * The Allwinner BSP has a comment that the period should be + * the display clock * 15, but uses an hardcoded 3000... + */ + regmap_write(tcon->regs, SUN4I_TCON_SAFE_PERIOD_REG, + SUN4I_TCON_SAFE_PERIOD_NUM(3000) | + SUN4I_TCON_SAFE_PERIOD_MODE(3)); + + /* Enable the output on the pins */ + regmap_write(tcon->regs, SUN4I_TCON0_IO_TRI_REG, + 0xe0000000); +} + static void sun4i_tcon0_mode_set_lvds(struct sun4i_tcon *tcon, const struct drm_encoder *encoder, const struct drm_display_mode *mode) @@ -538,7 +606,17 @@ void sun4i_tcon_mode_set(struct sun4i_tcon *tcon, const struct drm_encoder *encoder, const struct drm_display_mode *mode) { + struct sun6i_dsi *dsi; + switch (encoder->encoder_type) { + case DRM_MODE_ENCODER_DSI: + /* + * This is not really elegant, but it's the "cleaner" + * way I could think of... + */ + dsi = encoder_to_sun6i_dsi(encoder); + sun4i_tcon0_mode_set_cpu(tcon, dsi->device, mode); + break; case DRM_MODE_ENCODER_LVDS: sun4i_tcon0_mode_set_lvds(tcon, encoder, mode); break; @@ -582,7 +660,8 @@ static irqreturn_t sun4i_tcon_handler(int irq, void *private) regmap_read(tcon->regs, SUN4I_TCON_GINT0_REG, &status); if (!(status & (SUN4I_TCON_GINT0_VBLANK_INT(0) | - SUN4I_TCON_GINT0_VBLANK_INT(1)))) + SUN4I_TCON_GINT0_VBLANK_INT(1) | + SUN4I_TCON_GINT0_TCON0_TRI_FINISH_INT))) return IRQ_NONE; drm_crtc_handle_vblank(&scrtc->crtc); @@ -591,7 +670,8 @@ static irqreturn_t sun4i_tcon_handler(int irq, void *private) /* Acknowledge the interrupt */ regmap_update_bits(tcon->regs, SUN4I_TCON_GINT0_REG, SUN4I_TCON_GINT0_VBLANK_INT(0) | - SUN4I_TCON_GINT0_VBLANK_INT(1), + SUN4I_TCON_GINT0_VBLANK_INT(1) | + SUN4I_TCON_GINT0_TCON0_TRI_FINISH_INT, 0); if (engine->ops->vblank_quirk) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h index 161e09427124..f6a071cd5a6f 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.h +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h @@ -28,13 +28,32 @@ #define SUN4I_TCON_GINT0_REG 0x4 #define SUN4I_TCON_GINT0_VBLANK_ENABLE(pipe) BIT(31 - (pipe)) +#define SUN4I_TCON_GINT0_TCON0_TRI_FINISH_ENABLE BIT(27) +#define SUN4I_TCON_GINT0_TCON0_TRI_COUNTER_ENABLE BIT(26) #define SUN4I_TCON_GINT0_VBLANK_INT(pipe) BIT(15 - (pipe)) +#define SUN4I_TCON_GINT0_TCON0_TRI_FINISH_INT BIT(11) +#define SUN4I_TCON_GINT0_TCON0_TRI_COUNTER_INT BIT(10) #define SUN4I_TCON_GINT1_REG 0x8 + #define SUN4I_TCON_FRM_CTL_REG 0x10 +#define SUN4I_TCON_FRM_CTL_EN BIT(31) + +#define SUN4I_TCON_FRM_SEED_PR_REG 0x14 +#define SUN4I_TCON_FRM_SEED_PG_REG 0x18 +#define SUN4I_TCON_FRM_SEED_PB_REG 0x1c +#define SUN4I_TCON_FRM_SEED_LR_REG 0x20 +#define SUN4I_TCON_FRM_SEED_LG_REG 0x24 +#define SUN4I_TCON_FRM_SEED_LB_REG 0x28 +#define SUN4I_TCON_FRM_TBL0_REG 0x2c +#define SUN4I_TCON_FRM_TBL1_REG 0x30 +#define SUN4I_TCON_FRM_TBL2_REG 0x34 +#define SUN4I_TCON_FRM_TBL3_REG 0x38 #define SUN4I_TCON0_CTL_REG 0x40 #define SUN4I_TCON0_CTL_TCON_ENABLE BIT(31) +#define SUN4I_TCON0_CTL_IF_MASK GENMASK(25, 24) +#define SUN4I_TCON0_CTL_IF_8080 (1 << 24) #define SUN4I_TCON0_CTL_CLK_DELAY_MASK GENMASK(8, 4) #define SUN4I_TCON0_CTL_CLK_DELAY(delay) ((delay << 4) & SUN4I_TCON0_CTL_CLK_DELAY_MASK) #define SUN4I_TCON0_CTL_SRC_SEL_MASK GENMASK(2, 0) @@ -61,7 +80,14 @@ #define SUN4I_TCON0_BASIC3_V_SYNC(height) (((height) - 1) & 0x7ff) #define SUN4I_TCON0_HV_IF_REG 0x58 + #define SUN4I_TCON0_CPU_IF_REG 0x60 +#define SUN4I_TCON0_CPU_IF_MODE_MASK GENMASK(31, 28) +#define SUN4I_TCON0_CPU_IF_MODE_DSI (1 << 28) +#define SUN4I_TCON0_CPU_IF_TRI_FIFO_FLUSH BIT(16) +#define SUN4I_TCON0_CPU_IF_TRI_FIFO_EN BIT(2) +#define SUN4I_TCON0_CPU_IF_TRI_EN BIT(0) + #define SUN4I_TCON0_CPU_WR_REG 0x64 #define SUN4I_TCON0_CPU_RD0_REG 0x68 #define SUN4I_TCON0_CPU_RDA_REG 0x6c @@ -128,6 +154,10 @@ #define SUN4I_TCON1_IO_POL_REG 0xf0 #define SUN4I_TCON1_IO_TRI_REG 0xf4 + +#define SUN4I_TCON_ECC_FIFO_REG 0xf8 +#define SUN4I_TCON_ECC_FIFO_EN BIT(3) + #define SUN4I_TCON_CEU_CTL_REG 0x100 #define SUN4I_TCON_CEU_MUL_RR_REG 0x110 #define SUN4I_TCON_CEU_MUL_RG_REG 0x114 @@ -144,6 +174,22 @@ #define SUN4I_TCON_CEU_RANGE_R_REG 0x140 #define SUN4I_TCON_CEU_RANGE_G_REG 0x144 #define SUN4I_TCON_CEU_RANGE_B_REG 0x148 + +#define SUN4I_TCON0_CPU_TRI0_REG 0x160 +#define SUN4I_TCON0_CPU_TRI0_BLOCK_SPACE(space) ((((space) - 1) & 0xfff) << 16) +#define SUN4I_TCON0_CPU_TRI0_BLOCK_SIZE(size) (((size) - 1) & 0xfff) + +#define SUN4I_TCON0_CPU_TRI1_REG 0x164 +#define SUN4I_TCON0_CPU_TRI1_BLOCK_NUM(num) (((num) - 1) & 0xffff) + +#define SUN4I_TCON0_CPU_TRI2_REG 0x168 +#define SUN4I_TCON0_CPU_TRI2_START_DELAY(delay) (((delay) & 0xffff) << 16) +#define SUN4I_TCON0_CPU_TRI2_TRANS_START_SET(set) ((set) & 0xfff) + +#define SUN4I_TCON_SAFE_PERIOD_REG 0x1f0 +#define SUN4I_TCON_SAFE_PERIOD_NUM(num) (((num) & 0xfff) << 16) +#define SUN4I_TCON_SAFE_PERIOD_MODE(mode) ((mode) & 0x3) + #define SUN4I_TCON_MUX_CTRL_REG 0x200 #define SUN4I_TCON0_LVDS_ANA0_REG 0x220 diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c new file mode 100644 index 000000000000..e4d19431fa0e --- /dev/null +++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2016 Allwinnertech Co., Ltd. + * Copyright (C) 2017-2018 Bootlin + * + * Maxime Ripard <maxime.ripard@free-electrons.com> + */ + +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/of_address.h> +#include <linux/regmap.h> +#include <linux/reset.h> + +#include "sun6i_mipi_dsi.h" + +#define SUN6I_DPHY_GCTL_REG 0x00 +#define SUN6I_DPHY_GCTL_LANE_NUM(n) ((((n) - 1) & 3) << 4) +#define SUN6I_DPHY_GCTL_EN BIT(0) + +#define SUN6I_DPHY_TX_CTL_REG 0x04 +#define SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT BIT(28) + +#define SUN6I_DPHY_TX_TIME0_REG 0x10 +#define SUN6I_DPHY_TX_TIME0_HS_TRAIL(n) (((n) & 0xff) << 24) +#define SUN6I_DPHY_TX_TIME0_HS_PREPARE(n) (((n) & 0xff) << 16) +#define SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(n) ((n) & 0xff) + +#define SUN6I_DPHY_TX_TIME1_REG 0x14 +#define SUN6I_DPHY_TX_TIME1_CLK_POST(n) (((n) & 0xff) << 24) +#define SUN6I_DPHY_TX_TIME1_CLK_PRE(n) (((n) & 0xff) << 16) +#define SUN6I_DPHY_TX_TIME1_CLK_ZERO(n) (((n) & 0xff) << 8) +#define SUN6I_DPHY_TX_TIME1_CLK_PREPARE(n) ((n) & 0xff) + +#define SUN6I_DPHY_TX_TIME2_REG 0x18 +#define SUN6I_DPHY_TX_TIME2_CLK_TRAIL(n) ((n) & 0xff) + +#define SUN6I_DPHY_TX_TIME3_REG 0x1c + +#define SUN6I_DPHY_TX_TIME4_REG 0x20 +#define SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(n) (((n) & 0xff) << 8) +#define SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(n) ((n) & 0xff) + +#define SUN6I_DPHY_ANA0_REG 0x4c +#define SUN6I_DPHY_ANA0_REG_PWS BIT(31) +#define SUN6I_DPHY_ANA0_REG_DMPC BIT(28) +#define SUN6I_DPHY_ANA0_REG_DMPD(n) (((n) & 0xf) << 24) +#define SUN6I_DPHY_ANA0_REG_SLV(n) (((n) & 7) << 12) +#define SUN6I_DPHY_ANA0_REG_DEN(n) (((n) & 0xf) << 8) + +#define SUN6I_DPHY_ANA1_REG 0x50 +#define SUN6I_DPHY_ANA1_REG_VTTMODE BIT(31) +#define SUN6I_DPHY_ANA1_REG_CSMPS(n) (((n) & 3) << 28) +#define SUN6I_DPHY_ANA1_REG_SVTT(n) (((n) & 0xf) << 24) + +#define SUN6I_DPHY_ANA2_REG 0x54 +#define SUN6I_DPHY_ANA2_EN_P2S_CPU(n) (((n) & 0xf) << 24) +#define SUN6I_DPHY_ANA2_EN_P2S_CPU_MASK GENMASK(27, 24) +#define SUN6I_DPHY_ANA2_EN_CK_CPU BIT(4) +#define SUN6I_DPHY_ANA2_REG_ENIB BIT(1) + +#define SUN6I_DPHY_ANA3_REG 0x58 +#define SUN6I_DPHY_ANA3_EN_VTTD(n) (((n) & 0xf) << 28) +#define SUN6I_DPHY_ANA3_EN_VTTD_MASK GENMASK(31, 28) +#define SUN6I_DPHY_ANA3_EN_VTTC BIT(27) +#define SUN6I_DPHY_ANA3_EN_DIV BIT(26) +#define SUN6I_DPHY_ANA3_EN_LDOC BIT(25) +#define SUN6I_DPHY_ANA3_EN_LDOD BIT(24) +#define SUN6I_DPHY_ANA3_EN_LDOR BIT(18) + +#define SUN6I_DPHY_ANA4_REG 0x5c +#define SUN6I_DPHY_ANA4_REG_DMPLVC BIT(24) +#define SUN6I_DPHY_ANA4_REG_DMPLVD(n) (((n) & 0xf) << 20) +#define SUN6I_DPHY_ANA4_REG_CKDV(n) (((n) & 0x1f) << 12) +#define SUN6I_DPHY_ANA4_REG_TMSC(n) (((n) & 3) << 10) +#define SUN6I_DPHY_ANA4_REG_TMSD(n) (((n) & 3) << 8) +#define SUN6I_DPHY_ANA4_REG_TXDNSC(n) (((n) & 3) << 6) +#define SUN6I_DPHY_ANA4_REG_TXDNSD(n) (((n) & 3) << 4) +#define SUN6I_DPHY_ANA4_REG_TXPUSC(n) (((n) & 3) << 2) +#define SUN6I_DPHY_ANA4_REG_TXPUSD(n) ((n) & 3) + +#define SUN6I_DPHY_DBG5_REG 0xf4 + +int sun6i_dphy_init(struct sun6i_dphy *dphy, unsigned int lanes) +{ + reset_control_deassert(dphy->reset); + clk_prepare_enable(dphy->mod_clk); + clk_set_rate_exclusive(dphy->mod_clk, 150000000); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_CTL_REG, + SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME0_REG, + SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(14) | + SUN6I_DPHY_TX_TIME0_HS_PREPARE(6) | + SUN6I_DPHY_TX_TIME0_HS_TRAIL(10)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME1_REG, + SUN6I_DPHY_TX_TIME1_CLK_PREPARE(7) | + SUN6I_DPHY_TX_TIME1_CLK_ZERO(50) | + SUN6I_DPHY_TX_TIME1_CLK_PRE(3) | + SUN6I_DPHY_TX_TIME1_CLK_POST(10)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME2_REG, + SUN6I_DPHY_TX_TIME2_CLK_TRAIL(30)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME3_REG, 0); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME4_REG, + SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) | + SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3)); + + regmap_write(dphy->regs, SUN6I_DPHY_GCTL_REG, + SUN6I_DPHY_GCTL_LANE_NUM(lanes) | + SUN6I_DPHY_GCTL_EN); + + return 0; +} + +int sun6i_dphy_power_on(struct sun6i_dphy *dphy, unsigned int lanes) +{ + u8 lanes_mask = GENMASK(lanes - 1, 0); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG, + SUN6I_DPHY_ANA0_REG_PWS | + SUN6I_DPHY_ANA0_REG_DMPC | + SUN6I_DPHY_ANA0_REG_SLV(7) | + SUN6I_DPHY_ANA0_REG_DMPD(lanes_mask) | + SUN6I_DPHY_ANA0_REG_DEN(lanes_mask)); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA1_REG, + SUN6I_DPHY_ANA1_REG_CSMPS(1) | + SUN6I_DPHY_ANA1_REG_SVTT(7)); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA4_REG, + SUN6I_DPHY_ANA4_REG_CKDV(1) | + SUN6I_DPHY_ANA4_REG_TMSC(1) | + SUN6I_DPHY_ANA4_REG_TMSD(1) | + SUN6I_DPHY_ANA4_REG_TXDNSC(1) | + SUN6I_DPHY_ANA4_REG_TXDNSD(1) | + SUN6I_DPHY_ANA4_REG_TXPUSC(1) | + SUN6I_DPHY_ANA4_REG_TXPUSD(1) | + SUN6I_DPHY_ANA4_REG_DMPLVC | + SUN6I_DPHY_ANA4_REG_DMPLVD(lanes_mask)); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA2_REG, + SUN6I_DPHY_ANA2_REG_ENIB); + udelay(5); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA3_REG, + SUN6I_DPHY_ANA3_EN_LDOR | + SUN6I_DPHY_ANA3_EN_LDOC | + SUN6I_DPHY_ANA3_EN_LDOD); + udelay(1); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA3_REG, + SUN6I_DPHY_ANA3_EN_VTTC | + SUN6I_DPHY_ANA3_EN_VTTD_MASK, + SUN6I_DPHY_ANA3_EN_VTTC | + SUN6I_DPHY_ANA3_EN_VTTD(lanes_mask)); + udelay(1); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA3_REG, + SUN6I_DPHY_ANA3_EN_DIV, + SUN6I_DPHY_ANA3_EN_DIV); + udelay(1); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG, + SUN6I_DPHY_ANA2_EN_CK_CPU, + SUN6I_DPHY_ANA2_EN_CK_CPU); + udelay(1); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA1_REG, + SUN6I_DPHY_ANA1_REG_VTTMODE, + SUN6I_DPHY_ANA1_REG_VTTMODE); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG, + SUN6I_DPHY_ANA2_EN_P2S_CPU_MASK, + SUN6I_DPHY_ANA2_EN_P2S_CPU(lanes_mask)); + + return 0; +} + +int sun6i_dphy_power_off(struct sun6i_dphy *dphy) +{ + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA1_REG, + SUN6I_DPHY_ANA1_REG_VTTMODE, 0); + + return 0; +} + +int sun6i_dphy_exit(struct sun6i_dphy *dphy) +{ + clk_rate_exclusive_put(dphy->mod_clk); + clk_disable_unprepare(dphy->mod_clk); + reset_control_assert(dphy->reset); + + return 0; +} + +static struct regmap_config sun6i_dphy_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = SUN6I_DPHY_DBG5_REG, + .name = "mipi-dphy", +}; + +static const struct of_device_id sun6i_dphy_of_table[] = { + { .compatible = "allwinner,sun6i-a31-mipi-dphy" }, + { } +}; + +int sun6i_dphy_probe(struct sun6i_dsi *dsi, struct device_node *node) +{ + struct sun6i_dphy *dphy; + struct resource res; + void __iomem *regs; + int ret; + + if (!of_match_node(sun6i_dphy_of_table, node)) { + dev_err(dsi->dev, "Incompatible D-PHY\n"); + return -EINVAL; + } + + dphy = devm_kzalloc(dsi->dev, sizeof(*dphy), GFP_KERNEL); + if (!dphy) + return -ENOMEM; + + ret = of_address_to_resource(node, 0, &res); + if (ret) { + dev_err(dsi->dev, "phy: Couldn't get our resources\n"); + return ret; + } + + regs = devm_ioremap_resource(dsi->dev, &res); + if (IS_ERR(regs)) { + dev_err(dsi->dev, "Couldn't map the DPHY encoder registers\n"); + return PTR_ERR(regs); + } + + dphy->regs = devm_regmap_init_mmio(dsi->dev, regs, + &sun6i_dphy_regmap_config); + if (IS_ERR(dphy->regs)) { + dev_err(dsi->dev, "Couldn't create the DPHY encoder regmap\n"); + return PTR_ERR(dphy->regs); + } + + dphy->reset = of_reset_control_get_shared(node, NULL); + if (IS_ERR(dphy->reset)) { + dev_err(dsi->dev, "Couldn't get our reset line\n"); + return PTR_ERR(dphy->reset); + } + + dphy->bus_clk = of_clk_get_by_name(node, "bus"); + if (IS_ERR(dphy->bus_clk)) { + dev_err(dsi->dev, "Couldn't get the DPHY bus clock\n"); + ret = PTR_ERR(dphy->bus_clk); + goto err_free_reset; + } + regmap_mmio_attach_clk(dphy->regs, dphy->bus_clk); + + dphy->mod_clk = of_clk_get_by_name(node, "mod"); + if (IS_ERR(dphy->mod_clk)) { + dev_err(dsi->dev, "Couldn't get the DPHY mod clock\n"); + ret = PTR_ERR(dphy->mod_clk); + goto err_free_bus; + } + + dsi->dphy = dphy; + + return 0; + +err_free_bus: + regmap_mmio_detach_clk(dphy->regs); + clk_put(dphy->bus_clk); +err_free_reset: + reset_control_put(dphy->reset); + return ret; +} + +int sun6i_dphy_remove(struct sun6i_dsi *dsi) +{ + struct sun6i_dphy *dphy = dsi->dphy; + + regmap_mmio_detach_clk(dphy->regs); + clk_put(dphy->mod_clk); + clk_put(dphy->bus_clk); + reset_control_put(dphy->reset); + + return 0; +} diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c new file mode 100644 index 000000000000..bfbf761f0c1d --- /dev/null +++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c @@ -0,0 +1,1107 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2016 Allwinnertech Co., Ltd. + * Copyright (C) 2017-2018 Bootlin + * + * Maxime Ripard <maxime.ripard@bootlin.com> + */ + +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/crc-ccitt.h> +#include <linux/of_address.h> +#include <linux/pm_runtime.h> +#include <linux/regmap.h> +#include <linux/reset.h> + +#include <linux/phy/phy.h> + +#include <drm/drmP.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_panel.h> + +#include "sun4i_drv.h" +#include "sun6i_mipi_dsi.h" + +#include <video/mipi_display.h> + +#define SUN6I_DSI_CTL_REG 0x000 +#define SUN6I_DSI_CTL_EN BIT(0) + +#define SUN6I_DSI_BASIC_CTL_REG 0x00c +#define SUN6I_DSI_BASIC_CTL_HBP_DIS BIT(2) +#define SUN6I_DSI_BASIC_CTL_HSA_HSE_DIS BIT(1) +#define SUN6I_DSI_BASIC_CTL_VIDEO_BURST BIT(0) + +#define SUN6I_DSI_BASIC_CTL0_REG 0x010 +#define SUN6I_DSI_BASIC_CTL0_HS_EOTP_EN BIT(18) +#define SUN6I_DSI_BASIC_CTL0_CRC_EN BIT(17) +#define SUN6I_DSI_BASIC_CTL0_ECC_EN BIT(16) +#define SUN6I_DSI_BASIC_CTL0_INST_ST BIT(0) + +#define SUN6I_DSI_BASIC_CTL1_REG 0x014 +#define SUN6I_DSI_BASIC_CTL1_VIDEO_ST_DELAY(n) (((n) & 0x1fff) << 4) +#define SUN6I_DSI_BASIC_CTL1_VIDEO_FILL BIT(2) +#define SUN6I_DSI_BASIC_CTL1_VIDEO_PRECISION BIT(1) +#define SUN6I_DSI_BASIC_CTL1_VIDEO_MODE BIT(0) + +#define SUN6I_DSI_BASIC_SIZE0_REG 0x018 +#define SUN6I_DSI_BASIC_SIZE0_VBP(n) (((n) & 0xfff) << 16) +#define SUN6I_DSI_BASIC_SIZE0_VSA(n) ((n) & 0xfff) + +#define SUN6I_DSI_BASIC_SIZE1_REG 0x01c +#define SUN6I_DSI_BASIC_SIZE1_VT(n) (((n) & 0xfff) << 16) +#define SUN6I_DSI_BASIC_SIZE1_VACT(n) ((n) & 0xfff) + +#define SUN6I_DSI_INST_FUNC_REG(n) (0x020 + (n) * 0x04) +#define SUN6I_DSI_INST_FUNC_INST_MODE(n) (((n) & 0xf) << 28) +#define SUN6I_DSI_INST_FUNC_ESCAPE_ENTRY(n) (((n) & 0xf) << 24) +#define SUN6I_DSI_INST_FUNC_TRANS_PACKET(n) (((n) & 0xf) << 20) +#define SUN6I_DSI_INST_FUNC_LANE_CEN BIT(4) +#define SUN6I_DSI_INST_FUNC_LANE_DEN(n) ((n) & 0xf) + +#define SUN6I_DSI_INST_LOOP_SEL_REG 0x040 + +#define SUN6I_DSI_INST_LOOP_NUM_REG(n) (0x044 + (n) * 0x10) +#define SUN6I_DSI_INST_LOOP_NUM_N1(n) (((n) & 0xfff) << 16) +#define SUN6I_DSI_INST_LOOP_NUM_N0(n) ((n) & 0xfff) + +#define SUN6I_DSI_INST_JUMP_SEL_REG 0x048 + +#define SUN6I_DSI_INST_JUMP_CFG_REG(n) (0x04c + (n) * 0x04) +#define SUN6I_DSI_INST_JUMP_CFG_TO(n) (((n) & 0xf) << 20) +#define SUN6I_DSI_INST_JUMP_CFG_POINT(n) (((n) & 0xf) << 16) +#define SUN6I_DSI_INST_JUMP_CFG_NUM(n) ((n) & 0xffff) + +#define SUN6I_DSI_TRANS_START_REG 0x060 + +#define SUN6I_DSI_TRANS_ZERO_REG 0x078 + +#define SUN6I_DSI_TCON_DRQ_REG 0x07c +#define SUN6I_DSI_TCON_DRQ_ENABLE_MODE BIT(28) +#define SUN6I_DSI_TCON_DRQ_SET(n) ((n) & 0x3ff) + +#define SUN6I_DSI_PIXEL_CTL0_REG 0x080 +#define SUN6I_DSI_PIXEL_CTL0_PD_PLUG_DISABLE BIT(16) +#define SUN6I_DSI_PIXEL_CTL0_FORMAT(n) ((n) & 0xf) + +#define SUN6I_DSI_PIXEL_CTL1_REG 0x084 + +#define SUN6I_DSI_PIXEL_PH_REG 0x090 +#define SUN6I_DSI_PIXEL_PH_ECC(n) (((n) & 0xff) << 24) +#define SUN6I_DSI_PIXEL_PH_WC(n) (((n) & 0xffff) << 8) +#define SUN6I_DSI_PIXEL_PH_VC(n) (((n) & 3) << 6) +#define SUN6I_DSI_PIXEL_PH_DT(n) ((n) & 0x3f) + +#define SUN6I_DSI_PIXEL_PF0_REG 0x098 +#define SUN6I_DSI_PIXEL_PF0_CRC_FORCE(n) ((n) & 0xffff) + +#define SUN6I_DSI_PIXEL_PF1_REG 0x09c +#define SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINEN(n) (((n) & 0xffff) << 16) +#define SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINE0(n) ((n) & 0xffff) + +#define SUN6I_DSI_SYNC_HSS_REG 0x0b0 + +#define SUN6I_DSI_SYNC_HSE_REG 0x0b4 + +#define SUN6I_DSI_SYNC_VSS_REG 0x0b8 + +#define SUN6I_DSI_SYNC_VSE_REG 0x0bc + +#define SUN6I_DSI_BLK_HSA0_REG 0x0c0 + +#define SUN6I_DSI_BLK_HSA1_REG 0x0c4 +#define SUN6I_DSI_BLK_PF(n) (((n) & 0xffff) << 16) +#define SUN6I_DSI_BLK_PD(n) ((n) & 0xff) + +#define SUN6I_DSI_BLK_HBP0_REG 0x0c8 + +#define SUN6I_DSI_BLK_HBP1_REG 0x0cc + +#define SUN6I_DSI_BLK_HFP0_REG 0x0d0 + +#define SUN6I_DSI_BLK_HFP1_REG 0x0d4 + +#define SUN6I_DSI_BLK_HBLK0_REG 0x0e0 + +#define SUN6I_DSI_BLK_HBLK1_REG 0x0e4 + +#define SUN6I_DSI_BLK_VBLK0_REG 0x0e8 + +#define SUN6I_DSI_BLK_VBLK1_REG 0x0ec + +#define SUN6I_DSI_BURST_LINE_REG 0x0f0 +#define SUN6I_DSI_BURST_LINE_SYNC_POINT(n) (((n) & 0xffff) << 16) +#define SUN6I_DSI_BURST_LINE_NUM(n) ((n) & 0xffff) + +#define SUN6I_DSI_BURST_DRQ_REG 0x0f4 +#define SUN6I_DSI_BURST_DRQ_EDGE1(n) (((n) & 0xffff) << 16) +#define SUN6I_DSI_BURST_DRQ_EDGE0(n) ((n) & 0xffff) + +#define SUN6I_DSI_CMD_CTL_REG 0x200 +#define SUN6I_DSI_CMD_CTL_RX_OVERFLOW BIT(26) +#define SUN6I_DSI_CMD_CTL_RX_FLAG BIT(25) +#define SUN6I_DSI_CMD_CTL_TX_FLAG BIT(9) + +#define SUN6I_DSI_CMD_RX_REG(n) (0x240 + (n) * 0x04) + +#define SUN6I_DSI_DEBUG_DATA_REG 0x2f8 + +#define SUN6I_DSI_CMD_TX_REG(n) (0x300 + (n) * 0x04) + +enum sun6i_dsi_start_inst { + DSI_START_LPRX, + DSI_START_LPTX, + DSI_START_HSC, + DSI_START_HSD, +}; + +enum sun6i_dsi_inst_id { + DSI_INST_ID_LP11 = 0, + DSI_INST_ID_TBA, + DSI_INST_ID_HSC, + DSI_INST_ID_HSD, + DSI_INST_ID_LPDT, + DSI_INST_ID_HSCEXIT, + DSI_INST_ID_NOP, + DSI_INST_ID_DLY, + DSI_INST_ID_END = 15, +}; + +enum sun6i_dsi_inst_mode { + DSI_INST_MODE_STOP = 0, + DSI_INST_MODE_TBA, + DSI_INST_MODE_HS, + DSI_INST_MODE_ESCAPE, + DSI_INST_MODE_HSCEXIT, + DSI_INST_MODE_NOP, +}; + +enum sun6i_dsi_inst_escape { + DSI_INST_ESCA_LPDT = 0, + DSI_INST_ESCA_ULPS, + DSI_INST_ESCA_UN1, + DSI_INST_ESCA_UN2, + DSI_INST_ESCA_RESET, + DSI_INST_ESCA_UN3, + DSI_INST_ESCA_UN4, + DSI_INST_ESCA_UN5, +}; + +enum sun6i_dsi_inst_packet { + DSI_INST_PACK_PIXEL = 0, + DSI_INST_PACK_COMMAND, +}; + +static const u32 sun6i_dsi_ecc_array[] = { + [0] = (BIT(0) | BIT(1) | BIT(2) | BIT(4) | BIT(5) | BIT(7) | BIT(10) | + BIT(11) | BIT(13) | BIT(16) | BIT(20) | BIT(21) | BIT(22) | + BIT(23)), + [1] = (BIT(0) | BIT(1) | BIT(3) | BIT(4) | BIT(6) | BIT(8) | BIT(10) | + BIT(12) | BIT(14) | BIT(17) | BIT(20) | BIT(21) | BIT(22) | + BIT(23)), + [2] = (BIT(0) | BIT(2) | BIT(3) | BIT(5) | BIT(6) | BIT(9) | BIT(11) | + BIT(12) | BIT(15) | BIT(18) | BIT(20) | BIT(21) | BIT(22)), + [3] = (BIT(1) | BIT(2) | BIT(3) | BIT(7) | BIT(8) | BIT(9) | BIT(13) | + BIT(14) | BIT(15) | BIT(19) | BIT(20) | BIT(21) | BIT(23)), + [4] = (BIT(4) | BIT(5) | BIT(6) | BIT(7) | BIT(8) | BIT(9) | BIT(16) | + BIT(17) | BIT(18) | BIT(19) | BIT(20) | BIT(22) | BIT(23)), + [5] = (BIT(10) | BIT(11) | BIT(12) | BIT(13) | BIT(14) | BIT(15) | + BIT(16) | BIT(17) | BIT(18) | BIT(19) | BIT(21) | BIT(22) | + BIT(23)), +}; + +static u32 sun6i_dsi_ecc_compute(unsigned int data) +{ + int i; + u8 ecc = 0; + + for (i = 0; i < ARRAY_SIZE(sun6i_dsi_ecc_array); i++) { + u32 field = sun6i_dsi_ecc_array[i]; + bool init = false; + u8 val = 0; + int j; + + for (j = 0; j < 24; j++) { + if (!(BIT(j) & field)) + continue; + + if (!init) { + val = (BIT(j) & data) ? 1 : 0; + init = true; + } else { + val ^= (BIT(j) & data) ? 1 : 0; + } + } + + ecc |= val << i; + } + + return ecc; +} + +static u16 sun6i_dsi_crc_compute(u8 const *buffer, size_t len) +{ + return crc_ccitt(0xffff, buffer, len); +} + +static u16 sun6i_dsi_crc_repeat_compute(u8 pd, size_t len) +{ + u8 buffer[len]; + + memset(buffer, pd, len); + + return sun6i_dsi_crc_compute(buffer, len); +} + +static u32 sun6i_dsi_build_sync_pkt(u8 dt, u8 vc, u8 d0, u8 d1) +{ + u32 val = dt & 0x3f; + + val |= (vc & 3) << 6; + val |= (d0 & 0xff) << 8; + val |= (d1 & 0xff) << 16; + val |= sun6i_dsi_ecc_compute(val) << 24; + + return val; +} + +static u32 sun6i_dsi_build_blk0_pkt(u8 vc, u16 wc) +{ + return sun6i_dsi_build_sync_pkt(MIPI_DSI_BLANKING_PACKET, vc, + wc & 0xff, wc >> 8); +} + +static u32 sun6i_dsi_build_blk1_pkt(u16 pd, size_t len) +{ + u32 val = SUN6I_DSI_BLK_PD(pd); + + return val | SUN6I_DSI_BLK_PF(sun6i_dsi_crc_repeat_compute(pd, len)); +} + +static void sun6i_dsi_inst_abort(struct sun6i_dsi *dsi) +{ + regmap_update_bits(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG, + SUN6I_DSI_BASIC_CTL0_INST_ST, 0); +} + +static void sun6i_dsi_inst_commit(struct sun6i_dsi *dsi) +{ + regmap_update_bits(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG, + SUN6I_DSI_BASIC_CTL0_INST_ST, + SUN6I_DSI_BASIC_CTL0_INST_ST); +} + +static int sun6i_dsi_inst_wait_for_completion(struct sun6i_dsi *dsi) +{ + u32 val; + + return regmap_read_poll_timeout(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG, + val, + !(val & SUN6I_DSI_BASIC_CTL0_INST_ST), + 100, 5000); +} + +static void sun6i_dsi_inst_setup(struct sun6i_dsi *dsi, + enum sun6i_dsi_inst_id id, + enum sun6i_dsi_inst_mode mode, + bool clock, u8 data, + enum sun6i_dsi_inst_packet packet, + enum sun6i_dsi_inst_escape escape) +{ + regmap_write(dsi->regs, SUN6I_DSI_INST_FUNC_REG(id), + SUN6I_DSI_INST_FUNC_INST_MODE(mode) | + SUN6I_DSI_INST_FUNC_ESCAPE_ENTRY(escape) | + SUN6I_DSI_INST_FUNC_TRANS_PACKET(packet) | + (clock ? SUN6I_DSI_INST_FUNC_LANE_CEN : 0) | + SUN6I_DSI_INST_FUNC_LANE_DEN(data)); +} + +static void sun6i_dsi_inst_init(struct sun6i_dsi *dsi, + struct mipi_dsi_device *device) +{ + u8 lanes_mask = GENMASK(device->lanes - 1, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_LP11, DSI_INST_MODE_STOP, + true, lanes_mask, 0, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_TBA, DSI_INST_MODE_TBA, + false, 1, 0, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_HSC, DSI_INST_MODE_HS, + true, 0, DSI_INST_PACK_PIXEL, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_HSD, DSI_INST_MODE_HS, + false, lanes_mask, DSI_INST_PACK_PIXEL, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_LPDT, DSI_INST_MODE_ESCAPE, + false, 1, DSI_INST_PACK_COMMAND, + DSI_INST_ESCA_LPDT); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_HSCEXIT, DSI_INST_MODE_HSCEXIT, + true, 0, 0, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_NOP, DSI_INST_MODE_STOP, + false, lanes_mask, 0, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_DLY, DSI_INST_MODE_NOP, + true, lanes_mask, 0, 0); + + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_CFG_REG(0), + SUN6I_DSI_INST_JUMP_CFG_POINT(DSI_INST_ID_NOP) | + SUN6I_DSI_INST_JUMP_CFG_TO(DSI_INST_ID_HSCEXIT) | + SUN6I_DSI_INST_JUMP_CFG_NUM(1)); +}; + +static u16 sun6i_dsi_get_video_start_delay(struct sun6i_dsi *dsi, + struct drm_display_mode *mode) +{ + return mode->vtotal - (mode->vsync_end - mode->vdisplay) + 1; +} + +static void sun6i_dsi_setup_burst(struct sun6i_dsi *dsi, + struct drm_display_mode *mode) +{ + struct mipi_dsi_device *device = dsi->device; + u32 val = 0; + + if ((mode->hsync_end - mode->hdisplay) > 20) { + /* Maaaaaagic */ + u16 drq = (mode->hsync_end - mode->hdisplay) - 20; + + drq *= mipi_dsi_pixel_format_to_bpp(device->format); + drq /= 32; + + val = (SUN6I_DSI_TCON_DRQ_ENABLE_MODE | + SUN6I_DSI_TCON_DRQ_SET(drq)); + } + + regmap_write(dsi->regs, SUN6I_DSI_TCON_DRQ_REG, val); +} + +static void sun6i_dsi_setup_inst_loop(struct sun6i_dsi *dsi, + struct drm_display_mode *mode) +{ + u16 delay = 50 - 1; + + regmap_write(dsi->regs, SUN6I_DSI_INST_LOOP_NUM_REG(0), + SUN6I_DSI_INST_LOOP_NUM_N0(50 - 1) | + SUN6I_DSI_INST_LOOP_NUM_N1(delay)); + regmap_write(dsi->regs, SUN6I_DSI_INST_LOOP_NUM_REG(1), + SUN6I_DSI_INST_LOOP_NUM_N0(50 - 1) | + SUN6I_DSI_INST_LOOP_NUM_N1(delay)); +} + +static void sun6i_dsi_setup_format(struct sun6i_dsi *dsi, + struct drm_display_mode *mode) +{ + struct mipi_dsi_device *device = dsi->device; + u32 val = SUN6I_DSI_PIXEL_PH_VC(device->channel); + u8 dt, fmt; + u16 wc; + + /* + * TODO: The format defines are only valid in video mode and + * change in command mode. + */ + switch (device->format) { + case MIPI_DSI_FMT_RGB888: + dt = MIPI_DSI_PACKED_PIXEL_STREAM_24; + fmt = 8; + break; + case MIPI_DSI_FMT_RGB666: + dt = MIPI_DSI_PIXEL_STREAM_3BYTE_18; + fmt = 9; + break; + case MIPI_DSI_FMT_RGB666_PACKED: + dt = MIPI_DSI_PACKED_PIXEL_STREAM_18; + fmt = 10; + break; + case MIPI_DSI_FMT_RGB565: + dt = MIPI_DSI_PACKED_PIXEL_STREAM_16; + fmt = 11; + break; + default: + return; + } + val |= SUN6I_DSI_PIXEL_PH_DT(dt); + + wc = mode->hdisplay * mipi_dsi_pixel_format_to_bpp(device->format) / 8; + val |= SUN6I_DSI_PIXEL_PH_WC(wc); + val |= SUN6I_DSI_PIXEL_PH_ECC(sun6i_dsi_ecc_compute(val)); + + regmap_write(dsi->regs, SUN6I_DSI_PIXEL_PH_REG, val); + + regmap_write(dsi->regs, SUN6I_DSI_PIXEL_PF0_REG, + SUN6I_DSI_PIXEL_PF0_CRC_FORCE(0xffff)); + + regmap_write(dsi->regs, SUN6I_DSI_PIXEL_PF1_REG, + SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINE0(0xffff) | + SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINEN(0xffff)); + + regmap_write(dsi->regs, SUN6I_DSI_PIXEL_CTL0_REG, + SUN6I_DSI_PIXEL_CTL0_PD_PLUG_DISABLE | + SUN6I_DSI_PIXEL_CTL0_FORMAT(fmt)); +} + +static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi, + struct drm_display_mode *mode) +{ + struct mipi_dsi_device *device = dsi->device; + unsigned int Bpp = mipi_dsi_pixel_format_to_bpp(device->format) / 8; + u16 hbp, hfp, hsa, hblk, vblk; + + regmap_write(dsi->regs, SUN6I_DSI_BASIC_CTL_REG, 0); + + regmap_write(dsi->regs, SUN6I_DSI_SYNC_HSS_REG, + sun6i_dsi_build_sync_pkt(MIPI_DSI_H_SYNC_START, + device->channel, + 0, 0)); + + regmap_write(dsi->regs, SUN6I_DSI_SYNC_HSE_REG, + sun6i_dsi_build_sync_pkt(MIPI_DSI_H_SYNC_END, + device->channel, + 0, 0)); + + regmap_write(dsi->regs, SUN6I_DSI_SYNC_VSS_REG, + sun6i_dsi_build_sync_pkt(MIPI_DSI_V_SYNC_START, + device->channel, + 0, 0)); + + regmap_write(dsi->regs, SUN6I_DSI_SYNC_VSE_REG, + sun6i_dsi_build_sync_pkt(MIPI_DSI_V_SYNC_END, + device->channel, + 0, 0)); + + regmap_write(dsi->regs, SUN6I_DSI_BASIC_SIZE0_REG, + SUN6I_DSI_BASIC_SIZE0_VSA(mode->vsync_end - + mode->vsync_start) | + SUN6I_DSI_BASIC_SIZE0_VBP(mode->vsync_start - + mode->vdisplay)); + + regmap_write(dsi->regs, SUN6I_DSI_BASIC_SIZE1_REG, + SUN6I_DSI_BASIC_SIZE1_VACT(mode->vdisplay) | + SUN6I_DSI_BASIC_SIZE1_VT(mode->vtotal)); + + /* + * A sync period is composed of a blanking packet (4 bytes + + * payload + 2 bytes) and a sync event packet (4 bytes). Its + * minimal size is therefore 10 bytes + */ +#define HSA_PACKET_OVERHEAD 10 + hsa = max((unsigned int)HSA_PACKET_OVERHEAD, + (mode->hsync_end - mode->hsync_start) * Bpp - HSA_PACKET_OVERHEAD); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HSA0_REG, + sun6i_dsi_build_blk0_pkt(device->channel, hsa)); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HSA1_REG, + sun6i_dsi_build_blk1_pkt(0, hsa)); + + /* + * The backporch is set using a blanking packet (4 bytes + + * payload + 2 bytes). Its minimal size is therefore 6 bytes + */ +#define HBP_PACKET_OVERHEAD 6 + hbp = max((unsigned int)HBP_PACKET_OVERHEAD, + (mode->hsync_start - mode->hdisplay) * Bpp - HBP_PACKET_OVERHEAD); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HBP0_REG, + sun6i_dsi_build_blk0_pkt(device->channel, hbp)); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HBP1_REG, + sun6i_dsi_build_blk1_pkt(0, hbp)); + + /* + * The frontporch is set using a blanking packet (4 bytes + + * payload + 2 bytes). Its minimal size is therefore 6 bytes + */ +#define HFP_PACKET_OVERHEAD 6 + hfp = max((unsigned int)HFP_PACKET_OVERHEAD, + (mode->htotal - mode->hsync_end) * Bpp - HFP_PACKET_OVERHEAD); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HFP0_REG, + sun6i_dsi_build_blk0_pkt(device->channel, hfp)); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HFP1_REG, + sun6i_dsi_build_blk1_pkt(0, hfp)); + + /* + * hblk seems to be the line + porches length. + */ + hblk = mode->htotal * Bpp - hsa; + regmap_write(dsi->regs, SUN6I_DSI_BLK_HBLK0_REG, + sun6i_dsi_build_blk0_pkt(device->channel, hblk)); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HBLK1_REG, + sun6i_dsi_build_blk1_pkt(0, hblk)); + + /* + * And I'm not entirely sure what vblk is about. The driver in + * Allwinner BSP is using a rather convoluted calculation + * there only for 4 lanes. However, using 0 (the !4 lanes + * case) even with a 4 lanes screen seems to work... + */ + vblk = 0; + regmap_write(dsi->regs, SUN6I_DSI_BLK_VBLK0_REG, + sun6i_dsi_build_blk0_pkt(device->channel, vblk)); + regmap_write(dsi->regs, SUN6I_DSI_BLK_VBLK1_REG, + sun6i_dsi_build_blk1_pkt(0, vblk)); +} + +static int sun6i_dsi_start(struct sun6i_dsi *dsi, + enum sun6i_dsi_start_inst func) +{ + switch (func) { + case DSI_START_LPTX: + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG, + DSI_INST_ID_LPDT << (4 * DSI_INST_ID_LP11) | + DSI_INST_ID_END << (4 * DSI_INST_ID_LPDT)); + break; + case DSI_START_LPRX: + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG, + DSI_INST_ID_LPDT << (4 * DSI_INST_ID_LP11) | + DSI_INST_ID_DLY << (4 * DSI_INST_ID_LPDT) | + DSI_INST_ID_TBA << (4 * DSI_INST_ID_DLY) | + DSI_INST_ID_END << (4 * DSI_INST_ID_TBA)); + break; + case DSI_START_HSC: + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG, + DSI_INST_ID_HSC << (4 * DSI_INST_ID_LP11) | + DSI_INST_ID_END << (4 * DSI_INST_ID_HSC)); + break; + case DSI_START_HSD: + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG, + DSI_INST_ID_NOP << (4 * DSI_INST_ID_LP11) | + DSI_INST_ID_HSD << (4 * DSI_INST_ID_NOP) | + DSI_INST_ID_DLY << (4 * DSI_INST_ID_HSD) | + DSI_INST_ID_NOP << (4 * DSI_INST_ID_DLY) | + DSI_INST_ID_END << (4 * DSI_INST_ID_HSCEXIT)); + break; + default: + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG, + DSI_INST_ID_END << (4 * DSI_INST_ID_LP11)); + break; + } + + sun6i_dsi_inst_abort(dsi); + sun6i_dsi_inst_commit(dsi); + + if (func == DSI_START_HSC) + regmap_write_bits(dsi->regs, + SUN6I_DSI_INST_FUNC_REG(DSI_INST_ID_LP11), + SUN6I_DSI_INST_FUNC_LANE_CEN, 0); + + return 0; +} + +static void sun6i_dsi_encoder_enable(struct drm_encoder *encoder) +{ + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; + struct sun6i_dsi *dsi = encoder_to_sun6i_dsi(encoder); + struct mipi_dsi_device *device = dsi->device; + u16 delay; + + DRM_DEBUG_DRIVER("Enabling DSI output\n"); + + pm_runtime_get_sync(dsi->dev); + + delay = sun6i_dsi_get_video_start_delay(dsi, mode); + regmap_write(dsi->regs, SUN6I_DSI_BASIC_CTL1_REG, + SUN6I_DSI_BASIC_CTL1_VIDEO_ST_DELAY(delay) | + SUN6I_DSI_BASIC_CTL1_VIDEO_FILL | + SUN6I_DSI_BASIC_CTL1_VIDEO_PRECISION | + SUN6I_DSI_BASIC_CTL1_VIDEO_MODE); + + sun6i_dsi_setup_burst(dsi, mode); + sun6i_dsi_setup_inst_loop(dsi, mode); + sun6i_dsi_setup_format(dsi, mode); + sun6i_dsi_setup_timings(dsi, mode); + + sun6i_dphy_init(dsi->dphy, device->lanes); + sun6i_dphy_power_on(dsi->dphy, device->lanes); + + if (!IS_ERR(dsi->panel)) + drm_panel_prepare(dsi->panel); + + /* + * FIXME: This should be moved after the switch to HS mode. + * + * Unfortunately, once in HS mode, it seems like we're not + * able to send DCS commands anymore, which would prevent any + * panel to send any DCS command as part as their enable + * method, which is quite common. + * + * I haven't seen any artifact due to that sub-optimal + * ordering on the panels I've tested it with, so I guess this + * will do for now, until that IP is better understood. + */ + if (!IS_ERR(dsi->panel)) + drm_panel_enable(dsi->panel); + + sun6i_dsi_start(dsi, DSI_START_HSC); + + udelay(1000); + + sun6i_dsi_start(dsi, DSI_START_HSD); +} + +static void sun6i_dsi_encoder_disable(struct drm_encoder *encoder) +{ + struct sun6i_dsi *dsi = encoder_to_sun6i_dsi(encoder); + + DRM_DEBUG_DRIVER("Disabling DSI output\n"); + + if (!IS_ERR(dsi->panel)) { + drm_panel_disable(dsi->panel); + drm_panel_unprepare(dsi->panel); + } + + sun6i_dphy_power_off(dsi->dphy); + sun6i_dphy_exit(dsi->dphy); + + pm_runtime_put(dsi->dev); +} + +static int sun6i_dsi_get_modes(struct drm_connector *connector) +{ + struct sun6i_dsi *dsi = connector_to_sun6i_dsi(connector); + + return drm_panel_get_modes(dsi->panel); +} + +static struct drm_connector_helper_funcs sun6i_dsi_connector_helper_funcs = { + .get_modes = sun6i_dsi_get_modes, +}; + +static enum drm_connector_status +sun6i_dsi_connector_detect(struct drm_connector *connector, bool force) +{ + return connector_status_connected; +} + +static const struct drm_connector_funcs sun6i_dsi_connector_funcs = { + .detect = sun6i_dsi_connector_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static const struct drm_encoder_helper_funcs sun6i_dsi_enc_helper_funcs = { + .disable = sun6i_dsi_encoder_disable, + .enable = sun6i_dsi_encoder_enable, +}; + +static const struct drm_encoder_funcs sun6i_dsi_enc_funcs = { + .destroy = drm_encoder_cleanup, +}; + +static u32 sun6i_dsi_dcs_build_pkt_hdr(struct sun6i_dsi *dsi, + const struct mipi_dsi_msg *msg) +{ + u32 pkt = msg->type; + + if (msg->type == MIPI_DSI_DCS_LONG_WRITE) { + pkt |= ((msg->tx_len + 1) & 0xffff) << 8; + pkt |= (((msg->tx_len + 1) >> 8) & 0xffff) << 16; + } else { + pkt |= (((u8 *)msg->tx_buf)[0] << 8); + if (msg->tx_len > 1) + pkt |= (((u8 *)msg->tx_buf)[1] << 16); + } + + pkt |= sun6i_dsi_ecc_compute(pkt) << 24; + + return pkt; +} + +static int sun6i_dsi_dcs_write_short(struct sun6i_dsi *dsi, + const struct mipi_dsi_msg *msg) +{ + regmap_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(0), + sun6i_dsi_dcs_build_pkt_hdr(dsi, msg)); + regmap_write_bits(dsi->regs, SUN6I_DSI_CMD_CTL_REG, + 0xff, (4 - 1)); + + sun6i_dsi_start(dsi, DSI_START_LPTX); + + return msg->tx_len; +} + +static int sun6i_dsi_dcs_write_long(struct sun6i_dsi *dsi, + const struct mipi_dsi_msg *msg) +{ + int ret, len = 0; + u8 *bounce; + u16 crc; + + regmap_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(0), + sun6i_dsi_dcs_build_pkt_hdr(dsi, msg)); + + bounce = kzalloc(msg->tx_len + sizeof(crc), GFP_KERNEL); + if (!bounce) + return -ENOMEM; + + memcpy(bounce, msg->tx_buf, msg->tx_len); + len += msg->tx_len; + + crc = sun6i_dsi_crc_compute(bounce, msg->tx_len); + memcpy((u8 *)bounce + msg->tx_len, &crc, sizeof(crc)); + len += sizeof(crc); + + regmap_bulk_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(1), bounce, len); + regmap_write(dsi->regs, SUN6I_DSI_CMD_CTL_REG, len + 4 - 1); + kfree(bounce); + + sun6i_dsi_start(dsi, DSI_START_LPTX); + + ret = sun6i_dsi_inst_wait_for_completion(dsi); + if (ret < 0) { + sun6i_dsi_inst_abort(dsi); + return ret; + } + + /* + * TODO: There's some bits (reg 0x200, bits 8/9) that + * apparently can be used to check whether the data have been + * sent, but I couldn't get it to work reliably. + */ + return msg->tx_len; +} + +static int sun6i_dsi_dcs_read(struct sun6i_dsi *dsi, + const struct mipi_dsi_msg *msg) +{ + u32 val; + int ret; + u8 byte0; + + regmap_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(0), + sun6i_dsi_dcs_build_pkt_hdr(dsi, msg)); + regmap_write(dsi->regs, SUN6I_DSI_CMD_CTL_REG, + (4 - 1)); + + sun6i_dsi_start(dsi, DSI_START_LPRX); + + ret = sun6i_dsi_inst_wait_for_completion(dsi); + if (ret < 0) { + sun6i_dsi_inst_abort(dsi); + return ret; + } + + /* + * TODO: There's some bits (reg 0x200, bits 24/25) that + * apparently can be used to check whether the data have been + * received, but I couldn't get it to work reliably. + */ + regmap_read(dsi->regs, SUN6I_DSI_CMD_CTL_REG, &val); + if (val & SUN6I_DSI_CMD_CTL_RX_OVERFLOW) + return -EIO; + + regmap_read(dsi->regs, SUN6I_DSI_CMD_RX_REG(0), &val); + byte0 = val & 0xff; + if (byte0 == MIPI_DSI_RX_ACKNOWLEDGE_AND_ERROR_REPORT) + return -EIO; + + ((u8 *)msg->rx_buf)[0] = (val >> 8); + + return 1; +} + +static int sun6i_dsi_attach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct sun6i_dsi *dsi = host_to_sun6i_dsi(host); + + dsi->device = device; + dsi->panel = of_drm_find_panel(device->dev.of_node); + if (!dsi->panel) + return -EINVAL; + + dev_info(host->dev, "Attached device %s\n", device->name); + + return 0; +} + +static int sun6i_dsi_detach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct sun6i_dsi *dsi = host_to_sun6i_dsi(host); + + dsi->panel = NULL; + dsi->device = NULL; + + return 0; +} + +static ssize_t sun6i_dsi_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + struct sun6i_dsi *dsi = host_to_sun6i_dsi(host); + int ret; + + ret = sun6i_dsi_inst_wait_for_completion(dsi); + if (ret < 0) + sun6i_dsi_inst_abort(dsi); + + regmap_write(dsi->regs, SUN6I_DSI_CMD_CTL_REG, + SUN6I_DSI_CMD_CTL_RX_OVERFLOW | + SUN6I_DSI_CMD_CTL_RX_FLAG | + SUN6I_DSI_CMD_CTL_TX_FLAG); + + switch (msg->type) { + case MIPI_DSI_DCS_SHORT_WRITE: + case MIPI_DSI_DCS_SHORT_WRITE_PARAM: + ret = sun6i_dsi_dcs_write_short(dsi, msg); + break; + + case MIPI_DSI_DCS_LONG_WRITE: + ret = sun6i_dsi_dcs_write_long(dsi, msg); + break; + + case MIPI_DSI_DCS_READ: + if (msg->rx_len == 1) { + ret = sun6i_dsi_dcs_read(dsi, msg); + break; + } + + default: + ret = -EINVAL; + } + + return ret; +} + +static const struct mipi_dsi_host_ops sun6i_dsi_host_ops = { + .attach = sun6i_dsi_attach, + .detach = sun6i_dsi_detach, + .transfer = sun6i_dsi_transfer, +}; + +static const struct regmap_config sun6i_dsi_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = SUN6I_DSI_CMD_TX_REG(255), + .name = "mipi-dsi", +}; + +static int sun6i_dsi_bind(struct device *dev, struct device *master, + void *data) +{ + struct drm_device *drm = data; + struct sun4i_drv *drv = drm->dev_private; + struct sun6i_dsi *dsi = dev_get_drvdata(dev); + int ret; + + if (!dsi->panel) + return -EPROBE_DEFER; + + dsi->drv = drv; + + drm_encoder_helper_add(&dsi->encoder, + &sun6i_dsi_enc_helper_funcs); + ret = drm_encoder_init(drm, + &dsi->encoder, + &sun6i_dsi_enc_funcs, + DRM_MODE_ENCODER_DSI, + NULL); + if (ret) { + dev_err(dsi->dev, "Couldn't initialise the DSI encoder\n"); + return ret; + } + dsi->encoder.possible_crtcs = BIT(0); + + drm_connector_helper_add(&dsi->connector, + &sun6i_dsi_connector_helper_funcs); + ret = drm_connector_init(drm, &dsi->connector, + &sun6i_dsi_connector_funcs, + DRM_MODE_CONNECTOR_DSI); + if (ret) { + dev_err(dsi->dev, + "Couldn't initialise the DSI connector\n"); + goto err_cleanup_connector; + } + + drm_mode_connector_attach_encoder(&dsi->connector, &dsi->encoder); + drm_panel_attach(dsi->panel, &dsi->connector); + + return 0; + +err_cleanup_connector: + drm_encoder_cleanup(&dsi->encoder); + return ret; +} + +static void sun6i_dsi_unbind(struct device *dev, struct device *master, + void *data) +{ + struct sun6i_dsi *dsi = dev_get_drvdata(dev); + + drm_panel_detach(dsi->panel); +} + +static const struct component_ops sun6i_dsi_ops = { + .bind = sun6i_dsi_bind, + .unbind = sun6i_dsi_unbind, +}; + +static int sun6i_dsi_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *dphy_node; + struct sun6i_dsi *dsi; + struct resource *res; + void __iomem *base; + int ret; + + dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL); + if (!dsi) + return -ENOMEM; + dev_set_drvdata(dev, dsi); + dsi->dev = dev; + dsi->host.ops = &sun6i_dsi_host_ops; + dsi->host.dev = dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) { + dev_err(dev, "Couldn't map the DSI encoder registers\n"); + return PTR_ERR(base); + } + + dsi->regs = devm_regmap_init_mmio_clk(dev, "bus", base, + &sun6i_dsi_regmap_config); + if (IS_ERR(dsi->regs)) { + dev_err(dev, "Couldn't create the DSI encoder regmap\n"); + return PTR_ERR(dsi->regs); + } + + dsi->reset = devm_reset_control_get_shared(dev, NULL); + if (IS_ERR(dsi->reset)) { + dev_err(dev, "Couldn't get our reset line\n"); + return PTR_ERR(dsi->reset); + } + + dsi->mod_clk = devm_clk_get(dev, "mod"); + if (IS_ERR(dsi->mod_clk)) { + dev_err(dev, "Couldn't get the DSI mod clock\n"); + return PTR_ERR(dsi->mod_clk); + } + + /* + * In order to operate properly, that clock seems to be always + * set to 297MHz. + */ + clk_set_rate_exclusive(dsi->mod_clk, 297000000); + + dphy_node = of_parse_phandle(dev->of_node, "phys", 0); + ret = sun6i_dphy_probe(dsi, dphy_node); + of_node_put(dphy_node); + if (ret) { + dev_err(dev, "Couldn't get the MIPI D-PHY\n"); + goto err_unprotect_clk; + } + + pm_runtime_enable(dev); + + ret = mipi_dsi_host_register(&dsi->host); + if (ret) { + dev_err(dev, "Couldn't register MIPI-DSI host\n"); + goto err_remove_phy; + } + + ret = component_add(&pdev->dev, &sun6i_dsi_ops); + if (ret) { + dev_err(dev, "Couldn't register our component\n"); + goto err_remove_dsi_host; + } + + return 0; + +err_remove_dsi_host: + mipi_dsi_host_unregister(&dsi->host); +err_remove_phy: + pm_runtime_disable(dev); + sun6i_dphy_remove(dsi); +err_unprotect_clk: + clk_rate_exclusive_put(dsi->mod_clk); + return ret; +} + +static int sun6i_dsi_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct sun6i_dsi *dsi = dev_get_drvdata(dev); + + component_del(&pdev->dev, &sun6i_dsi_ops); + mipi_dsi_host_unregister(&dsi->host); + pm_runtime_disable(dev); + sun6i_dphy_remove(dsi); + clk_rate_exclusive_put(dsi->mod_clk); + + return 0; +} + +static int sun6i_dsi_runtime_resume(struct device *dev) +{ + struct sun6i_dsi *dsi = dev_get_drvdata(dev); + + reset_control_deassert(dsi->reset); + clk_prepare_enable(dsi->mod_clk); + + /* + * Enable the DSI block. + * + * Some part of it can only be done once we get a number of + * lanes, see sun6i_dsi_inst_init + */ + regmap_write(dsi->regs, SUN6I_DSI_CTL_REG, SUN6I_DSI_CTL_EN); + + regmap_write(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG, + SUN6I_DSI_BASIC_CTL0_ECC_EN | SUN6I_DSI_BASIC_CTL0_CRC_EN); + + regmap_write(dsi->regs, SUN6I_DSI_TRANS_START_REG, 10); + regmap_write(dsi->regs, SUN6I_DSI_TRANS_ZERO_REG, 0); + + if (dsi->device) + sun6i_dsi_inst_init(dsi, dsi->device); + + regmap_write(dsi->regs, SUN6I_DSI_DEBUG_DATA_REG, 0xff); + + return 0; +} + +static int sun6i_dsi_runtime_suspend(struct device *dev) +{ + struct sun6i_dsi *dsi = dev_get_drvdata(dev); + + clk_disable_unprepare(dsi->mod_clk); + reset_control_assert(dsi->reset); + + return 0; +} + +static const struct dev_pm_ops sun6i_dsi_pm_ops = { + SET_RUNTIME_PM_OPS(sun6i_dsi_runtime_suspend, + sun6i_dsi_runtime_resume, + NULL) +}; + +static const struct of_device_id sun6i_dsi_of_table[] = { + { .compatible = "allwinner,sun6i-a31-mipi-dsi" }, + { } +}; +MODULE_DEVICE_TABLE(of, sun6i_dsi_of_table); + +static struct platform_driver sun6i_dsi_platform_driver = { + .probe = sun6i_dsi_probe, + .remove = sun6i_dsi_remove, + .driver = { + .name = "sun6i-mipi-dsi", + .of_match_table = sun6i_dsi_of_table, + .pm = &sun6i_dsi_pm_ops, + }, +}; +module_platform_driver(sun6i_dsi_platform_driver); + +MODULE_AUTHOR("Maxime Ripard <maxime.ripard@free-electrons.com>"); +MODULE_DESCRIPTION("Allwinner A31 DSI Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h new file mode 100644 index 000000000000..dbbc5b3ecbda --- /dev/null +++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2016 Allwinnertech Co., Ltd. + * Copyright (C) 2017-2018 Bootlin + * + * Maxime Ripard <maxime.ripard@bootlin.com> + */ + +#ifndef _SUN6I_MIPI_DSI_H_ +#define _SUN6I_MIPI_DSI_H_ + +#include <drm/drm_connector.h> +#include <drm/drm_encoder.h> +#include <drm/drm_mipi_dsi.h> + +struct sun6i_dphy { + struct clk *bus_clk; + struct clk *mod_clk; + struct regmap *regs; + struct reset_control *reset; +}; + +struct sun6i_dsi { + struct drm_connector connector; + struct drm_encoder encoder; + struct mipi_dsi_host host; + + struct clk *bus_clk; + struct clk *mod_clk; + struct regmap *regs; + struct reset_control *reset; + struct sun6i_dphy *dphy; + + struct device *dev; + struct sun4i_drv *drv; + struct mipi_dsi_device *device; + struct drm_panel *panel; +}; + +static inline struct sun6i_dsi *host_to_sun6i_dsi(struct mipi_dsi_host *host) +{ + return container_of(host, struct sun6i_dsi, host); +}; + +static inline struct sun6i_dsi *connector_to_sun6i_dsi(struct drm_connector *connector) +{ + return container_of(connector, struct sun6i_dsi, connector); +}; + +static inline struct sun6i_dsi *encoder_to_sun6i_dsi(const struct drm_encoder *encoder) +{ + return container_of(encoder, struct sun6i_dsi, encoder); +}; + +int sun6i_dphy_probe(struct sun6i_dsi *dsi, struct device_node *node); +int sun6i_dphy_remove(struct sun6i_dsi *dsi); + +int sun6i_dphy_init(struct sun6i_dphy *dphy, unsigned int lanes); +int sun6i_dphy_power_on(struct sun6i_dphy *dphy, unsigned int lanes); +int sun6i_dphy_power_off(struct sun6i_dphy *dphy); +int sun6i_dphy_exit(struct sun6i_dphy *dphy); + +#endif /* _SUN6I_MIPI_DSI_H_ */ diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 7afe2f635f74..a0519612ae2c 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -38,26 +38,11 @@ static int tegra_atomic_check(struct drm_device *drm, { int err; - err = drm_atomic_helper_check_modeset(drm, state); + err = drm_atomic_helper_check(drm, state); if (err < 0) return err; - err = tegra_display_hub_atomic_check(drm, state); - if (err < 0) - return err; - - err = drm_atomic_normalize_zpos(drm, state); - if (err < 0) - return err; - - err = drm_atomic_helper_check_planes(drm, state); - if (err < 0) - return err; - - if (state->legacy_cursor_update) - state->async_update = !drm_atomic_helper_async_check(drm, state); - - return 0; + return tegra_display_hub_atomic_check(drm, state); } static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = { @@ -151,6 +136,8 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) drm->mode_config.allow_fb_modifiers = true; + drm->mode_config.normalize_zpos = true; + drm->mode_config.funcs = &tegra_drm_mode_config_funcs; drm->mode_config.helper_private = &tegra_drm_mode_config_helpers; diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 1b278a22c8b7..1067e702c22c 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -224,7 +224,7 @@ static void tilcdc_crtc_set_clk(struct drm_crtc *crtc) ret = clk_set_rate(priv->clk, req_rate * clkdiv); clk_rate = clk_get_rate(priv->clk); - if (ret < 0) { + if (ret < 0 || tilcdc_pclk_diff(req_rate, clk_rate) > 5) { /* * If we fail to set the clock rate (some architectures don't * use the common clock framework yet and may not implement diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-core.c b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c index 4c6616278c48..24a33bf862fa 100644 --- a/drivers/gpu/drm/tinydrm/core/tinydrm-core.c +++ b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c @@ -91,7 +91,7 @@ EXPORT_SYMBOL(tinydrm_gem_cma_prime_import_sg_table); * GEM object state and frees the memory used to store the object itself using * drm_gem_cma_free_object(). It also handles PRIME buffers which has the kernel * virtual address set by tinydrm_gem_cma_prime_import_sg_table(). Drivers - * can use this as their &drm_driver->gem_free_object callback. + * can use this as their &drm_driver->gem_free_object_unlocked callback. */ void tinydrm_gem_cma_free_object(struct drm_gem_object *gem_obj) { diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c index d1c3ce9ab294..dcd390163a4a 100644 --- a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c +++ b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c @@ -78,6 +78,36 @@ bool tinydrm_merge_clips(struct drm_clip_rect *dst, } EXPORT_SYMBOL(tinydrm_merge_clips); +int tinydrm_fb_dirty(struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int flags, unsigned int color, + struct drm_clip_rect *clips, + unsigned int num_clips) +{ + struct tinydrm_device *tdev = fb->dev->dev_private; + struct drm_plane *plane = &tdev->pipe.plane; + int ret = 0; + + drm_modeset_lock(&plane->mutex, NULL); + + /* fbdev can flush even when we're not interested */ + if (plane->state->fb == fb) { + mutex_lock(&tdev->dirty_lock); + ret = tdev->fb_dirty(fb, file_priv, flags, + color, clips, num_clips); + mutex_unlock(&tdev->dirty_lock); + } + + drm_modeset_unlock(&plane->mutex); + + if (ret) + dev_err_once(fb->dev->dev, + "Failed to update display %d\n", ret); + + return ret; +} +EXPORT_SYMBOL(tinydrm_fb_dirty); + /** * tinydrm_memcpy - Copy clip buffer * @dst: Destination buffer diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c index 11ae950b0fc9..7e8e24d0b7a7 100644 --- a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c +++ b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c @@ -125,9 +125,8 @@ void tinydrm_display_pipe_update(struct drm_simple_display_pipe *pipe, struct drm_crtc *crtc = &tdev->pipe.crtc; if (fb && (fb != old_state->fb)) { - pipe->plane.fb = fb; - if (fb->funcs->dirty) - fb->funcs->dirty(fb, NULL, 0, 0, NULL, 0); + if (tdev->fb_dirty) + tdev->fb_dirty(fb, NULL, 0, 0, NULL, 0); } if (crtc->state->event) { @@ -139,23 +138,6 @@ void tinydrm_display_pipe_update(struct drm_simple_display_pipe *pipe, } EXPORT_SYMBOL(tinydrm_display_pipe_update); -/** - * tinydrm_display_pipe_prepare_fb - Display pipe prepare_fb helper - * @pipe: Simple display pipe - * @plane_state: Plane state - * - * This function uses drm_gem_fb_prepare_fb() to check if the plane FB has an - * dma-buf attached, extracts the exclusive fence and attaches it to plane - * state for the atomic helper to wait on. Drivers can use this as their - * &drm_simple_display_pipe_funcs->prepare_fb callback. - */ -int tinydrm_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *plane_state) -{ - return drm_gem_fb_prepare_fb(&pipe->plane, plane_state); -} -EXPORT_SYMBOL(tinydrm_display_pipe_prepare_fb); - static int tinydrm_rotate_mode(struct drm_display_mode *mode, unsigned int rotation) { diff --git a/drivers/gpu/drm/tinydrm/ili9225.c b/drivers/gpu/drm/tinydrm/ili9225.c index a0759502b81a..841c69aba059 100644 --- a/drivers/gpu/drm/tinydrm/ili9225.c +++ b/drivers/gpu/drm/tinydrm/ili9225.c @@ -88,14 +88,8 @@ static int ili9225_fb_dirty(struct drm_framebuffer *fb, bool full; void *tr; - mutex_lock(&tdev->dirty_lock); - if (!mipi->enabled) - goto out_unlock; - - /* fbdev can flush even when we're not interested */ - if (tdev->pipe.plane.fb != fb) - goto out_unlock; + return 0; full = tinydrm_merge_clips(&clip, clips, num_clips, flags, fb->width, fb->height); @@ -108,7 +102,7 @@ static int ili9225_fb_dirty(struct drm_framebuffer *fb, tr = mipi->tx_buf; ret = mipi_dbi_buf_copy(mipi->tx_buf, fb, &clip, swap); if (ret) - goto out_unlock; + return ret; } else { tr = cma_obj->vaddr; } @@ -159,24 +153,18 @@ static int ili9225_fb_dirty(struct drm_framebuffer *fb, ret = mipi_dbi_command_buf(mipi, ILI9225_WRITE_DATA_TO_GRAM, tr, (clip.x2 - clip.x1) * (clip.y2 - clip.y1) * 2); -out_unlock: - mutex_unlock(&tdev->dirty_lock); - - if (ret) - dev_err_once(fb->dev->dev, "Failed to update display %d\n", - ret); - return ret; } static const struct drm_framebuffer_funcs ili9225_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, - .dirty = ili9225_fb_dirty, + .dirty = tinydrm_fb_dirty, }; static void ili9225_pipe_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct tinydrm_device *tdev = pipe_to_tinydrm(pipe); struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev); @@ -268,7 +256,7 @@ static void ili9225_pipe_enable(struct drm_simple_display_pipe *pipe, ili9225_command(mipi, ILI9225_DISPLAY_CONTROL_1, 0x1017); - mipi_dbi_enable_flush(mipi); + mipi_dbi_enable_flush(mipi, crtc_state, plane_state); } static void ili9225_pipe_disable(struct drm_simple_display_pipe *pipe) @@ -341,6 +329,8 @@ static int ili9225_init(struct device *dev, struct mipi_dbi *mipi, if (ret) return ret; + tdev->fb_dirty = ili9225_fb_dirty; + ret = tinydrm_display_pipe_init(tdev, pipe_funcs, DRM_MODE_CONNECTOR_VIRTUAL, ili9225_formats, @@ -364,7 +354,7 @@ static const struct drm_simple_display_pipe_funcs ili9225_pipe_funcs = { .enable = ili9225_pipe_enable, .disable = ili9225_pipe_disable, .update = tinydrm_display_pipe_update, - .prepare_fb = tinydrm_display_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode ili9225_mode = { diff --git a/drivers/gpu/drm/tinydrm/mi0283qt.c b/drivers/gpu/drm/tinydrm/mi0283qt.c index d8ed6e6f8e05..015d03f2acba 100644 --- a/drivers/gpu/drm/tinydrm/mi0283qt.c +++ b/drivers/gpu/drm/tinydrm/mi0283qt.c @@ -19,6 +19,7 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_modeset_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/tinydrm/mipi-dbi.h> #include <drm/tinydrm/tinydrm-helpers.h> #include <video/mipi_display.h> @@ -49,7 +50,8 @@ #define ILI9341_MADCTL_MY BIT(7) static void mi0283qt_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct tinydrm_device *tdev = pipe_to_tinydrm(pipe); struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev); @@ -83,24 +85,6 @@ static void mi0283qt_enable(struct drm_simple_display_pipe *pipe, /* Memory Access Control */ mipi_dbi_command(mipi, MIPI_DCS_SET_PIXEL_FORMAT, MIPI_DCS_PIXEL_FMT_16BIT); - switch (mipi->rotation) { - default: - addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY | - ILI9341_MADCTL_MX; - break; - case 90: - addr_mode = ILI9341_MADCTL_MY; - break; - case 180: - addr_mode = ILI9341_MADCTL_MV; - break; - case 270: - addr_mode = ILI9341_MADCTL_MX; - break; - } - addr_mode |= ILI9341_MADCTL_BGR; - mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode); - /* Frame Rate */ mipi_dbi_command(mipi, ILI9341_FRMCTR1, 0x00, 0x1b); @@ -126,14 +110,37 @@ static void mi0283qt_enable(struct drm_simple_display_pipe *pipe, msleep(100); out_enable: - mipi_dbi_enable_flush(mipi); + /* The PiTFT (ili9340) has a hardware reset circuit that + * resets only on power-on and not on each reboot through + * a gpio like the rpi-display does. + * As a result, we need to always apply the rotation value + * regardless of the display "on/off" state. + */ + switch (mipi->rotation) { + default: + addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY | + ILI9341_MADCTL_MX; + break; + case 90: + addr_mode = ILI9341_MADCTL_MY; + break; + case 180: + addr_mode = ILI9341_MADCTL_MV; + break; + case 270: + addr_mode = ILI9341_MADCTL_MX; + break; + } + addr_mode |= ILI9341_MADCTL_BGR; + mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode); + mipi_dbi_enable_flush(mipi, crtc_state, plane_state); } static const struct drm_simple_display_pipe_funcs mi0283qt_pipe_funcs = { .enable = mi0283qt_enable, .disable = mipi_dbi_pipe_disable, .update = tinydrm_display_pipe_update, - .prepare_fb = tinydrm_display_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode mi0283qt_mode = { diff --git a/drivers/gpu/drm/tinydrm/mipi-dbi.c b/drivers/gpu/drm/tinydrm/mipi-dbi.c index 9e903812b573..4d1fb31a781f 100644 --- a/drivers/gpu/drm/tinydrm/mipi-dbi.c +++ b/drivers/gpu/drm/tinydrm/mipi-dbi.c @@ -219,14 +219,8 @@ static int mipi_dbi_fb_dirty(struct drm_framebuffer *fb, bool full; void *tr; - mutex_lock(&tdev->dirty_lock); - if (!mipi->enabled) - goto out_unlock; - - /* fbdev can flush even when we're not interested */ - if (tdev->pipe.plane.fb != fb) - goto out_unlock; + return 0; full = tinydrm_merge_clips(&clip, clips, num_clips, flags, fb->width, fb->height); @@ -239,7 +233,7 @@ static int mipi_dbi_fb_dirty(struct drm_framebuffer *fb, tr = mipi->tx_buf; ret = mipi_dbi_buf_copy(mipi->tx_buf, fb, &clip, swap); if (ret) - goto out_unlock; + return ret; } else { tr = cma_obj->vaddr; } @@ -254,20 +248,13 @@ static int mipi_dbi_fb_dirty(struct drm_framebuffer *fb, ret = mipi_dbi_command_buf(mipi, MIPI_DCS_WRITE_MEMORY_START, tr, (clip.x2 - clip.x1) * (clip.y2 - clip.y1) * 2); -out_unlock: - mutex_unlock(&tdev->dirty_lock); - - if (ret) - dev_err_once(fb->dev->dev, "Failed to update display %d\n", - ret); - return ret; } static const struct drm_framebuffer_funcs mipi_dbi_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, - .dirty = mipi_dbi_fb_dirty, + .dirty = tinydrm_fb_dirty, }; /** @@ -278,13 +265,16 @@ static const struct drm_framebuffer_funcs mipi_dbi_fb_funcs = { * enables the backlight. Drivers can use this in their * &drm_simple_display_pipe_funcs->enable callback. */ -void mipi_dbi_enable_flush(struct mipi_dbi *mipi) +void mipi_dbi_enable_flush(struct mipi_dbi *mipi, + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { - struct drm_framebuffer *fb = mipi->tinydrm.pipe.plane.fb; + struct tinydrm_device *tdev = &mipi->tinydrm; + struct drm_framebuffer *fb = plane_state->fb; mipi->enabled = true; if (fb) - fb->funcs->dirty(fb, NULL, 0, 0, NULL, 0); + tdev->fb_dirty(fb, NULL, 0, 0, NULL, 0); backlight_enable(mipi->backlight); } @@ -381,6 +371,8 @@ int mipi_dbi_init(struct device *dev, struct mipi_dbi *mipi, if (ret) return ret; + tdev->fb_dirty = mipi_dbi_fb_dirty; + /* TODO: Maybe add DRM_MODE_CONNECTOR_SPI */ ret = tinydrm_display_pipe_init(tdev, pipe_funcs, DRM_MODE_CONNECTOR_VIRTUAL, diff --git a/drivers/gpu/drm/tinydrm/repaper.c b/drivers/gpu/drm/tinydrm/repaper.c index 75740630c410..1ee6855212a0 100644 --- a/drivers/gpu/drm/tinydrm/repaper.c +++ b/drivers/gpu/drm/tinydrm/repaper.c @@ -540,14 +540,8 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb, clip.y1 = 0; clip.y2 = fb->height; - mutex_lock(&tdev->dirty_lock); - if (!epd->enabled) - goto out_unlock; - - /* fbdev can flush even when we're not interested */ - if (tdev->pipe.plane.fb != fb) - goto out_unlock; + return 0; repaper_get_temperature(epd); @@ -555,16 +549,14 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb, epd->factored_stage_time); buf = kmalloc(fb->width * fb->height, GFP_KERNEL); - if (!buf) { - ret = -ENOMEM; - goto out_unlock; - } + if (!buf) + return -ENOMEM; if (import_attach) { ret = dma_buf_begin_cpu_access(import_attach->dmabuf, DMA_FROM_DEVICE); if (ret) - goto out_unlock; + goto out_free; } tinydrm_xrgb8888_to_gray8(buf, cma_obj->vaddr, fb, &clip); @@ -573,7 +565,7 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb, ret = dma_buf_end_cpu_access(import_attach->dmabuf, DMA_FROM_DEVICE); if (ret) - goto out_unlock; + goto out_free; } repaper_gray8_to_mono_reversed(buf, fb->width, fb->height); @@ -625,11 +617,7 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb, } } -out_unlock: - mutex_unlock(&tdev->dirty_lock); - - if (ret) - DRM_DEV_ERROR(fb->dev->dev, "Failed to update display (%d)\n", ret); +out_free: kfree(buf); return ret; @@ -638,7 +626,7 @@ out_unlock: static const struct drm_framebuffer_funcs repaper_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, - .dirty = repaper_fb_dirty, + .dirty = tinydrm_fb_dirty, }; static void power_off(struct repaper_epd *epd) @@ -659,7 +647,8 @@ static void power_off(struct repaper_epd *epd) } static void repaper_pipe_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct tinydrm_device *tdev = pipe_to_tinydrm(pipe); struct repaper_epd *epd = epd_from_tinydrm(tdev); @@ -852,7 +841,7 @@ static const struct drm_simple_display_pipe_funcs repaper_pipe_funcs = { .enable = repaper_pipe_enable, .disable = repaper_pipe_disable, .update = tinydrm_display_pipe_update, - .prepare_fb = tinydrm_display_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static const uint32_t repaper_formats[] = { @@ -1069,6 +1058,8 @@ static int repaper_probe(struct spi_device *spi) if (ret) return ret; + tdev->fb_dirty = repaper_fb_dirty; + ret = tinydrm_display_pipe_init(tdev, &repaper_pipe_funcs, DRM_MODE_CONNECTOR_VIRTUAL, repaper_formats, diff --git a/drivers/gpu/drm/tinydrm/st7586.c b/drivers/gpu/drm/tinydrm/st7586.c index a6396ef9cc4a..5c29e3803ecb 100644 --- a/drivers/gpu/drm/tinydrm/st7586.c +++ b/drivers/gpu/drm/tinydrm/st7586.c @@ -120,14 +120,8 @@ static int st7586_fb_dirty(struct drm_framebuffer *fb, int start, end; int ret = 0; - mutex_lock(&tdev->dirty_lock); - if (!mipi->enabled) - goto out_unlock; - - /* fbdev can flush even when we're not interested */ - if (tdev->pipe.plane.fb != fb) - goto out_unlock; + return 0; tinydrm_merge_clips(&clip, clips, num_clips, flags, fb->width, fb->height); @@ -141,7 +135,7 @@ static int st7586_fb_dirty(struct drm_framebuffer *fb, ret = st7586_buf_copy(mipi->tx_buf, fb, &clip); if (ret) - goto out_unlock; + return ret; /* Pixels are packed 3 per byte */ start = clip.x1 / 3; @@ -158,24 +152,18 @@ static int st7586_fb_dirty(struct drm_framebuffer *fb, (u8 *)mipi->tx_buf, (end - start) * (clip.y2 - clip.y1)); -out_unlock: - mutex_unlock(&tdev->dirty_lock); - - if (ret) - dev_err_once(fb->dev->dev, "Failed to update display %d\n", - ret); - return ret; } static const struct drm_framebuffer_funcs st7586_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, - .dirty = st7586_fb_dirty, + .dirty = tinydrm_fb_dirty, }; static void st7586_pipe_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct tinydrm_device *tdev = pipe_to_tinydrm(pipe); struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev); @@ -237,7 +225,7 @@ static void st7586_pipe_enable(struct drm_simple_display_pipe *pipe, mipi_dbi_command(mipi, MIPI_DCS_SET_DISPLAY_ON); - mipi_dbi_enable_flush(mipi); + mipi_dbi_enable_flush(mipi, crtc_state, plane_state); } static void st7586_pipe_disable(struct drm_simple_display_pipe *pipe) @@ -277,6 +265,8 @@ static int st7586_init(struct device *dev, struct mipi_dbi *mipi, if (ret) return ret; + tdev->fb_dirty = st7586_fb_dirty; + ret = tinydrm_display_pipe_init(tdev, pipe_funcs, DRM_MODE_CONNECTOR_VIRTUAL, st7586_formats, @@ -300,7 +290,7 @@ static const struct drm_simple_display_pipe_funcs st7586_pipe_funcs = { .enable = st7586_pipe_enable, .disable = st7586_pipe_disable, .update = tinydrm_display_pipe_update, - .prepare_fb = tinydrm_display_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode st7586_mode = { diff --git a/drivers/gpu/drm/tinydrm/st7735r.c b/drivers/gpu/drm/tinydrm/st7735r.c index 67d197ecfc4b..6c7b15c9da4f 100644 --- a/drivers/gpu/drm/tinydrm/st7735r.c +++ b/drivers/gpu/drm/tinydrm/st7735r.c @@ -37,7 +37,8 @@ #define ST7735R_MV BIT(5) static void jd_t18003_t01_pipe_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct tinydrm_device *tdev = pipe_to_tinydrm(pipe); struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev); @@ -98,14 +99,14 @@ static void jd_t18003_t01_pipe_enable(struct drm_simple_display_pipe *pipe, msleep(20); - mipi_dbi_enable_flush(mipi); + mipi_dbi_enable_flush(mipi, crtc_state, plane_state); } static const struct drm_simple_display_pipe_funcs jd_t18003_t01_pipe_funcs = { .enable = jd_t18003_t01_pipe_enable, .disable = mipi_dbi_pipe_disable, .update = tinydrm_display_pipe_update, - .prepare_fb = tinydrm_display_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode jd_t18003_t01_mode = { diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c index 7c2485fe88d8..ea4d59eb8966 100644 --- a/drivers/gpu/drm/ttm/ttm_agp_backend.c +++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 98e06f8bf23b..5d8688e522d1 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA @@ -1175,7 +1176,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, reservation_object_init(&bo->ttm_resv); atomic_inc(&bo->bdev->glob->bo_count); drm_vma_node_reset(&bo->vma_node); - bo->priority = 0; /* * For ttm_bo_type_device buffers, allocate diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c index a7c232dc39cb..18d3debcc949 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_manager.c +++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2007-2010 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2ebbae6067ab..f2c167702eef 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2007-2009 VMware, Inc., Palo Alto, CA., USA @@ -39,6 +40,11 @@ #include <linux/module.h> #include <linux/reservation.h> +struct ttm_transfer_obj { + struct ttm_buffer_object base; + struct ttm_buffer_object *bo; +}; + void ttm_bo_free_old_node(struct ttm_buffer_object *bo) { ttm_bo_mem_put(bo, &bo->mem); @@ -454,7 +460,11 @@ EXPORT_SYMBOL(ttm_bo_move_memcpy); static void ttm_transfered_destroy(struct ttm_buffer_object *bo) { - kfree(bo); + struct ttm_transfer_obj *fbo; + + fbo = container_of(bo, struct ttm_transfer_obj, base); + ttm_bo_unref(&fbo->bo); + kfree(fbo); } /** @@ -475,14 +485,15 @@ static void ttm_transfered_destroy(struct ttm_buffer_object *bo) static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, struct ttm_buffer_object **new_obj) { - struct ttm_buffer_object *fbo; + struct ttm_transfer_obj *fbo; int ret; fbo = kmalloc(sizeof(*fbo), GFP_KERNEL); if (!fbo) return -ENOMEM; - *fbo = *bo; + fbo->base = *bo; + fbo->bo = ttm_bo_reference(bo); /** * Fix up members that we shouldn't copy directly: @@ -490,25 +501,25 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, */ atomic_inc(&bo->bdev->glob->bo_count); - INIT_LIST_HEAD(&fbo->ddestroy); - INIT_LIST_HEAD(&fbo->lru); - INIT_LIST_HEAD(&fbo->swap); - INIT_LIST_HEAD(&fbo->io_reserve_lru); - mutex_init(&fbo->wu_mutex); - fbo->moving = NULL; - drm_vma_node_reset(&fbo->vma_node); - atomic_set(&fbo->cpu_writers, 0); - - kref_init(&fbo->list_kref); - kref_init(&fbo->kref); - fbo->destroy = &ttm_transfered_destroy; - fbo->acc_size = 0; - fbo->resv = &fbo->ttm_resv; - reservation_object_init(fbo->resv); - ret = reservation_object_trylock(fbo->resv); + INIT_LIST_HEAD(&fbo->base.ddestroy); + INIT_LIST_HEAD(&fbo->base.lru); + INIT_LIST_HEAD(&fbo->base.swap); + INIT_LIST_HEAD(&fbo->base.io_reserve_lru); + mutex_init(&fbo->base.wu_mutex); + fbo->base.moving = NULL; + drm_vma_node_reset(&fbo->base.vma_node); + atomic_set(&fbo->base.cpu_writers, 0); + + kref_init(&fbo->base.list_kref); + kref_init(&fbo->base.kref); + fbo->base.destroy = &ttm_transfered_destroy; + fbo->base.acc_size = 0; + fbo->base.resv = &fbo->base.ttm_resv; + reservation_object_init(fbo->base.resv); + ret = reservation_object_trylock(fbo->base.resv); WARN_ON(!ret); - *new_obj = fbo; + *new_obj = &fbo->base; return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 8eba95b3c737..c7ece7613a6a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 3dca206e85f7..e73ae0d22897 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_lock.c b/drivers/gpu/drm/ttm/ttm_lock.c index 913f4318cdc0..20694b8a01ca 100644 --- a/drivers/gpu/drm/ttm/ttm_lock.c +++ b/drivers/gpu/drm/ttm/ttm_lock.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2007-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c index 27856c55dc84..450387c92b63 100644 --- a/drivers/gpu/drm/ttm/ttm_memory.c +++ b/drivers/gpu/drm/ttm/ttm_memory.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index 66fc6395eb54..6ff40c041d79 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c index 1aa2baa83959..74f1b1eb1f8e 100644 --- a/drivers/gpu/drm/ttm/ttm_object.c +++ b/drivers/gpu/drm/ttm/ttm_object.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2009-2013 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 7e672be987b5..a1e543972ca7 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/tve200/tve200_display.c b/drivers/gpu/drm/tve200/tve200_display.c index db397fcb345a..e8723a2412a6 100644 --- a/drivers/gpu/drm/tve200/tve200_display.c +++ b/drivers/gpu/drm/tve200/tve200_display.c @@ -120,7 +120,8 @@ static int tve200_display_check(struct drm_simple_display_pipe *pipe, } static void tve200_display_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *cstate) + struct drm_crtc_state *cstate, + struct drm_plane_state *plane_state) { struct drm_crtc *crtc = &pipe->crtc; struct drm_plane *plane = &pipe->plane; @@ -292,18 +293,12 @@ static void tve200_display_disable_vblank(struct drm_simple_display_pipe *pipe) writel(0, priv->regs + TVE200_INT_EN); } -static int tve200_display_prepare_fb(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *plane_state) -{ - return drm_gem_fb_prepare_fb(&pipe->plane, plane_state); -} - static const struct drm_simple_display_pipe_funcs tve200_display_funcs = { .check = tve200_display_check, .enable = tve200_display_enable, .disable = tve200_display_disable, .update = tve200_display_update, - .prepare_fb = tve200_display_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, .enable_vblank = tve200_display_enable_vblank, .disable_vblank = tve200_display_disable_vblank, }; diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index c3dc1fd20cb4..09dc585aa46f 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -105,7 +105,7 @@ static int udl_get_modes(struct drm_connector *connector) return 0; } -static int udl_mode_valid(struct drm_connector *connector, +static enum drm_mode_status udl_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct udl_device *udl = connector->dev->dev_private; diff --git a/drivers/gpu/drm/udl/udl_dmabuf.c b/drivers/gpu/drm/udl/udl_dmabuf.c index 2867ed155ff6..0a20695eb120 100644 --- a/drivers/gpu/drm/udl/udl_dmabuf.c +++ b/drivers/gpu/drm/udl/udl_dmabuf.c @@ -76,6 +76,7 @@ static struct sg_table *udl_map_dma_buf(struct dma_buf_attachment *attach, struct udl_drm_dmabuf_attachment *udl_attach = attach->priv; struct udl_gem_object *obj = to_udl_bo(attach->dmabuf->priv); struct drm_device *dev = obj->base.dev; + struct udl_device *udl = dev->dev_private; struct scatterlist *rd, *wr; struct sg_table *sgt = NULL; unsigned int i; @@ -112,7 +113,7 @@ static struct sg_table *udl_map_dma_buf(struct dma_buf_attachment *attach, return ERR_PTR(-ENOMEM); } - mutex_lock(&dev->struct_mutex); + mutex_lock(&udl->gem_lock); rd = obj->sg->sgl; wr = sgt->sgl; @@ -137,7 +138,7 @@ static struct sg_table *udl_map_dma_buf(struct dma_buf_attachment *attach, attach->priv = udl_attach; err_unlock: - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&udl->gem_lock); return sgt; } diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c index 3c45a3064726..9ef515df724b 100644 --- a/drivers/gpu/drm/udl/udl_drv.c +++ b/drivers/gpu/drm/udl/udl_drv.c @@ -53,7 +53,7 @@ static struct drm_driver driver = { .unload = udl_driver_unload, /* gem hooks */ - .gem_free_object = udl_gem_free_object, + .gem_free_object_unlocked = udl_gem_free_object, .gem_vm_ops = &udl_gem_vm_ops, .dumb_create = udl_dumb_create, diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h index 2a75ab80527a..55c0cc309198 100644 --- a/drivers/gpu/drm/udl/udl_drv.h +++ b/drivers/gpu/drm/udl/udl_drv.h @@ -54,6 +54,8 @@ struct udl_device { struct usb_device *udev; struct drm_crtc *crtc; + struct mutex gem_lock; + int sku_pixel_limit; struct urb_list urbs; diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index dee6bd9a3dd1..9a15cce22cce 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -214,9 +214,10 @@ int udl_gem_mmap(struct drm_file *file, struct drm_device *dev, { struct udl_gem_object *gobj; struct drm_gem_object *obj; + struct udl_device *udl = dev->dev_private; int ret = 0; - mutex_lock(&dev->struct_mutex); + mutex_lock(&udl->gem_lock); obj = drm_gem_object_lookup(file, handle); if (obj == NULL) { ret = -ENOENT; @@ -236,6 +237,6 @@ int udl_gem_mmap(struct drm_file *file, struct drm_device *dev, out: drm_gem_object_put(&gobj->base); unlock: - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&udl->gem_lock); return ret; } diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index f1ec4528a73e..d518de8f496b 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -324,6 +324,8 @@ int udl_driver_load(struct drm_device *dev, unsigned long flags) udl->ddev = dev; dev->dev_private = udl; + mutex_init(&udl->gem_lock); + if (!udl_parse_vendor_descriptor(dev, udl->udev)) { ret = -ENODEV; DRM_ERROR("firmware not recognized. Assume incompatible device\n"); diff --git a/drivers/gpu/drm/v3d/Kconfig b/drivers/gpu/drm/v3d/Kconfig new file mode 100644 index 000000000000..a0c0259355bd --- /dev/null +++ b/drivers/gpu/drm/v3d/Kconfig @@ -0,0 +1,9 @@ +config DRM_V3D + tristate "Broadcom V3D 3.x and newer" + depends on ARCH_BCM || ARCH_BCMSTB || COMPILE_TEST + depends on DRM + depends on COMMON_CLK + select DRM_SCHED + help + Choose this option if you have a system that has a Broadcom + V3D 3.x or newer GPU, such as BCM7268. diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile new file mode 100644 index 000000000000..34446e1de64f --- /dev/null +++ b/drivers/gpu/drm/v3d/Makefile @@ -0,0 +1,18 @@ +# Please keep these build lists sorted! + +# core driver code +v3d-y := \ + v3d_bo.o \ + v3d_drv.o \ + v3d_fence.o \ + v3d_gem.o \ + v3d_irq.o \ + v3d_mmu.o \ + v3d_trace_points.o \ + v3d_sched.o + +v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o + +obj-$(CONFIG_DRM_V3D) += v3d.o + +CFLAGS_v3d_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c new file mode 100644 index 000000000000..7b1e2a549a71 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015-2018 Broadcom */ + +/** + * DOC: V3D GEM BO management support + * + * Compared to VC4 (V3D 2.x), V3D 3.3 introduces an MMU between the + * GPU and the bus, allowing us to use shmem objects for our storage + * instead of CMA. + * + * Physically contiguous objects may still be imported to V3D, but the + * driver doesn't allocate physically contiguous objects on its own. + * Display engines requiring physically contiguous allocations should + * look into Mesa's "renderonly" support (as used by the Mesa pl111 + * driver) for an example of how to integrate with V3D. + * + * Long term, we should support evicting pages from the MMU when under + * memory pressure (thus the v3d_bo_get_pages() refcounting), but + * that's not a high priority since our systems tend to not have swap. + */ + +#include <linux/dma-buf.h> +#include <linux/pfn_t.h> + +#include "v3d_drv.h" +#include "uapi/drm/v3d_drm.h" + +/* Pins the shmem pages, fills in the .pages and .sgt fields of the BO, and maps + * it for DMA. + */ +static int +v3d_bo_get_pages(struct v3d_bo *bo) +{ + struct drm_gem_object *obj = &bo->base; + struct drm_device *dev = obj->dev; + int npages = obj->size >> PAGE_SHIFT; + int ret = 0; + + mutex_lock(&bo->lock); + if (bo->pages_refcount++ != 0) + goto unlock; + + if (!obj->import_attach) { + bo->pages = drm_gem_get_pages(obj); + if (IS_ERR(bo->pages)) { + ret = PTR_ERR(bo->pages); + goto unlock; + } + + bo->sgt = drm_prime_pages_to_sg(bo->pages, npages); + if (IS_ERR(bo->sgt)) { + ret = PTR_ERR(bo->sgt); + goto put_pages; + } + + /* Map the pages for use by the GPU. */ + dma_map_sg(dev->dev, bo->sgt->sgl, + bo->sgt->nents, DMA_BIDIRECTIONAL); + } else { + bo->pages = kcalloc(npages, sizeof(*bo->pages), GFP_KERNEL); + if (!bo->pages) + goto put_pages; + + drm_prime_sg_to_page_addr_arrays(bo->sgt, bo->pages, + NULL, npages); + + /* Note that dma-bufs come in mapped. */ + } + + mutex_unlock(&bo->lock); + + return 0; + +put_pages: + drm_gem_put_pages(obj, bo->pages, true, true); + bo->pages = NULL; +unlock: + bo->pages_refcount--; + mutex_unlock(&bo->lock); + return ret; +} + +static void +v3d_bo_put_pages(struct v3d_bo *bo) +{ + struct drm_gem_object *obj = &bo->base; + + mutex_lock(&bo->lock); + if (--bo->pages_refcount == 0) { + if (!obj->import_attach) { + dma_unmap_sg(obj->dev->dev, bo->sgt->sgl, + bo->sgt->nents, DMA_BIDIRECTIONAL); + sg_free_table(bo->sgt); + kfree(bo->sgt); + drm_gem_put_pages(obj, bo->pages, true, true); + } else { + kfree(bo->pages); + } + } + mutex_unlock(&bo->lock); +} + +static struct v3d_bo *v3d_bo_create_struct(struct drm_device *dev, + size_t unaligned_size) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct drm_gem_object *obj; + struct v3d_bo *bo; + size_t size = roundup(unaligned_size, PAGE_SIZE); + int ret; + + if (size == 0) + return ERR_PTR(-EINVAL); + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) + return ERR_PTR(-ENOMEM); + obj = &bo->base; + + INIT_LIST_HEAD(&bo->vmas); + INIT_LIST_HEAD(&bo->unref_head); + mutex_init(&bo->lock); + + ret = drm_gem_object_init(dev, obj, size); + if (ret) + goto free_bo; + + spin_lock(&v3d->mm_lock); + ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node, + obj->size >> PAGE_SHIFT, + GMP_GRANULARITY >> PAGE_SHIFT, 0, 0); + spin_unlock(&v3d->mm_lock); + if (ret) + goto free_obj; + + return bo; + +free_obj: + drm_gem_object_release(obj); +free_bo: + kfree(bo); + return ERR_PTR(ret); +} + +struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, + size_t unaligned_size) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct drm_gem_object *obj; + struct v3d_bo *bo; + int ret; + + bo = v3d_bo_create_struct(dev, unaligned_size); + if (IS_ERR(bo)) + return bo; + obj = &bo->base; + + bo->resv = &bo->_resv; + reservation_object_init(bo->resv); + + ret = v3d_bo_get_pages(bo); + if (ret) + goto free_mm; + + v3d_mmu_insert_ptes(bo); + + mutex_lock(&v3d->bo_lock); + v3d->bo_stats.num_allocated++; + v3d->bo_stats.pages_allocated += obj->size >> PAGE_SHIFT; + mutex_unlock(&v3d->bo_lock); + + return bo; + +free_mm: + spin_lock(&v3d->mm_lock); + drm_mm_remove_node(&bo->node); + spin_unlock(&v3d->mm_lock); + + drm_gem_object_release(obj); + kfree(bo); + return ERR_PTR(ret); +} + +/* Called DRM core on the last userspace/kernel unreference of the + * BO. + */ +void v3d_free_object(struct drm_gem_object *obj) +{ + struct v3d_dev *v3d = to_v3d_dev(obj->dev); + struct v3d_bo *bo = to_v3d_bo(obj); + + mutex_lock(&v3d->bo_lock); + v3d->bo_stats.num_allocated--; + v3d->bo_stats.pages_allocated -= obj->size >> PAGE_SHIFT; + mutex_unlock(&v3d->bo_lock); + + reservation_object_fini(&bo->_resv); + + v3d_bo_put_pages(bo); + + if (obj->import_attach) + drm_prime_gem_destroy(obj, bo->sgt); + + v3d_mmu_remove_ptes(bo); + spin_lock(&v3d->mm_lock); + drm_mm_remove_node(&bo->node); + spin_unlock(&v3d->mm_lock); + + mutex_destroy(&bo->lock); + + drm_gem_object_release(obj); + kfree(bo); +} + +struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj) +{ + struct v3d_bo *bo = to_v3d_bo(obj); + + return bo->resv; +} + +static void +v3d_set_mmap_vma_flags(struct vm_area_struct *vma) +{ + vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_MIXEDMAP; + vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); +} + +int v3d_gem_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct drm_gem_object *obj = vma->vm_private_data; + struct v3d_bo *bo = to_v3d_bo(obj); + unsigned long pfn; + pgoff_t pgoff; + int ret; + + /* We don't use vmf->pgoff since that has the fake offset: */ + pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT; + pfn = page_to_pfn(bo->pages[pgoff]); + + ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); + + switch (ret) { + case -EAGAIN: + case 0: + case -ERESTARTSYS: + case -EINTR: + case -EBUSY: + /* + * EBUSY is ok: this just means that another thread + * already did the job. + */ + return VM_FAULT_NOPAGE; + case -ENOMEM: + return VM_FAULT_OOM; + default: + return VM_FAULT_SIGBUS; + } +} + +int v3d_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int ret; + + ret = drm_gem_mmap(filp, vma); + if (ret) + return ret; + + v3d_set_mmap_vma_flags(vma); + + return ret; +} + +int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + int ret; + + ret = drm_gem_mmap_obj(obj, obj->size, vma); + if (ret < 0) + return ret; + + v3d_set_mmap_vma_flags(vma); + + return 0; +} + +struct sg_table * +v3d_prime_get_sg_table(struct drm_gem_object *obj) +{ + struct v3d_bo *bo = to_v3d_bo(obj); + int npages = obj->size >> PAGE_SHIFT; + + return drm_prime_pages_to_sg(bo->pages, npages); +} + +struct drm_gem_object * +v3d_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct drm_gem_object *obj; + struct v3d_bo *bo; + + bo = v3d_bo_create_struct(dev, attach->dmabuf->size); + if (IS_ERR(bo)) + return ERR_CAST(bo); + obj = &bo->base; + + bo->resv = attach->dmabuf->resv; + + bo->sgt = sgt; + v3d_bo_get_pages(bo); + + v3d_mmu_insert_ptes(bo); + + return obj; +} + +int v3d_create_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_v3d_create_bo *args = data; + struct v3d_bo *bo = NULL; + int ret; + + if (args->flags != 0) { + DRM_INFO("unknown create_bo flags: %d\n", args->flags); + return -EINVAL; + } + + bo = v3d_bo_create(dev, file_priv, PAGE_ALIGN(args->size)); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + args->offset = bo->node.start << PAGE_SHIFT; + + ret = drm_gem_handle_create(file_priv, &bo->base, &args->handle); + drm_gem_object_put_unlocked(&bo->base); + + return ret; +} + +int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_v3d_mmap_bo *args = data; + struct drm_gem_object *gem_obj; + int ret; + + if (args->flags != 0) { + DRM_INFO("unknown mmap_bo flags: %d\n", args->flags); + return -EINVAL; + } + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + ret = drm_gem_create_mmap_offset(gem_obj); + if (ret == 0) + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + drm_gem_object_put_unlocked(gem_obj); + + return ret; +} + +int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_v3d_get_bo_offset *args = data; + struct drm_gem_object *gem_obj; + struct v3d_bo *bo; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + bo = to_v3d_bo(gem_obj); + + args->offset = bo->node.start << PAGE_SHIFT; + + drm_gem_object_put_unlocked(gem_obj); + return 0; +} diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c new file mode 100644 index 000000000000..4db62c545748 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +#include <linux/circ_buf.h> +#include <linux/ctype.h> +#include <linux/debugfs.h> +#include <linux/pm_runtime.h> +#include <linux/seq_file.h> +#include <drm/drmP.h> + +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define REGDEF(reg) { reg, #reg } +struct v3d_reg_def { + u32 reg; + const char *name; +}; + +static const struct v3d_reg_def v3d_hub_reg_defs[] = { + REGDEF(V3D_HUB_AXICFG), + REGDEF(V3D_HUB_UIFCFG), + REGDEF(V3D_HUB_IDENT0), + REGDEF(V3D_HUB_IDENT1), + REGDEF(V3D_HUB_IDENT2), + REGDEF(V3D_HUB_IDENT3), + REGDEF(V3D_HUB_INT_STS), + REGDEF(V3D_HUB_INT_MSK_STS), +}; + +static const struct v3d_reg_def v3d_gca_reg_defs[] = { + REGDEF(V3D_GCA_SAFE_SHUTDOWN), + REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK), +}; + +static const struct v3d_reg_def v3d_core_reg_defs[] = { + REGDEF(V3D_CTL_IDENT0), + REGDEF(V3D_CTL_IDENT1), + REGDEF(V3D_CTL_IDENT2), + REGDEF(V3D_CTL_MISCCFG), + REGDEF(V3D_CTL_INT_STS), + REGDEF(V3D_CTL_INT_MSK_STS), + REGDEF(V3D_CLE_CT0CS), + REGDEF(V3D_CLE_CT0CA), + REGDEF(V3D_CLE_CT0EA), + REGDEF(V3D_CLE_CT1CS), + REGDEF(V3D_CLE_CT1CA), + REGDEF(V3D_CLE_CT1EA), + + REGDEF(V3D_PTB_BPCA), + REGDEF(V3D_PTB_BPCS), + + REGDEF(V3D_MMU_CTL), + REGDEF(V3D_MMU_VIO_ADDR), + + REGDEF(V3D_GMP_STATUS), + REGDEF(V3D_GMP_CFG), + REGDEF(V3D_GMP_VIO_ADDR), +}; + +static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct v3d_dev *v3d = to_v3d_dev(dev); + int i, core; + + for (i = 0; i < ARRAY_SIZE(v3d_hub_reg_defs); i++) { + seq_printf(m, "%s (0x%04x): 0x%08x\n", + v3d_hub_reg_defs[i].name, v3d_hub_reg_defs[i].reg, + V3D_READ(v3d_hub_reg_defs[i].reg)); + } + + for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) { + seq_printf(m, "%s (0x%04x): 0x%08x\n", + v3d_gca_reg_defs[i].name, v3d_gca_reg_defs[i].reg, + V3D_GCA_READ(v3d_gca_reg_defs[i].reg)); + } + + for (core = 0; core < v3d->cores; core++) { + for (i = 0; i < ARRAY_SIZE(v3d_core_reg_defs); i++) { + seq_printf(m, "core %d %s (0x%04x): 0x%08x\n", + core, + v3d_core_reg_defs[i].name, + v3d_core_reg_defs[i].reg, + V3D_CORE_READ(core, + v3d_core_reg_defs[i].reg)); + } + } + + return 0; +} + +static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct v3d_dev *v3d = to_v3d_dev(dev); + u32 ident0, ident1, ident2, ident3, cores; + int ret, core; + + ret = pm_runtime_get_sync(v3d->dev); + if (ret < 0) + return ret; + + ident0 = V3D_READ(V3D_HUB_IDENT0); + ident1 = V3D_READ(V3D_HUB_IDENT1); + ident2 = V3D_READ(V3D_HUB_IDENT2); + ident3 = V3D_READ(V3D_HUB_IDENT3); + cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); + + seq_printf(m, "Revision: %d.%d.%d.%d\n", + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER), + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV), + V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPREV), + V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPIDX)); + seq_printf(m, "MMU: %s\n", + (ident2 & V3D_HUB_IDENT2_WITH_MMU) ? "yes" : "no"); + seq_printf(m, "TFU: %s\n", + (ident1 & V3D_HUB_IDENT1_WITH_TFU) ? "yes" : "no"); + seq_printf(m, "TSY: %s\n", + (ident1 & V3D_HUB_IDENT1_WITH_TSY) ? "yes" : "no"); + seq_printf(m, "MSO: %s\n", + (ident1 & V3D_HUB_IDENT1_WITH_MSO) ? "yes" : "no"); + seq_printf(m, "L3C: %s (%dkb)\n", + (ident1 & V3D_HUB_IDENT1_WITH_L3C) ? "yes" : "no", + V3D_GET_FIELD(ident2, V3D_HUB_IDENT2_L3C_NKB)); + + for (core = 0; core < cores; core++) { + u32 misccfg; + u32 nslc, ntmu, qups; + + ident0 = V3D_CORE_READ(core, V3D_CTL_IDENT0); + ident1 = V3D_CORE_READ(core, V3D_CTL_IDENT1); + ident2 = V3D_CORE_READ(core, V3D_CTL_IDENT2); + misccfg = V3D_CORE_READ(core, V3D_CTL_MISCCFG); + + nslc = V3D_GET_FIELD(ident1, V3D_IDENT1_NSLC); + ntmu = V3D_GET_FIELD(ident1, V3D_IDENT1_NTMU); + qups = V3D_GET_FIELD(ident1, V3D_IDENT1_QUPS); + + seq_printf(m, "Core %d:\n", core); + seq_printf(m, " Revision: %d.%d\n", + V3D_GET_FIELD(ident0, V3D_IDENT0_VER), + V3D_GET_FIELD(ident1, V3D_IDENT1_REV)); + seq_printf(m, " Slices: %d\n", nslc); + seq_printf(m, " TMUs: %d\n", nslc * ntmu); + seq_printf(m, " QPUs: %d\n", nslc * qups); + seq_printf(m, " Semaphores: %d\n", + V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM)); + seq_printf(m, " BCG int: %d\n", + (ident2 & V3D_IDENT2_BCG_INT) != 0); + seq_printf(m, " Override TMU: %d\n", + (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0); + } + + pm_runtime_mark_last_busy(v3d->dev); + pm_runtime_put_autosuspend(v3d->dev); + + return 0; +} + +static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct v3d_dev *v3d = to_v3d_dev(dev); + + mutex_lock(&v3d->bo_lock); + seq_printf(m, "allocated bos: %d\n", + v3d->bo_stats.num_allocated); + seq_printf(m, "allocated bo size (kb): %ld\n", + (long)v3d->bo_stats.pages_allocated << (PAGE_SHIFT - 10)); + mutex_unlock(&v3d->bo_lock); + + return 0; +} + +static const struct drm_info_list v3d_debugfs_list[] = { + {"v3d_ident", v3d_v3d_debugfs_ident, 0}, + {"v3d_regs", v3d_v3d_debugfs_regs, 0}, + {"bo_stats", v3d_debugfs_bo_stats, 0}, +}; + +int +v3d_debugfs_init(struct drm_minor *minor) +{ + return drm_debugfs_create_files(v3d_debugfs_list, + ARRAY_SIZE(v3d_debugfs_list), + minor->debugfs_root, minor); +} diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c new file mode 100644 index 000000000000..cdb582043b4f --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +/** + * DOC: Broadcom V3D Graphics Driver + * + * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs. + * For V3D 2.x support, see the VC4 driver. + * + * Currently only single-core rendering using the binner and renderer + * is supported. The TFU (texture formatting unit) and V3D 4.x's CSD + * (compute shader dispatch) are not yet supported. + */ + +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <drm/drm_fb_cma_helper.h> +#include <drm/drm_fb_helper.h> + +#include "uapi/drm/v3d_drm.h" +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define DRIVER_NAME "v3d" +#define DRIVER_DESC "Broadcom V3D graphics" +#define DRIVER_DATE "20180419" +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 0 +#define DRIVER_PATCHLEVEL 0 + +#ifdef CONFIG_PM +static int v3d_runtime_suspend(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct v3d_dev *v3d = to_v3d_dev(drm); + + v3d_irq_disable(v3d); + + clk_disable_unprepare(v3d->clk); + + return 0; +} + +static int v3d_runtime_resume(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct v3d_dev *v3d = to_v3d_dev(drm); + int ret; + + ret = clk_prepare_enable(v3d->clk); + if (ret != 0) + return ret; + + /* XXX: VPM base */ + + v3d_mmu_set_page_table(v3d); + v3d_irq_enable(v3d); + + return 0; +} +#endif + +static const struct dev_pm_ops v3d_v3d_pm_ops = { + SET_RUNTIME_PM_OPS(v3d_runtime_suspend, v3d_runtime_resume, NULL) +}; + +static int v3d_get_param_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct drm_v3d_get_param *args = data; + int ret; + static const u32 reg_map[] = { + [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_UIFCFG, + [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_IDENT1, + [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_IDENT2, + [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_IDENT3, + [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_IDENT0, + [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_IDENT1, + [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_IDENT2, + }; + + if (args->pad != 0) + return -EINVAL; + + /* Note that DRM_V3D_PARAM_V3D_CORE0_IDENT0 is 0, so we need + * to explicitly allow it in the "the register in our + * parameter map" check. + */ + if (args->param < ARRAY_SIZE(reg_map) && + (reg_map[args->param] || + args->param == DRM_V3D_PARAM_V3D_CORE0_IDENT0)) { + u32 offset = reg_map[args->param]; + + if (args->value != 0) + return -EINVAL; + + ret = pm_runtime_get_sync(v3d->dev); + if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 && + args->param <= DRM_V3D_PARAM_V3D_CORE0_IDENT2) { + args->value = V3D_CORE_READ(0, offset); + } else { + args->value = V3D_READ(offset); + } + pm_runtime_mark_last_busy(v3d->dev); + pm_runtime_put_autosuspend(v3d->dev); + return 0; + } + + /* Any params that aren't just register reads would go here. */ + + DRM_DEBUG("Unknown parameter %d\n", args->param); + return -EINVAL; +} + +static int +v3d_open(struct drm_device *dev, struct drm_file *file) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv; + int i; + + v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL); + if (!v3d_priv) + return -ENOMEM; + + v3d_priv->v3d = v3d; + + for (i = 0; i < V3D_MAX_QUEUES; i++) { + drm_sched_entity_init(&v3d->queue[i].sched, + &v3d_priv->sched_entity[i], + &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL], + NULL); + } + + file->driver_priv = v3d_priv; + + return 0; +} + +static void +v3d_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file->driver_priv; + enum v3d_queue q; + + for (q = 0; q < V3D_MAX_QUEUES; q++) { + drm_sched_entity_fini(&v3d->queue[q].sched, + &v3d_priv->sched_entity[q]); + } + + kfree(v3d_priv); +} + +static const struct file_operations v3d_drm_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .mmap = v3d_mmap, + .poll = drm_poll, + .read = drm_read, + .compat_ioctl = drm_compat_ioctl, + .llseek = noop_llseek, +}; + +/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP + * protection between clients. Note that render nodes would be be + * able to submit CLs that could access BOs from clients authenticated + * with the master node. + */ +static const struct drm_ioctl_desc v3d_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), + DRM_IOCTL_DEF_DRV(V3D_WAIT_BO, v3d_wait_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_CREATE_BO, v3d_create_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW), +}; + +static const struct vm_operations_struct v3d_vm_ops = { + .fault = v3d_gem_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static struct drm_driver v3d_drm_driver = { + .driver_features = (DRIVER_GEM | + DRIVER_RENDER | + DRIVER_PRIME | + DRIVER_SYNCOBJ), + + .open = v3d_open, + .postclose = v3d_postclose, + +#if defined(CONFIG_DEBUG_FS) + .debugfs_init = v3d_debugfs_init, +#endif + + .gem_free_object_unlocked = v3d_free_object, + .gem_vm_ops = &v3d_vm_ops, + + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = drm_gem_prime_import, + .gem_prime_export = drm_gem_prime_export, + .gem_prime_res_obj = v3d_prime_res_obj, + .gem_prime_get_sg_table = v3d_prime_get_sg_table, + .gem_prime_import_sg_table = v3d_prime_import_sg_table, + .gem_prime_mmap = v3d_prime_mmap, + + .ioctls = v3d_drm_ioctls, + .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls), + .fops = &v3d_drm_fops, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +static const struct of_device_id v3d_of_match[] = { + { .compatible = "brcm,7268-v3d" }, + { .compatible = "brcm,7278-v3d" }, + {}, +}; +MODULE_DEVICE_TABLE(of, v3d_of_match); + +static int +map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name) +{ + struct resource *res = + platform_get_resource_byname(v3d->pdev, IORESOURCE_MEM, name); + + *regs = devm_ioremap_resource(v3d->dev, res); + return PTR_ERR_OR_ZERO(*regs); +} + +static int v3d_platform_drm_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct drm_device *drm; + struct v3d_dev *v3d; + int ret; + u32 ident1; + + dev->coherent_dma_mask = DMA_BIT_MASK(36); + + v3d = kzalloc(sizeof(*v3d), GFP_KERNEL); + if (!v3d) + return -ENOMEM; + v3d->dev = dev; + v3d->pdev = pdev; + drm = &v3d->drm; + + ret = map_regs(v3d, &v3d->bridge_regs, "bridge"); + if (ret) + goto dev_free; + + ret = map_regs(v3d, &v3d->hub_regs, "hub"); + if (ret) + goto dev_free; + + ret = map_regs(v3d, &v3d->core_regs[0], "core0"); + if (ret) + goto dev_free; + + ident1 = V3D_READ(V3D_HUB_IDENT1); + v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 + + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV)); + v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); + WARN_ON(v3d->cores > 1); /* multicore not yet implemented */ + + if (v3d->ver < 41) { + ret = map_regs(v3d, &v3d->gca_regs, "gca"); + if (ret) + goto dev_free; + } + + v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr, + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + if (!v3d->mmu_scratch) { + dev_err(dev, "Failed to allocate MMU scratch page\n"); + ret = -ENOMEM; + goto dev_free; + } + + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, 50); + pm_runtime_enable(dev); + + ret = drm_dev_init(&v3d->drm, &v3d_drm_driver, dev); + if (ret) + goto dma_free; + + platform_set_drvdata(pdev, drm); + drm->dev_private = v3d; + + ret = v3d_gem_init(drm); + if (ret) + goto dev_destroy; + + v3d_irq_init(v3d); + + ret = drm_dev_register(drm, 0); + if (ret) + goto gem_destroy; + + return 0; + +gem_destroy: + v3d_gem_destroy(drm); +dev_destroy: + drm_dev_put(drm); +dma_free: + dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); +dev_free: + kfree(v3d); + return ret; +} + +static int v3d_platform_drm_remove(struct platform_device *pdev) +{ + struct drm_device *drm = platform_get_drvdata(pdev); + struct v3d_dev *v3d = to_v3d_dev(drm); + + drm_dev_unregister(drm); + + v3d_gem_destroy(drm); + + drm_dev_put(drm); + + dma_free_wc(v3d->dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); + + return 0; +} + +static struct platform_driver v3d_platform_driver = { + .probe = v3d_platform_drm_probe, + .remove = v3d_platform_drm_remove, + .driver = { + .name = "v3d", + .of_match_table = v3d_of_match, + }, +}; + +static int __init v3d_drm_register(void) +{ + return platform_driver_register(&v3d_platform_driver); +} + +static void __exit v3d_drm_unregister(void) +{ + platform_driver_unregister(&v3d_platform_driver); +} + +module_init(v3d_drm_register); +module_exit(v3d_drm_unregister); + +MODULE_ALIAS("platform:v3d-drm"); +MODULE_DESCRIPTION("Broadcom V3D DRM Driver"); +MODULE_AUTHOR("Eric Anholt <eric@anholt.net>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h new file mode 100644 index 000000000000..a043ac3aae98 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015-2018 Broadcom */ + +#include <linux/reservation.h> +#include <drm/drmP.h> +#include <drm/drm_encoder.h> +#include <drm/drm_gem.h> +#include <drm/gpu_scheduler.h> + +#define GMP_GRANULARITY (128 * 1024) + +/* Enum for each of the V3D queues. We maintain various queue + * tracking as an array because at some point we'll want to support + * the TFU (texture formatting unit) as another queue. + */ +enum v3d_queue { + V3D_BIN, + V3D_RENDER, +}; + +#define V3D_MAX_QUEUES (V3D_RENDER + 1) + +struct v3d_queue_state { + struct drm_gpu_scheduler sched; + + u64 fence_context; + u64 emit_seqno; + u64 finished_seqno; +}; + +struct v3d_dev { + struct drm_device drm; + + /* Short representation (e.g. 33, 41) of the V3D tech version + * and revision. + */ + int ver; + + struct device *dev; + struct platform_device *pdev; + void __iomem *hub_regs; + void __iomem *core_regs[3]; + void __iomem *bridge_regs; + void __iomem *gca_regs; + struct clk *clk; + + /* Virtual and DMA addresses of the single shared page table. */ + volatile u32 *pt; + dma_addr_t pt_paddr; + + /* Virtual and DMA addresses of the MMU's scratch page. When + * a read or write is invalid in the MMU, it will be + * redirected here. + */ + void *mmu_scratch; + dma_addr_t mmu_scratch_paddr; + + /* Number of V3D cores. */ + u32 cores; + + /* Allocator managing the address space. All units are in + * number of pages. + */ + struct drm_mm mm; + spinlock_t mm_lock; + + struct work_struct overflow_mem_work; + + struct v3d_exec_info *bin_job; + struct v3d_exec_info *render_job; + + struct v3d_queue_state queue[V3D_MAX_QUEUES]; + + /* Spinlock used to synchronize the overflow memory + * management against bin job submission. + */ + spinlock_t job_lock; + + /* Protects bo_stats */ + struct mutex bo_lock; + + /* Lock taken when resetting the GPU, to keep multiple + * processes from trying to park the scheduler threads and + * reset at once. + */ + struct mutex reset_lock; + + struct { + u32 num_allocated; + u32 pages_allocated; + } bo_stats; +}; + +static inline struct v3d_dev * +to_v3d_dev(struct drm_device *dev) +{ + return (struct v3d_dev *)dev->dev_private; +} + +/* The per-fd struct, which tracks the MMU mappings. */ +struct v3d_file_priv { + struct v3d_dev *v3d; + + struct drm_sched_entity sched_entity[V3D_MAX_QUEUES]; +}; + +/* Tracks a mapping of a BO into a per-fd address space */ +struct v3d_vma { + struct v3d_page_table *pt; + struct list_head list; /* entry in v3d_bo.vmas */ +}; + +struct v3d_bo { + struct drm_gem_object base; + + struct mutex lock; + + struct drm_mm_node node; + + u32 pages_refcount; + struct page **pages; + struct sg_table *sgt; + void *vaddr; + + struct list_head vmas; /* list of v3d_vma */ + + /* List entry for the BO's position in + * v3d_exec_info->unref_list + */ + struct list_head unref_head; + + /* normally (resv == &_resv) except for imported bo's */ + struct reservation_object *resv; + struct reservation_object _resv; +}; + +static inline struct v3d_bo * +to_v3d_bo(struct drm_gem_object *bo) +{ + return (struct v3d_bo *)bo; +} + +struct v3d_fence { + struct dma_fence base; + struct drm_device *dev; + /* v3d seqno for signaled() test */ + u64 seqno; + enum v3d_queue queue; +}; + +static inline struct v3d_fence * +to_v3d_fence(struct dma_fence *fence) +{ + return (struct v3d_fence *)fence; +} + +#define V3D_READ(offset) readl(v3d->hub_regs + offset) +#define V3D_WRITE(offset, val) writel(val, v3d->hub_regs + offset) + +#define V3D_BRIDGE_READ(offset) readl(v3d->bridge_regs + offset) +#define V3D_BRIDGE_WRITE(offset, val) writel(val, v3d->bridge_regs + offset) + +#define V3D_GCA_READ(offset) readl(v3d->gca_regs + offset) +#define V3D_GCA_WRITE(offset, val) writel(val, v3d->gca_regs + offset) + +#define V3D_CORE_READ(core, offset) readl(v3d->core_regs[core] + offset) +#define V3D_CORE_WRITE(core, offset, val) writel(val, v3d->core_regs[core] + offset) + +struct v3d_job { + struct drm_sched_job base; + + struct v3d_exec_info *exec; + + /* An optional fence userspace can pass in for the job to depend on. */ + struct dma_fence *in_fence; + + /* v3d fence to be signaled by IRQ handler when the job is complete. */ + struct dma_fence *done_fence; + + /* GPU virtual addresses of the start/end of the CL job. */ + u32 start, end; +}; + +struct v3d_exec_info { + struct v3d_dev *v3d; + + struct v3d_job bin, render; + + /* Fence for when the scheduler considers the binner to be + * done, for render to depend on. + */ + struct dma_fence *bin_done_fence; + + struct kref refcount; + + /* This is the array of BOs that were looked up at the start of exec. */ + struct v3d_bo **bo; + u32 bo_count; + + /* List of overflow BOs used in the job that need to be + * released once the job is complete. + */ + struct list_head unref_list; + + /* Submitted tile memory allocation start/size, tile state. */ + u32 qma, qms, qts; +}; + +/** + * _wait_for - magic (register) wait macro + * + * Does the right thing for modeset paths when run under kdgb or similar atomic + * contexts. Note that it's important that we check the condition again after + * having timed out, since the timeout could be due to preemption or similar and + * we've never had a chance to check the condition before the timeout. + */ +#define wait_for(COND, MS) ({ \ + unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ + int ret__ = 0; \ + while (!(COND)) { \ + if (time_after(jiffies, timeout__)) { \ + if (!(COND)) \ + ret__ = -ETIMEDOUT; \ + break; \ + } \ + msleep(1); \ + } \ + ret__; \ +}) + +static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) +{ + /* nsecs_to_jiffies64() does not guard against overflow */ + if (NSEC_PER_SEC % HZ && + div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) + return MAX_JIFFY_OFFSET; + + return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); +} + +/* v3d_bo.c */ +void v3d_free_object(struct drm_gem_object *gem_obj); +struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, + size_t size); +int v3d_create_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_gem_fault(struct vm_fault *vmf); +int v3d_mmap(struct file *filp, struct vm_area_struct *vma); +struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj); +int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +struct sg_table *v3d_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt); + +/* v3d_debugfs.c */ +int v3d_debugfs_init(struct drm_minor *minor); + +/* v3d_fence.c */ +extern const struct dma_fence_ops v3d_fence_ops; +struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue); + +/* v3d_gem.c */ +int v3d_gem_init(struct drm_device *dev); +void v3d_gem_destroy(struct drm_device *dev); +int v3d_submit_cl_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +void v3d_exec_put(struct v3d_exec_info *exec); +void v3d_reset(struct v3d_dev *v3d); +void v3d_invalidate_caches(struct v3d_dev *v3d); +void v3d_flush_caches(struct v3d_dev *v3d); + +/* v3d_irq.c */ +void v3d_irq_init(struct v3d_dev *v3d); +void v3d_irq_enable(struct v3d_dev *v3d); +void v3d_irq_disable(struct v3d_dev *v3d); +void v3d_irq_reset(struct v3d_dev *v3d); + +/* v3d_mmu.c */ +int v3d_mmu_get_offset(struct drm_file *file_priv, struct v3d_bo *bo, + u32 *offset); +int v3d_mmu_set_page_table(struct v3d_dev *v3d); +void v3d_mmu_insert_ptes(struct v3d_bo *bo); +void v3d_mmu_remove_ptes(struct v3d_bo *bo); + +/* v3d_sched.c */ +int v3d_sched_init(struct v3d_dev *v3d); +void v3d_sched_fini(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c new file mode 100644 index 000000000000..087d49c8cb12 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_fence.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2017-2018 Broadcom */ + +#include "v3d_drv.h" + +struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue) +{ + struct v3d_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->dev = &v3d->drm; + fence->queue = queue; + fence->seqno = ++v3d->queue[queue].emit_seqno; + dma_fence_init(&fence->base, &v3d_fence_ops, &v3d->job_lock, + v3d->queue[queue].fence_context, fence->seqno); + + return &fence->base; +} + +static const char *v3d_fence_get_driver_name(struct dma_fence *fence) +{ + return "v3d"; +} + +static const char *v3d_fence_get_timeline_name(struct dma_fence *fence) +{ + struct v3d_fence *f = to_v3d_fence(fence); + + if (f->queue == V3D_BIN) + return "v3d-bin"; + else + return "v3d-render"; +} + +static bool v3d_fence_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static bool v3d_fence_signaled(struct dma_fence *fence) +{ + struct v3d_fence *f = to_v3d_fence(fence); + struct v3d_dev *v3d = to_v3d_dev(f->dev); + + return v3d->queue[f->queue].finished_seqno >= f->seqno; +} + +const struct dma_fence_ops v3d_fence_ops = { + .get_driver_name = v3d_fence_get_driver_name, + .get_timeline_name = v3d_fence_get_timeline_name, + .enable_signaling = v3d_fence_enable_signaling, + .signaled = v3d_fence_signaled, + .wait = dma_fence_default_wait, + .release = dma_fence_free, +}; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c new file mode 100644 index 000000000000..b513f9189caf --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -0,0 +1,668 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +#include <drm/drmP.h> +#include <drm/drm_syncobj.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/sched/signal.h> + +#include "uapi/drm/v3d_drm.h" +#include "v3d_drv.h" +#include "v3d_regs.h" +#include "v3d_trace.h" + +static void +v3d_init_core(struct v3d_dev *v3d, int core) +{ + /* Set OVRTMUOUT, which means that the texture sampler uniform + * configuration's tmu output type field is used, instead of + * using the hardware default behavior based on the texture + * type. If you want the default behavior, you can still put + * "2" in the indirect texture state's output_type field. + */ + V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT); + + /* Whenever we flush the L2T cache, we always want to flush + * the whole thing. + */ + V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0); + V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0); +} + +/* Sets invariant state for the HW. */ +static void +v3d_init_hw_state(struct v3d_dev *v3d) +{ + v3d_init_core(v3d, 0); +} + +static void +v3d_idle_axi(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ); + + if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) & + (V3D_GMP_STATUS_RD_COUNT_MASK | + V3D_GMP_STATUS_WR_COUNT_MASK | + V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) { + DRM_ERROR("Failed to wait for safe GMP shutdown\n"); + } +} + +static void +v3d_idle_gca(struct v3d_dev *v3d) +{ + if (v3d->ver >= 41) + return; + + V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN); + + if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) & + V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) == + V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) { + DRM_ERROR("Failed to wait for safe GCA shutdown\n"); + } +} + +static void +v3d_reset_v3d(struct v3d_dev *v3d) +{ + int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION); + + if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) { + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, + V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT); + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0); + + /* GFXH-1383: The SW_INIT may cause a stray write to address 0 + * of the unit, so reset it to its power-on value here. + */ + V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK); + } else { + WARN_ON_ONCE(V3D_GET_FIELD(version, + V3D_TOP_GR_BRIDGE_MAJOR) != 7); + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, + V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT); + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0); + } + + v3d_init_hw_state(v3d); +} + +void +v3d_reset(struct v3d_dev *v3d) +{ + struct drm_device *dev = &v3d->drm; + + DRM_ERROR("Resetting GPU.\n"); + trace_v3d_reset_begin(dev); + + /* XXX: only needed for safe powerdown, not reset. */ + if (false) + v3d_idle_axi(v3d, 0); + + v3d_idle_gca(v3d); + v3d_reset_v3d(v3d); + + v3d_mmu_set_page_table(v3d); + v3d_irq_reset(v3d); + + trace_v3d_reset_end(dev); +} + +static void +v3d_flush_l3(struct v3d_dev *v3d) +{ + if (v3d->ver < 41) { + u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL); + + V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, + gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH); + + if (v3d->ver < 33) { + V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, + gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH); + } + } +} + +/* Invalidates the (read-only) L2 cache. */ +static void +v3d_invalidate_l2(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_CTL_L2CACTL, + V3D_L2CACTL_L2CCLR | + V3D_L2CACTL_L2CENA); +} + +static void +v3d_invalidate_l1td(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L1T write combiner flush\n"); + } +} + +/* Invalidates texture L2 cachelines */ +static void +v3d_flush_l2t(struct v3d_dev *v3d, int core) +{ + v3d_invalidate_l1td(v3d, core); + + V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, + V3D_L2TCACTL_L2TFLS | + V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM)); + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L2T flush\n"); + } +} + +/* Invalidates the slice caches. These are read-only caches. */ +static void +v3d_invalidate_slices(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_CTL_SLCACTL, + V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) | + V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) | + V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) | + V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC)); +} + +/* Invalidates texture L2 cachelines */ +static void +v3d_invalidate_l2t(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, + V3D_CTL_L2TCACTL, + V3D_L2TCACTL_L2TFLS | + V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAR, V3D_L2TCACTL_FLM)); + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L2T invalidate\n"); + } +} + +void +v3d_invalidate_caches(struct v3d_dev *v3d) +{ + v3d_flush_l3(v3d); + + v3d_invalidate_l2(v3d, 0); + v3d_invalidate_slices(v3d, 0); + v3d_flush_l2t(v3d, 0); +} + +void +v3d_flush_caches(struct v3d_dev *v3d) +{ + v3d_invalidate_l1td(v3d, 0); + v3d_invalidate_l2t(v3d, 0); +} + +static void +v3d_attach_object_fences(struct v3d_exec_info *exec) +{ + struct dma_fence *out_fence = &exec->render.base.s_fence->finished; + struct v3d_bo *bo; + int i; + + for (i = 0; i < exec->bo_count; i++) { + bo = to_v3d_bo(&exec->bo[i]->base); + + /* XXX: Use shared fences for read-only objects. */ + reservation_object_add_excl_fence(bo->resv, out_fence); + } +} + +static void +v3d_unlock_bo_reservations(struct drm_device *dev, + struct v3d_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int i; + + for (i = 0; i < exec->bo_count; i++) { + struct v3d_bo *bo = to_v3d_bo(&exec->bo[i]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + ww_acquire_fini(acquire_ctx); +} + +/* Takes the reservation lock on all the BOs being referenced, so that + * at queue submit time we can update the reservations. + * + * We don't lock the RCL the tile alloc/state BOs, or overflow memory + * (all of which are on exec->unref_list). They're entirely private + * to v3d, so we don't attach dma-buf fences to them. + */ +static int +v3d_lock_bo_reservations(struct drm_device *dev, + struct v3d_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int contended_lock = -1; + int i, ret; + struct v3d_bo *bo; + + ww_acquire_init(acquire_ctx, &reservation_ww_class); + +retry: + if (contended_lock != -1) { + bo = to_v3d_bo(&exec->bo[contended_lock]->base); + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + acquire_ctx); + if (ret) { + ww_acquire_done(acquire_ctx); + return ret; + } + } + + for (i = 0; i < exec->bo_count; i++) { + if (i == contended_lock) + continue; + + bo = to_v3d_bo(&exec->bo[i]->base); + + ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); + if (ret) { + int j; + + for (j = 0; j < i; j++) { + bo = to_v3d_bo(&exec->bo[j]->base); + ww_mutex_unlock(&bo->resv->lock); + } + + if (contended_lock != -1 && contended_lock >= i) { + bo = to_v3d_bo(&exec->bo[contended_lock]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + if (ret == -EDEADLK) { + contended_lock = i; + goto retry; + } + + ww_acquire_done(acquire_ctx); + return ret; + } + } + + ww_acquire_done(acquire_ctx); + + /* Reserve space for our shared (read-only) fence references, + * before we commit the CL to the hardware. + */ + for (i = 0; i < exec->bo_count; i++) { + bo = to_v3d_bo(&exec->bo[i]->base); + + ret = reservation_object_reserve_shared(bo->resv); + if (ret) { + v3d_unlock_bo_reservations(dev, exec, acquire_ctx); + return ret; + } + } + + return 0; +} + +/** + * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects + * referenced by the job. + * @dev: DRM device + * @file_priv: DRM file for this fd + * @exec: V3D job being set up + * + * The command validator needs to reference BOs by their index within + * the submitted job's BO list. This does the validation of the job's + * BO list and reference counting for the lifetime of the job. + * + * Note that this function doesn't need to unreference the BOs on + * failure, because that will happen at v3d_exec_cleanup() time. + */ +static int +v3d_cl_lookup_bos(struct drm_device *dev, + struct drm_file *file_priv, + struct drm_v3d_submit_cl *args, + struct v3d_exec_info *exec) +{ + u32 *handles; + int ret = 0; + int i; + + exec->bo_count = args->bo_handle_count; + + if (!exec->bo_count) { + /* See comment on bo_index for why we have to check + * this. + */ + DRM_DEBUG("Rendering requires BOs\n"); + return -EINVAL; + } + + exec->bo = kvmalloc_array(exec->bo_count, + sizeof(struct drm_gem_cma_object *), + GFP_KERNEL | __GFP_ZERO); + if (!exec->bo) { + DRM_DEBUG("Failed to allocate validated BO pointers\n"); + return -ENOMEM; + } + + handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL); + if (!handles) { + ret = -ENOMEM; + DRM_DEBUG("Failed to allocate incoming GEM handles\n"); + goto fail; + } + + if (copy_from_user(handles, + (void __user *)(uintptr_t)args->bo_handles, + exec->bo_count * sizeof(u32))) { + ret = -EFAULT; + DRM_DEBUG("Failed to copy in GEM handles\n"); + goto fail; + } + + spin_lock(&file_priv->table_lock); + for (i = 0; i < exec->bo_count; i++) { + struct drm_gem_object *bo = idr_find(&file_priv->object_idr, + handles[i]); + if (!bo) { + DRM_DEBUG("Failed to look up GEM BO %d: %d\n", + i, handles[i]); + ret = -ENOENT; + spin_unlock(&file_priv->table_lock); + goto fail; + } + drm_gem_object_get(bo); + exec->bo[i] = to_v3d_bo(bo); + } + spin_unlock(&file_priv->table_lock); + +fail: + kvfree(handles); + return ret; +} + +static void +v3d_exec_cleanup(struct kref *ref) +{ + struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info, + refcount); + struct v3d_dev *v3d = exec->v3d; + unsigned int i; + struct v3d_bo *bo, *save; + + dma_fence_put(exec->bin.in_fence); + dma_fence_put(exec->render.in_fence); + + dma_fence_put(exec->bin.done_fence); + dma_fence_put(exec->render.done_fence); + + dma_fence_put(exec->bin_done_fence); + + for (i = 0; i < exec->bo_count; i++) + drm_gem_object_put_unlocked(&exec->bo[i]->base); + kvfree(exec->bo); + + list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) { + drm_gem_object_put_unlocked(&bo->base); + } + + pm_runtime_mark_last_busy(v3d->dev); + pm_runtime_put_autosuspend(v3d->dev); + + kfree(exec); +} + +void v3d_exec_put(struct v3d_exec_info *exec) +{ + kref_put(&exec->refcount, v3d_exec_cleanup); +} + +int +v3d_wait_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + int ret; + struct drm_v3d_wait_bo *args = data; + struct drm_gem_object *gem_obj; + struct v3d_bo *bo; + ktime_t start = ktime_get(); + u64 delta_ns; + unsigned long timeout_jiffies = + nsecs_to_jiffies_timeout(args->timeout_ns); + + if (args->pad != 0) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -EINVAL; + } + bo = to_v3d_bo(gem_obj); + + ret = reservation_object_wait_timeout_rcu(bo->resv, + true, true, + timeout_jiffies); + + if (ret == 0) + ret = -ETIME; + else if (ret > 0) + ret = 0; + + /* Decrement the user's timeout, in case we got interrupted + * such that the ioctl will be restarted. + */ + delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); + if (delta_ns < args->timeout_ns) + args->timeout_ns -= delta_ns; + else + args->timeout_ns = 0; + + /* Asked to wait beyond the jiffie/scheduler precision? */ + if (ret == -ETIME && args->timeout_ns) + ret = -EAGAIN; + + drm_gem_object_put_unlocked(gem_obj); + + return ret; +} + +/** + * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * This is the main entrypoint for userspace to submit a 3D frame to + * the GPU. Userspace provides the binner command list (if + * applicable), and the kernel sets up the render command list to draw + * to the framebuffer described in the ioctl, using the command lists + * that the 3D engine's binner will produce. + */ +int +v3d_submit_cl_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_submit_cl *args = data; + struct v3d_exec_info *exec; + struct ww_acquire_ctx acquire_ctx; + struct drm_syncobj *sync_out; + int ret = 0; + + if (args->pad != 0) { + DRM_INFO("pad must be zero: %d\n", args->pad); + return -EINVAL; + } + + exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); + if (!exec) + return -ENOMEM; + + ret = pm_runtime_get_sync(v3d->dev); + if (ret < 0) { + kfree(exec); + return ret; + } + + kref_init(&exec->refcount); + + ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl, + &exec->bin.in_fence); + if (ret == -EINVAL) + goto fail; + + ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl, + &exec->render.in_fence); + if (ret == -EINVAL) + goto fail; + + exec->qma = args->qma; + exec->qms = args->qms; + exec->qts = args->qts; + exec->bin.exec = exec; + exec->bin.start = args->bcl_start; + exec->bin.end = args->bcl_end; + exec->render.exec = exec; + exec->render.start = args->rcl_start; + exec->render.end = args->rcl_end; + exec->v3d = v3d; + INIT_LIST_HEAD(&exec->unref_list); + + ret = v3d_cl_lookup_bos(dev, file_priv, args, exec); + if (ret) + goto fail; + + ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx); + if (ret) + goto fail; + + if (exec->bin.start != exec->bin.end) { + ret = drm_sched_job_init(&exec->bin.base, + &v3d->queue[V3D_BIN].sched, + &v3d_priv->sched_entity[V3D_BIN], + v3d_priv); + if (ret) + goto fail_unreserve; + + exec->bin_done_fence = + dma_fence_get(&exec->bin.base.s_fence->finished); + + kref_get(&exec->refcount); /* put by scheduler job completion */ + drm_sched_entity_push_job(&exec->bin.base, + &v3d_priv->sched_entity[V3D_BIN]); + } + + ret = drm_sched_job_init(&exec->render.base, + &v3d->queue[V3D_RENDER].sched, + &v3d_priv->sched_entity[V3D_RENDER], + v3d_priv); + if (ret) + goto fail_unreserve; + + kref_get(&exec->refcount); /* put by scheduler job completion */ + drm_sched_entity_push_job(&exec->render.base, + &v3d_priv->sched_entity[V3D_RENDER]); + + v3d_attach_object_fences(exec); + + v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); + + /* Update the return sync object for the */ + sync_out = drm_syncobj_find(file_priv, args->out_sync); + if (sync_out) { + drm_syncobj_replace_fence(sync_out, + &exec->render.base.s_fence->finished); + drm_syncobj_put(sync_out); + } + + v3d_exec_put(exec); + + return 0; + +fail_unreserve: + v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); +fail: + v3d_exec_put(exec); + + return ret; +} + +int +v3d_gem_init(struct drm_device *dev) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + u32 pt_size = 4096 * 1024; + int ret, i; + + for (i = 0; i < V3D_MAX_QUEUES; i++) + v3d->queue[i].fence_context = dma_fence_context_alloc(1); + + spin_lock_init(&v3d->mm_lock); + spin_lock_init(&v3d->job_lock); + mutex_init(&v3d->bo_lock); + mutex_init(&v3d->reset_lock); + + /* Note: We don't allocate address 0. Various bits of HW + * treat 0 as special, such as the occlusion query counters + * where 0 means "disabled". + */ + drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1); + + v3d->pt = dma_alloc_wc(v3d->dev, pt_size, + &v3d->pt_paddr, + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + if (!v3d->pt) { + drm_mm_takedown(&v3d->mm); + dev_err(v3d->dev, + "Failed to allocate page tables. " + "Please ensure you have CMA enabled.\n"); + return -ENOMEM; + } + + v3d_init_hw_state(v3d); + v3d_mmu_set_page_table(v3d); + + ret = v3d_sched_init(v3d); + if (ret) { + drm_mm_takedown(&v3d->mm); + dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, + v3d->pt_paddr); + } + + return 0; +} + +void +v3d_gem_destroy(struct drm_device *dev) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + enum v3d_queue q; + + v3d_sched_fini(v3d); + + /* Waiting for exec to finish would need to be done before + * unregistering V3D. + */ + for (q = 0; q < V3D_MAX_QUEUES; q++) { + WARN_ON(v3d->queue[q].emit_seqno != + v3d->queue[q].finished_seqno); + } + + drm_mm_takedown(&v3d->mm); + + dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, v3d->pt_paddr); +} diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c new file mode 100644 index 000000000000..77e1fa046c10 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +/** + * DOC: Interrupt management for the V3D engine + * + * When we take a binning or rendering flush done interrupt, we need + * to signal the fence for that job so that the scheduler can queue up + * the next one and unblock any waiters. + * + * When we take the binner out of memory interrupt, we need to + * allocate some new memory and pass it to the binner so that the + * current job can make progress. + */ + +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \ + V3D_INT_FLDONE | \ + V3D_INT_FRDONE | \ + V3D_INT_GMPV)) + +#define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \ + V3D_HUB_INT_MMU_PTI | \ + V3D_HUB_INT_MMU_CAP)) + +static void +v3d_overflow_mem_work(struct work_struct *work) +{ + struct v3d_dev *v3d = + container_of(work, struct v3d_dev, overflow_mem_work); + struct drm_device *dev = &v3d->drm; + struct v3d_bo *bo = v3d_bo_create(dev, NULL /* XXX: GMP */, 256 * 1024); + unsigned long irqflags; + + if (IS_ERR(bo)) { + DRM_ERROR("Couldn't allocate binner overflow mem\n"); + return; + } + + /* We lost a race, and our work task came in after the bin job + * completed and exited. This can happen because the HW + * signals OOM before it's fully OOM, so the binner might just + * barely complete. + * + * If we lose the race and our work task comes in after a new + * bin job got scheduled, that's fine. We'll just give them + * some binner pool anyway. + */ + spin_lock_irqsave(&v3d->job_lock, irqflags); + if (!v3d->bin_job) { + spin_unlock_irqrestore(&v3d->job_lock, irqflags); + goto out; + } + + drm_gem_object_get(&bo->base); + list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list); + spin_unlock_irqrestore(&v3d->job_lock, irqflags); + + V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT); + V3D_CORE_WRITE(0, V3D_PTB_BPOS, bo->base.size); + +out: + drm_gem_object_put_unlocked(&bo->base); +} + +static irqreturn_t +v3d_irq(int irq, void *arg) +{ + struct v3d_dev *v3d = arg; + u32 intsts; + irqreturn_t status = IRQ_NONE; + + intsts = V3D_CORE_READ(0, V3D_CTL_INT_STS); + + /* Acknowledge the interrupts we're handling here. */ + V3D_CORE_WRITE(0, V3D_CTL_INT_CLR, intsts); + + if (intsts & V3D_INT_OUTOMEM) { + /* Note that the OOM status is edge signaled, so the + * interrupt won't happen again until the we actually + * add more memory. + */ + schedule_work(&v3d->overflow_mem_work); + status = IRQ_HANDLED; + } + + if (intsts & V3D_INT_FLDONE) { + v3d->queue[V3D_BIN].finished_seqno++; + dma_fence_signal(v3d->bin_job->bin.done_fence); + status = IRQ_HANDLED; + } + + if (intsts & V3D_INT_FRDONE) { + v3d->queue[V3D_RENDER].finished_seqno++; + dma_fence_signal(v3d->render_job->render.done_fence); + + status = IRQ_HANDLED; + } + + /* We shouldn't be triggering these if we have GMP in + * always-allowed mode. + */ + if (intsts & V3D_INT_GMPV) + dev_err(v3d->dev, "GMP violation\n"); + + return status; +} + +static irqreturn_t +v3d_hub_irq(int irq, void *arg) +{ + struct v3d_dev *v3d = arg; + u32 intsts; + irqreturn_t status = IRQ_NONE; + + intsts = V3D_READ(V3D_HUB_INT_STS); + + /* Acknowledge the interrupts we're handling here. */ + V3D_WRITE(V3D_HUB_INT_CLR, intsts); + + if (intsts & (V3D_HUB_INT_MMU_WRV | + V3D_HUB_INT_MMU_PTI | + V3D_HUB_INT_MMU_CAP)) { + u32 axi_id = V3D_READ(V3D_MMU_VIO_ID); + u64 vio_addr = (u64)V3D_READ(V3D_MMU_VIO_ADDR) << 8; + + dev_err(v3d->dev, "MMU error from client %d at 0x%08llx%s%s%s\n", + axi_id, (long long)vio_addr, + ((intsts & V3D_HUB_INT_MMU_WRV) ? + ", write violation" : ""), + ((intsts & V3D_HUB_INT_MMU_PTI) ? + ", pte invalid" : ""), + ((intsts & V3D_HUB_INT_MMU_CAP) ? + ", cap exceeded" : "")); + status = IRQ_HANDLED; + } + + return status; +} + +void +v3d_irq_init(struct v3d_dev *v3d) +{ + int ret, core; + + INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work); + + /* Clear any pending interrupts someone might have left around + * for us. + */ + for (core = 0; core < v3d->cores; core++) + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS); + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS); + + ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 0), + v3d_hub_irq, IRQF_SHARED, + "v3d_hub", v3d); + ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 1), + v3d_irq, IRQF_SHARED, + "v3d_core0", v3d); + if (ret) + dev_err(v3d->dev, "IRQ setup failed: %d\n", ret); + + v3d_irq_enable(v3d); +} + +void +v3d_irq_enable(struct v3d_dev *v3d) +{ + int core; + + /* Enable our set of interrupts, masking out any others. */ + for (core = 0; core < v3d->cores; core++) { + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS); + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS); + } + + V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS); + V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS); +} + +void +v3d_irq_disable(struct v3d_dev *v3d) +{ + int core; + + /* Disable all interrupts. */ + for (core = 0; core < v3d->cores; core++) + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0); + V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0); + + /* Clear any pending interrupts we might have left. */ + for (core = 0; core < v3d->cores; core++) + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS); + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS); + + cancel_work_sync(&v3d->overflow_mem_work); +} + +/** Reinitializes interrupt registers when a GPU reset is performed. */ +void v3d_irq_reset(struct v3d_dev *v3d) +{ + v3d_irq_enable(v3d); +} diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c new file mode 100644 index 000000000000..b00f97c31b70 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2017-2018 Broadcom */ + +/** + * DOC: Broadcom V3D MMU + * + * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has + * a single level of page tables for the V3D's 4GB address space to + * map to AXI bus addresses, thus it could need up to 4MB of + * physically contiguous memory to store the PTEs. + * + * Because the 4MB of contiguous memory for page tables is precious, + * and switching between them is expensive, we load all BOs into the + * same 4GB address space. + * + * To protect clients from each other, we should use the GMP to + * quickly mask out (at 128kb granularity) what pages are available to + * each client. This is not yet implemented. + */ + +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define V3D_MMU_PAGE_SHIFT 12 + +/* Note: All PTEs for the 1MB superpage must be filled with the + * superpage bit set. + */ +#define V3D_PTE_SUPERPAGE BIT(31) +#define V3D_PTE_WRITEABLE BIT(29) +#define V3D_PTE_VALID BIT(28) + +static int v3d_mmu_flush_all(struct v3d_dev *v3d) +{ + int ret; + + /* Make sure that another flush isn't already running when we + * start this one. + */ + ret = wait_for(!(V3D_READ(V3D_MMU_CTL) & + V3D_MMU_CTL_TLB_CLEARING), 100); + if (ret) + dev_err(v3d->dev, "TLB clear wait idle pre-wait failed\n"); + + V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) | + V3D_MMU_CTL_TLB_CLEAR); + + V3D_WRITE(V3D_MMUC_CONTROL, + V3D_MMUC_CONTROL_FLUSH | + V3D_MMUC_CONTROL_ENABLE); + + ret = wait_for(!(V3D_READ(V3D_MMU_CTL) & + V3D_MMU_CTL_TLB_CLEARING), 100); + if (ret) { + dev_err(v3d->dev, "TLB clear wait idle failed\n"); + return ret; + } + + ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) & + V3D_MMUC_CONTROL_FLUSHING), 100); + if (ret) + dev_err(v3d->dev, "MMUC flush wait idle failed\n"); + + return ret; +} + +int v3d_mmu_set_page_table(struct v3d_dev *v3d) +{ + V3D_WRITE(V3D_MMU_PT_PA_BASE, v3d->pt_paddr >> V3D_MMU_PAGE_SHIFT); + V3D_WRITE(V3D_MMU_CTL, + V3D_MMU_CTL_ENABLE | + V3D_MMU_CTL_PT_INVALID | + V3D_MMU_CTL_PT_INVALID_ABORT | + V3D_MMU_CTL_WRITE_VIOLATION_ABORT | + V3D_MMU_CTL_CAP_EXCEEDED_ABORT); + V3D_WRITE(V3D_MMU_ILLEGAL_ADDR, + (v3d->mmu_scratch_paddr >> V3D_MMU_PAGE_SHIFT) | + V3D_MMU_ILLEGAL_ADDR_ENABLE); + V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_ENABLE); + + return v3d_mmu_flush_all(v3d); +} + +void v3d_mmu_insert_ptes(struct v3d_bo *bo) +{ + struct v3d_dev *v3d = to_v3d_dev(bo->base.dev); + u32 page = bo->node.start; + u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID; + unsigned int count; + struct scatterlist *sgl; + + for_each_sg(bo->sgt->sgl, sgl, bo->sgt->nents, count) { + u32 page_address = sg_dma_address(sgl) >> V3D_MMU_PAGE_SHIFT; + u32 pte = page_prot | page_address; + u32 i; + + BUG_ON(page_address + (sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT) >= + BIT(24)); + + for (i = 0; i < sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT; i++) + v3d->pt[page++] = pte + i; + } + + WARN_ON_ONCE(page - bo->node.start != + bo->base.size >> V3D_MMU_PAGE_SHIFT); + + if (v3d_mmu_flush_all(v3d)) + dev_err(v3d->dev, "MMU flush timeout\n"); +} + +void v3d_mmu_remove_ptes(struct v3d_bo *bo) +{ + struct v3d_dev *v3d = to_v3d_dev(bo->base.dev); + u32 npages = bo->base.size >> V3D_MMU_PAGE_SHIFT; + u32 page; + + for (page = bo->node.start; page < bo->node.start + npages; page++) + v3d->pt[page] = 0; + + if (v3d_mmu_flush_all(v3d)) + dev_err(v3d->dev, "MMU flush timeout\n"); +} diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h new file mode 100644 index 000000000000..fc13282dfc2f --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2017-2018 Broadcom */ + +#ifndef V3D_REGS_H +#define V3D_REGS_H + +#include <linux/bitops.h> + +#define V3D_MASK(high, low) ((u32)GENMASK(high, low)) +/* Using the GNU statement expression extension */ +#define V3D_SET_FIELD(value, field) \ + ({ \ + u32 fieldval = (value) << field##_SHIFT; \ + WARN_ON((fieldval & ~field##_MASK) != 0); \ + fieldval & field##_MASK; \ + }) + +#define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >> \ + field##_SHIFT) + +/* Hub registers for shared hardware between V3D cores. */ + +#define V3D_HUB_AXICFG 0x00000 +# define V3D_HUB_AXICFG_MAX_LEN_MASK V3D_MASK(3, 0) +# define V3D_HUB_AXICFG_MAX_LEN_SHIFT 0 +#define V3D_HUB_UIFCFG 0x00004 +#define V3D_HUB_IDENT0 0x00008 + +#define V3D_HUB_IDENT1 0x0000c +# define V3D_HUB_IDENT1_WITH_MSO BIT(19) +# define V3D_HUB_IDENT1_WITH_TSY BIT(18) +# define V3D_HUB_IDENT1_WITH_TFU BIT(17) +# define V3D_HUB_IDENT1_WITH_L3C BIT(16) +# define V3D_HUB_IDENT1_NHOSTS_MASK V3D_MASK(15, 12) +# define V3D_HUB_IDENT1_NHOSTS_SHIFT 12 +# define V3D_HUB_IDENT1_NCORES_MASK V3D_MASK(11, 8) +# define V3D_HUB_IDENT1_NCORES_SHIFT 8 +# define V3D_HUB_IDENT1_REV_MASK V3D_MASK(7, 4) +# define V3D_HUB_IDENT1_REV_SHIFT 4 +# define V3D_HUB_IDENT1_TVER_MASK V3D_MASK(3, 0) +# define V3D_HUB_IDENT1_TVER_SHIFT 0 + +#define V3D_HUB_IDENT2 0x00010 +# define V3D_HUB_IDENT2_WITH_MMU BIT(8) +# define V3D_HUB_IDENT2_L3C_NKB_MASK V3D_MASK(7, 0) +# define V3D_HUB_IDENT2_L3C_NKB_SHIFT 0 + +#define V3D_HUB_IDENT3 0x00014 +# define V3D_HUB_IDENT3_IPREV_MASK V3D_MASK(15, 8) +# define V3D_HUB_IDENT3_IPREV_SHIFT 8 +# define V3D_HUB_IDENT3_IPIDX_MASK V3D_MASK(7, 0) +# define V3D_HUB_IDENT3_IPIDX_SHIFT 0 + +#define V3D_HUB_INT_STS 0x00050 +#define V3D_HUB_INT_SET 0x00054 +#define V3D_HUB_INT_CLR 0x00058 +#define V3D_HUB_INT_MSK_STS 0x0005c +#define V3D_HUB_INT_MSK_SET 0x00060 +#define V3D_HUB_INT_MSK_CLR 0x00064 +# define V3D_HUB_INT_MMU_WRV BIT(5) +# define V3D_HUB_INT_MMU_PTI BIT(4) +# define V3D_HUB_INT_MMU_CAP BIT(3) +# define V3D_HUB_INT_MSO BIT(2) +# define V3D_HUB_INT_TFUC BIT(1) +# define V3D_HUB_INT_TFUF BIT(0) + +#define V3D_GCA_CACHE_CTRL 0x0000c +# define V3D_GCA_CACHE_CTRL_FLUSH BIT(0) + +#define V3D_GCA_SAFE_SHUTDOWN 0x000b0 +# define V3D_GCA_SAFE_SHUTDOWN_EN BIT(0) + +#define V3D_GCA_SAFE_SHUTDOWN_ACK 0x000b4 +# define V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED 3 + +# define V3D_TOP_GR_BRIDGE_REVISION 0x00000 +# define V3D_TOP_GR_BRIDGE_MAJOR_MASK V3D_MASK(15, 8) +# define V3D_TOP_GR_BRIDGE_MAJOR_SHIFT 8 +# define V3D_TOP_GR_BRIDGE_MINOR_MASK V3D_MASK(7, 0) +# define V3D_TOP_GR_BRIDGE_MINOR_SHIFT 0 + +/* 7268 reset reg */ +# define V3D_TOP_GR_BRIDGE_SW_INIT_0 0x00008 +# define V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT BIT(0) +/* 7278 reset reg */ +# define V3D_TOP_GR_BRIDGE_SW_INIT_1 0x0000c +# define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0) + +/* Per-MMU registers. */ + +#define V3D_MMUC_CONTROL 0x01000 +# define V3D_MMUC_CONTROL_CLEAR BIT(3) +# define V3D_MMUC_CONTROL_FLUSHING BIT(2) +# define V3D_MMUC_CONTROL_FLUSH BIT(1) +# define V3D_MMUC_CONTROL_ENABLE BIT(0) + +#define V3D_MMU_CTL 0x01200 +# define V3D_MMU_CTL_CAP_EXCEEDED BIT(27) +# define V3D_MMU_CTL_CAP_EXCEEDED_ABORT BIT(26) +# define V3D_MMU_CTL_CAP_EXCEEDED_INT BIT(25) +# define V3D_MMU_CTL_CAP_EXCEEDED_EXCEPTION BIT(24) +# define V3D_MMU_CTL_PT_INVALID BIT(20) +# define V3D_MMU_CTL_PT_INVALID_ABORT BIT(19) +# define V3D_MMU_CTL_PT_INVALID_INT BIT(18) +# define V3D_MMU_CTL_PT_INVALID_EXCEPTION BIT(17) +# define V3D_MMU_CTL_WRITE_VIOLATION BIT(16) +# define V3D_MMU_CTL_WRITE_VIOLATION_ABORT BIT(11) +# define V3D_MMU_CTL_WRITE_VIOLATION_INT BIT(10) +# define V3D_MMU_CTL_WRITE_VIOLATION_EXCEPTION BIT(9) +# define V3D_MMU_CTL_TLB_CLEARING BIT(7) +# define V3D_MMU_CTL_TLB_STATS_CLEAR BIT(3) +# define V3D_MMU_CTL_TLB_CLEAR BIT(2) +# define V3D_MMU_CTL_TLB_STATS_ENABLE BIT(1) +# define V3D_MMU_CTL_ENABLE BIT(0) + +#define V3D_MMU_PT_PA_BASE 0x01204 +#define V3D_MMU_HIT 0x01208 +#define V3D_MMU_MISSES 0x0120c +#define V3D_MMU_STALLS 0x01210 + +#define V3D_MMU_ADDR_CAP 0x01214 +# define V3D_MMU_ADDR_CAP_ENABLE BIT(31) +# define V3D_MMU_ADDR_CAP_MPAGE_MASK V3D_MASK(11, 0) +# define V3D_MMU_ADDR_CAP_MPAGE_SHIFT 0 + +#define V3D_MMU_SHOOT_DOWN 0x01218 +# define V3D_MMU_SHOOT_DOWN_SHOOTING BIT(29) +# define V3D_MMU_SHOOT_DOWN_SHOOT BIT(28) +# define V3D_MMU_SHOOT_DOWN_PAGE_MASK V3D_MASK(27, 0) +# define V3D_MMU_SHOOT_DOWN_PAGE_SHIFT 0 + +#define V3D_MMU_BYPASS_START 0x0121c +#define V3D_MMU_BYPASS_END 0x01220 + +/* AXI ID of the access that faulted */ +#define V3D_MMU_VIO_ID 0x0122c + +/* Address for illegal PTEs to return */ +#define V3D_MMU_ILLEGAL_ADDR 0x01230 +# define V3D_MMU_ILLEGAL_ADDR_ENABLE BIT(31) + +/* Address that faulted */ +#define V3D_MMU_VIO_ADDR 0x01234 + +/* Per-V3D-core registers */ + +#define V3D_CTL_IDENT0 0x00000 +# define V3D_IDENT0_VER_MASK V3D_MASK(31, 24) +# define V3D_IDENT0_VER_SHIFT 24 + +#define V3D_CTL_IDENT1 0x00004 +/* Multiples of 1kb */ +# define V3D_IDENT1_VPM_SIZE_MASK V3D_MASK(31, 28) +# define V3D_IDENT1_VPM_SIZE_SHIFT 28 +# define V3D_IDENT1_NSEM_MASK V3D_MASK(23, 16) +# define V3D_IDENT1_NSEM_SHIFT 16 +# define V3D_IDENT1_NTMU_MASK V3D_MASK(15, 12) +# define V3D_IDENT1_NTMU_SHIFT 12 +# define V3D_IDENT1_QUPS_MASK V3D_MASK(11, 8) +# define V3D_IDENT1_QUPS_SHIFT 8 +# define V3D_IDENT1_NSLC_MASK V3D_MASK(7, 4) +# define V3D_IDENT1_NSLC_SHIFT 4 +# define V3D_IDENT1_REV_MASK V3D_MASK(3, 0) +# define V3D_IDENT1_REV_SHIFT 0 + +#define V3D_CTL_IDENT2 0x00008 +# define V3D_IDENT2_BCG_INT BIT(28) + +#define V3D_CTL_MISCCFG 0x00018 +# define V3D_MISCCFG_OVRTMUOUT BIT(0) + +#define V3D_CTL_L2CACTL 0x00020 +# define V3D_L2CACTL_L2CCLR BIT(2) +# define V3D_L2CACTL_L2CDIS BIT(1) +# define V3D_L2CACTL_L2CENA BIT(0) + +#define V3D_CTL_SLCACTL 0x00024 +# define V3D_SLCACTL_TVCCS_MASK V3D_MASK(27, 24) +# define V3D_SLCACTL_TVCCS_SHIFT 24 +# define V3D_SLCACTL_TDCCS_MASK V3D_MASK(19, 16) +# define V3D_SLCACTL_TDCCS_SHIFT 16 +# define V3D_SLCACTL_UCC_MASK V3D_MASK(11, 8) +# define V3D_SLCACTL_UCC_SHIFT 8 +# define V3D_SLCACTL_ICC_MASK V3D_MASK(3, 0) +# define V3D_SLCACTL_ICC_SHIFT 0 + +#define V3D_CTL_L2TCACTL 0x00030 +# define V3D_L2TCACTL_TMUWCF BIT(8) +# define V3D_L2TCACTL_L2T_NO_WM BIT(4) +# define V3D_L2TCACTL_FLM_FLUSH 0 +# define V3D_L2TCACTL_FLM_CLEAR 1 +# define V3D_L2TCACTL_FLM_CLEAN 2 +# define V3D_L2TCACTL_FLM_MASK V3D_MASK(2, 1) +# define V3D_L2TCACTL_FLM_SHIFT 1 +# define V3D_L2TCACTL_L2TFLS BIT(0) +#define V3D_CTL_L2TFLSTA 0x00034 +#define V3D_CTL_L2TFLEND 0x00038 + +#define V3D_CTL_INT_STS 0x00050 +#define V3D_CTL_INT_SET 0x00054 +#define V3D_CTL_INT_CLR 0x00058 +#define V3D_CTL_INT_MSK_STS 0x0005c +#define V3D_CTL_INT_MSK_SET 0x00060 +#define V3D_CTL_INT_MSK_CLR 0x00064 +# define V3D_INT_QPU_MASK V3D_MASK(27, 16) +# define V3D_INT_QPU_SHIFT 16 +# define V3D_INT_GMPV BIT(5) +# define V3D_INT_TRFB BIT(4) +# define V3D_INT_SPILLUSE BIT(3) +# define V3D_INT_OUTOMEM BIT(2) +# define V3D_INT_FLDONE BIT(1) +# define V3D_INT_FRDONE BIT(0) + +#define V3D_CLE_CT0CS 0x00100 +#define V3D_CLE_CT1CS 0x00104 +#define V3D_CLE_CTNCS(n) (V3D_CLE_CT0CS + 4 * n) +#define V3D_CLE_CT0EA 0x00108 +#define V3D_CLE_CT1EA 0x0010c +#define V3D_CLE_CTNEA(n) (V3D_CLE_CT0EA + 4 * n) +#define V3D_CLE_CT0CA 0x00110 +#define V3D_CLE_CT1CA 0x00114 +#define V3D_CLE_CTNCA(n) (V3D_CLE_CT0CA + 4 * n) +#define V3D_CLE_CT0RA 0x00118 +#define V3D_CLE_CT1RA 0x0011c +#define V3D_CLE_CT0LC 0x00120 +#define V3D_CLE_CT1LC 0x00124 +#define V3D_CLE_CT0PC 0x00128 +#define V3D_CLE_CT1PC 0x0012c +#define V3D_CLE_PCS 0x00130 +#define V3D_CLE_BFC 0x00134 +#define V3D_CLE_RFC 0x00138 +#define V3D_CLE_TFBC 0x0013c +#define V3D_CLE_TFIT 0x00140 +#define V3D_CLE_CT1CFG 0x00144 +#define V3D_CLE_CT1TILECT 0x00148 +#define V3D_CLE_CT1TSKIP 0x0014c +#define V3D_CLE_CT1PTCT 0x00150 +#define V3D_CLE_CT0SYNC 0x00154 +#define V3D_CLE_CT1SYNC 0x00158 +#define V3D_CLE_CT0QTS 0x0015c +# define V3D_CLE_CT0QTS_ENABLE BIT(1) +#define V3D_CLE_CT0QBA 0x00160 +#define V3D_CLE_CT1QBA 0x00164 +#define V3D_CLE_CTNQBA(n) (V3D_CLE_CT0QBA + 4 * n) +#define V3D_CLE_CT0QEA 0x00168 +#define V3D_CLE_CT1QEA 0x0016c +#define V3D_CLE_CTNQEA(n) (V3D_CLE_CT0QEA + 4 * n) +#define V3D_CLE_CT0QMA 0x00170 +#define V3D_CLE_CT0QMS 0x00174 +#define V3D_CLE_CT1QCFG 0x00178 +/* If set without ETPROC, entirely skip tiles with no primitives. */ +# define V3D_CLE_QCFG_ETFILT BIT(7) +/* If set with ETFILT, just write the clear color to tiles with no + * primitives. + */ +# define V3D_CLE_QCFG_ETPROC BIT(6) +# define V3D_CLE_QCFG_ETSFLUSH BIT(1) +# define V3D_CLE_QCFG_MCDIS BIT(0) + +#define V3D_PTB_BPCA 0x00300 +#define V3D_PTB_BPCS 0x00304 +#define V3D_PTB_BPOA 0x00308 +#define V3D_PTB_BPOS 0x0030c + +#define V3D_PTB_BXCF 0x00310 +# define V3D_PTB_BXCF_RWORDERDISA BIT(1) +# define V3D_PTB_BXCF_CLIPDISA BIT(0) + +#define V3D_GMP_STATUS 0x00800 +# define V3D_GMP_STATUS_GMPRST BIT(31) +# define V3D_GMP_STATUS_WR_COUNT_MASK V3D_MASK(30, 24) +# define V3D_GMP_STATUS_WR_COUNT_SHIFT 24 +# define V3D_GMP_STATUS_RD_COUNT_MASK V3D_MASK(22, 16) +# define V3D_GMP_STATUS_RD_COUNT_SHIFT 16 +# define V3D_GMP_STATUS_WR_ACTIVE BIT(5) +# define V3D_GMP_STATUS_RD_ACTIVE BIT(4) +# define V3D_GMP_STATUS_CFG_BUSY BIT(3) +# define V3D_GMP_STATUS_CNTOVF BIT(2) +# define V3D_GMP_STATUS_INVPROT BIT(1) +# define V3D_GMP_STATUS_VIO BIT(0) + +#define V3D_GMP_CFG 0x00804 +# define V3D_GMP_CFG_LBURSTEN BIT(3) +# define V3D_GMP_CFG_PGCRSEN BIT() +# define V3D_GMP_CFG_STOP_REQ BIT(1) +# define V3D_GMP_CFG_PROT_ENABLE BIT(0) + +#define V3D_GMP_VIO_ADDR 0x00808 +#define V3D_GMP_VIO_TYPE 0x0080c +#define V3D_GMP_TABLE_ADDR 0x00810 +#define V3D_GMP_CLEAR_LOAD 0x00814 +#define V3D_GMP_PRESERVE_LOAD 0x00818 +#define V3D_GMP_VALID_LINES 0x00820 + +#endif /* V3D_REGS_H */ diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c new file mode 100644 index 000000000000..b07bece9417d --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2018 Broadcom */ + +/** + * DOC: Broadcom V3D scheduling + * + * The shared DRM GPU scheduler is used to coordinate submitting jobs + * to the hardware. Each DRM fd (roughly a client process) gets its + * own scheduler entity, which will process jobs in order. The GPU + * scheduler will round-robin between clients to submit the next job. + * + * For simplicity, and in order to keep latency low for interactive + * jobs when bulk background jobs are queued up, we submit a new job + * to the HW only when it has completed the last one, instead of + * filling up the CT[01]Q FIFOs with jobs. Similarly, we use + * v3d_job_dependency() to manage the dependency between bin and + * render, instead of having the clients submit jobs with using the + * HW's semaphores to interlock between them. + */ + +#include <linux/kthread.h> + +#include "v3d_drv.h" +#include "v3d_regs.h" +#include "v3d_trace.h" + +static struct v3d_job * +to_v3d_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct v3d_job, base); +} + +static void +v3d_job_free(struct drm_sched_job *sched_job) +{ + struct v3d_job *job = to_v3d_job(sched_job); + + v3d_exec_put(job->exec); +} + +/** + * Returns the fences that the bin job depends on, one by one. + * v3d_job_run() won't be called until all of them have been signaled. + */ +static struct dma_fence * +v3d_job_dependency(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) +{ + struct v3d_job *job = to_v3d_job(sched_job); + struct v3d_exec_info *exec = job->exec; + enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; + struct dma_fence *fence; + + fence = job->in_fence; + if (fence) { + job->in_fence = NULL; + return fence; + } + + if (q == V3D_RENDER) { + /* If we had a bin job, the render job definitely depends on + * it. We first have to wait for bin to be scheduled, so that + * its done_fence is created. + */ + fence = exec->bin_done_fence; + if (fence) { + exec->bin_done_fence = NULL; + return fence; + } + } + + /* XXX: Wait on a fence for switching the GMP if necessary, + * and then do so. + */ + + return fence; +} + +static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job) +{ + struct v3d_job *job = to_v3d_job(sched_job); + struct v3d_exec_info *exec = job->exec; + enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; + struct v3d_dev *v3d = exec->v3d; + struct drm_device *dev = &v3d->drm; + struct dma_fence *fence; + unsigned long irqflags; + + if (unlikely(job->base.s_fence->finished.error)) + return NULL; + + /* Lock required around bin_job update vs + * v3d_overflow_mem_work(). + */ + spin_lock_irqsave(&v3d->job_lock, irqflags); + if (q == V3D_BIN) { + v3d->bin_job = job->exec; + + /* Clear out the overflow allocation, so we don't + * reuse the overflow attached to a previous job. + */ + V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); + } else { + v3d->render_job = job->exec; + } + spin_unlock_irqrestore(&v3d->job_lock, irqflags); + + /* Can we avoid this flush when q==RENDER? We need to be + * careful of scheduling, though -- imagine job0 rendering to + * texture and job1 reading, and them being executed as bin0, + * bin1, render0, render1, so that render1's flush at bin time + * wasn't enough. + */ + v3d_invalidate_caches(v3d); + + fence = v3d_fence_create(v3d, q); + if (!fence) + return fence; + + if (job->done_fence) + dma_fence_put(job->done_fence); + job->done_fence = dma_fence_get(fence); + + trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno, + job->start, job->end); + + if (q == V3D_BIN) { + if (exec->qma) { + V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma); + V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms); + } + if (exec->qts) { + V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, + V3D_CLE_CT0QTS_ENABLE | + exec->qts); + } + } else { + /* XXX: Set the QCFG */ + } + + /* Set the current and end address of the control list. + * Writing the end register is what starts the job. + */ + V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start); + V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end); + + return fence; +} + +static void +v3d_job_timedout(struct drm_sched_job *sched_job) +{ + struct v3d_job *job = to_v3d_job(sched_job); + struct v3d_exec_info *exec = job->exec; + struct v3d_dev *v3d = exec->v3d; + enum v3d_queue q; + + mutex_lock(&v3d->reset_lock); + + /* block scheduler */ + for (q = 0; q < V3D_MAX_QUEUES; q++) { + struct drm_gpu_scheduler *sched = &v3d->queue[q].sched; + + kthread_park(sched->thread); + drm_sched_hw_job_reset(sched, (sched_job->sched == sched ? + sched_job : NULL)); + } + + /* get the GPU back into the init state */ + v3d_reset(v3d); + + /* Unblock schedulers and restart their jobs. */ + for (q = 0; q < V3D_MAX_QUEUES; q++) { + drm_sched_job_recovery(&v3d->queue[q].sched); + kthread_unpark(v3d->queue[q].sched.thread); + } + + mutex_unlock(&v3d->reset_lock); +} + +static const struct drm_sched_backend_ops v3d_sched_ops = { + .dependency = v3d_job_dependency, + .run_job = v3d_job_run, + .timedout_job = v3d_job_timedout, + .free_job = v3d_job_free +}; + +int +v3d_sched_init(struct v3d_dev *v3d) +{ + int hw_jobs_limit = 1; + int job_hang_limit = 0; + int hang_limit_ms = 500; + int ret; + + ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, + &v3d_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), + "v3d_bin"); + if (ret) { + dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret); + return ret; + } + + ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, + &v3d_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), + "v3d_render"); + if (ret) { + dev_err(v3d->dev, "Failed to create render scheduler: %d.", + ret); + drm_sched_fini(&v3d->queue[V3D_BIN].sched); + return ret; + } + + return 0; +} + +void +v3d_sched_fini(struct v3d_dev *v3d) +{ + enum v3d_queue q; + + for (q = 0; q < V3D_MAX_QUEUES; q++) + drm_sched_fini(&v3d->queue[q].sched); +} diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h new file mode 100644 index 000000000000..85dd351e1e09 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_trace.h @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015-2018 Broadcom */ + +#if !defined(_V3D_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _V3D_TRACE_H_ + +#include <linux/stringify.h> +#include <linux/types.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM v3d +#define TRACE_INCLUDE_FILE v3d_trace + +TRACE_EVENT(v3d_submit_cl, + TP_PROTO(struct drm_device *dev, bool is_render, + uint64_t seqno, + u32 ctnqba, u32 ctnqea), + TP_ARGS(dev, is_render, seqno, ctnqba, ctnqea), + + TP_STRUCT__entry( + __field(u32, dev) + __field(bool, is_render) + __field(u64, seqno) + __field(u32, ctnqba) + __field(u32, ctnqea) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->is_render = is_render; + __entry->seqno = seqno; + __entry->ctnqba = ctnqba; + __entry->ctnqea = ctnqea; + ), + + TP_printk("dev=%u, %s, seqno=%llu, 0x%08x..0x%08x", + __entry->dev, + __entry->is_render ? "RCL" : "BCL", + __entry->seqno, + __entry->ctnqba, + __entry->ctnqea) +); + +TRACE_EVENT(v3d_reset_begin, + TP_PROTO(struct drm_device *dev), + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(u32, dev) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + ), + + TP_printk("dev=%u", + __entry->dev) +); + +TRACE_EVENT(v3d_reset_end, + TP_PROTO(struct drm_device *dev), + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(u32, dev) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + ), + + TP_printk("dev=%u", + __entry->dev) +); + +#endif /* _V3D_TRACE_H_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/v3d/v3d_trace_points.c b/drivers/gpu/drm/v3d/v3d_trace_points.c new file mode 100644 index 000000000000..482922d7c7e1 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_trace_points.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015 Broadcom */ + +#include "v3d_drv.h" + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "v3d_trace.h" +#endif diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index c61dff594195..c8650bbcbcb3 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -42,51 +42,18 @@ #include "vc4_drv.h" #include "vc4_regs.h" -struct vc4_crtc { - struct drm_crtc base; - const struct vc4_crtc_data *data; - void __iomem *regs; - - /* Timestamp at start of vblank irq - unaffected by lock delays. */ - ktime_t t_vblank; - - /* Which HVS channel we're using for our CRTC. */ - int channel; - - u8 lut_r[256]; - u8 lut_g[256]; - u8 lut_b[256]; - /* Size in pixels of the COB memory allocated to this CRTC. */ - u32 cob_size; - - struct drm_pending_vblank_event *event; -}; - struct vc4_crtc_state { struct drm_crtc_state base; /* Dlist area for this CRTC configuration. */ struct drm_mm_node mm; }; -static inline struct vc4_crtc * -to_vc4_crtc(struct drm_crtc *crtc) -{ - return (struct vc4_crtc *)crtc; -} - static inline struct vc4_crtc_state * to_vc4_crtc_state(struct drm_crtc_state *crtc_state) { return (struct vc4_crtc_state *)crtc_state; } -struct vc4_crtc_data { - /* Which channel of the HVS this pixelvalve sources from. */ - int hvs_channel; - - enum vc4_encoder_type encoder_types[4]; -}; - #define CRTC_WRITE(offset, val) writel(val, vc4_crtc->regs + (offset)) #define CRTC_READ(offset) readl(vc4_crtc->regs + (offset)) @@ -298,23 +265,21 @@ vc4_crtc_lut_load(struct drm_crtc *crtc) HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_b[i]); } -static int -vc4_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b, - uint32_t size, - struct drm_modeset_acquire_ctx *ctx) +static void +vc4_crtc_update_gamma_lut(struct drm_crtc *crtc) { struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + struct drm_color_lut *lut = crtc->state->gamma_lut->data; + u32 length = drm_color_lut_size(crtc->state->gamma_lut); u32 i; - for (i = 0; i < size; i++) { - vc4_crtc->lut_r[i] = r[i] >> 8; - vc4_crtc->lut_g[i] = g[i] >> 8; - vc4_crtc->lut_b[i] = b[i] >> 8; + for (i = 0; i < length; i++) { + vc4_crtc->lut_r[i] = drm_color_lut_extract(lut[i].red, 8); + vc4_crtc->lut_g[i] = drm_color_lut_extract(lut[i].green, 8); + vc4_crtc->lut_b[i] = drm_color_lut_extract(lut[i].blue, 8); } vc4_crtc_lut_load(crtc); - - return 0; } static u32 vc4_get_fifo_full_level(u32 format) @@ -699,6 +664,22 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, if (crtc->state->active && old_state->active) vc4_crtc_update_dlist(crtc); + if (crtc->state->color_mgmt_changed) { + u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(vc4_crtc->channel)); + + if (crtc->state->gamma_lut) { + vc4_crtc_update_gamma_lut(crtc); + dispbkgndx |= SCALER_DISPBKGND_GAMMA; + } else { + /* Unsetting DISPBKGND_GAMMA skips the gamma lut step + * in hardware, which is the same as a linear lut that + * DRM expects us to use in absence of a user lut. + */ + dispbkgndx &= ~SCALER_DISPBKGND_GAMMA; + } + HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel), dispbkgndx); + } + if (debug_dump_regs) { DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); vc4_hvs_dump_state(dev); @@ -953,7 +934,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = { .reset = vc4_crtc_reset, .atomic_duplicate_state = vc4_crtc_duplicate_state, .atomic_destroy_state = vc4_crtc_destroy_state, - .gamma_set = vc4_crtc_gamma_set, + .gamma_set = drm_atomic_helper_legacy_gamma_set, .enable_vblank = vc4_enable_vblank, .disable_vblank = vc4_disable_vblank, }; @@ -1079,6 +1060,12 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) primary_plane->crtc = crtc; vc4_crtc->channel = vc4_crtc->data->hvs_channel; drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r)); + drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size); + + /* We support CTM, but only for one CRTC at a time. It's therefore + * implemented as private driver state in vc4_kms, not here. + */ + drm_crtc_enable_color_mgmt(crtc, 0, true, crtc->gamma_size); /* Set up some arbitrary number of planes. We're not limited * by a set number of physical registers, just the space in diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 7c95ed5c5cac..466d0a27b415 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -176,7 +176,8 @@ static struct drm_driver vc4_drm_driver = { DRIVER_GEM | DRIVER_HAVE_IRQ | DRIVER_RENDER | - DRIVER_PRIME), + DRIVER_PRIME | + DRIVER_SYNCOBJ), .lastclose = drm_fb_helper_lastclose, .open = vc4_open, .postclose = vc4_close, @@ -319,8 +320,8 @@ dev_unref: static void vc4_drm_unbind(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct drm_device *drm = platform_get_drvdata(pdev); + struct drm_device *drm = dev_get_drvdata(dev); + struct vc4_dev *vc4 = to_vc4_dev(drm); drm_dev_unregister(drm); @@ -328,6 +329,8 @@ static void vc4_drm_unbind(struct device *dev) drm_mode_config_cleanup(drm); + drm_atomic_private_obj_fini(&vc4->ctm_manager); + drm_dev_unref(drm); } diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 1b4cd1fabf56..554a4e810d5b 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -10,6 +10,8 @@ #include <drm/drmP.h> #include <drm/drm_encoder.h> #include <drm/drm_gem_cma_helper.h> +#include <drm/drm_atomic.h> +#include <drm/drm_syncobj.h> #include "uapi/drm/vc4_drm.h" @@ -193,6 +195,9 @@ struct vc4_dev { } hangcheck; struct semaphore async_modeset; + + struct drm_modeset_lock ctm_state_lock; + struct drm_private_obj ctm_manager; }; static inline struct vc4_dev * @@ -392,6 +397,39 @@ to_vc4_encoder(struct drm_encoder *encoder) return container_of(encoder, struct vc4_encoder, base); } +struct vc4_crtc_data { + /* Which channel of the HVS this pixelvalve sources from. */ + int hvs_channel; + + enum vc4_encoder_type encoder_types[4]; +}; + +struct vc4_crtc { + struct drm_crtc base; + const struct vc4_crtc_data *data; + void __iomem *regs; + + /* Timestamp at start of vblank irq - unaffected by lock delays. */ + ktime_t t_vblank; + + /* Which HVS channel we're using for our CRTC. */ + int channel; + + u8 lut_r[256]; + u8 lut_g[256]; + u8 lut_b[256]; + /* Size in pixels of the COB memory allocated to this CRTC. */ + u32 cob_size; + + struct drm_pending_vblank_event *event; +}; + +static inline struct vc4_crtc * +to_vc4_crtc(struct drm_crtc *crtc) +{ + return (struct vc4_crtc *)crtc; +} + #define V3D_READ(offset) readl(vc4->v3d->regs + offset) #define V3D_WRITE(offset, val) writel(val, vc4->v3d->regs + offset) #define HVS_READ(offset) readl(vc4->hvs->regs + offset) diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index 94085f8bcd68..8aa897835118 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -753,6 +753,11 @@ static void vc4_dsi_ulps(struct vc4_dsi *dsi, bool ulps) (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_STOP : 0) | (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_STOP : 0)); int ret; + bool ulps_currently_enabled = (DSI_PORT_READ(PHY_AFEC0) & + DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS)); + + if (ulps == ulps_currently_enabled) + return; DSI_PORT_WRITE(STAT, stat_ulps); DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) | phyc_ulps); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 2107b0daf8ef..7910b9acedd6 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -27,6 +27,7 @@ #include <linux/device.h> #include <linux/io.h> #include <linux/sched/signal.h> +#include <linux/dma-fence-array.h> #include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" @@ -655,7 +656,8 @@ retry: */ static int vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx) + struct ww_acquire_ctx *acquire_ctx, + struct drm_syncobj *out_sync) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *renderjob; @@ -678,6 +680,9 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, fence->seqno = exec->seqno; exec->fence = &fence->base; + if (out_sync) + drm_syncobj_replace_fence(out_sync, exec->fence); + vc4_update_bo_seqnos(exec, seqno); vc4_unlock_bo_reservations(dev, exec, acquire_ctx); @@ -1113,8 +1118,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_file *vc4file = file_priv->driver_priv; struct drm_vc4_submit_cl *args = data; + struct drm_syncobj *out_sync = NULL; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; + struct dma_fence *in_fence; int ret = 0; if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | @@ -1126,7 +1133,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } if (args->pad2 != 0) { - DRM_DEBUG("->pad2 must be set to zero\n"); + DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2); return -EINVAL; } @@ -1164,6 +1171,29 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } } + if (args->in_sync) { + ret = drm_syncobj_find_fence(file_priv, args->in_sync, + &in_fence); + if (ret) + goto fail; + + /* When the fence (or fence array) is exclusively from our + * context we can skip the wait since jobs are executed in + * order of their submission through this ioctl and this can + * only have fences from a prior job. + */ + if (!dma_fence_match_context(in_fence, + vc4->dma_fence_context)) { + ret = dma_fence_wait(in_fence, true); + if (ret) { + dma_fence_put(in_fence); + goto fail; + } + } + + dma_fence_put(in_fence); + } + if (exec->args->bin_cl_size != 0) { ret = vc4_get_bcl(dev, exec); if (ret) @@ -1181,12 +1211,33 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + if (args->out_sync) { + out_sync = drm_syncobj_find(file_priv, args->out_sync); + if (!out_sync) { + ret = -EINVAL; + goto fail; + } + + /* We replace the fence in out_sync in vc4_queue_submit since + * the render job could execute immediately after that call. + * If it finishes before our ioctl processing resumes the + * render job fence could already have been freed. + */ + } + /* Clear this out of the struct we'll be putting in the queue, * since it's part of our stack. */ exec->args = NULL; - ret = vc4_queue_submit(dev, exec, &acquire_ctx); + ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); + + /* The syncobj isn't part of the exec data and we need to free our + * reference even if job submission failed. + */ + if (out_sync) + drm_syncobj_put(out_sync); + if (ret) goto fail; diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 2b62fc5b8d85..5d8c749c9749 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -58,6 +58,10 @@ static const struct { HVS_REG(SCALER_DISPSTAT2), HVS_REG(SCALER_DISPBASE2), HVS_REG(SCALER_DISPALPHA2), + HVS_REG(SCALER_OLEDOFFS), + HVS_REG(SCALER_OLEDCOEF0), + HVS_REG(SCALER_OLEDCOEF1), + HVS_REG(SCALER_OLEDCOEF2), }; void vc4_hvs_dump_state(struct drm_device *dev) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index ba60153dddb5..8a411e5f8776 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -23,6 +23,117 @@ #include <drm/drm_fb_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include "vc4_drv.h" +#include "vc4_regs.h" + +struct vc4_ctm_state { + struct drm_private_state base; + struct drm_color_ctm *ctm; + int fifo; +}; + +static struct vc4_ctm_state *to_vc4_ctm_state(struct drm_private_state *priv) +{ + return container_of(priv, struct vc4_ctm_state, base); +} + +static struct vc4_ctm_state *vc4_get_ctm_state(struct drm_atomic_state *state, + struct drm_private_obj *manager) +{ + struct drm_device *dev = state->dev; + struct vc4_dev *vc4 = dev->dev_private; + struct drm_private_state *priv_state; + int ret; + + ret = drm_modeset_lock(&vc4->ctm_state_lock, state->acquire_ctx); + if (ret) + return ERR_PTR(ret); + + priv_state = drm_atomic_get_private_obj_state(state, manager); + if (IS_ERR(priv_state)) + return ERR_CAST(priv_state); + + return to_vc4_ctm_state(priv_state); +} + +static struct drm_private_state * +vc4_ctm_duplicate_state(struct drm_private_obj *obj) +{ + struct vc4_ctm_state *state; + + state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base); + + return &state->base; +} + +static void vc4_ctm_destroy_state(struct drm_private_obj *obj, + struct drm_private_state *state) +{ + struct vc4_ctm_state *ctm_state = to_vc4_ctm_state(state); + + kfree(ctm_state); +} + +static const struct drm_private_state_funcs vc4_ctm_state_funcs = { + .atomic_duplicate_state = vc4_ctm_duplicate_state, + .atomic_destroy_state = vc4_ctm_destroy_state, +}; + +/* Converts a DRM S31.32 value to the HW S0.9 format. */ +static u16 vc4_ctm_s31_32_to_s0_9(u64 in) +{ + u16 r; + + /* Sign bit. */ + r = in & BIT_ULL(63) ? BIT(9) : 0; + + if ((in & GENMASK_ULL(62, 32)) > 0) { + /* We have zero integer bits so we can only saturate here. */ + r |= GENMASK(8, 0); + } else { + /* Otherwise take the 9 most important fractional bits. */ + r |= (in >> 23) & GENMASK(8, 0); + } + + return r; +} + +static void +vc4_ctm_commit(struct vc4_dev *vc4, struct drm_atomic_state *state) +{ + struct vc4_ctm_state *ctm_state = to_vc4_ctm_state(vc4->ctm_manager.state); + struct drm_color_ctm *ctm = ctm_state->ctm; + + if (ctm_state->fifo) { + HVS_WRITE(SCALER_OLEDCOEF2, + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[0]), + SCALER_OLEDCOEF2_R_TO_R) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[3]), + SCALER_OLEDCOEF2_R_TO_G) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[6]), + SCALER_OLEDCOEF2_R_TO_B)); + HVS_WRITE(SCALER_OLEDCOEF1, + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[1]), + SCALER_OLEDCOEF1_G_TO_R) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[4]), + SCALER_OLEDCOEF1_G_TO_G) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[7]), + SCALER_OLEDCOEF1_G_TO_B)); + HVS_WRITE(SCALER_OLEDCOEF0, + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[2]), + SCALER_OLEDCOEF0_B_TO_R) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[5]), + SCALER_OLEDCOEF0_B_TO_G) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[8]), + SCALER_OLEDCOEF0_B_TO_B)); + } + + HVS_WRITE(SCALER_OLEDOFFS, + VC4_SET_FIELD(ctm_state->fifo, SCALER_OLEDOFFS_DISPFIFO)); +} static void vc4_atomic_complete_commit(struct drm_atomic_state *state) @@ -36,6 +147,8 @@ vc4_atomic_complete_commit(struct drm_atomic_state *state) drm_atomic_helper_commit_modeset_disables(dev, state); + vc4_ctm_commit(vc4, state); + drm_atomic_helper_commit_planes(dev, state, 0); drm_atomic_helper_commit_modeset_enables(dev, state); @@ -90,6 +203,26 @@ static int vc4_atomic_commit(struct drm_device *dev, struct vc4_dev *vc4 = to_vc4_dev(dev); int ret; + if (state->async_update) { + ret = down_interruptible(&vc4->async_modeset); + if (ret) + return ret; + + ret = drm_atomic_helper_prepare_planes(dev, state); + if (ret) { + up(&vc4->async_modeset); + return ret; + } + + drm_atomic_helper_async_commit(dev, state); + + drm_atomic_helper_cleanup_planes(dev, state); + + up(&vc4->async_modeset); + + return 0; + } + ret = drm_atomic_helper_setup_commit(state, nonblock); if (ret) return ret; @@ -187,9 +320,89 @@ static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev, return drm_gem_fb_create(dev, file_priv, mode_cmd); } +/* Our CTM has some peculiar limitations: we can only enable it for one CRTC + * at a time and the HW only supports S0.9 scalars. To account for the latter, + * we don't allow userland to set a CTM that we have no hope of approximating. + */ +static int +vc4_ctm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_ctm_state *ctm_state = NULL; + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct drm_color_ctm *ctm; + int i; + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + /* CTM is being disabled. */ + if (!new_crtc_state->ctm && old_crtc_state->ctm) { + ctm_state = vc4_get_ctm_state(state, &vc4->ctm_manager); + if (IS_ERR(ctm_state)) + return PTR_ERR(ctm_state); + ctm_state->fifo = 0; + } + } + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + if (new_crtc_state->ctm == old_crtc_state->ctm) + continue; + + if (!ctm_state) { + ctm_state = vc4_get_ctm_state(state, &vc4->ctm_manager); + if (IS_ERR(ctm_state)) + return PTR_ERR(ctm_state); + } + + /* CTM is being enabled or the matrix changed. */ + if (new_crtc_state->ctm) { + /* fifo is 1-based since 0 disables CTM. */ + int fifo = to_vc4_crtc(crtc)->channel + 1; + + /* Check userland isn't trying to turn on CTM for more + * than one CRTC at a time. + */ + if (ctm_state->fifo && ctm_state->fifo != fifo) { + DRM_DEBUG_DRIVER("Too many CTM configured\n"); + return -EINVAL; + } + + /* Check we can approximate the specified CTM. + * We disallow scalars |c| > 1.0 since the HW has + * no integer bits. + */ + ctm = new_crtc_state->ctm->data; + for (i = 0; i < ARRAY_SIZE(ctm->matrix); i++) { + u64 val = ctm->matrix[i]; + + val &= ~BIT_ULL(63); + if (val > BIT_ULL(32)) + return -EINVAL; + } + + ctm_state->fifo = fifo; + ctm_state->ctm = ctm; + } + } + + return 0; +} + +static int +vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) +{ + int ret; + + ret = vc4_ctm_atomic_check(dev, state); + if (ret < 0) + return ret; + + return drm_atomic_helper_check(dev, state); +} + static const struct drm_mode_config_funcs vc4_mode_funcs = { .output_poll_changed = drm_fb_helper_output_poll_changed, - .atomic_check = drm_atomic_helper_check, + .atomic_check = vc4_atomic_check, .atomic_commit = vc4_atomic_commit, .fb_create = vc4_fb_create, }; @@ -197,6 +410,7 @@ static const struct drm_mode_config_funcs vc4_mode_funcs = { int vc4_kms_load(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_ctm_state *ctm_state; int ret; sema_init(&vc4->async_modeset, 1); @@ -217,6 +431,14 @@ int vc4_kms_load(struct drm_device *dev) dev->mode_config.async_page_flip = true; dev->mode_config.allow_fb_modifiers = true; + drm_modeset_lock_init(&vc4->ctm_state_lock); + + ctm_state = kzalloc(sizeof(*ctm_state), GFP_KERNEL); + if (!ctm_state) + return -ENOMEM; + drm_atomic_private_obj_init(&vc4->ctm_manager, &ctm_state->base, + &vc4_ctm_state_funcs); + drm_mode_config_reset(dev); if (dev->mode_config.num_connector) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 13dcaad06798..71d44c357d35 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -201,6 +201,7 @@ static void vc4_plane_reset(struct drm_plane *plane) return; plane->state = &vc4_state->base; + plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE; vc4_state->base.plane = plane; } @@ -467,6 +468,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, u32 ctl0_offset = vc4_state->dlist_count; const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); int num_planes = drm_format_num_planes(format->drm); + bool mix_plane_alpha; bool covers_screen; u32 scl0, scl1, pitch0; u32 lbm_size, tiling; @@ -552,7 +554,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Position Word 0: Image Positions and Alpha Value */ vc4_state->pos0_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, - VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) | + VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); @@ -565,6 +567,13 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS1_SCL_HEIGHT)); } + /* Don't waste cycles mixing with plane alpha if the set alpha + * is opaque or there is no per-pixel alpha information. + * In any case we use the alpha property value as the fixed alpha. + */ + mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && + fb->format->has_alpha; + /* Position Word 2: Source Image Size, Alpha */ vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, @@ -572,6 +581,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS2_ALPHA_MODE_PIPELINE : SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE) | + (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | (fb->format->has_alpha ? SCALER_POS2_ALPHA_PREMULT : 0) | VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) | VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT)); @@ -653,10 +663,11 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_state->crtc_w == state->crtc->mode.hdisplay && vc4_state->crtc_h == state->crtc->mode.vdisplay; /* Background fill might be necessary when the plane has per-pixel - * alpha content and blends from the background or does not cover - * the entire screen. + * alpha content or a non-opaque plane alpha and could blend from the + * background or does not cover the entire screen. */ - vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen; + vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || + state->alpha != DRM_BLEND_ALPHA_OPAQUE; return 0; } @@ -741,6 +752,57 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) vc4_state->dlist[vc4_state->ptr0_offset] = addr; } +static void vc4_plane_atomic_async_update(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); + + if (plane->state->fb != state->fb) { + vc4_plane_async_set_fb(plane, state->fb); + drm_atomic_set_fb_for_plane(plane->state, state->fb); + } + + /* Set the cursor's position on the screen. This is the + * expected change from the drm_mode_cursor_universal() + * helper. + */ + plane->state->crtc_x = state->crtc_x; + plane->state->crtc_y = state->crtc_y; + + /* Allow changing the start position within the cursor BO, if + * that matters. + */ + plane->state->src_x = state->src_x; + plane->state->src_y = state->src_y; + + /* Update the display list based on the new crtc_x/y. */ + vc4_plane_atomic_check(plane, plane->state); + + /* Note that we can't just call vc4_plane_write_dlist() + * because that would smash the context data that the HVS is + * currently using. + */ + writel(vc4_state->dlist[vc4_state->pos0_offset], + &vc4_state->hw_dlist[vc4_state->pos0_offset]); + writel(vc4_state->dlist[vc4_state->pos2_offset], + &vc4_state->hw_dlist[vc4_state->pos2_offset]); + writel(vc4_state->dlist[vc4_state->ptr0_offset], + &vc4_state->hw_dlist[vc4_state->ptr0_offset]); +} + +static int vc4_plane_atomic_async_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + /* No configuring new scaling in the fast path. */ + if (plane->state->crtc_w != state->crtc_w || + plane->state->crtc_h != state->crtc_h || + plane->state->src_w != state->src_w || + plane->state->src_h != state->src_h) + return -EINVAL; + + return 0; +} + static int vc4_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { @@ -780,6 +842,8 @@ static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { .atomic_update = vc4_plane_atomic_update, .prepare_fb = vc4_prepare_fb, .cleanup_fb = vc4_cleanup_fb, + .atomic_async_check = vc4_plane_atomic_async_check, + .atomic_async_update = vc4_plane_atomic_async_update, }; static void vc4_plane_destroy(struct drm_plane *plane) @@ -788,82 +852,6 @@ static void vc4_plane_destroy(struct drm_plane *plane) drm_plane_cleanup(plane); } -/* Implements immediate (non-vblank-synced) updates of the cursor - * position, or falls back to the atomic helper otherwise. - */ -static int -vc4_update_plane(struct drm_plane *plane, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int crtc_x, int crtc_y, - unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx) -{ - struct drm_plane_state *plane_state; - struct vc4_plane_state *vc4_state; - - if (plane != crtc->cursor) - goto out; - - plane_state = plane->state; - vc4_state = to_vc4_plane_state(plane_state); - - if (!plane_state) - goto out; - - /* No configuring new scaling in the fast path. */ - if (crtc_w != plane_state->crtc_w || - crtc_h != plane_state->crtc_h || - src_w != plane_state->src_w || - src_h != plane_state->src_h) { - goto out; - } - - if (fb != plane_state->fb) { - drm_atomic_set_fb_for_plane(plane->state, fb); - vc4_plane_async_set_fb(plane, fb); - } - - /* Set the cursor's position on the screen. This is the - * expected change from the drm_mode_cursor_universal() - * helper. - */ - plane_state->crtc_x = crtc_x; - plane_state->crtc_y = crtc_y; - - /* Allow changing the start position within the cursor BO, if - * that matters. - */ - plane_state->src_x = src_x; - plane_state->src_y = src_y; - - /* Update the display list based on the new crtc_x/y. */ - vc4_plane_atomic_check(plane, plane_state); - - /* Note that we can't just call vc4_plane_write_dlist() - * because that would smash the context data that the HVS is - * currently using. - */ - writel(vc4_state->dlist[vc4_state->pos0_offset], - &vc4_state->hw_dlist[vc4_state->pos0_offset]); - writel(vc4_state->dlist[vc4_state->pos2_offset], - &vc4_state->hw_dlist[vc4_state->pos2_offset]); - writel(vc4_state->dlist[vc4_state->ptr0_offset], - &vc4_state->hw_dlist[vc4_state->ptr0_offset]); - - return 0; - -out: - return drm_atomic_helper_update_plane(plane, crtc, fb, - crtc_x, crtc_y, - crtc_w, crtc_h, - src_x, src_y, - src_w, src_h, - ctx); -} - static bool vc4_format_mod_supported(struct drm_plane *plane, uint32_t format, uint64_t modifier) @@ -891,7 +879,7 @@ static bool vc4_format_mod_supported(struct drm_plane *plane, } static const struct drm_plane_funcs vc4_plane_funcs = { - .update_plane = vc4_update_plane, + .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = vc4_plane_destroy, .set_property = NULL, @@ -939,5 +927,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, drm_plane_helper_add(plane, &vc4_plane_helper_funcs); + drm_plane_create_alpha_property(plane); + return plane; } diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index a141496104a6..d1fb6fec46eb 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -330,6 +330,21 @@ #define SCALER_DISPCTRL0 0x00000040 # define SCALER_DISPCTRLX_ENABLE BIT(31) # define SCALER_DISPCTRLX_RESET BIT(30) +/* Generates a single frame when VSTART is seen and stops at the last + * pixel read from the FIFO. + */ +# define SCALER_DISPCTRLX_ONESHOT BIT(29) +/* Processes a single context in the dlist and then task switch, + * instead of an entire line. + */ +# define SCALER_DISPCTRLX_ONECTX BIT(28) +/* Set to have DISPSLAVE return 2 16bpp pixels and no status data. */ +# define SCALER_DISPCTRLX_FIFO32 BIT(27) +/* Turns on output to the DISPSLAVE register instead of the normal + * FIFO. + */ +# define SCALER_DISPCTRLX_FIFOREG BIT(26) + # define SCALER_DISPCTRLX_WIDTH_MASK VC4_MASK(23, 12) # define SCALER_DISPCTRLX_WIDTH_SHIFT 12 # define SCALER_DISPCTRLX_HEIGHT_MASK VC4_MASK(11, 0) @@ -402,6 +417,68 @@ */ # define SCALER_GAMADDR_SRAMENB BIT(30) +#define SCALER_OLEDOFFS 0x00000080 +/* Clamps R to [16,235] and G/B to [16,240]. */ +# define SCALER_OLEDOFFS_YUVCLAMP BIT(31) + +/* Chooses which display FIFO the matrix applies to. */ +# define SCALER_OLEDOFFS_DISPFIFO_MASK VC4_MASK(25, 24) +# define SCALER_OLEDOFFS_DISPFIFO_SHIFT 24 +# define SCALER_OLEDOFFS_DISPFIFO_DISABLED 0 +# define SCALER_OLEDOFFS_DISPFIFO_0 1 +# define SCALER_OLEDOFFS_DISPFIFO_1 2 +# define SCALER_OLEDOFFS_DISPFIFO_2 3 + +/* Offsets are 8-bit 2s-complement. */ +# define SCALER_OLEDOFFS_RED_MASK VC4_MASK(23, 16) +# define SCALER_OLEDOFFS_RED_SHIFT 16 +# define SCALER_OLEDOFFS_GREEN_MASK VC4_MASK(15, 8) +# define SCALER_OLEDOFFS_GREEN_SHIFT 8 +# define SCALER_OLEDOFFS_BLUE_MASK VC4_MASK(7, 0) +# define SCALER_OLEDOFFS_BLUE_SHIFT 0 + +/* The coefficients are S0.9 fractions. */ +#define SCALER_OLEDCOEF0 0x00000084 +# define SCALER_OLEDCOEF0_B_TO_R_MASK VC4_MASK(29, 20) +# define SCALER_OLEDCOEF0_B_TO_R_SHIFT 20 +# define SCALER_OLEDCOEF0_B_TO_G_MASK VC4_MASK(19, 10) +# define SCALER_OLEDCOEF0_B_TO_G_SHIFT 10 +# define SCALER_OLEDCOEF0_B_TO_B_MASK VC4_MASK(9, 0) +# define SCALER_OLEDCOEF0_B_TO_B_SHIFT 0 + +#define SCALER_OLEDCOEF1 0x00000088 +# define SCALER_OLEDCOEF1_G_TO_R_MASK VC4_MASK(29, 20) +# define SCALER_OLEDCOEF1_G_TO_R_SHIFT 20 +# define SCALER_OLEDCOEF1_G_TO_G_MASK VC4_MASK(19, 10) +# define SCALER_OLEDCOEF1_G_TO_G_SHIFT 10 +# define SCALER_OLEDCOEF1_G_TO_B_MASK VC4_MASK(9, 0) +# define SCALER_OLEDCOEF1_G_TO_B_SHIFT 0 + +#define SCALER_OLEDCOEF2 0x0000008c +# define SCALER_OLEDCOEF2_R_TO_R_MASK VC4_MASK(29, 20) +# define SCALER_OLEDCOEF2_R_TO_R_SHIFT 20 +# define SCALER_OLEDCOEF2_R_TO_G_MASK VC4_MASK(19, 10) +# define SCALER_OLEDCOEF2_R_TO_G_SHIFT 10 +# define SCALER_OLEDCOEF2_R_TO_B_MASK VC4_MASK(9, 0) +# define SCALER_OLEDCOEF2_R_TO_B_SHIFT 0 + +/* Slave addresses for DMAing from HVS composition output to other + * devices. The top bits are valid only in !FIFO32 mode. + */ +#define SCALER_DISPSLAVE0 0x000000c0 +#define SCALER_DISPSLAVE1 0x000000c9 +#define SCALER_DISPSLAVE2 0x000000d0 +# define SCALER_DISPSLAVE_ISSUE_VSTART BIT(31) +# define SCALER_DISPSLAVE_ISSUE_HSTART BIT(30) +/* Set when the current line has been read and an HSTART is required. */ +# define SCALER_DISPSLAVE_EOL BIT(26) +/* Set when the display FIFO is empty. */ +# define SCALER_DISPSLAVE_EMPTY BIT(25) +/* Set when there is RGB data ready to read. */ +# define SCALER_DISPSLAVE_VALID BIT(24) +# define SCALER_DISPSLAVE_RGB_MASK VC4_MASK(23, 0) +# define SCALER_DISPSLAVE_RGB_SHIFT 0 + #define SCALER_GAMDATA 0x000000e0 #define SCALER_DLIST_START 0x00002000 #define SCALER_DLIST_SIZE 0x00004000 @@ -767,6 +844,10 @@ enum hvs_pixel_format { HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE = 9, HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE = 10, HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE = 11, + HVS_PIXEL_FORMAT_H264 = 12, + HVS_PIXEL_FORMAT_PALETTE = 13, + HVS_PIXEL_FORMAT_YUV444_RGB = 14, + HVS_PIXEL_FORMAT_AYUV444_RGB = 15, }; /* Note: the LSB is the rightmost character shown. Only valid for @@ -800,12 +881,27 @@ enum hvs_pixel_format { #define SCALER_CTL0_TILING_128B 2 #define SCALER_CTL0_TILING_256B_OR_T 3 +#define SCALER_CTL0_ALPHA_MASK BIT(19) #define SCALER_CTL0_HFLIP BIT(16) #define SCALER_CTL0_VFLIP BIT(15) +#define SCALER_CTL0_KEY_MODE_MASK VC4_MASK(18, 17) +#define SCALER_CTL0_KEY_MODE_SHIFT 17 +#define SCALER_CTL0_KEY_DISABLED 0 +#define SCALER_CTL0_KEY_LUMA_OR_COMMON_RGB 1 +#define SCALER_CTL0_KEY_MATCH 2 /* turn transparent */ +#define SCALER_CTL0_KEY_REPLACE 3 /* replace with value from key mask word 2 */ + #define SCALER_CTL0_ORDER_MASK VC4_MASK(14, 13) #define SCALER_CTL0_ORDER_SHIFT 13 +#define SCALER_CTL0_RGBA_EXPAND_MASK VC4_MASK(12, 11) +#define SCALER_CTL0_RGBA_EXPAND_SHIFT 11 +#define SCALER_CTL0_RGBA_EXPAND_ZERO 0 +#define SCALER_CTL0_RGBA_EXPAND_LSB 1 +#define SCALER_CTL0_RGBA_EXPAND_MSB 2 +#define SCALER_CTL0_RGBA_EXPAND_ROUND 3 + #define SCALER_CTL0_SCL1_MASK VC4_MASK(10, 8) #define SCALER_CTL0_SCL1_SHIFT 8 @@ -849,6 +945,7 @@ enum hvs_pixel_format { #define SCALER_POS2_ALPHA_MODE_FIXED_NONZERO 2 #define SCALER_POS2_ALPHA_MODE_FIXED_OVER_0x07 3 #define SCALER_POS2_ALPHA_PREMULT BIT(29) +#define SCALER_POS2_ALPHA_MIX BIT(28) #define SCALER_POS2_HEIGHT_MASK VC4_MASK(27, 16) #define SCALER_POS2_HEIGHT_SHIFT 16 diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index bfc2fa73d2ae..e47e29426078 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -218,8 +218,7 @@ try_again: * overall CMA pool before they make scenes complicated enough to run * out of bin space. */ -int -vc4_allocate_bin_bo(struct drm_device *drm) +static int vc4_allocate_bin_bo(struct drm_device *drm) { struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_v3d *v3d = vc4->v3d; diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index 8cc8c34d67f5..a5edd86603d9 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -208,7 +208,7 @@ static int virtio_gpu_conn_get_modes(struct drm_connector *connector) return count; } -static int virtio_gpu_conn_mode_valid(struct drm_connector *connector, +static enum drm_mode_status virtio_gpu_conn_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct virtio_gpu_output *output = diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 70e1a8820a7c..97f37c3c16f2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -159,14 +159,14 @@ static const struct drm_ioctl_desc vmw_ioctls[] = { DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_CURSOR_BYPASS, vmw_kms_cursor_bypass_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_CONTROL_STREAM, vmw_overlay_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_UNREF_STREAM, vmw_stream_unref_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 96fd7a03d2f8..01f2dc9e6f52 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -384,9 +384,9 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, hotspot_x = du->hotspot_x; hotspot_y = du->hotspot_y; - if (plane->fb) { - hotspot_x += plane->fb->hot_x; - hotspot_y += plane->fb->hot_y; + if (plane->state->fb) { + hotspot_x += plane->state->fb->hot_x; + hotspot_y += plane->state->fb->hot_y; } du->cursor_surface = vps->surf; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 3824595fece1..4a5907e3f560 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -281,39 +281,6 @@ drm_connector_helper_funcs vmw_ldu_connector_helper_funcs = { * Legacy Display Plane Functions */ -/** - * vmw_ldu_primary_plane_cleanup_fb - Noop - * - * @plane: display plane - * @old_state: Contains the FB to clean up - * - * Unpins the display surface - * - * Returns 0 on success - */ -static void -vmw_ldu_primary_plane_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) -{ -} - - -/** - * vmw_ldu_primary_plane_prepare_fb - Noop - * - * @plane: display plane - * @new_state: info on the new plane state, including the FB - * - * Returns 0 on success - */ -static int -vmw_ldu_primary_plane_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) -{ - return 0; -} - - static void vmw_ldu_primary_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) @@ -373,8 +340,6 @@ static const struct drm_plane_helper_funcs vmw_ldu_primary_plane_helper_funcs = { .atomic_check = vmw_du_primary_plane_atomic_check, .atomic_update = vmw_ldu_primary_plane_atomic_update, - .prepare_fb = vmw_ldu_primary_plane_prepare_fb, - .cleanup_fb = vmw_ldu_primary_plane_cleanup_fb, }; static const struct drm_crtc_helper_funcs vmw_ldu_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/xen/Kconfig b/drivers/gpu/drm/xen/Kconfig new file mode 100644 index 000000000000..4cca160782ab --- /dev/null +++ b/drivers/gpu/drm/xen/Kconfig @@ -0,0 +1,17 @@ +config DRM_XEN + bool "DRM Support for Xen guest OS" + depends on XEN + help + Choose this option if you want to enable DRM support + for Xen. + +config DRM_XEN_FRONTEND + tristate "Para-virtualized frontend driver for Xen guest OS" + depends on DRM_XEN + depends on DRM + select DRM_KMS_HELPER + select VIDEOMODE_HELPERS + select XEN_XENBUS_FRONTEND + help + Choose this option if you want to enable a para-virtualized + frontend DRM/KMS driver for Xen guest OSes. diff --git a/drivers/gpu/drm/xen/Makefile b/drivers/gpu/drm/xen/Makefile new file mode 100644 index 000000000000..712afff5ffc3 --- /dev/null +++ b/drivers/gpu/drm/xen/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 OR MIT + +drm_xen_front-objs := xen_drm_front.o \ + xen_drm_front_kms.o \ + xen_drm_front_conn.o \ + xen_drm_front_evtchnl.o \ + xen_drm_front_shbuf.o \ + xen_drm_front_cfg.o \ + xen_drm_front_gem.o + +obj-$(CONFIG_DRM_XEN_FRONTEND) += drm_xen_front.o diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c new file mode 100644 index 000000000000..b3786c1a4e80 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -0,0 +1,839 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include <drm/drmP.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_gem.h> + +#include <linux/of_device.h> + +#include <xen/platform_pci.h> +#include <xen/xen.h> +#include <xen/xenbus.h> + +#include <xen/interface/io/displif.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_cfg.h" +#include "xen_drm_front_evtchnl.h" +#include "xen_drm_front_gem.h" +#include "xen_drm_front_kms.h" +#include "xen_drm_front_shbuf.h" + +struct xen_drm_front_dbuf { + struct list_head list; + u64 dbuf_cookie; + u64 fb_cookie; + struct xen_drm_front_shbuf *shbuf; +}; + +static int dbuf_add_to_list(struct xen_drm_front_info *front_info, + struct xen_drm_front_shbuf *shbuf, u64 dbuf_cookie) +{ + struct xen_drm_front_dbuf *dbuf; + + dbuf = kzalloc(sizeof(*dbuf), GFP_KERNEL); + if (!dbuf) + return -ENOMEM; + + dbuf->dbuf_cookie = dbuf_cookie; + dbuf->shbuf = shbuf; + list_add(&dbuf->list, &front_info->dbuf_list); + return 0; +} + +static struct xen_drm_front_dbuf *dbuf_get(struct list_head *dbuf_list, + u64 dbuf_cookie) +{ + struct xen_drm_front_dbuf *buf, *q; + + list_for_each_entry_safe(buf, q, dbuf_list, list) + if (buf->dbuf_cookie == dbuf_cookie) + return buf; + + return NULL; +} + +static void dbuf_flush_fb(struct list_head *dbuf_list, u64 fb_cookie) +{ + struct xen_drm_front_dbuf *buf, *q; + + list_for_each_entry_safe(buf, q, dbuf_list, list) + if (buf->fb_cookie == fb_cookie) + xen_drm_front_shbuf_flush(buf->shbuf); +} + +static void dbuf_free(struct list_head *dbuf_list, u64 dbuf_cookie) +{ + struct xen_drm_front_dbuf *buf, *q; + + list_for_each_entry_safe(buf, q, dbuf_list, list) + if (buf->dbuf_cookie == dbuf_cookie) { + list_del(&buf->list); + xen_drm_front_shbuf_unmap(buf->shbuf); + xen_drm_front_shbuf_free(buf->shbuf); + kfree(buf); + break; + } +} + +static void dbuf_free_all(struct list_head *dbuf_list) +{ + struct xen_drm_front_dbuf *buf, *q; + + list_for_each_entry_safe(buf, q, dbuf_list, list) { + list_del(&buf->list); + xen_drm_front_shbuf_unmap(buf->shbuf); + xen_drm_front_shbuf_free(buf->shbuf); + kfree(buf); + } +} + +static struct xendispl_req * +be_prepare_req(struct xen_drm_front_evtchnl *evtchnl, u8 operation) +{ + struct xendispl_req *req; + + req = RING_GET_REQUEST(&evtchnl->u.req.ring, + evtchnl->u.req.ring.req_prod_pvt); + req->operation = operation; + req->id = evtchnl->evt_next_id++; + evtchnl->evt_id = req->id; + return req; +} + +static int be_stream_do_io(struct xen_drm_front_evtchnl *evtchnl, + struct xendispl_req *req) +{ + reinit_completion(&evtchnl->u.req.completion); + if (unlikely(evtchnl->state != EVTCHNL_STATE_CONNECTED)) + return -EIO; + + xen_drm_front_evtchnl_flush(evtchnl); + return 0; +} + +static int be_stream_wait_io(struct xen_drm_front_evtchnl *evtchnl) +{ + if (wait_for_completion_timeout(&evtchnl->u.req.completion, + msecs_to_jiffies(XEN_DRM_FRONT_WAIT_BACK_MS)) <= 0) + return -ETIMEDOUT; + + return evtchnl->u.req.resp_status; +} + +int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline, + u32 x, u32 y, u32 width, u32 height, + u32 bpp, u64 fb_cookie) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xen_drm_front_info *front_info; + struct xendispl_req *req; + unsigned long flags; + int ret; + + front_info = pipeline->drm_info->front_info; + evtchnl = &front_info->evt_pairs[pipeline->index].req; + if (unlikely(!evtchnl)) + return -EIO; + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_SET_CONFIG); + req->op.set_config.x = x; + req->op.set_config.y = y; + req->op.set_config.width = width; + req->op.set_config.height = height; + req->op.set_config.bpp = bpp; + req->op.set_config.fb_cookie = fb_cookie; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret == 0) + ret = be_stream_wait_io(evtchnl); + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return ret; +} + +int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info, + u64 dbuf_cookie, u32 width, u32 height, + u32 bpp, u64 size, struct page **pages) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xen_drm_front_shbuf *shbuf; + struct xendispl_req *req; + struct xen_drm_front_shbuf_cfg buf_cfg; + unsigned long flags; + int ret; + + evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req; + if (unlikely(!evtchnl)) + return -EIO; + + memset(&buf_cfg, 0, sizeof(buf_cfg)); + buf_cfg.xb_dev = front_info->xb_dev; + buf_cfg.pages = pages; + buf_cfg.size = size; + buf_cfg.be_alloc = front_info->cfg.be_alloc; + + shbuf = xen_drm_front_shbuf_alloc(&buf_cfg); + if (IS_ERR(shbuf)) + return PTR_ERR(shbuf); + + ret = dbuf_add_to_list(front_info, shbuf, dbuf_cookie); + if (ret < 0) { + xen_drm_front_shbuf_free(shbuf); + return ret; + } + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_DBUF_CREATE); + req->op.dbuf_create.gref_directory = + xen_drm_front_shbuf_get_dir_start(shbuf); + req->op.dbuf_create.buffer_sz = size; + req->op.dbuf_create.dbuf_cookie = dbuf_cookie; + req->op.dbuf_create.width = width; + req->op.dbuf_create.height = height; + req->op.dbuf_create.bpp = bpp; + if (buf_cfg.be_alloc) + req->op.dbuf_create.flags |= XENDISPL_DBUF_FLG_REQ_ALLOC; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret < 0) + goto fail; + + ret = be_stream_wait_io(evtchnl); + if (ret < 0) + goto fail; + + ret = xen_drm_front_shbuf_map(shbuf); + if (ret < 0) + goto fail; + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return 0; + +fail: + mutex_unlock(&evtchnl->u.req.req_io_lock); + dbuf_free(&front_info->dbuf_list, dbuf_cookie); + return ret; +} + +static int xen_drm_front_dbuf_destroy(struct xen_drm_front_info *front_info, + u64 dbuf_cookie) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xendispl_req *req; + unsigned long flags; + bool be_alloc; + int ret; + + evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req; + if (unlikely(!evtchnl)) + return -EIO; + + be_alloc = front_info->cfg.be_alloc; + + /* + * For the backend allocated buffer release references now, so backend + * can free the buffer. + */ + if (be_alloc) + dbuf_free(&front_info->dbuf_list, dbuf_cookie); + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_DBUF_DESTROY); + req->op.dbuf_destroy.dbuf_cookie = dbuf_cookie; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret == 0) + ret = be_stream_wait_io(evtchnl); + + /* + * Do this regardless of communication status with the backend: + * if we cannot remove remote resources remove what we can locally. + */ + if (!be_alloc) + dbuf_free(&front_info->dbuf_list, dbuf_cookie); + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return ret; +} + +int xen_drm_front_fb_attach(struct xen_drm_front_info *front_info, + u64 dbuf_cookie, u64 fb_cookie, u32 width, + u32 height, u32 pixel_format) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xen_drm_front_dbuf *buf; + struct xendispl_req *req; + unsigned long flags; + int ret; + + evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req; + if (unlikely(!evtchnl)) + return -EIO; + + buf = dbuf_get(&front_info->dbuf_list, dbuf_cookie); + if (!buf) + return -EINVAL; + + buf->fb_cookie = fb_cookie; + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_FB_ATTACH); + req->op.fb_attach.dbuf_cookie = dbuf_cookie; + req->op.fb_attach.fb_cookie = fb_cookie; + req->op.fb_attach.width = width; + req->op.fb_attach.height = height; + req->op.fb_attach.pixel_format = pixel_format; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret == 0) + ret = be_stream_wait_io(evtchnl); + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return ret; +} + +int xen_drm_front_fb_detach(struct xen_drm_front_info *front_info, + u64 fb_cookie) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xendispl_req *req; + unsigned long flags; + int ret; + + evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req; + if (unlikely(!evtchnl)) + return -EIO; + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_FB_DETACH); + req->op.fb_detach.fb_cookie = fb_cookie; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret == 0) + ret = be_stream_wait_io(evtchnl); + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return ret; +} + +int xen_drm_front_page_flip(struct xen_drm_front_info *front_info, + int conn_idx, u64 fb_cookie) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xendispl_req *req; + unsigned long flags; + int ret; + + if (unlikely(conn_idx >= front_info->num_evt_pairs)) + return -EINVAL; + + dbuf_flush_fb(&front_info->dbuf_list, fb_cookie); + evtchnl = &front_info->evt_pairs[conn_idx].req; + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_PG_FLIP); + req->op.pg_flip.fb_cookie = fb_cookie; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret == 0) + ret = be_stream_wait_io(evtchnl); + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return ret; +} + +void xen_drm_front_on_frame_done(struct xen_drm_front_info *front_info, + int conn_idx, u64 fb_cookie) +{ + struct xen_drm_front_drm_info *drm_info = front_info->drm_info; + + if (unlikely(conn_idx >= front_info->cfg.num_connectors)) + return; + + xen_drm_front_kms_on_frame_done(&drm_info->pipeline[conn_idx], + fb_cookie); +} + +static int xen_drm_drv_dumb_create(struct drm_file *filp, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + struct xen_drm_front_drm_info *drm_info = dev->dev_private; + struct drm_gem_object *obj; + int ret; + + /* + * Dumb creation is a two stage process: first we create a fully + * constructed GEM object which is communicated to the backend, and + * only after that we can create GEM's handle. This is done so, + * because of the possible races: once you create a handle it becomes + * immediately visible to user-space, so the latter can try accessing + * object without pages etc. + * For details also see drm_gem_handle_create + */ + args->pitch = DIV_ROUND_UP(args->width * args->bpp, 8); + args->size = args->pitch * args->height; + + obj = xen_drm_front_gem_create(dev, args->size); + if (IS_ERR_OR_NULL(obj)) { + ret = PTR_ERR(obj); + goto fail; + } + + ret = xen_drm_front_dbuf_create(drm_info->front_info, + xen_drm_front_dbuf_to_cookie(obj), + args->width, args->height, args->bpp, + args->size, + xen_drm_front_gem_get_pages(obj)); + if (ret) + goto fail_backend; + + /* This is the tail of GEM object creation */ + ret = drm_gem_handle_create(filp, obj, &args->handle); + if (ret) + goto fail_handle; + + /* Drop reference from allocate - handle holds it now */ + drm_gem_object_put_unlocked(obj); + return 0; + +fail_handle: + xen_drm_front_dbuf_destroy(drm_info->front_info, + xen_drm_front_dbuf_to_cookie(obj)); +fail_backend: + /* drop reference from allocate */ + drm_gem_object_put_unlocked(obj); +fail: + DRM_ERROR("Failed to create dumb buffer: %d\n", ret); + return ret; +} + +static void xen_drm_drv_free_object_unlocked(struct drm_gem_object *obj) +{ + struct xen_drm_front_drm_info *drm_info = obj->dev->dev_private; + int idx; + + if (drm_dev_enter(obj->dev, &idx)) { + xen_drm_front_dbuf_destroy(drm_info->front_info, + xen_drm_front_dbuf_to_cookie(obj)); + drm_dev_exit(idx); + } else { + dbuf_free(&drm_info->front_info->dbuf_list, + xen_drm_front_dbuf_to_cookie(obj)); + } + + xen_drm_front_gem_free_object_unlocked(obj); +} + +static void xen_drm_drv_release(struct drm_device *dev) +{ + struct xen_drm_front_drm_info *drm_info = dev->dev_private; + struct xen_drm_front_info *front_info = drm_info->front_info; + + xen_drm_front_kms_fini(drm_info); + + drm_atomic_helper_shutdown(dev); + drm_mode_config_cleanup(dev); + + drm_dev_fini(dev); + kfree(dev); + + if (front_info->cfg.be_alloc) + xenbus_switch_state(front_info->xb_dev, + XenbusStateInitialising); + + kfree(drm_info); +} + +static const struct file_operations xen_drm_dev_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = drm_compat_ioctl, +#endif + .poll = drm_poll, + .read = drm_read, + .llseek = no_llseek, + .mmap = xen_drm_front_gem_mmap, +}; + +static const struct vm_operations_struct xen_drm_drv_vm_ops = { + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static struct drm_driver xen_drm_driver = { + .driver_features = DRIVER_GEM | DRIVER_MODESET | + DRIVER_PRIME | DRIVER_ATOMIC, + .release = xen_drm_drv_release, + .gem_vm_ops = &xen_drm_drv_vm_ops, + .gem_free_object_unlocked = xen_drm_drv_free_object_unlocked, + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = drm_gem_prime_import, + .gem_prime_export = drm_gem_prime_export, + .gem_prime_import_sg_table = xen_drm_front_gem_import_sg_table, + .gem_prime_get_sg_table = xen_drm_front_gem_get_sg_table, + .gem_prime_vmap = xen_drm_front_gem_prime_vmap, + .gem_prime_vunmap = xen_drm_front_gem_prime_vunmap, + .gem_prime_mmap = xen_drm_front_gem_prime_mmap, + .dumb_create = xen_drm_drv_dumb_create, + .fops = &xen_drm_dev_fops, + .name = "xendrm-du", + .desc = "Xen PV DRM Display Unit", + .date = "20180221", + .major = 1, + .minor = 0, + +}; + +static int xen_drm_drv_init(struct xen_drm_front_info *front_info) +{ + struct device *dev = &front_info->xb_dev->dev; + struct xen_drm_front_drm_info *drm_info; + struct drm_device *drm_dev; + int ret; + + DRM_INFO("Creating %s\n", xen_drm_driver.desc); + + drm_info = kzalloc(sizeof(*drm_info), GFP_KERNEL); + if (!drm_info) { + ret = -ENOMEM; + goto fail; + } + + drm_info->front_info = front_info; + front_info->drm_info = drm_info; + + drm_dev = drm_dev_alloc(&xen_drm_driver, dev); + if (IS_ERR(drm_dev)) { + ret = PTR_ERR(drm_dev); + goto fail; + } + + drm_info->drm_dev = drm_dev; + + drm_dev->dev_private = drm_info; + + ret = xen_drm_front_kms_init(drm_info); + if (ret) { + DRM_ERROR("Failed to initialize DRM/KMS, ret %d\n", ret); + goto fail_modeset; + } + + ret = drm_dev_register(drm_dev, 0); + if (ret) + goto fail_register; + + DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n", + xen_drm_driver.name, xen_drm_driver.major, + xen_drm_driver.minor, xen_drm_driver.patchlevel, + xen_drm_driver.date, drm_dev->primary->index); + + return 0; + +fail_register: + drm_dev_unregister(drm_dev); +fail_modeset: + drm_kms_helper_poll_fini(drm_dev); + drm_mode_config_cleanup(drm_dev); +fail: + kfree(drm_info); + return ret; +} + +static void xen_drm_drv_fini(struct xen_drm_front_info *front_info) +{ + struct xen_drm_front_drm_info *drm_info = front_info->drm_info; + struct drm_device *dev; + + if (!drm_info) + return; + + dev = drm_info->drm_dev; + if (!dev) + return; + + /* Nothing to do if device is already unplugged */ + if (drm_dev_is_unplugged(dev)) + return; + + drm_kms_helper_poll_fini(dev); + drm_dev_unplug(dev); + + front_info->drm_info = NULL; + + xen_drm_front_evtchnl_free_all(front_info); + dbuf_free_all(&front_info->dbuf_list); + + /* + * If we are not using backend allocated buffers, then tell the + * backend we are ready to (re)initialize. Otherwise, wait for + * drm_driver.release. + */ + if (!front_info->cfg.be_alloc) + xenbus_switch_state(front_info->xb_dev, + XenbusStateInitialising); +} + +static int displback_initwait(struct xen_drm_front_info *front_info) +{ + struct xen_drm_front_cfg *cfg = &front_info->cfg; + int ret; + + cfg->front_info = front_info; + ret = xen_drm_front_cfg_card(front_info, cfg); + if (ret < 0) + return ret; + + DRM_INFO("Have %d conector(s)\n", cfg->num_connectors); + /* Create event channels for all connectors and publish */ + ret = xen_drm_front_evtchnl_create_all(front_info); + if (ret < 0) + return ret; + + return xen_drm_front_evtchnl_publish_all(front_info); +} + +static int displback_connect(struct xen_drm_front_info *front_info) +{ + xen_drm_front_evtchnl_set_state(front_info, EVTCHNL_STATE_CONNECTED); + return xen_drm_drv_init(front_info); +} + +static void displback_disconnect(struct xen_drm_front_info *front_info) +{ + if (!front_info->drm_info) + return; + + /* Tell the backend to wait until we release the DRM driver. */ + xenbus_switch_state(front_info->xb_dev, XenbusStateReconfiguring); + + xen_drm_drv_fini(front_info); +} + +static void displback_changed(struct xenbus_device *xb_dev, + enum xenbus_state backend_state) +{ + struct xen_drm_front_info *front_info = dev_get_drvdata(&xb_dev->dev); + int ret; + + DRM_DEBUG("Backend state is %s, front is %s\n", + xenbus_strstate(backend_state), + xenbus_strstate(xb_dev->state)); + + switch (backend_state) { + case XenbusStateReconfiguring: + /* fall through */ + case XenbusStateReconfigured: + /* fall through */ + case XenbusStateInitialised: + break; + + case XenbusStateInitialising: + if (xb_dev->state == XenbusStateReconfiguring) + break; + + /* recovering after backend unexpected closure */ + displback_disconnect(front_info); + break; + + case XenbusStateInitWait: + if (xb_dev->state == XenbusStateReconfiguring) + break; + + /* recovering after backend unexpected closure */ + displback_disconnect(front_info); + if (xb_dev->state != XenbusStateInitialising) + break; + + ret = displback_initwait(front_info); + if (ret < 0) + xenbus_dev_fatal(xb_dev, ret, "initializing frontend"); + else + xenbus_switch_state(xb_dev, XenbusStateInitialised); + break; + + case XenbusStateConnected: + if (xb_dev->state != XenbusStateInitialised) + break; + + ret = displback_connect(front_info); + if (ret < 0) { + displback_disconnect(front_info); + xenbus_dev_fatal(xb_dev, ret, "connecting backend"); + } else { + xenbus_switch_state(xb_dev, XenbusStateConnected); + } + break; + + case XenbusStateClosing: + /* + * in this state backend starts freeing resources, + * so let it go into closed state, so we can also + * remove ours + */ + break; + + case XenbusStateUnknown: + /* fall through */ + case XenbusStateClosed: + if (xb_dev->state == XenbusStateClosed) + break; + + displback_disconnect(front_info); + break; + } +} + +static int xen_drv_probe(struct xenbus_device *xb_dev, + const struct xenbus_device_id *id) +{ + struct xen_drm_front_info *front_info; + struct device *dev = &xb_dev->dev; + int ret; + + /* + * The device is not spawn from a device tree, so arch_setup_dma_ops + * is not called, thus leaving the device with dummy DMA ops. + * This makes the device return error on PRIME buffer import, which + * is not correct: to fix this call of_dma_configure() with a NULL + * node to set default DMA ops. + */ + dev->coherent_dma_mask = DMA_BIT_MASK(32); + ret = of_dma_configure(dev, NULL, true); + if (ret < 0) { + DRM_ERROR("Cannot setup DMA ops, ret %d", ret); + return ret; + } + + front_info = devm_kzalloc(&xb_dev->dev, + sizeof(*front_info), GFP_KERNEL); + if (!front_info) + return -ENOMEM; + + front_info->xb_dev = xb_dev; + spin_lock_init(&front_info->io_lock); + INIT_LIST_HEAD(&front_info->dbuf_list); + dev_set_drvdata(&xb_dev->dev, front_info); + + return xenbus_switch_state(xb_dev, XenbusStateInitialising); +} + +static int xen_drv_remove(struct xenbus_device *dev) +{ + struct xen_drm_front_info *front_info = dev_get_drvdata(&dev->dev); + int to = 100; + + xenbus_switch_state(dev, XenbusStateClosing); + + /* + * On driver removal it is disconnected from XenBus, + * so no backend state change events come via .otherend_changed + * callback. This prevents us from exiting gracefully, e.g. + * signaling the backend to free event channels, waiting for its + * state to change to XenbusStateClosed and cleaning at our end. + * Normally when front driver removed backend will finally go into + * XenbusStateInitWait state. + * + * Workaround: read backend's state manually and wait with time-out. + */ + while ((xenbus_read_unsigned(front_info->xb_dev->otherend, "state", + XenbusStateUnknown) != XenbusStateInitWait) && + --to) + msleep(10); + + if (!to) { + unsigned int state; + + state = xenbus_read_unsigned(front_info->xb_dev->otherend, + "state", XenbusStateUnknown); + DRM_ERROR("Backend state is %s while removing driver\n", + xenbus_strstate(state)); + } + + xen_drm_drv_fini(front_info); + xenbus_frontend_closed(dev); + return 0; +} + +static const struct xenbus_device_id xen_driver_ids[] = { + { XENDISPL_DRIVER_NAME }, + { "" } +}; + +static struct xenbus_driver xen_driver = { + .ids = xen_driver_ids, + .probe = xen_drv_probe, + .remove = xen_drv_remove, + .otherend_changed = displback_changed, +}; + +static int __init xen_drv_init(void) +{ + /* At the moment we only support case with XEN_PAGE_SIZE == PAGE_SIZE */ + if (XEN_PAGE_SIZE != PAGE_SIZE) { + DRM_ERROR(XENDISPL_DRIVER_NAME ": different kernel and Xen page sizes are not supported: XEN_PAGE_SIZE (%lu) != PAGE_SIZE (%lu)\n", + XEN_PAGE_SIZE, PAGE_SIZE); + return -ENODEV; + } + + if (!xen_domain()) + return -ENODEV; + + if (!xen_has_pv_devices()) + return -ENODEV; + + DRM_INFO("Registering XEN PV " XENDISPL_DRIVER_NAME "\n"); + return xenbus_register_frontend(&xen_driver); +} + +static void __exit xen_drv_fini(void) +{ + DRM_INFO("Unregistering XEN PV " XENDISPL_DRIVER_NAME "\n"); + xenbus_unregister_driver(&xen_driver); +} + +module_init(xen_drv_init); +module_exit(xen_drv_fini); + +MODULE_DESCRIPTION("Xen para-virtualized display device frontend"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("xen:" XENDISPL_DRIVER_NAME); diff --git a/drivers/gpu/drm/xen/xen_drm_front.h b/drivers/gpu/drm/xen/xen_drm_front.h new file mode 100644 index 000000000000..2c2479b571ae --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_H_ +#define __XEN_DRM_FRONT_H_ + +#include <drm/drmP.h> +#include <drm/drm_simple_kms_helper.h> + +#include <linux/scatterlist.h> + +#include "xen_drm_front_cfg.h" + +/** + * DOC: Driver modes of operation in terms of display buffers used + * + * Depending on the requirements for the para-virtualized environment, namely + * requirements dictated by the accompanying DRM/(v)GPU drivers running in both + * host and guest environments, display buffers can be allocated by either + * frontend driver or backend. + */ + +/** + * DOC: Buffers allocated by the frontend driver + * + * In this mode of operation driver allocates buffers from system memory. + * + * Note! If used with accompanying DRM/(v)GPU drivers this mode of operation + * may require IOMMU support on the platform, so accompanying DRM/vGPU + * hardware can still reach display buffer memory while importing PRIME + * buffers from the frontend driver. + */ + +/** + * DOC: Buffers allocated by the backend + * + * This mode of operation is run-time configured via guest domain configuration + * through XenStore entries. + * + * For systems which do not provide IOMMU support, but having specific + * requirements for display buffers it is possible to allocate such buffers + * at backend side and share those with the frontend. + * For example, if host domain is 1:1 mapped and has DRM/GPU hardware expecting + * physically contiguous memory, this allows implementing zero-copying + * use-cases. + * + * Note, while using this scenario the following should be considered: + * + * #. If guest domain dies then pages/grants received from the backend + * cannot be claimed back + * + * #. Misbehaving guest may send too many requests to the + * backend exhausting its grant references and memory + * (consider this from security POV) + */ + +/** + * DOC: Driver limitations + * + * #. Only primary plane without additional properties is supported. + * + * #. Only one video mode per connector supported which is configured + * via XenStore. + * + * #. All CRTCs operate at fixed frequency of 60Hz. + */ + +/* timeout in ms to wait for backend to respond */ +#define XEN_DRM_FRONT_WAIT_BACK_MS 3000 + +#ifndef GRANT_INVALID_REF +/* + * Note on usage of grant reference 0 as invalid grant reference: + * grant reference 0 is valid, but never exposed to a PV driver, + * because of the fact it is already in use/reserved by the PV console. + */ +#define GRANT_INVALID_REF 0 +#endif + +struct xen_drm_front_info { + struct xenbus_device *xb_dev; + struct xen_drm_front_drm_info *drm_info; + + /* to protect data between backend IO code and interrupt handler */ + spinlock_t io_lock; + + int num_evt_pairs; + struct xen_drm_front_evtchnl_pair *evt_pairs; + struct xen_drm_front_cfg cfg; + + /* display buffers */ + struct list_head dbuf_list; +}; + +struct xen_drm_front_drm_pipeline { + struct xen_drm_front_drm_info *drm_info; + + int index; + + struct drm_simple_display_pipe pipe; + + struct drm_connector conn; + /* These are only for connector mode checking */ + int width, height; + + struct drm_pending_vblank_event *pending_event; + + struct delayed_work pflip_to_worker; + + bool conn_connected; +}; + +struct xen_drm_front_drm_info { + struct xen_drm_front_info *front_info; + struct drm_device *drm_dev; + + struct xen_drm_front_drm_pipeline pipeline[XEN_DRM_FRONT_MAX_CRTCS]; +}; + +static inline u64 xen_drm_front_fb_to_cookie(struct drm_framebuffer *fb) +{ + return (u64)fb; +} + +static inline u64 xen_drm_front_dbuf_to_cookie(struct drm_gem_object *gem_obj) +{ + return (u64)gem_obj; +} + +int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline, + u32 x, u32 y, u32 width, u32 height, + u32 bpp, u64 fb_cookie); + +int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info, + u64 dbuf_cookie, u32 width, u32 height, + u32 bpp, u64 size, struct page **pages); + +int xen_drm_front_fb_attach(struct xen_drm_front_info *front_info, + u64 dbuf_cookie, u64 fb_cookie, u32 width, + u32 height, u32 pixel_format); + +int xen_drm_front_fb_detach(struct xen_drm_front_info *front_info, + u64 fb_cookie); + +int xen_drm_front_page_flip(struct xen_drm_front_info *front_info, + int conn_idx, u64 fb_cookie); + +void xen_drm_front_on_frame_done(struct xen_drm_front_info *front_info, + int conn_idx, u64 fb_cookie); + +#endif /* __XEN_DRM_FRONT_H_ */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_cfg.c b/drivers/gpu/drm/xen/xen_drm_front_cfg.c new file mode 100644 index 000000000000..5baf2b9de93c --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_cfg.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include <drm/drmP.h> + +#include <linux/device.h> + +#include <xen/interface/io/displif.h> +#include <xen/xenbus.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_cfg.h" + +static int cfg_connector(struct xen_drm_front_info *front_info, + struct xen_drm_front_cfg_connector *connector, + const char *path, int index) +{ + char *connector_path; + + connector_path = devm_kasprintf(&front_info->xb_dev->dev, + GFP_KERNEL, "%s/%d", path, index); + if (!connector_path) + return -ENOMEM; + + if (xenbus_scanf(XBT_NIL, connector_path, XENDISPL_FIELD_RESOLUTION, + "%d" XENDISPL_RESOLUTION_SEPARATOR "%d", + &connector->width, &connector->height) < 0) { + /* either no entry configured or wrong resolution set */ + connector->width = 0; + connector->height = 0; + return -EINVAL; + } + + connector->xenstore_path = connector_path; + + DRM_INFO("Connector %s: resolution %dx%d\n", + connector_path, connector->width, connector->height); + return 0; +} + +int xen_drm_front_cfg_card(struct xen_drm_front_info *front_info, + struct xen_drm_front_cfg *cfg) +{ + struct xenbus_device *xb_dev = front_info->xb_dev; + int ret, i; + + if (xenbus_read_unsigned(front_info->xb_dev->nodename, + XENDISPL_FIELD_BE_ALLOC, 0)) { + DRM_INFO("Backend can provide display buffers\n"); + cfg->be_alloc = true; + } + + cfg->num_connectors = 0; + for (i = 0; i < ARRAY_SIZE(cfg->connectors); i++) { + ret = cfg_connector(front_info, &cfg->connectors[i], + xb_dev->nodename, i); + if (ret < 0) + break; + cfg->num_connectors++; + } + + if (!cfg->num_connectors) { + DRM_ERROR("No connector(s) configured at %s\n", + xb_dev->nodename); + return -ENODEV; + } + + return 0; +} + diff --git a/drivers/gpu/drm/xen/xen_drm_front_cfg.h b/drivers/gpu/drm/xen/xen_drm_front_cfg.h new file mode 100644 index 000000000000..aa8490ba9146 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_cfg.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_CFG_H_ +#define __XEN_DRM_FRONT_CFG_H_ + +#include <linux/types.h> + +#define XEN_DRM_FRONT_MAX_CRTCS 4 + +struct xen_drm_front_cfg_connector { + int width; + int height; + char *xenstore_path; +}; + +struct xen_drm_front_cfg { + struct xen_drm_front_info *front_info; + /* number of connectors in this configuration */ + int num_connectors; + /* connector configurations */ + struct xen_drm_front_cfg_connector connectors[XEN_DRM_FRONT_MAX_CRTCS]; + /* set if dumb buffers are allocated externally on backend side */ + bool be_alloc; +}; + +int xen_drm_front_cfg_card(struct xen_drm_front_info *front_info, + struct xen_drm_front_cfg *cfg); + +#endif /* __XEN_DRM_FRONT_CFG_H_ */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_conn.c b/drivers/gpu/drm/xen/xen_drm_front_conn.c new file mode 100644 index 000000000000..c91ae532fa55 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_conn.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> + +#include <video/videomode.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_conn.h" +#include "xen_drm_front_kms.h" + +static struct xen_drm_front_drm_pipeline * +to_xen_drm_pipeline(struct drm_connector *connector) +{ + return container_of(connector, struct xen_drm_front_drm_pipeline, conn); +} + +static const u32 plane_formats[] = { + DRM_FORMAT_RGB565, + DRM_FORMAT_RGB888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB4444, + DRM_FORMAT_ARGB4444, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_ARGB1555, +}; + +const u32 *xen_drm_front_conn_get_formats(int *format_count) +{ + *format_count = ARRAY_SIZE(plane_formats); + return plane_formats; +} + +static int connector_detect(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, + bool force) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(connector); + + if (drm_dev_is_unplugged(connector->dev)) + pipeline->conn_connected = false; + + return pipeline->conn_connected ? connector_status_connected : + connector_status_disconnected; +} + +#define XEN_DRM_CRTC_VREFRESH_HZ 60 + +static int connector_get_modes(struct drm_connector *connector) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(connector); + struct drm_display_mode *mode; + struct videomode videomode; + int width, height; + + mode = drm_mode_create(connector->dev); + if (!mode) + return 0; + + memset(&videomode, 0, sizeof(videomode)); + videomode.hactive = pipeline->width; + videomode.vactive = pipeline->height; + width = videomode.hactive + videomode.hfront_porch + + videomode.hback_porch + videomode.hsync_len; + height = videomode.vactive + videomode.vfront_porch + + videomode.vback_porch + videomode.vsync_len; + videomode.pixelclock = width * height * XEN_DRM_CRTC_VREFRESH_HZ; + mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; + + drm_display_mode_from_videomode(&videomode, mode); + drm_mode_probed_add(connector, mode); + return 1; +} + +static const struct drm_connector_helper_funcs connector_helper_funcs = { + .get_modes = connector_get_modes, + .detect_ctx = connector_detect, +}; + +static const struct drm_connector_funcs connector_funcs = { + .dpms = drm_helper_connector_dpms, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +int xen_drm_front_conn_init(struct xen_drm_front_drm_info *drm_info, + struct drm_connector *connector) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(connector); + + drm_connector_helper_add(connector, &connector_helper_funcs); + + pipeline->conn_connected = true; + + connector->polled = DRM_CONNECTOR_POLL_CONNECT | + DRM_CONNECTOR_POLL_DISCONNECT; + + return drm_connector_init(drm_info->drm_dev, connector, + &connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL); +} diff --git a/drivers/gpu/drm/xen/xen_drm_front_conn.h b/drivers/gpu/drm/xen/xen_drm_front_conn.h new file mode 100644 index 000000000000..39de7cf5adbe --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_conn.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_CONN_H_ +#define __XEN_DRM_FRONT_CONN_H_ + +#include <drm/drmP.h> +#include <drm/drm_crtc.h> +#include <drm/drm_encoder.h> + +#include <linux/wait.h> + +struct xen_drm_front_drm_info; + +int xen_drm_front_conn_init(struct xen_drm_front_drm_info *drm_info, + struct drm_connector *connector); + +const u32 *xen_drm_front_conn_get_formats(int *format_count); + +#endif /* __XEN_DRM_FRONT_CONN_H_ */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c new file mode 100644 index 000000000000..945226a95e9b --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include <drm/drmP.h> + +#include <linux/errno.h> +#include <linux/irq.h> + +#include <xen/xenbus.h> +#include <xen/events.h> +#include <xen/grant_table.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_evtchnl.h" + +static irqreturn_t evtchnl_interrupt_ctrl(int irq, void *dev_id) +{ + struct xen_drm_front_evtchnl *evtchnl = dev_id; + struct xen_drm_front_info *front_info = evtchnl->front_info; + struct xendispl_resp *resp; + RING_IDX i, rp; + unsigned long flags; + + if (unlikely(evtchnl->state != EVTCHNL_STATE_CONNECTED)) + return IRQ_HANDLED; + + spin_lock_irqsave(&front_info->io_lock, flags); + +again: + rp = evtchnl->u.req.ring.sring->rsp_prod; + /* ensure we see queued responses up to rp */ + virt_rmb(); + + for (i = evtchnl->u.req.ring.rsp_cons; i != rp; i++) { + resp = RING_GET_RESPONSE(&evtchnl->u.req.ring, i); + if (unlikely(resp->id != evtchnl->evt_id)) + continue; + + switch (resp->operation) { + case XENDISPL_OP_PG_FLIP: + case XENDISPL_OP_FB_ATTACH: + case XENDISPL_OP_FB_DETACH: + case XENDISPL_OP_DBUF_CREATE: + case XENDISPL_OP_DBUF_DESTROY: + case XENDISPL_OP_SET_CONFIG: + evtchnl->u.req.resp_status = resp->status; + complete(&evtchnl->u.req.completion); + break; + + default: + DRM_ERROR("Operation %d is not supported\n", + resp->operation); + break; + } + } + + evtchnl->u.req.ring.rsp_cons = i; + + if (i != evtchnl->u.req.ring.req_prod_pvt) { + int more_to_do; + + RING_FINAL_CHECK_FOR_RESPONSES(&evtchnl->u.req.ring, + more_to_do); + if (more_to_do) + goto again; + } else { + evtchnl->u.req.ring.sring->rsp_event = i + 1; + } + + spin_unlock_irqrestore(&front_info->io_lock, flags); + return IRQ_HANDLED; +} + +static irqreturn_t evtchnl_interrupt_evt(int irq, void *dev_id) +{ + struct xen_drm_front_evtchnl *evtchnl = dev_id; + struct xen_drm_front_info *front_info = evtchnl->front_info; + struct xendispl_event_page *page = evtchnl->u.evt.page; + u32 cons, prod; + unsigned long flags; + + if (unlikely(evtchnl->state != EVTCHNL_STATE_CONNECTED)) + return IRQ_HANDLED; + + spin_lock_irqsave(&front_info->io_lock, flags); + + prod = page->in_prod; + /* ensure we see ring contents up to prod */ + virt_rmb(); + if (prod == page->in_cons) + goto out; + + for (cons = page->in_cons; cons != prod; cons++) { + struct xendispl_evt *event; + + event = &XENDISPL_IN_RING_REF(page, cons); + if (unlikely(event->id != evtchnl->evt_id++)) + continue; + + switch (event->type) { + case XENDISPL_EVT_PG_FLIP: + xen_drm_front_on_frame_done(front_info, evtchnl->index, + event->op.pg_flip.fb_cookie); + break; + } + } + page->in_cons = cons; + /* ensure ring contents */ + virt_wmb(); + +out: + spin_unlock_irqrestore(&front_info->io_lock, flags); + return IRQ_HANDLED; +} + +static void evtchnl_free(struct xen_drm_front_info *front_info, + struct xen_drm_front_evtchnl *evtchnl) +{ + unsigned long page = 0; + + if (evtchnl->type == EVTCHNL_TYPE_REQ) + page = (unsigned long)evtchnl->u.req.ring.sring; + else if (evtchnl->type == EVTCHNL_TYPE_EVT) + page = (unsigned long)evtchnl->u.evt.page; + if (!page) + return; + + evtchnl->state = EVTCHNL_STATE_DISCONNECTED; + + if (evtchnl->type == EVTCHNL_TYPE_REQ) { + /* release all who still waits for response if any */ + evtchnl->u.req.resp_status = -EIO; + complete_all(&evtchnl->u.req.completion); + } + + if (evtchnl->irq) + unbind_from_irqhandler(evtchnl->irq, evtchnl); + + if (evtchnl->port) + xenbus_free_evtchn(front_info->xb_dev, evtchnl->port); + + /* end access and free the page */ + if (evtchnl->gref != GRANT_INVALID_REF) + gnttab_end_foreign_access(evtchnl->gref, 0, page); + + memset(evtchnl, 0, sizeof(*evtchnl)); +} + +static int evtchnl_alloc(struct xen_drm_front_info *front_info, int index, + struct xen_drm_front_evtchnl *evtchnl, + enum xen_drm_front_evtchnl_type type) +{ + struct xenbus_device *xb_dev = front_info->xb_dev; + unsigned long page; + grant_ref_t gref; + irq_handler_t handler; + int ret; + + memset(evtchnl, 0, sizeof(*evtchnl)); + evtchnl->type = type; + evtchnl->index = index; + evtchnl->front_info = front_info; + evtchnl->state = EVTCHNL_STATE_DISCONNECTED; + evtchnl->gref = GRANT_INVALID_REF; + + page = get_zeroed_page(GFP_NOIO | __GFP_HIGH); + if (!page) { + ret = -ENOMEM; + goto fail; + } + + if (type == EVTCHNL_TYPE_REQ) { + struct xen_displif_sring *sring; + + init_completion(&evtchnl->u.req.completion); + mutex_init(&evtchnl->u.req.req_io_lock); + sring = (struct xen_displif_sring *)page; + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&evtchnl->u.req.ring, sring, XEN_PAGE_SIZE); + + ret = xenbus_grant_ring(xb_dev, sring, 1, &gref); + if (ret < 0) { + evtchnl->u.req.ring.sring = NULL; + free_page(page); + goto fail; + } + + handler = evtchnl_interrupt_ctrl; + } else { + ret = gnttab_grant_foreign_access(xb_dev->otherend_id, + virt_to_gfn((void *)page), 0); + if (ret < 0) { + free_page(page); + goto fail; + } + + evtchnl->u.evt.page = (struct xendispl_event_page *)page; + gref = ret; + handler = evtchnl_interrupt_evt; + } + evtchnl->gref = gref; + + ret = xenbus_alloc_evtchn(xb_dev, &evtchnl->port); + if (ret < 0) + goto fail; + + ret = bind_evtchn_to_irqhandler(evtchnl->port, + handler, 0, xb_dev->devicetype, + evtchnl); + if (ret < 0) + goto fail; + + evtchnl->irq = ret; + return 0; + +fail: + DRM_ERROR("Failed to allocate ring: %d\n", ret); + return ret; +} + +int xen_drm_front_evtchnl_create_all(struct xen_drm_front_info *front_info) +{ + struct xen_drm_front_cfg *cfg; + int ret, conn; + + cfg = &front_info->cfg; + + front_info->evt_pairs = + kcalloc(cfg->num_connectors, + sizeof(struct xen_drm_front_evtchnl_pair), + GFP_KERNEL); + if (!front_info->evt_pairs) { + ret = -ENOMEM; + goto fail; + } + + for (conn = 0; conn < cfg->num_connectors; conn++) { + ret = evtchnl_alloc(front_info, conn, + &front_info->evt_pairs[conn].req, + EVTCHNL_TYPE_REQ); + if (ret < 0) { + DRM_ERROR("Error allocating control channel\n"); + goto fail; + } + + ret = evtchnl_alloc(front_info, conn, + &front_info->evt_pairs[conn].evt, + EVTCHNL_TYPE_EVT); + if (ret < 0) { + DRM_ERROR("Error allocating in-event channel\n"); + goto fail; + } + } + front_info->num_evt_pairs = cfg->num_connectors; + return 0; + +fail: + xen_drm_front_evtchnl_free_all(front_info); + return ret; +} + +static int evtchnl_publish(struct xenbus_transaction xbt, + struct xen_drm_front_evtchnl *evtchnl, + const char *path, const char *node_ring, + const char *node_chnl) +{ + struct xenbus_device *xb_dev = evtchnl->front_info->xb_dev; + int ret; + + /* write control channel ring reference */ + ret = xenbus_printf(xbt, path, node_ring, "%u", evtchnl->gref); + if (ret < 0) { + xenbus_dev_error(xb_dev, ret, "writing ring-ref"); + return ret; + } + + /* write event channel ring reference */ + ret = xenbus_printf(xbt, path, node_chnl, "%u", evtchnl->port); + if (ret < 0) { + xenbus_dev_error(xb_dev, ret, "writing event channel"); + return ret; + } + + return 0; +} + +int xen_drm_front_evtchnl_publish_all(struct xen_drm_front_info *front_info) +{ + struct xenbus_transaction xbt; + struct xen_drm_front_cfg *plat_data; + int ret, conn; + + plat_data = &front_info->cfg; + +again: + ret = xenbus_transaction_start(&xbt); + if (ret < 0) { + xenbus_dev_fatal(front_info->xb_dev, ret, + "starting transaction"); + return ret; + } + + for (conn = 0; conn < plat_data->num_connectors; conn++) { + ret = evtchnl_publish(xbt, &front_info->evt_pairs[conn].req, + plat_data->connectors[conn].xenstore_path, + XENDISPL_FIELD_REQ_RING_REF, + XENDISPL_FIELD_REQ_CHANNEL); + if (ret < 0) + goto fail; + + ret = evtchnl_publish(xbt, &front_info->evt_pairs[conn].evt, + plat_data->connectors[conn].xenstore_path, + XENDISPL_FIELD_EVT_RING_REF, + XENDISPL_FIELD_EVT_CHANNEL); + if (ret < 0) + goto fail; + } + + ret = xenbus_transaction_end(xbt, 0); + if (ret < 0) { + if (ret == -EAGAIN) + goto again; + + xenbus_dev_fatal(front_info->xb_dev, ret, + "completing transaction"); + goto fail_to_end; + } + + return 0; + +fail: + xenbus_transaction_end(xbt, 1); + +fail_to_end: + xenbus_dev_fatal(front_info->xb_dev, ret, "writing Xen store"); + return ret; +} + +void xen_drm_front_evtchnl_flush(struct xen_drm_front_evtchnl *evtchnl) +{ + int notify; + + evtchnl->u.req.ring.req_prod_pvt++; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&evtchnl->u.req.ring, notify); + if (notify) + notify_remote_via_irq(evtchnl->irq); +} + +void xen_drm_front_evtchnl_set_state(struct xen_drm_front_info *front_info, + enum xen_drm_front_evtchnl_state state) +{ + unsigned long flags; + int i; + + if (!front_info->evt_pairs) + return; + + spin_lock_irqsave(&front_info->io_lock, flags); + for (i = 0; i < front_info->num_evt_pairs; i++) { + front_info->evt_pairs[i].req.state = state; + front_info->evt_pairs[i].evt.state = state; + } + spin_unlock_irqrestore(&front_info->io_lock, flags); +} + +void xen_drm_front_evtchnl_free_all(struct xen_drm_front_info *front_info) +{ + int i; + + if (!front_info->evt_pairs) + return; + + for (i = 0; i < front_info->num_evt_pairs; i++) { + evtchnl_free(front_info, &front_info->evt_pairs[i].req); + evtchnl_free(front_info, &front_info->evt_pairs[i].evt); + } + + kfree(front_info->evt_pairs); + front_info->evt_pairs = NULL; +} diff --git a/drivers/gpu/drm/xen/xen_drm_front_evtchnl.h b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.h new file mode 100644 index 000000000000..b0af6994332b --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_EVTCHNL_H_ +#define __XEN_DRM_FRONT_EVTCHNL_H_ + +#include <linux/completion.h> +#include <linux/types.h> + +#include <xen/interface/io/ring.h> +#include <xen/interface/io/displif.h> + +/* + * All operations which are not connector oriented use this ctrl event channel, + * e.g. fb_attach/destroy which belong to a DRM device, not to a CRTC. + */ +#define GENERIC_OP_EVT_CHNL 0 + +enum xen_drm_front_evtchnl_state { + EVTCHNL_STATE_DISCONNECTED, + EVTCHNL_STATE_CONNECTED, +}; + +enum xen_drm_front_evtchnl_type { + EVTCHNL_TYPE_REQ, + EVTCHNL_TYPE_EVT, +}; + +struct xen_drm_front_drm_info; + +struct xen_drm_front_evtchnl { + struct xen_drm_front_info *front_info; + int gref; + int port; + int irq; + int index; + enum xen_drm_front_evtchnl_state state; + enum xen_drm_front_evtchnl_type type; + /* either response id or incoming event id */ + u16 evt_id; + /* next request id or next expected event id */ + u16 evt_next_id; + union { + struct { + struct xen_displif_front_ring ring; + struct completion completion; + /* latest response status */ + int resp_status; + /* serializer for backend IO: request/response */ + struct mutex req_io_lock; + } req; + struct { + struct xendispl_event_page *page; + } evt; + } u; +}; + +struct xen_drm_front_evtchnl_pair { + struct xen_drm_front_evtchnl req; + struct xen_drm_front_evtchnl evt; +}; + +int xen_drm_front_evtchnl_create_all(struct xen_drm_front_info *front_info); + +int xen_drm_front_evtchnl_publish_all(struct xen_drm_front_info *front_info); + +void xen_drm_front_evtchnl_flush(struct xen_drm_front_evtchnl *evtchnl); + +void xen_drm_front_evtchnl_set_state(struct xen_drm_front_info *front_info, + enum xen_drm_front_evtchnl_state state); + +void xen_drm_front_evtchnl_free_all(struct xen_drm_front_info *front_info); + +#endif /* __XEN_DRM_FRONT_EVTCHNL_H_ */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c new file mode 100644 index 000000000000..c85bfe7571cb --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include "xen_drm_front_gem.h" + +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_fb_helper.h> +#include <drm/drm_gem.h> + +#include <linux/dma-buf.h> +#include <linux/scatterlist.h> +#include <linux/shmem_fs.h> + +#include <xen/balloon.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_shbuf.h" + +struct xen_gem_object { + struct drm_gem_object base; + + size_t num_pages; + struct page **pages; + + /* set for buffers allocated by the backend */ + bool be_alloc; + + /* this is for imported PRIME buffer */ + struct sg_table *sgt_imported; +}; + +static inline struct xen_gem_object * +to_xen_gem_obj(struct drm_gem_object *gem_obj) +{ + return container_of(gem_obj, struct xen_gem_object, base); +} + +static int gem_alloc_pages_array(struct xen_gem_object *xen_obj, + size_t buf_size) +{ + xen_obj->num_pages = DIV_ROUND_UP(buf_size, PAGE_SIZE); + xen_obj->pages = kvmalloc_array(xen_obj->num_pages, + sizeof(struct page *), GFP_KERNEL); + return !xen_obj->pages ? -ENOMEM : 0; +} + +static void gem_free_pages_array(struct xen_gem_object *xen_obj) +{ + kvfree(xen_obj->pages); + xen_obj->pages = NULL; +} + +static struct xen_gem_object *gem_create_obj(struct drm_device *dev, + size_t size) +{ + struct xen_gem_object *xen_obj; + int ret; + + xen_obj = kzalloc(sizeof(*xen_obj), GFP_KERNEL); + if (!xen_obj) + return ERR_PTR(-ENOMEM); + + ret = drm_gem_object_init(dev, &xen_obj->base, size); + if (ret < 0) { + kfree(xen_obj); + return ERR_PTR(ret); + } + + return xen_obj; +} + +static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) +{ + struct xen_drm_front_drm_info *drm_info = dev->dev_private; + struct xen_gem_object *xen_obj; + int ret; + + size = round_up(size, PAGE_SIZE); + xen_obj = gem_create_obj(dev, size); + if (IS_ERR_OR_NULL(xen_obj)) + return xen_obj; + + if (drm_info->front_info->cfg.be_alloc) { + /* + * backend will allocate space for this buffer, so + * only allocate array of pointers to pages + */ + ret = gem_alloc_pages_array(xen_obj, size); + if (ret < 0) + goto fail; + + /* + * allocate ballooned pages which will be used to map + * grant references provided by the backend + */ + ret = alloc_xenballooned_pages(xen_obj->num_pages, + xen_obj->pages); + if (ret < 0) { + DRM_ERROR("Cannot allocate %zu ballooned pages: %d\n", + xen_obj->num_pages, ret); + gem_free_pages_array(xen_obj); + goto fail; + } + + xen_obj->be_alloc = true; + return xen_obj; + } + /* + * need to allocate backing pages now, so we can share those + * with the backend + */ + xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); + xen_obj->pages = drm_gem_get_pages(&xen_obj->base); + if (IS_ERR_OR_NULL(xen_obj->pages)) { + ret = PTR_ERR(xen_obj->pages); + xen_obj->pages = NULL; + goto fail; + } + + return xen_obj; + +fail: + DRM_ERROR("Failed to allocate buffer with size %zu\n", size); + return ERR_PTR(ret); +} + +struct drm_gem_object *xen_drm_front_gem_create(struct drm_device *dev, + size_t size) +{ + struct xen_gem_object *xen_obj; + + xen_obj = gem_create(dev, size); + if (IS_ERR_OR_NULL(xen_obj)) + return ERR_CAST(xen_obj); + + return &xen_obj->base; +} + +void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) +{ + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); + + if (xen_obj->base.import_attach) { + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); + gem_free_pages_array(xen_obj); + } else { + if (xen_obj->pages) { + if (xen_obj->be_alloc) { + free_xenballooned_pages(xen_obj->num_pages, + xen_obj->pages); + gem_free_pages_array(xen_obj); + } else { + drm_gem_put_pages(&xen_obj->base, + xen_obj->pages, true, false); + } + } + } + drm_gem_object_release(gem_obj); + kfree(xen_obj); +} + +struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj) +{ + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); + + return xen_obj->pages; +} + +struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) +{ + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); + + if (!xen_obj->pages) + return NULL; + + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); +} + +struct drm_gem_object * +xen_drm_front_gem_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct xen_drm_front_drm_info *drm_info = dev->dev_private; + struct xen_gem_object *xen_obj; + size_t size; + int ret; + + size = attach->dmabuf->size; + xen_obj = gem_create_obj(dev, size); + if (IS_ERR_OR_NULL(xen_obj)) + return ERR_CAST(xen_obj); + + ret = gem_alloc_pages_array(xen_obj, size); + if (ret < 0) + return ERR_PTR(ret); + + xen_obj->sgt_imported = sgt; + + ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, + NULL, xen_obj->num_pages); + if (ret < 0) + return ERR_PTR(ret); + + ret = xen_drm_front_dbuf_create(drm_info->front_info, + xen_drm_front_dbuf_to_cookie(&xen_obj->base), + 0, 0, 0, size, xen_obj->pages); + if (ret < 0) + return ERR_PTR(ret); + + DRM_DEBUG("Imported buffer of size %zu with nents %u\n", + size, sgt->nents); + + return &xen_obj->base; +} + +static int gem_mmap_obj(struct xen_gem_object *xen_obj, + struct vm_area_struct *vma) +{ + unsigned long addr = vma->vm_start; + int i; + + /* + * clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the + * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map + * the whole buffer. + */ + vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_MIXEDMAP; + vma->vm_pgoff = 0; + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + + /* + * vm_operations_struct.fault handler will be called if CPU access + * to VM is here. For GPUs this isn't the case, because CPU + * doesn't touch the memory. Insert pages now, so both CPU and GPU are + * happy. + * FIXME: as we insert all the pages now then no .fault handler must + * be called, so don't provide one + */ + for (i = 0; i < xen_obj->num_pages; i++) { + int ret; + + ret = vm_insert_page(vma, addr, xen_obj->pages[i]); + if (ret < 0) { + DRM_ERROR("Failed to insert pages into vma: %d\n", ret); + return ret; + } + + addr += PAGE_SIZE; + } + return 0; +} + +int xen_drm_front_gem_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct xen_gem_object *xen_obj; + struct drm_gem_object *gem_obj; + int ret; + + ret = drm_gem_mmap(filp, vma); + if (ret < 0) + return ret; + + gem_obj = vma->vm_private_data; + xen_obj = to_xen_gem_obj(gem_obj); + return gem_mmap_obj(xen_obj, vma); +} + +void *xen_drm_front_gem_prime_vmap(struct drm_gem_object *gem_obj) +{ + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); + + if (!xen_obj->pages) + return NULL; + + return vmap(xen_obj->pages, xen_obj->num_pages, + VM_MAP, pgprot_writecombine(PAGE_KERNEL)); +} + +void xen_drm_front_gem_prime_vunmap(struct drm_gem_object *gem_obj, + void *vaddr) +{ + vunmap(vaddr); +} + +int xen_drm_front_gem_prime_mmap(struct drm_gem_object *gem_obj, + struct vm_area_struct *vma) +{ + struct xen_gem_object *xen_obj; + int ret; + + ret = drm_gem_mmap_obj(gem_obj, gem_obj->size, vma); + if (ret < 0) + return ret; + + xen_obj = to_xen_gem_obj(gem_obj); + return gem_mmap_obj(xen_obj, vma); +} diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.h b/drivers/gpu/drm/xen/xen_drm_front_gem.h new file mode 100644 index 000000000000..d5ab734fdafe --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_GEM_H +#define __XEN_DRM_FRONT_GEM_H + +#include <drm/drmP.h> + +struct drm_gem_object *xen_drm_front_gem_create(struct drm_device *dev, + size_t size); + +struct drm_gem_object * +xen_drm_front_gem_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt); + +struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj); + +struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *obj); + +void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj); + +int xen_drm_front_gem_mmap(struct file *filp, struct vm_area_struct *vma); + +void *xen_drm_front_gem_prime_vmap(struct drm_gem_object *gem_obj); + +void xen_drm_front_gem_prime_vunmap(struct drm_gem_object *gem_obj, + void *vaddr); + +int xen_drm_front_gem_prime_mmap(struct drm_gem_object *gem_obj, + struct vm_area_struct *vma); + +#endif /* __XEN_DRM_FRONT_GEM_H */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.c b/drivers/gpu/drm/xen/xen_drm_front_kms.c new file mode 100644 index 000000000000..a3479eb72d79 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_kms.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include "xen_drm_front_kms.h" + +#include <drm/drmP.h> +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_framebuffer_helper.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_conn.h" + +/* + * Timeout in ms to wait for frame done event from the backend: + * must be a bit more than IO time-out + */ +#define FRAME_DONE_TO_MS (XEN_DRM_FRONT_WAIT_BACK_MS + 100) + +static struct xen_drm_front_drm_pipeline * +to_xen_drm_pipeline(struct drm_simple_display_pipe *pipe) +{ + return container_of(pipe, struct xen_drm_front_drm_pipeline, pipe); +} + +static void fb_destroy(struct drm_framebuffer *fb) +{ + struct xen_drm_front_drm_info *drm_info = fb->dev->dev_private; + int idx; + + if (drm_dev_enter(fb->dev, &idx)) { + xen_drm_front_fb_detach(drm_info->front_info, + xen_drm_front_fb_to_cookie(fb)); + drm_dev_exit(idx); + } + drm_gem_fb_destroy(fb); +} + +static struct drm_framebuffer_funcs fb_funcs = { + .destroy = fb_destroy, +}; + +static struct drm_framebuffer * +fb_create(struct drm_device *dev, struct drm_file *filp, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct xen_drm_front_drm_info *drm_info = dev->dev_private; + static struct drm_framebuffer *fb; + struct drm_gem_object *gem_obj; + int ret; + + fb = drm_gem_fb_create_with_funcs(dev, filp, mode_cmd, &fb_funcs); + if (IS_ERR_OR_NULL(fb)) + return fb; + + gem_obj = drm_gem_object_lookup(filp, mode_cmd->handles[0]); + if (!gem_obj) { + DRM_ERROR("Failed to lookup GEM object\n"); + ret = -ENOENT; + goto fail; + } + + drm_gem_object_put_unlocked(gem_obj); + + ret = xen_drm_front_fb_attach(drm_info->front_info, + xen_drm_front_dbuf_to_cookie(gem_obj), + xen_drm_front_fb_to_cookie(fb), + fb->width, fb->height, + fb->format->format); + if (ret < 0) { + DRM_ERROR("Back failed to attach FB %p: %d\n", fb, ret); + goto fail; + } + + return fb; + +fail: + drm_gem_fb_destroy(fb); + return ERR_PTR(ret); +} + +static const struct drm_mode_config_funcs mode_config_funcs = { + .fb_create = fb_create, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, +}; + +static void send_pending_event(struct xen_drm_front_drm_pipeline *pipeline) +{ + struct drm_crtc *crtc = &pipeline->pipe.crtc; + struct drm_device *dev = crtc->dev; + unsigned long flags; + + spin_lock_irqsave(&dev->event_lock, flags); + if (pipeline->pending_event) + drm_crtc_send_vblank_event(crtc, pipeline->pending_event); + pipeline->pending_event = NULL; + spin_unlock_irqrestore(&dev->event_lock, flags); +} + +static void display_enable(struct drm_simple_display_pipe *pipe, + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(pipe); + struct drm_crtc *crtc = &pipe->crtc; + struct drm_framebuffer *fb = plane_state->fb; + int ret, idx; + + if (!drm_dev_enter(pipe->crtc.dev, &idx)) + return; + + ret = xen_drm_front_mode_set(pipeline, crtc->x, crtc->y, + fb->width, fb->height, + fb->format->cpp[0] * 8, + xen_drm_front_fb_to_cookie(fb)); + + if (ret) { + DRM_ERROR("Failed to enable display: %d\n", ret); + pipeline->conn_connected = false; + } + + drm_dev_exit(idx); +} + +static void display_disable(struct drm_simple_display_pipe *pipe) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(pipe); + int ret = 0, idx; + + if (drm_dev_enter(pipe->crtc.dev, &idx)) { + ret = xen_drm_front_mode_set(pipeline, 0, 0, 0, 0, 0, + xen_drm_front_fb_to_cookie(NULL)); + drm_dev_exit(idx); + } + if (ret) + DRM_ERROR("Failed to disable display: %d\n", ret); + + /* Make sure we can restart with enabled connector next time */ + pipeline->conn_connected = true; + + /* release stalled event if any */ + send_pending_event(pipeline); +} + +void xen_drm_front_kms_on_frame_done(struct xen_drm_front_drm_pipeline *pipeline, + u64 fb_cookie) +{ + /* + * This runs in interrupt context, e.g. under + * drm_info->front_info->io_lock, so we cannot call _sync version + * to cancel the work + */ + cancel_delayed_work(&pipeline->pflip_to_worker); + + send_pending_event(pipeline); +} + +static void pflip_to_worker(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct xen_drm_front_drm_pipeline *pipeline = + container_of(delayed_work, + struct xen_drm_front_drm_pipeline, + pflip_to_worker); + + DRM_ERROR("Frame done timed-out, releasing"); + send_pending_event(pipeline); +} + +static bool display_send_page_flip(struct drm_simple_display_pipe *pipe, + struct drm_plane_state *old_plane_state) +{ + struct drm_plane_state *plane_state = + drm_atomic_get_new_plane_state(old_plane_state->state, + &pipe->plane); + + /* + * If old_plane_state->fb is NULL and plane_state->fb is not, + * then this is an atomic commit which will enable display. + * If old_plane_state->fb is not NULL and plane_state->fb is, + * then this is an atomic commit which will disable display. + * Ignore these and do not send page flip as this framebuffer will be + * sent to the backend as a part of display_set_config call. + */ + if (old_plane_state->fb && plane_state->fb) { + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(pipe); + struct xen_drm_front_drm_info *drm_info = pipeline->drm_info; + int ret; + + schedule_delayed_work(&pipeline->pflip_to_worker, + msecs_to_jiffies(FRAME_DONE_TO_MS)); + + ret = xen_drm_front_page_flip(drm_info->front_info, + pipeline->index, + xen_drm_front_fb_to_cookie(plane_state->fb)); + if (ret) { + DRM_ERROR("Failed to send page flip request to backend: %d\n", ret); + + pipeline->conn_connected = false; + /* + * Report the flip not handled, so pending event is + * sent, unblocking user-space. + */ + return false; + } + /* + * Signal that page flip was handled, pending event will be sent + * on frame done event from the backend. + */ + return true; + } + + return false; +} + +static void display_update(struct drm_simple_display_pipe *pipe, + struct drm_plane_state *old_plane_state) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(pipe); + struct drm_crtc *crtc = &pipe->crtc; + struct drm_pending_vblank_event *event; + int idx; + + event = crtc->state->event; + if (event) { + struct drm_device *dev = crtc->dev; + unsigned long flags; + + WARN_ON(pipeline->pending_event); + + spin_lock_irqsave(&dev->event_lock, flags); + crtc->state->event = NULL; + + pipeline->pending_event = event; + spin_unlock_irqrestore(&dev->event_lock, flags); + } + + if (!drm_dev_enter(pipe->crtc.dev, &idx)) { + send_pending_event(pipeline); + return; + } + + /* + * Send page flip request to the backend *after* we have event cached + * above, so on page flip done event from the backend we can + * deliver it and there is no race condition between this code and + * event from the backend. + * If this is not a page flip, e.g. no flip done event from the backend + * is expected, then send now. + */ + if (!display_send_page_flip(pipe, old_plane_state)) + send_pending_event(pipeline); + + drm_dev_exit(idx); +} + +static enum drm_mode_status +display_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *mode) +{ + struct xen_drm_front_drm_pipeline *pipeline = + container_of(crtc, struct xen_drm_front_drm_pipeline, + pipe.crtc); + + if (mode->hdisplay != pipeline->width) + return MODE_ERROR; + + if (mode->vdisplay != pipeline->height) + return MODE_ERROR; + + return MODE_OK; +} + +static const struct drm_simple_display_pipe_funcs display_funcs = { + .mode_valid = display_mode_valid, + .enable = display_enable, + .disable = display_disable, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, + .update = display_update, +}; + +static int display_pipe_init(struct xen_drm_front_drm_info *drm_info, + int index, struct xen_drm_front_cfg_connector *cfg, + struct xen_drm_front_drm_pipeline *pipeline) +{ + struct drm_device *dev = drm_info->drm_dev; + const u32 *formats; + int format_count; + int ret; + + pipeline->drm_info = drm_info; + pipeline->index = index; + pipeline->height = cfg->height; + pipeline->width = cfg->width; + + INIT_DELAYED_WORK(&pipeline->pflip_to_worker, pflip_to_worker); + + ret = xen_drm_front_conn_init(drm_info, &pipeline->conn); + if (ret) + return ret; + + formats = xen_drm_front_conn_get_formats(&format_count); + + return drm_simple_display_pipe_init(dev, &pipeline->pipe, + &display_funcs, formats, + format_count, NULL, + &pipeline->conn); +} + +int xen_drm_front_kms_init(struct xen_drm_front_drm_info *drm_info) +{ + struct drm_device *dev = drm_info->drm_dev; + int i, ret; + + drm_mode_config_init(dev); + + dev->mode_config.min_width = 0; + dev->mode_config.min_height = 0; + dev->mode_config.max_width = 4095; + dev->mode_config.max_height = 2047; + dev->mode_config.funcs = &mode_config_funcs; + + for (i = 0; i < drm_info->front_info->cfg.num_connectors; i++) { + struct xen_drm_front_cfg_connector *cfg = + &drm_info->front_info->cfg.connectors[i]; + struct xen_drm_front_drm_pipeline *pipeline = + &drm_info->pipeline[i]; + + ret = display_pipe_init(drm_info, i, cfg, pipeline); + if (ret) { + drm_mode_config_cleanup(dev); + return ret; + } + } + + drm_mode_config_reset(dev); + drm_kms_helper_poll_init(dev); + return 0; +} + +void xen_drm_front_kms_fini(struct xen_drm_front_drm_info *drm_info) +{ + int i; + + for (i = 0; i < drm_info->front_info->cfg.num_connectors; i++) { + struct xen_drm_front_drm_pipeline *pipeline = + &drm_info->pipeline[i]; + + cancel_delayed_work_sync(&pipeline->pflip_to_worker); + + send_pending_event(pipeline); + } +} diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.h b/drivers/gpu/drm/xen/xen_drm_front_kms.h new file mode 100644 index 000000000000..ab2fbad4fbbf --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_kms.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_KMS_H_ +#define __XEN_DRM_FRONT_KMS_H_ + +#include <linux/types.h> + +struct xen_drm_front_drm_info; +struct xen_drm_front_drm_pipeline; + +int xen_drm_front_kms_init(struct xen_drm_front_drm_info *drm_info); + +void xen_drm_front_kms_fini(struct xen_drm_front_drm_info *drm_info); + +void xen_drm_front_kms_on_frame_done(struct xen_drm_front_drm_pipeline *pipeline, + u64 fb_cookie); + +#endif /* __XEN_DRM_FRONT_KMS_H_ */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_shbuf.c b/drivers/gpu/drm/xen/xen_drm_front_shbuf.c new file mode 100644 index 000000000000..8099cb343ae3 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_shbuf.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include <drm/drmP.h> + +#if defined(CONFIG_X86) +#include <drm/drm_cache.h> +#endif +#include <linux/errno.h> +#include <linux/mm.h> + +#include <asm/xen/hypervisor.h> +#include <xen/balloon.h> +#include <xen/xen.h> +#include <xen/xenbus.h> +#include <xen/interface/io/ring.h> +#include <xen/interface/io/displif.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_shbuf.h" + +struct xen_drm_front_shbuf_ops { + /* + * Calculate number of grefs required to handle this buffer, + * e.g. if grefs are required for page directory only or the buffer + * pages as well. + */ + void (*calc_num_grefs)(struct xen_drm_front_shbuf *buf); + /* Fill page directory according to para-virtual display protocol. */ + void (*fill_page_dir)(struct xen_drm_front_shbuf *buf); + /* Claim grant references for the pages of the buffer. */ + int (*grant_refs_for_buffer)(struct xen_drm_front_shbuf *buf, + grant_ref_t *priv_gref_head, int gref_idx); + /* Map grant references of the buffer. */ + int (*map)(struct xen_drm_front_shbuf *buf); + /* Unmap grant references of the buffer. */ + int (*unmap)(struct xen_drm_front_shbuf *buf); +}; + +grant_ref_t xen_drm_front_shbuf_get_dir_start(struct xen_drm_front_shbuf *buf) +{ + if (!buf->grefs) + return GRANT_INVALID_REF; + + return buf->grefs[0]; +} + +int xen_drm_front_shbuf_map(struct xen_drm_front_shbuf *buf) +{ + if (buf->ops->map) + return buf->ops->map(buf); + + /* no need to map own grant references */ + return 0; +} + +int xen_drm_front_shbuf_unmap(struct xen_drm_front_shbuf *buf) +{ + if (buf->ops->unmap) + return buf->ops->unmap(buf); + + /* no need to unmap own grant references */ + return 0; +} + +void xen_drm_front_shbuf_flush(struct xen_drm_front_shbuf *buf) +{ +#if defined(CONFIG_X86) + drm_clflush_pages(buf->pages, buf->num_pages); +#endif +} + +void xen_drm_front_shbuf_free(struct xen_drm_front_shbuf *buf) +{ + if (buf->grefs) { + int i; + + for (i = 0; i < buf->num_grefs; i++) + if (buf->grefs[i] != GRANT_INVALID_REF) + gnttab_end_foreign_access(buf->grefs[i], + 0, 0UL); + } + kfree(buf->grefs); + kfree(buf->directory); + kfree(buf); +} + +/* + * number of grefs a page can hold with respect to the + * struct xendispl_page_directory header + */ +#define XEN_DRM_NUM_GREFS_PER_PAGE ((PAGE_SIZE - \ + offsetof(struct xendispl_page_directory, gref)) / \ + sizeof(grant_ref_t)) + +static int get_num_pages_dir(struct xen_drm_front_shbuf *buf) +{ + /* number of pages the page directory consumes itself */ + return DIV_ROUND_UP(buf->num_pages, XEN_DRM_NUM_GREFS_PER_PAGE); +} + +static void backend_calc_num_grefs(struct xen_drm_front_shbuf *buf) +{ + /* only for pages the page directory consumes itself */ + buf->num_grefs = get_num_pages_dir(buf); +} + +static void guest_calc_num_grefs(struct xen_drm_front_shbuf *buf) +{ + /* + * number of pages the page directory consumes itself + * plus grefs for the buffer pages + */ + buf->num_grefs = get_num_pages_dir(buf) + buf->num_pages; +} + +#define xen_page_to_vaddr(page) \ + ((phys_addr_t)pfn_to_kaddr(page_to_xen_pfn(page))) + +static int backend_unmap(struct xen_drm_front_shbuf *buf) +{ + struct gnttab_unmap_grant_ref *unmap_ops; + int i, ret; + + if (!buf->pages || !buf->backend_map_handles || !buf->grefs) + return 0; + + unmap_ops = kcalloc(buf->num_pages, sizeof(*unmap_ops), + GFP_KERNEL); + if (!unmap_ops) { + DRM_ERROR("Failed to get memory while unmapping\n"); + return -ENOMEM; + } + + for (i = 0; i < buf->num_pages; i++) { + phys_addr_t addr; + + addr = xen_page_to_vaddr(buf->pages[i]); + gnttab_set_unmap_op(&unmap_ops[i], addr, GNTMAP_host_map, + buf->backend_map_handles[i]); + } + + ret = gnttab_unmap_refs(unmap_ops, NULL, buf->pages, + buf->num_pages); + + for (i = 0; i < buf->num_pages; i++) { + if (unlikely(unmap_ops[i].status != GNTST_okay)) + DRM_ERROR("Failed to unmap page %d: %d\n", + i, unmap_ops[i].status); + } + + if (ret) + DRM_ERROR("Failed to unmap grant references, ret %d", ret); + + kfree(unmap_ops); + kfree(buf->backend_map_handles); + buf->backend_map_handles = NULL; + return ret; +} + +static int backend_map(struct xen_drm_front_shbuf *buf) +{ + struct gnttab_map_grant_ref *map_ops = NULL; + unsigned char *ptr; + int ret, cur_gref, cur_dir_page, cur_page, grefs_left; + + map_ops = kcalloc(buf->num_pages, sizeof(*map_ops), GFP_KERNEL); + if (!map_ops) + return -ENOMEM; + + buf->backend_map_handles = kcalloc(buf->num_pages, + sizeof(*buf->backend_map_handles), + GFP_KERNEL); + if (!buf->backend_map_handles) { + kfree(map_ops); + return -ENOMEM; + } + + /* + * read page directory to get grefs from the backend: for external + * buffer we only allocate buf->grefs for the page directory, + * so buf->num_grefs has number of pages in the page directory itself + */ + ptr = buf->directory; + grefs_left = buf->num_pages; + cur_page = 0; + for (cur_dir_page = 0; cur_dir_page < buf->num_grefs; cur_dir_page++) { + struct xendispl_page_directory *page_dir = + (struct xendispl_page_directory *)ptr; + int to_copy = XEN_DRM_NUM_GREFS_PER_PAGE; + + if (to_copy > grefs_left) + to_copy = grefs_left; + + for (cur_gref = 0; cur_gref < to_copy; cur_gref++) { + phys_addr_t addr; + + addr = xen_page_to_vaddr(buf->pages[cur_page]); + gnttab_set_map_op(&map_ops[cur_page], addr, + GNTMAP_host_map, + page_dir->gref[cur_gref], + buf->xb_dev->otherend_id); + cur_page++; + } + + grefs_left -= to_copy; + ptr += PAGE_SIZE; + } + ret = gnttab_map_refs(map_ops, NULL, buf->pages, buf->num_pages); + + /* save handles even if error, so we can unmap */ + for (cur_page = 0; cur_page < buf->num_pages; cur_page++) { + buf->backend_map_handles[cur_page] = map_ops[cur_page].handle; + if (unlikely(map_ops[cur_page].status != GNTST_okay)) + DRM_ERROR("Failed to map page %d: %d\n", + cur_page, map_ops[cur_page].status); + } + + if (ret) { + DRM_ERROR("Failed to map grant references, ret %d", ret); + backend_unmap(buf); + } + + kfree(map_ops); + return ret; +} + +static void backend_fill_page_dir(struct xen_drm_front_shbuf *buf) +{ + struct xendispl_page_directory *page_dir; + unsigned char *ptr; + int i, num_pages_dir; + + ptr = buf->directory; + num_pages_dir = get_num_pages_dir(buf); + + /* fill only grefs for the page directory itself */ + for (i = 0; i < num_pages_dir - 1; i++) { + page_dir = (struct xendispl_page_directory *)ptr; + + page_dir->gref_dir_next_page = buf->grefs[i + 1]; + ptr += PAGE_SIZE; + } + /* last page must say there is no more pages */ + page_dir = (struct xendispl_page_directory *)ptr; + page_dir->gref_dir_next_page = GRANT_INVALID_REF; +} + +static void guest_fill_page_dir(struct xen_drm_front_shbuf *buf) +{ + unsigned char *ptr; + int cur_gref, grefs_left, to_copy, i, num_pages_dir; + + ptr = buf->directory; + num_pages_dir = get_num_pages_dir(buf); + + /* + * while copying, skip grefs at start, they are for pages + * granted for the page directory itself + */ + cur_gref = num_pages_dir; + grefs_left = buf->num_pages; + for (i = 0; i < num_pages_dir; i++) { + struct xendispl_page_directory *page_dir = + (struct xendispl_page_directory *)ptr; + + if (grefs_left <= XEN_DRM_NUM_GREFS_PER_PAGE) { + to_copy = grefs_left; + page_dir->gref_dir_next_page = GRANT_INVALID_REF; + } else { + to_copy = XEN_DRM_NUM_GREFS_PER_PAGE; + page_dir->gref_dir_next_page = buf->grefs[i + 1]; + } + memcpy(&page_dir->gref, &buf->grefs[cur_gref], + to_copy * sizeof(grant_ref_t)); + ptr += PAGE_SIZE; + grefs_left -= to_copy; + cur_gref += to_copy; + } +} + +static int guest_grant_refs_for_buffer(struct xen_drm_front_shbuf *buf, + grant_ref_t *priv_gref_head, + int gref_idx) +{ + int i, cur_ref, otherend_id; + + otherend_id = buf->xb_dev->otherend_id; + for (i = 0; i < buf->num_pages; i++) { + cur_ref = gnttab_claim_grant_reference(priv_gref_head); + if (cur_ref < 0) + return cur_ref; + + gnttab_grant_foreign_access_ref(cur_ref, otherend_id, + xen_page_to_gfn(buf->pages[i]), + 0); + buf->grefs[gref_idx++] = cur_ref; + } + return 0; +} + +static int grant_references(struct xen_drm_front_shbuf *buf) +{ + grant_ref_t priv_gref_head; + int ret, i, j, cur_ref; + int otherend_id, num_pages_dir; + + ret = gnttab_alloc_grant_references(buf->num_grefs, &priv_gref_head); + if (ret < 0) { + DRM_ERROR("Cannot allocate grant references\n"); + return ret; + } + + otherend_id = buf->xb_dev->otherend_id; + j = 0; + num_pages_dir = get_num_pages_dir(buf); + for (i = 0; i < num_pages_dir; i++) { + unsigned long frame; + + cur_ref = gnttab_claim_grant_reference(&priv_gref_head); + if (cur_ref < 0) + return cur_ref; + + frame = xen_page_to_gfn(virt_to_page(buf->directory + + PAGE_SIZE * i)); + gnttab_grant_foreign_access_ref(cur_ref, otherend_id, frame, 0); + buf->grefs[j++] = cur_ref; + } + + if (buf->ops->grant_refs_for_buffer) { + ret = buf->ops->grant_refs_for_buffer(buf, &priv_gref_head, j); + if (ret) + return ret; + } + + gnttab_free_grant_references(priv_gref_head); + return 0; +} + +static int alloc_storage(struct xen_drm_front_shbuf *buf) +{ + buf->grefs = kcalloc(buf->num_grefs, sizeof(*buf->grefs), GFP_KERNEL); + if (!buf->grefs) + return -ENOMEM; + + buf->directory = kcalloc(get_num_pages_dir(buf), PAGE_SIZE, GFP_KERNEL); + if (!buf->directory) + return -ENOMEM; + + return 0; +} + +/* + * For be allocated buffers we don't need grant_refs_for_buffer as those + * grant references are allocated at backend side + */ +static const struct xen_drm_front_shbuf_ops backend_ops = { + .calc_num_grefs = backend_calc_num_grefs, + .fill_page_dir = backend_fill_page_dir, + .map = backend_map, + .unmap = backend_unmap +}; + +/* For locally granted references we do not need to map/unmap the references */ +static const struct xen_drm_front_shbuf_ops local_ops = { + .calc_num_grefs = guest_calc_num_grefs, + .fill_page_dir = guest_fill_page_dir, + .grant_refs_for_buffer = guest_grant_refs_for_buffer, +}; + +struct xen_drm_front_shbuf * +xen_drm_front_shbuf_alloc(struct xen_drm_front_shbuf_cfg *cfg) +{ + struct xen_drm_front_shbuf *buf; + int ret; + + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + if (cfg->be_alloc) + buf->ops = &backend_ops; + else + buf->ops = &local_ops; + + buf->xb_dev = cfg->xb_dev; + buf->num_pages = DIV_ROUND_UP(cfg->size, PAGE_SIZE); + buf->pages = cfg->pages; + + buf->ops->calc_num_grefs(buf); + + ret = alloc_storage(buf); + if (ret) + goto fail; + + ret = grant_references(buf); + if (ret) + goto fail; + + buf->ops->fill_page_dir(buf); + + return buf; + +fail: + xen_drm_front_shbuf_free(buf); + return ERR_PTR(ret); +} diff --git a/drivers/gpu/drm/xen/xen_drm_front_shbuf.h b/drivers/gpu/drm/xen/xen_drm_front_shbuf.h new file mode 100644 index 000000000000..7545c692539e --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_shbuf.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_SHBUF_H_ +#define __XEN_DRM_FRONT_SHBUF_H_ + +#include <linux/kernel.h> +#include <linux/scatterlist.h> + +#include <xen/grant_table.h> + +struct xen_drm_front_shbuf { + /* + * number of references granted for the backend use: + * - for allocated/imported dma-buf's this holds number of grant + * references for the page directory and pages of the buffer + * - for the buffer provided by the backend this holds number of + * grant references for the page directory as grant references for + * the buffer will be provided by the backend + */ + int num_grefs; + grant_ref_t *grefs; + unsigned char *directory; + + int num_pages; + struct page **pages; + + struct xenbus_device *xb_dev; + + /* these are the ops used internally depending on be_alloc mode */ + const struct xen_drm_front_shbuf_ops *ops; + + /* Xen map handles for the buffer allocated by the backend */ + grant_handle_t *backend_map_handles; +}; + +struct xen_drm_front_shbuf_cfg { + struct xenbus_device *xb_dev; + size_t size; + struct page **pages; + bool be_alloc; +}; + +struct xen_drm_front_shbuf * +xen_drm_front_shbuf_alloc(struct xen_drm_front_shbuf_cfg *cfg); + +grant_ref_t xen_drm_front_shbuf_get_dir_start(struct xen_drm_front_shbuf *buf); + +int xen_drm_front_shbuf_map(struct xen_drm_front_shbuf *buf); + +int xen_drm_front_shbuf_unmap(struct xen_drm_front_shbuf *buf); + +void xen_drm_front_shbuf_flush(struct xen_drm_front_shbuf *buf); + +void xen_drm_front_shbuf_free(struct xen_drm_front_shbuf *buf); + +#endif /* __XEN_DRM_FRONT_SHBUF_H_ */ diff --git a/drivers/gpu/drm/zte/zx_plane.c b/drivers/gpu/drm/zte/zx_plane.c index 94545adac50d..d1931f5ea0b2 100644 --- a/drivers/gpu/drm/zte/zx_plane.c +++ b/drivers/gpu/drm/zte/zx_plane.c @@ -268,7 +268,7 @@ static void zx_plane_atomic_disable(struct drm_plane *plane, struct zx_plane *zplane = to_zx_plane(plane); void __iomem *hbsc = zplane->hbsc; - zx_vou_layer_disable(plane); + zx_vou_layer_disable(plane, old_state); /* Disable HBSC block */ zx_writel_mask(hbsc + HBSC_CTRL0, HBSC_CTRL_EN, 0); diff --git a/drivers/gpu/drm/zte/zx_vou.c b/drivers/gpu/drm/zte/zx_vou.c index 7491813131f3..442311d31110 100644 --- a/drivers/gpu/drm/zte/zx_vou.c +++ b/drivers/gpu/drm/zte/zx_vou.c @@ -627,9 +627,10 @@ void zx_vou_layer_enable(struct drm_plane *plane) zx_writel_mask(vou->osd + OSD_CTRL0, bits->enable, bits->enable); } -void zx_vou_layer_disable(struct drm_plane *plane) +void zx_vou_layer_disable(struct drm_plane *plane, + struct drm_plane_state *old_state) { - struct zx_crtc *zcrtc = to_zx_crtc(plane->crtc); + struct zx_crtc *zcrtc = to_zx_crtc(old_state->crtc); struct zx_vou_hw *vou = zcrtc->vou; struct zx_plane *zplane = to_zx_plane(plane); const struct vou_layer_bits *bits = zplane->bits; diff --git a/drivers/gpu/drm/zte/zx_vou.h b/drivers/gpu/drm/zte/zx_vou.h index 97d72bfce982..5b7f84fbb112 100644 --- a/drivers/gpu/drm/zte/zx_vou.h +++ b/drivers/gpu/drm/zte/zx_vou.h @@ -62,6 +62,7 @@ void zx_vou_config_dividers(struct drm_crtc *crtc, struct vou_div_config *configs, int num); void zx_vou_layer_enable(struct drm_plane *plane); -void zx_vou_layer_disable(struct drm_plane *plane); +void zx_vou_layer_disable(struct drm_plane *plane, + struct drm_plane_state *old_state); #endif /* __ZX_VOU_H__ */ diff --git a/drivers/staging/vboxvideo/vbox_drv.c b/drivers/staging/vboxvideo/vbox_drv.c index e18642e5027e..f6d26beffa54 100644 --- a/drivers/staging/vboxvideo/vbox_drv.c +++ b/drivers/staging/vboxvideo/vbox_drv.c @@ -242,7 +242,7 @@ static struct drm_driver driver = { .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, - .gem_free_object = vbox_gem_free_object, + .gem_free_object_unlocked = vbox_gem_free_object, .dumb_create = vbox_dumb_create, .dumb_map_offset = vbox_dumb_mmap_offset, .dumb_destroy = drm_gem_dumb_destroy, diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c index 111a0ab6280a..38716eb50408 100644 --- a/drivers/video/hdmi.c +++ b/drivers/video/hdmi.c @@ -93,6 +93,9 @@ ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer, if (size < length) return -ENOSPC; + if (frame->picture_aspect > HDMI_PICTURE_ASPECT_16_9) + return -EINVAL; + memset(buffer, 0, size); ptr[0] = frame->type; diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h index 6c731c52c071..695bde7eb055 100644 --- a/include/drm/amd_asic_type.h +++ b/include/drm/amd_asic_type.h @@ -44,6 +44,7 @@ enum amd_asic_type { CHIP_POLARIS10, CHIP_POLARIS11, CHIP_POLARIS12, + CHIP_VEGAM, CHIP_VEGA10, CHIP_VEGA12, CHIP_RAVEN, diff --git a/include/drm/bridge/analogix_dp.h b/include/drm/bridge/analogix_dp.h index e9a1116d2f8e..475b706b49de 100644 --- a/include/drm/bridge/analogix_dp.h +++ b/include/drm/bridge/analogix_dp.h @@ -33,7 +33,8 @@ struct analogix_dp_plat_data { struct drm_connector *connector; bool skip_connector; - int (*power_on)(struct analogix_dp_plat_data *); + int (*power_on_start)(struct analogix_dp_plat_data *); + int (*power_on_end)(struct analogix_dp_plat_data *); int (*power_off)(struct analogix_dp_plat_data *); int (*attach)(struct analogix_dp_plat_data *, struct drm_bridge *, struct drm_connector *); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index c6666cd09347..f5099c12c6a6 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -95,14 +95,6 @@ struct dma_buf_attachment; struct pci_dev; struct pci_controller; -/***********************************************************************/ -/** \name DRM template customization defaults */ -/*@{*/ - -/***********************************************************************/ -/** \name Internal types and structures */ -/*@{*/ - #define DRM_IF_VERSION(maj, min) (maj << 16 | min) /** @@ -123,27 +115,13 @@ static inline bool drm_drv_uses_atomic_modeset(struct drm_device *dev) #define DRM_SWITCH_POWER_CHANGING 2 #define DRM_SWITCH_POWER_DYNAMIC_OFF 3 -static __inline__ int drm_core_check_feature(struct drm_device *dev, - int feature) +static inline bool drm_core_check_feature(struct drm_device *dev, int feature) { - return ((dev->driver->driver_features & feature) ? 1 : 0); + return dev->driver->driver_features & feature; } -/******************************************************************/ -/** \name Internal function definitions */ -/*@{*/ - - /* Driver support (drm_drv.h) */ - -/* - * These are exported to drivers so that they can implement fencing using - * DMA quiscent + idle. DMA quiescent usually requires the hardware lock. - */ - -/*@}*/ - /* returns true if currently okay to sleep */ -static __inline__ bool drm_can_sleep(void) +static inline bool drm_can_sleep(void) { if (in_atomic() || in_dbg_master() || irqs_disabled()) return false; diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h index 17606026590b..330c561c4c11 100644 --- a/include/drm/drm_blend.h +++ b/include/drm/drm_blend.h @@ -36,6 +36,9 @@ static inline bool drm_rotation_90_or_270(unsigned int rotation) return rotation & (DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270); } +#define DRM_BLEND_ALPHA_OPAQUE 0xffff + +int drm_plane_create_alpha_property(struct drm_plane *plane); int drm_plane_create_rotation_property(struct drm_plane *plane, unsigned int rotation, unsigned int supported_rotations); diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 7c4fa32f3fc6..858ba19a3e29 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -38,7 +38,6 @@ struct drm_device { struct device *dev; /**< Device structure of bus-device */ struct drm_driver *driver; /**< DRM driver managing the device */ void *dev_private; /**< DRM driver private data */ - struct drm_minor *control; /**< Control node */ struct drm_minor *primary; /**< Primary node */ struct drm_minor *render; /**< Render node */ bool registered; @@ -46,7 +45,14 @@ struct drm_device { /* currently active master for this device. Protected by master_mutex */ struct drm_master *master; - atomic_t unplugged; /**< Flag whether dev is dead */ + /** + * @unplugged: + * + * Flag to tell if the device has been unplugged. + * See drm_dev_enter() and drm_dev_is_unplugged(). + */ + bool unplugged; + struct inode *anon_inode; /**< inode for private address-space */ char *unique; /**< unique name of the device */ /*@} */ diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 62903bae0221..c01564991a9f 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -64,6 +64,11 @@ /* AUX CH addresses */ /* DPCD */ #define DP_DPCD_REV 0x000 +# define DP_DPCD_REV_10 0x10 +# define DP_DPCD_REV_11 0x11 +# define DP_DPCD_REV_12 0x12 +# define DP_DPCD_REV_13 0x13 +# define DP_DPCD_REV_14 0x14 #define DP_MAX_LINK_RATE 0x001 @@ -119,6 +124,7 @@ # define DP_DPCD_DISPLAY_CONTROL_CAPABLE (1 << 3) /* edp v1.2 or higher */ #define DP_TRAINING_AUX_RD_INTERVAL 0x00e /* XXX 1.2? */ +# define DP_TRAINING_AUX_RD_MASK 0x7F /* XXX 1.2? */ #define DP_ADAPTER_CAP 0x00f /* 1.2 */ # define DP_FORCE_LOAD_SENSE_CAP (1 << 0) @@ -478,6 +484,7 @@ # define DP_PSR_FRAME_CAPTURE (1 << 3) # define DP_PSR_SELECTIVE_UPDATE (1 << 4) # define DP_PSR_IRQ_HPD_WITH_CRC_ERRORS (1 << 5) +# define DP_PSR_ENABLE_PSR2 (1 << 6) /* eDP 1.4a */ #define DP_ADAPTER_CTRL 0x1a0 # define DP_ADAPTER_CTRL_FORCE_LOAD_SENSE (1 << 0) @@ -794,6 +801,15 @@ # define DP_LAST_ACTUAL_SYNCHRONIZATION_LATENCY_MASK (0xf << 4) # define DP_LAST_ACTUAL_SYNCHRONIZATION_LATENCY_SHIFT 4 +#define DP_LAST_RECEIVED_PSR_SDP 0x200a /* eDP 1.2 */ +# define DP_PSR_STATE_BIT (1 << 0) /* eDP 1.2 */ +# define DP_UPDATE_RFB_BIT (1 << 1) /* eDP 1.2 */ +# define DP_CRC_VALID_BIT (1 << 2) /* eDP 1.2 */ +# define DP_SU_VALID (1 << 3) /* eDP 1.4 */ +# define DP_FIRST_SCAN_LINE_SU_REGION (1 << 4) /* eDP 1.4 */ +# define DP_LAST_SCAN_LINE_SU_REGION (1 << 5) /* eDP 1.4 */ +# define DP_Y_COORDINATE_VALID (1 << 6) /* eDP 1.4a */ + #define DP_RECEIVER_ALPM_STATUS 0x200b /* eDP 1.4 */ # define DP_ALPM_LOCK_TIMEOUT_ERROR (1 << 0) @@ -967,18 +983,18 @@ int drm_dp_bw_code_to_link_rate(u8 link_bw); #define DP_SDP_VSC_EXT_CEA 0x21 /* DP 1.4 */ /* 0x80+ CEA-861 infoframe types */ -struct edp_sdp_header { +struct dp_sdp_header { u8 HB0; /* Secondary Data Packet ID */ u8 HB1; /* Secondary Data Packet Type */ - u8 HB2; /* 7:5 reserved, 4:0 revision number */ - u8 HB3; /* 7:5 reserved, 4:0 number of valid data bytes */ + u8 HB2; /* Secondary Data Packet Specific header, Byte 0 */ + u8 HB3; /* Secondary Data packet Specific header, Byte 1 */ } __packed; #define EDP_SDP_HEADER_REVISION_MASK 0x1F #define EDP_SDP_HEADER_VALID_PAYLOAD_BYTES 0x1F struct edp_vsc_psr { - struct edp_sdp_header sdp_header; + struct dp_sdp_header sdp_header; u8 DB0; /* Stereo Interface */ u8 DB1; /* 0 - PSR State; 1 - Update RFB; 2 - CRC Valid */ u8 DB2; /* CRC value bits 7:0 of the R or Cr component */ diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index d23dcdd1bd95..7e545f5f94d3 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -624,6 +624,8 @@ void drm_dev_get(struct drm_device *dev); void drm_dev_put(struct drm_device *dev); void drm_dev_unref(struct drm_device *dev); void drm_put_dev(struct drm_device *dev); +bool drm_dev_enter(struct drm_device *dev, int *idx); +void drm_dev_exit(int idx); void drm_dev_unplug(struct drm_device *dev); /** @@ -635,11 +637,16 @@ void drm_dev_unplug(struct drm_device *dev); * unplugged, these two functions guarantee that any store before calling * drm_dev_unplug() is visible to callers of this function after it completes */ -static inline int drm_dev_is_unplugged(struct drm_device *dev) +static inline bool drm_dev_is_unplugged(struct drm_device *dev) { - int ret = atomic_read(&dev->unplugged); - smp_rmb(); - return ret; + int idx; + + if (drm_dev_enter(dev, &idx)) { + drm_dev_exit(idx); + return false; + } + + return true; } diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 8d89a9c3748d..b25d12ef120a 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -465,8 +465,6 @@ struct edid *drm_get_edid(struct drm_connector *connector, struct edid *drm_get_edid_switcheroo(struct drm_connector *connector, struct i2c_adapter *adapter); struct edid *drm_edid_duplicate(const struct edid *edid); -void drm_reset_display_info(struct drm_connector *connector); -u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid); int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid); u8 drm_match_cea_mode(const struct drm_display_mode *to_match); diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 5176c3797680..027ac16da3d1 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -47,6 +47,9 @@ struct device; * header include loops we need it here for now. */ +/* Note that the order of this enum is ABI (it determines + * /dev/dri/renderD* numbers). + */ enum drm_minor_type { DRM_MINOR_PRIMARY, DRM_MINOR_CONTROL, @@ -182,6 +185,14 @@ struct drm_file { unsigned atomic:1; /** + * @aspect_ratio_allowed: + * + * True, if client can handle picture aspect ratios, and has requested + * to pass this information along with the mode. + */ + unsigned aspect_ratio_allowed:1; + + /** * @is_master: * * This client is the creator of @master. Protected by struct @@ -348,18 +359,6 @@ static inline bool drm_is_render_client(const struct drm_file *file_priv) return file_priv->minor->type == DRM_MINOR_RENDER; } -/** - * drm_is_control_client - is this an open file of the control node - * @file_priv: DRM file - * - * Control nodes are deprecated and in the process of getting removed from the - * DRM userspace API. Do not ever use! - */ -static inline bool drm_is_control_client(const struct drm_file *file_priv) -{ - return file_priv->minor->type == DRM_MINOR_CONTROL; -} - int drm_open(struct inode *inode, struct file *filp); ssize_t drm_read(struct file *filp, char __user *buffer, size_t count, loff_t *offset); diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h index 5ca7cdc3f527..a38de7eb55b4 100644 --- a/include/drm/drm_gem_framebuffer_helper.h +++ b/include/drm/drm_gem_framebuffer_helper.h @@ -10,6 +10,7 @@ struct drm_gem_object; struct drm_mode_fb_cmd2; struct drm_plane; struct drm_plane_state; +struct drm_simple_display_pipe; struct drm_gem_object *drm_gem_fb_get_obj(struct drm_framebuffer *fb, unsigned int plane); @@ -27,6 +28,8 @@ drm_gem_fb_create(struct drm_device *dev, struct drm_file *file, int drm_gem_fb_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state); +int drm_gem_fb_simple_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe, + struct drm_plane_state *plane_state); struct drm_framebuffer * drm_gem_fbdev_fb_create(struct drm_device *dev, diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h index add42809642a..fafb6f592c4b 100644 --- a/include/drm/drm_ioctl.h +++ b/include/drm/drm_ioctl.h @@ -109,13 +109,6 @@ enum drm_ioctl_flags { */ DRM_ROOT_ONLY = BIT(2), /** - * @DRM_CONTROL_ALLOW: - * - * Deprecated, do not use. Control nodes are in the process of getting - * removed. - */ - DRM_CONTROL_ALLOW = BIT(3), - /** * @DRM_UNLOCKED: * * Whether &drm_ioctl_desc.func should be called with the DRM BKL held diff --git a/include/drm/drm_legacy.h b/include/drm/drm_legacy.h index cf0e7d89bcdf..8fad66f88e4f 100644 --- a/include/drm/drm_legacy.h +++ b/include/drm/drm_legacy.h @@ -194,8 +194,8 @@ void drm_legacy_ioremap(struct drm_local_map *map, struct drm_device *dev); void drm_legacy_ioremap_wc(struct drm_local_map *map, struct drm_device *dev); void drm_legacy_ioremapfree(struct drm_local_map *map, struct drm_device *dev); -static __inline__ struct drm_local_map *drm_legacy_findmap(struct drm_device *dev, - unsigned int token) +static inline struct drm_local_map *drm_legacy_findmap(struct drm_device *dev, + unsigned int token) { struct drm_map_list *_entry; list_for_each_entry(_entry, &dev->maplist, head) diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 7569f22ffef6..33b3a96d66d0 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -796,6 +796,14 @@ struct drm_mode_config { bool allow_fb_modifiers; /** + * @normalize_zpos: + * + * If true the drm core will call drm_atomic_normalize_zpos() as part of + * atomic mode checking from drm_atomic_helper_check() + */ + bool normalize_zpos; + + /** * @modifiers_property: Plane property to list support modifier/format * combination. */ diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h index 0d310beae6af..b159fe07fcf9 100644 --- a/include/drm/drm_modes.h +++ b/include/drm/drm_modes.h @@ -147,6 +147,12 @@ enum drm_mode_status { #define DRM_MODE_FLAG_3D_MAX DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF +#define DRM_MODE_MATCH_TIMINGS (1 << 0) +#define DRM_MODE_MATCH_CLOCK (1 << 1) +#define DRM_MODE_MATCH_FLAGS (1 << 2) +#define DRM_MODE_MATCH_3D_FLAGS (1 << 3) +#define DRM_MODE_MATCH_ASPECT_RATIO (1 << 4) + /** * struct drm_display_mode - DRM kernel-internal display mode structure * @hdisplay: horizontal display size @@ -405,6 +411,19 @@ struct drm_display_mode { * Field for setting the HDMI picture aspect ratio of a mode. */ enum hdmi_picture_aspect picture_aspect_ratio; + + /** + * @export_head: + * + * struct list_head for modes to be exposed to the userspace. + * This is to maintain a list of exposed modes while preparing + * user-mode's list in drm_mode_getconnector ioctl. The purpose of this + * list_head only lies in the ioctl function, and is not expected to be + * used outside the function. + * Once used, the stale pointers are not reset, but left as it is, to + * avoid overhead of protecting it by mode_config.mutex. + */ + struct list_head export_head; }; /** @@ -490,6 +509,9 @@ void drm_mode_copy(struct drm_display_mode *dst, const struct drm_display_mode *src); struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev, const struct drm_display_mode *mode); +bool drm_mode_match(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2, + unsigned int match_flags); bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2); bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h index 3e76ca805b0f..35e2a3a79fc5 100644 --- a/include/drm/drm_modeset_helper_vtables.h +++ b/include/drm/drm_modeset_helper_vtables.h @@ -1004,11 +1004,14 @@ struct drm_plane_helper_funcs { * This function must not block for outstanding rendering, since it is * called in the context of the atomic IOCTL even for async commits to * be able to return any errors to userspace. Instead the recommended - * way is to fill out the fence member of the passed-in + * way is to fill out the &drm_plane_state.fence of the passed-in * &drm_plane_state. If the driver doesn't support native fences then * equivalent functionality should be implemented through private * members in the plane structure. * + * Drivers which always have their buffers pinned should use + * drm_gem_fb_prepare_fb() for this hook. + * * The helpers will call @cleanup_fb with matching arguments for every * successful call to this hook. * diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index f7bf4a48b1c3..26fa50c2a50e 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -43,6 +43,7 @@ struct drm_modeset_acquire_ctx; * plane (in 16.16) * @src_w: width of visible portion of plane (in 16.16) * @src_h: height of visible portion of plane (in 16.16) + * @alpha: opacity of the plane * @rotation: rotation of the plane * @zpos: priority of the given plane on crtc (optional) * Note that multiple active planes on the same crtc can have an identical @@ -51,8 +52,8 @@ struct drm_modeset_acquire_ctx; * plane with a lower ID. * @normalized_zpos: normalized value of zpos: unique, range from 0 to N-1 * where N is the number of active planes for given crtc. Note that - * the driver must call drm_atomic_normalize_zpos() to update this before - * it can be trusted. + * the driver must set drm_mode_config.normalize_zpos or call + * drm_atomic_normalize_zpos() to update this before it can be trusted. * @src: clipped source coordinates of the plane (in 16.16) * @dst: clipped destination coordinates of the plane * @state: backpointer to global drm_atomic_state @@ -79,8 +80,15 @@ struct drm_plane_state { /** * @fence: * - * Optional fence to wait for before scanning out @fb. Do not write this - * directly, use drm_atomic_set_fence_for_plane() + * Optional fence to wait for before scanning out @fb. The core atomic + * code will set this when userspace is using explicit fencing. Do not + * write this directly for a driver's implicit fence, use + * drm_atomic_set_fence_for_plane() to ensure that an explicit fence is + * preserved. + * + * Drivers should store any implicit fence in this from their + * &drm_plane_helper.prepare_fb callback. See drm_gem_fb_prepare_fb() + * and drm_gem_fb_simple_display_pipe_prepare_fb() for suitable helpers. */ struct dma_fence *fence; @@ -106,6 +114,9 @@ struct drm_plane_state { uint32_t src_x, src_y; uint32_t src_h, src_w; + /* Plane opacity */ + u16 alpha; + /* Plane rotation */ unsigned int rotation; @@ -496,6 +507,7 @@ enum drm_plane_type { * @funcs: helper functions * @properties: property tracking for this plane * @type: type of plane (overlay, primary, cursor) + * @alpha_property: alpha property for this plane * @zpos_property: zpos property for this plane * @rotation_property: rotation property for this plane * @helper_private: mid-layer private data @@ -571,6 +583,7 @@ struct drm_plane { */ struct drm_plane_state *state; + struct drm_property *alpha_property; struct drm_property *zpos_property; struct drm_property *rotation_property; diff --git a/include/drm/drm_property.h b/include/drm/drm_property.h index d1423c7f3c73..1d5c0b2a8956 100644 --- a/include/drm/drm_property.h +++ b/include/drm/drm_property.h @@ -260,7 +260,7 @@ struct drm_property *drm_property_create_object(struct drm_device *dev, uint32_t type); struct drm_property *drm_property_create_bool(struct drm_device *dev, u32 flags, const char *name); -int drm_property_add_enum(struct drm_property *property, int index, +int drm_property_add_enum(struct drm_property *property, uint64_t value, const char *name); void drm_property_destroy(struct drm_device *dev, struct drm_property *property); @@ -281,32 +281,6 @@ struct drm_property_blob *drm_property_blob_get(struct drm_property_blob *blob); void drm_property_blob_put(struct drm_property_blob *blob); /** - * drm_property_reference_blob - acquire a blob property reference - * @blob: DRM blob property - * - * This is a compatibility alias for drm_property_blob_get() and should not be - * used by new code. - */ -static inline struct drm_property_blob * -drm_property_reference_blob(struct drm_property_blob *blob) -{ - return drm_property_blob_get(blob); -} - -/** - * drm_property_unreference_blob - release a blob property reference - * @blob: DRM blob property - * - * This is a compatibility alias for drm_property_blob_put() and should not be - * used by new code. - */ -static inline void -drm_property_unreference_blob(struct drm_property_blob *blob) -{ - drm_property_blob_put(blob); -} - -/** * drm_property_find - find property object * @dev: DRM device * @file_priv: drm file to check for lease against. diff --git a/include/drm/drm_rect.h b/include/drm/drm_rect.h index 44bc122b9ee0..6c54544a4be7 100644 --- a/include/drm/drm_rect.h +++ b/include/drm/drm_rect.h @@ -175,8 +175,7 @@ static inline bool drm_rect_equals(const struct drm_rect *r1, bool drm_rect_intersect(struct drm_rect *r, const struct drm_rect *clip); bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst, - const struct drm_rect *clip, - int hscale, int vscale); + const struct drm_rect *clip); int drm_rect_calc_hscale(const struct drm_rect *src, const struct drm_rect *dst, int min_hscale, int max_hscale); diff --git a/include/drm/drm_simple_kms_helper.h b/include/drm/drm_simple_kms_helper.h index 1b4e352143fd..451960438a29 100644 --- a/include/drm/drm_simple_kms_helper.h +++ b/include/drm/drm_simple_kms_helper.h @@ -64,7 +64,8 @@ struct drm_simple_display_pipe_funcs { * This hook is optional. */ void (*enable)(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state); + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state); /** * @disable: * @@ -115,6 +116,9 @@ struct drm_simple_display_pipe_funcs { * Optional, called by &drm_plane_helper_funcs.prepare_fb. Please read * the documentation for the &drm_plane_helper_funcs.prepare_fb hook for * more details. + * + * Drivers which always have their buffers pinned should use + * drm_gem_fb_simple_display_pipe_prepare_fb() for this hook. */ int (*prepare_fb)(struct drm_simple_display_pipe *pipe, struct drm_plane_state *plane_state); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index dfd54fb94e10..52380067a43f 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -43,10 +43,12 @@ enum drm_sched_priority { }; /** - * A scheduler entity is a wrapper around a job queue or a group - * of other entities. Entities take turns emitting jobs from their - * job queues to corresponding hardware ring based on scheduling - * policy. + * drm_sched_entity - A wrapper around a job queue (typically attached + * to the DRM file_priv). + * + * Entities will emit jobs in order to their corresponding hardware + * ring, and the scheduler will alternate between entities based on + * scheduling policy. */ struct drm_sched_entity { struct list_head list; @@ -63,6 +65,8 @@ struct drm_sched_entity { struct dma_fence *dependency; struct dma_fence_cb cb; atomic_t *guilty; /* points to ctx's guilty */ + int fini_status; + struct dma_fence *last_scheduled; }; /** @@ -78,7 +82,18 @@ struct drm_sched_rq { struct drm_sched_fence { struct dma_fence scheduled; + + /* This fence is what will be signaled by the scheduler when + * the job is completed. + * + * When setting up an out fence for the job, you should use + * this, since it's available immediately upon + * drm_sched_job_init(), and the fence returned by the driver + * from run_job() won't be created until the dependencies have + * resolved. + */ struct dma_fence finished; + struct dma_fence_cb cb; struct dma_fence *parent; struct drm_gpu_scheduler *sched; @@ -88,6 +103,13 @@ struct drm_sched_fence { struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); +/** + * drm_sched_job - A job to be run by an entity. + * + * A job is created by the driver using drm_sched_job_init(), and + * should call drm_sched_entity_push_job() once it wants the scheduler + * to schedule the job. + */ struct drm_sched_job { struct spsc_node queue_node; struct drm_gpu_scheduler *sched; @@ -99,6 +121,7 @@ struct drm_sched_job { uint64_t id; atomic_t karma; enum drm_sched_priority s_priority; + struct drm_sched_entity *entity; }; static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, @@ -112,10 +135,28 @@ static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, * these functions should be implemented in driver side */ struct drm_sched_backend_ops { + /* Called when the scheduler is considering scheduling this + * job next, to get another struct dma_fence for this job to + * block on. Once it returns NULL, run_job() may be called. + */ struct dma_fence *(*dependency)(struct drm_sched_job *sched_job, struct drm_sched_entity *s_entity); + + /* Called to execute the job once all of the dependencies have + * been resolved. This may be called multiple times, if + * timedout_job() has happened and drm_sched_job_recovery() + * decides to try it again. + */ struct dma_fence *(*run_job)(struct drm_sched_job *sched_job); + + /* Called when a job has taken too long to execute, to trigger + * GPU recovery. + */ void (*timedout_job)(struct drm_sched_job *sched_job); + + /* Called once the job's finished fence has been signaled and + * it's time to clean it up. + */ void (*free_job)(struct drm_sched_job *sched_job); }; @@ -147,7 +188,11 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched); int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, struct drm_sched_rq *rq, - uint32_t jobs, atomic_t *guilty); + atomic_t *guilty); +void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity); +void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity); void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); void drm_sched_entity_push_job(struct drm_sched_job *sched_job, diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 70f0c2535b87..bab70ff6e78b 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -349,6 +349,7 @@ #define INTEL_KBL_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \ INTEL_VGA_DEVICE(0x5917, info), /* Mobile GT2 */ \ + INTEL_VGA_DEVICE(0x591C, info), /* ULX GT2 */ \ INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \ INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \ INTEL_VGA_DEVICE(0x5912, info), /* DT GT2 */ \ diff --git a/include/drm/tinydrm/mipi-dbi.h b/include/drm/tinydrm/mipi-dbi.h index 44e824af2ef6..b8ba58861986 100644 --- a/include/drm/tinydrm/mipi-dbi.h +++ b/include/drm/tinydrm/mipi-dbi.h @@ -67,7 +67,9 @@ int mipi_dbi_init(struct device *dev, struct mipi_dbi *mipi, const struct drm_simple_display_pipe_funcs *pipe_funcs, struct drm_driver *driver, const struct drm_display_mode *mode, unsigned int rotation); -void mipi_dbi_enable_flush(struct mipi_dbi *mipi); +void mipi_dbi_enable_flush(struct mipi_dbi *mipi, + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plan_state); void mipi_dbi_pipe_disable(struct drm_simple_display_pipe *pipe); void mipi_dbi_hw_reset(struct mipi_dbi *mipi); bool mipi_dbi_display_is_on(struct mipi_dbi *mipi); diff --git a/include/drm/tinydrm/tinydrm-helpers.h b/include/drm/tinydrm/tinydrm-helpers.h index 0a4ddbc04c60..5b96f0b12c8c 100644 --- a/include/drm/tinydrm/tinydrm-helpers.h +++ b/include/drm/tinydrm/tinydrm-helpers.h @@ -36,6 +36,11 @@ static inline bool tinydrm_machine_little_endian(void) bool tinydrm_merge_clips(struct drm_clip_rect *dst, struct drm_clip_rect *src, unsigned int num_clips, unsigned int flags, u32 max_width, u32 max_height); +int tinydrm_fb_dirty(struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int flags, unsigned int color, + struct drm_clip_rect *clips, + unsigned int num_clips); void tinydrm_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb, struct drm_clip_rect *clip); void tinydrm_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb, diff --git a/include/drm/tinydrm/tinydrm.h b/include/drm/tinydrm/tinydrm.h index 07a9a11fe19d..56e4a916b5e8 100644 --- a/include/drm/tinydrm/tinydrm.h +++ b/include/drm/tinydrm/tinydrm.h @@ -26,6 +26,10 @@ struct tinydrm_device { struct drm_simple_display_pipe pipe; struct mutex dirty_lock; const struct drm_framebuffer_funcs *fb_funcs; + int (*fb_dirty)(struct drm_framebuffer *framebuffer, + struct drm_file *file_priv, unsigned flags, + unsigned color, struct drm_clip_rect *clips, + unsigned num_clips); }; static inline struct tinydrm_device * @@ -41,7 +45,7 @@ pipe_to_tinydrm(struct drm_simple_display_pipe *pipe) * the &drm_driver structure. */ #define TINYDRM_GEM_DRIVER_OPS \ - .gem_free_object = tinydrm_gem_cma_free_object, \ + .gem_free_object_unlocked = tinydrm_gem_cma_free_object, \ .gem_print_info = drm_gem_cma_print_info, \ .gem_vm_ops = &drm_gem_cma_vm_ops, \ .prime_handle_to_fd = drm_gem_prime_handle_to_fd, \ @@ -91,8 +95,6 @@ void tinydrm_shutdown(struct tinydrm_device *tdev); void tinydrm_display_pipe_update(struct drm_simple_display_pipe *pipe, struct drm_plane_state *old_state); -int tinydrm_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *plane_state); int tinydrm_display_pipe_init(struct tinydrm_device *tdev, const struct drm_simple_display_pipe_funcs *funcs, diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 4c008170fe65..eb9b05aa5aea 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -94,11 +94,11 @@ typedef void (*dma_fence_func_t)(struct dma_fence *fence, struct dma_fence_cb *cb); /** - * struct dma_fence_cb - callback for dma_fence_add_callback - * @node: used by dma_fence_add_callback to append this struct to fence::cb_list + * struct dma_fence_cb - callback for dma_fence_add_callback() + * @node: used by dma_fence_add_callback() to append this struct to fence::cb_list * @func: dma_fence_func_t to call * - * This struct will be initialized by dma_fence_add_callback, additional + * This struct will be initialized by dma_fence_add_callback(), additional * data can be passed along by embedding dma_fence_cb in another struct. */ struct dma_fence_cb { @@ -108,75 +108,143 @@ struct dma_fence_cb { /** * struct dma_fence_ops - operations implemented for fence - * @get_driver_name: returns the driver name. - * @get_timeline_name: return the name of the context this fence belongs to. - * @enable_signaling: enable software signaling of fence. - * @signaled: [optional] peek whether the fence is signaled, can be null. - * @wait: custom wait implementation, or dma_fence_default_wait. - * @release: [optional] called on destruction of fence, can be null - * @fill_driver_data: [optional] callback to fill in free-form debug info - * Returns amount of bytes filled, or -errno. - * @fence_value_str: [optional] fills in the value of the fence as a string - * @timeline_value_str: [optional] fills in the current value of the timeline - * as a string * - * Notes on enable_signaling: - * For fence implementations that have the capability for hw->hw - * signaling, they can implement this op to enable the necessary - * irqs, or insert commands into cmdstream, etc. This is called - * in the first wait() or add_callback() path to let the fence - * implementation know that there is another driver waiting on - * the signal (ie. hw->sw case). - * - * This function can be called from atomic context, but not - * from irq context, so normal spinlocks can be used. - * - * A return value of false indicates the fence already passed, - * or some failure occurred that made it impossible to enable - * signaling. True indicates successful enabling. - * - * fence->error may be set in enable_signaling, but only when false is - * returned. - * - * Calling dma_fence_signal before enable_signaling is called allows - * for a tiny race window in which enable_signaling is called during, - * before, or after dma_fence_signal. To fight this, it is recommended - * that before enable_signaling returns true an extra reference is - * taken on the fence, to be released when the fence is signaled. - * This will mean dma_fence_signal will still be called twice, but - * the second time will be a noop since it was already signaled. - * - * Notes on signaled: - * May set fence->error if returning true. - * - * Notes on wait: - * Must not be NULL, set to dma_fence_default_wait for default implementation. - * the dma_fence_default_wait implementation should work for any fence, as long - * as enable_signaling works correctly. - * - * Must return -ERESTARTSYS if the wait is intr = true and the wait was - * interrupted, and remaining jiffies if fence has signaled, or 0 if wait - * timed out. Can also return other error values on custom implementations, - * which should be treated as if the fence is signaled. For example a hardware - * lockup could be reported like that. - * - * Notes on release: - * Can be NULL, this function allows additional commands to run on - * destruction of the fence. Can be called from irq context. - * If pointer is set to NULL, kfree will get called instead. */ - struct dma_fence_ops { + /** + * @get_driver_name: + * + * Returns the driver name. This is a callback to allow drivers to + * compute the name at runtime, without having it to store permanently + * for each fence, or build a cache of some sort. + * + * This callback is mandatory. + */ const char * (*get_driver_name)(struct dma_fence *fence); + + /** + * @get_timeline_name: + * + * Return the name of the context this fence belongs to. This is a + * callback to allow drivers to compute the name at runtime, without + * having it to store permanently for each fence, or build a cache of + * some sort. + * + * This callback is mandatory. + */ const char * (*get_timeline_name)(struct dma_fence *fence); + + /** + * @enable_signaling: + * + * Enable software signaling of fence. + * + * For fence implementations that have the capability for hw->hw + * signaling, they can implement this op to enable the necessary + * interrupts, or insert commands into cmdstream, etc, to avoid these + * costly operations for the common case where only hw->hw + * synchronization is required. This is called in the first + * dma_fence_wait() or dma_fence_add_callback() path to let the fence + * implementation know that there is another driver waiting on the + * signal (ie. hw->sw case). + * + * This function can be called from atomic context, but not + * from irq context, so normal spinlocks can be used. + * + * A return value of false indicates the fence already passed, + * or some failure occurred that made it impossible to enable + * signaling. True indicates successful enabling. + * + * &dma_fence.error may be set in enable_signaling, but only when false + * is returned. + * + * Since many implementations can call dma_fence_signal() even when before + * @enable_signaling has been called there's a race window, where the + * dma_fence_signal() might result in the final fence reference being + * released and its memory freed. To avoid this, implementations of this + * callback should grab their own reference using dma_fence_get(), to be + * released when the fence is signalled (through e.g. the interrupt + * handler). + * + * This callback is mandatory. + */ bool (*enable_signaling)(struct dma_fence *fence); + + /** + * @signaled: + * + * Peek whether the fence is signaled, as a fastpath optimization for + * e.g. dma_fence_wait() or dma_fence_add_callback(). Note that this + * callback does not need to make any guarantees beyond that a fence + * once indicates as signalled must always return true from this + * callback. This callback may return false even if the fence has + * completed already, in this case information hasn't propogated throug + * the system yet. See also dma_fence_is_signaled(). + * + * May set &dma_fence.error if returning true. + * + * This callback is optional. + */ bool (*signaled)(struct dma_fence *fence); + + /** + * @wait: + * + * Custom wait implementation, or dma_fence_default_wait. + * + * Must not be NULL, set to dma_fence_default_wait for default implementation. + * the dma_fence_default_wait implementation should work for any fence, as long + * as enable_signaling works correctly. + * + * Must return -ERESTARTSYS if the wait is intr = true and the wait was + * interrupted, and remaining jiffies if fence has signaled, or 0 if wait + * timed out. Can also return other error values on custom implementations, + * which should be treated as if the fence is signaled. For example a hardware + * lockup could be reported like that. + * + * This callback is mandatory. + */ signed long (*wait)(struct dma_fence *fence, bool intr, signed long timeout); + + /** + * @release: + * + * Called on destruction of fence to release additional resources. + * Can be called from irq context. This callback is optional. If it is + * NULL, then dma_fence_free() is instead called as the default + * implementation. + */ void (*release)(struct dma_fence *fence); + /** + * @fill_driver_data: + * + * Callback to fill in free-form debug info. + * + * Returns amount of bytes filled, or negative error on failure. + * + * This callback is optional. + */ int (*fill_driver_data)(struct dma_fence *fence, void *data, int size); + + /** + * @fence_value_str: + * + * Callback to fill in free-form debug info specific to this fence, like + * the sequence number. + * + * This callback is optional. + */ void (*fence_value_str)(struct dma_fence *fence, char *str, int size); + + /** + * @timeline_value_str: + * + * Fills in the current value of the timeline as a string, like the + * sequence number. This should match what @fill_driver_data prints for + * the most recently signalled fence (assuming no delayed signalling). + */ void (*timeline_value_str)(struct dma_fence *fence, char *str, int size); }; @@ -189,7 +257,7 @@ void dma_fence_free(struct dma_fence *fence); /** * dma_fence_put - decreases refcount of the fence - * @fence: [in] fence to reduce refcount of + * @fence: fence to reduce refcount of */ static inline void dma_fence_put(struct dma_fence *fence) { @@ -199,7 +267,7 @@ static inline void dma_fence_put(struct dma_fence *fence) /** * dma_fence_get - increases refcount of the fence - * @fence: [in] fence to increase refcount of + * @fence: fence to increase refcount of * * Returns the same fence, with refcount increased by 1. */ @@ -213,7 +281,7 @@ static inline struct dma_fence *dma_fence_get(struct dma_fence *fence) /** * dma_fence_get_rcu - get a fence from a reservation_object_list with * rcu read lock - * @fence: [in] fence to increase refcount of + * @fence: fence to increase refcount of * * Function returns NULL if no refcount could be obtained, or the fence. */ @@ -227,7 +295,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence) /** * dma_fence_get_rcu_safe - acquire a reference to an RCU tracked fence - * @fencep: [in] pointer to fence to increase refcount of + * @fencep: pointer to fence to increase refcount of * * Function returns NULL if no refcount could be obtained, or the fence. * This function handles acquiring a reference to a fence that may be @@ -289,14 +357,16 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence); /** * dma_fence_is_signaled_locked - Return an indication if the fence * is signaled yet. - * @fence: [in] the fence to check + * @fence: the fence to check * * Returns true if the fence was already signaled, false if not. Since this * function doesn't enable signaling, it is not guaranteed to ever return - * true if dma_fence_add_callback, dma_fence_wait or - * dma_fence_enable_sw_signaling haven't been called before. + * true if dma_fence_add_callback(), dma_fence_wait() or + * dma_fence_enable_sw_signaling() haven't been called before. * - * This function requires fence->lock to be held. + * This function requires &dma_fence.lock to be held. + * + * See also dma_fence_is_signaled(). */ static inline bool dma_fence_is_signaled_locked(struct dma_fence *fence) @@ -314,17 +384,19 @@ dma_fence_is_signaled_locked(struct dma_fence *fence) /** * dma_fence_is_signaled - Return an indication if the fence is signaled yet. - * @fence: [in] the fence to check + * @fence: the fence to check * * Returns true if the fence was already signaled, false if not. Since this * function doesn't enable signaling, it is not guaranteed to ever return - * true if dma_fence_add_callback, dma_fence_wait or - * dma_fence_enable_sw_signaling haven't been called before. + * true if dma_fence_add_callback(), dma_fence_wait() or + * dma_fence_enable_sw_signaling() haven't been called before. * * It's recommended for seqno fences to call dma_fence_signal when the * operation is complete, it makes it possible to prevent issues from * wraparound between time of issue and time of use by checking the return * value of this function before calling hardware-specific wait instructions. + * + * See also dma_fence_is_signaled_locked(). */ static inline bool dma_fence_is_signaled(struct dma_fence *fence) @@ -342,8 +414,8 @@ dma_fence_is_signaled(struct dma_fence *fence) /** * __dma_fence_is_later - return if f1 is chronologically later than f2 - * @f1: [in] the first fence's seqno - * @f2: [in] the second fence's seqno from the same context + * @f1: the first fence's seqno + * @f2: the second fence's seqno from the same context * * Returns true if f1 is chronologically later than f2. Both fences must be * from the same context, since a seqno is not common across contexts. @@ -355,8 +427,8 @@ static inline bool __dma_fence_is_later(u32 f1, u32 f2) /** * dma_fence_is_later - return if f1 is chronologically later than f2 - * @f1: [in] the first fence from the same context - * @f2: [in] the second fence from the same context + * @f1: the first fence from the same context + * @f2: the second fence from the same context * * Returns true if f1 is chronologically later than f2. Both fences must be * from the same context, since a seqno is not re-used across contexts. @@ -372,8 +444,8 @@ static inline bool dma_fence_is_later(struct dma_fence *f1, /** * dma_fence_later - return the chronologically later fence - * @f1: [in] the first fence from the same context - * @f2: [in] the second fence from the same context + * @f1: the first fence from the same context + * @f2: the second fence from the same context * * Returns NULL if both fences are signaled, otherwise the fence that would be * signaled last. Both fences must be from the same context, since a seqno is @@ -398,7 +470,7 @@ static inline struct dma_fence *dma_fence_later(struct dma_fence *f1, /** * dma_fence_get_status_locked - returns the status upon completion - * @fence: [in] the dma_fence to query + * @fence: the dma_fence to query * * Drivers can supply an optional error status condition before they signal * the fence (to indicate whether the fence was completed due to an error @@ -422,8 +494,8 @@ int dma_fence_get_status(struct dma_fence *fence); /** * dma_fence_set_error - flag an error condition on the fence - * @fence: [in] the dma_fence - * @error: [in] the error to store + * @fence: the dma_fence + * @error: the error to store * * Drivers can supply an optional error status condition before they signal * the fence, to indicate that the fence was completed due to an error @@ -449,8 +521,8 @@ signed long dma_fence_wait_any_timeout(struct dma_fence **fences, /** * dma_fence_wait - sleep until the fence gets signaled - * @fence: [in] the fence to wait on - * @intr: [in] if true, do an interruptible wait + * @fence: the fence to wait on + * @intr: if true, do an interruptible wait * * This function will return -ERESTARTSYS if interrupted by a signal, * or 0 if the fence was signaled. Other error values may be @@ -459,6 +531,8 @@ signed long dma_fence_wait_any_timeout(struct dma_fence **fences, * Performs a synchronous wait on this fence. It is assumed the caller * directly or indirectly holds a reference to the fence, otherwise the * fence might be freed before return, resulting in undefined behavior. + * + * See also dma_fence_wait_timeout() and dma_fence_wait_any_timeout(). */ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr) { diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c363b67f2d0a..78b4dd89fcb4 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -78,6 +78,12 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GDS 0x8 #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ + AMDGPU_GEM_DOMAIN_GTT | \ + AMDGPU_GEM_DOMAIN_VRAM | \ + AMDGPU_GEM_DOMAIN_GDS | \ + AMDGPU_GEM_DOMAIN_GWS | \ + AMDGPU_GEM_DOMAIN_OA) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) @@ -95,6 +101,10 @@ extern "C" { #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) /* Flag that BO sharing will be explicitly synchronized */ #define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) +/* Flag that indicates allocating MQD gart on GFX9, where the mtype + * for the second page onward should be set to NC. + */ +#define AMDGPU_GEM_CREATE_MQD_GFX9 (1 << 8) struct drm_amdgpu_gem_create_in { /** the requested memory size */ @@ -520,6 +530,10 @@ union drm_amdgpu_cs { /* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ #define AMDGPU_IB_FLAG_PREEMPT (1<<2) +/* The IB fence should do the L2 writeback but not invalidate any shader + * caches (L2/vL1/sL1/I$). */ +#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */ @@ -620,6 +634,12 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_ASD 0x0d /* Subquery id: Query VCN firmware version */ #define AMDGPU_INFO_FW_VCN 0x0e + /* Subquery id: Query GFX RLC SRLC firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f + /* Subquery id: Query GFX RLC SRLG firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10 + /* Subquery id: Query GFX RLC SRLS firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11 /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f /* the used VRAM size */ diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 6fdff5945c8a..9c660e1688ab 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -680,6 +680,13 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_ATOMIC 3 +/** + * DRM_CLIENT_CAP_ASPECT_RATIO + * + * If set to 1, the DRM core will provide aspect ratio information in modes. + */ +#define DRM_CLIENT_CAP_ASPECT_RATIO 4 + /** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 50bcf4214ff9..4b3a1bb58e68 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -93,6 +93,8 @@ extern "C" { #define DRM_MODE_PICTURE_ASPECT_NONE 0 #define DRM_MODE_PICTURE_ASPECT_4_3 1 #define DRM_MODE_PICTURE_ASPECT_16_9 2 +#define DRM_MODE_PICTURE_ASPECT_64_27 3 +#define DRM_MODE_PICTURE_ASPECT_256_135 4 /* Aspect ratio flag bitmask (4 bits 22:19) */ #define DRM_MODE_FLAG_PIC_AR_MASK (0x0F<<19) @@ -102,6 +104,10 @@ extern "C" { (DRM_MODE_PICTURE_ASPECT_4_3<<19) #define DRM_MODE_FLAG_PIC_AR_16_9 \ (DRM_MODE_PICTURE_ASPECT_16_9<<19) +#define DRM_MODE_FLAG_PIC_AR_64_27 \ + (DRM_MODE_PICTURE_ASPECT_64_27<<19) +#define DRM_MODE_FLAG_PIC_AR_256_135 \ + (DRM_MODE_PICTURE_ASPECT_256_135<<19) #define DRM_MODE_FLAG_ALL (DRM_MODE_FLAG_PHSYNC | \ DRM_MODE_FLAG_NHSYNC | \ diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h index 4a54305120e0..3e59b8382dd8 100644 --- a/include/uapi/drm/exynos_drm.h +++ b/include/uapi/drm/exynos_drm.h @@ -135,6 +135,219 @@ struct drm_exynos_g2d_exec { __u64 async; }; +/* Exynos DRM IPP v2 API */ + +/** + * Enumerate available IPP hardware modules. + * + * @count_ipps: size of ipp_id array / number of ipp modules (set by driver) + * @reserved: padding + * @ipp_id_ptr: pointer to ipp_id array or NULL + */ +struct drm_exynos_ioctl_ipp_get_res { + __u32 count_ipps; + __u32 reserved; + __u64 ipp_id_ptr; +}; + +enum drm_exynos_ipp_format_type { + DRM_EXYNOS_IPP_FORMAT_SOURCE = 0x01, + DRM_EXYNOS_IPP_FORMAT_DESTINATION = 0x02, +}; + +struct drm_exynos_ipp_format { + __u32 fourcc; + __u32 type; + __u64 modifier; +}; + +enum drm_exynos_ipp_capability { + DRM_EXYNOS_IPP_CAP_CROP = 0x01, + DRM_EXYNOS_IPP_CAP_ROTATE = 0x02, + DRM_EXYNOS_IPP_CAP_SCALE = 0x04, + DRM_EXYNOS_IPP_CAP_CONVERT = 0x08, +}; + +/** + * Get IPP hardware capabilities and supported image formats. + * + * @ipp_id: id of IPP module to query + * @capabilities: bitmask of drm_exynos_ipp_capability (set by driver) + * @reserved: padding + * @formats_count: size of formats array (in entries) / number of filled + * formats (set by driver) + * @formats_ptr: pointer to formats array or NULL + */ +struct drm_exynos_ioctl_ipp_get_caps { + __u32 ipp_id; + __u32 capabilities; + __u32 reserved; + __u32 formats_count; + __u64 formats_ptr; +}; + +enum drm_exynos_ipp_limit_type { + /* size (horizontal/vertial) limits, in pixels (min, max, alignment) */ + DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE = 0x0001, + /* scale ratio (horizonta/vertial), 16.16 fixed point (min, max) */ + DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE = 0x0002, + + /* image buffer area */ + DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER = 0x0001 << 16, + /* src/dst rectangle area */ + DRM_EXYNOS_IPP_LIMIT_SIZE_AREA = 0x0002 << 16, + /* src/dst rectangle area when rotation enabled */ + DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED = 0x0003 << 16, + + DRM_EXYNOS_IPP_LIMIT_TYPE_MASK = 0x000f, + DRM_EXYNOS_IPP_LIMIT_SIZE_MASK = 0x000f << 16, +}; + +struct drm_exynos_ipp_limit_val { + __u32 min; + __u32 max; + __u32 align; + __u32 reserved; +}; + +/** + * IPP module limitation. + * + * @type: limit type (see drm_exynos_ipp_limit_type enum) + * @reserved: padding + * @h: horizontal limits + * @v: vertical limits + */ +struct drm_exynos_ipp_limit { + __u32 type; + __u32 reserved; + struct drm_exynos_ipp_limit_val h; + struct drm_exynos_ipp_limit_val v; +}; + +/** + * Get IPP limits for given image format. + * + * @ipp_id: id of IPP module to query + * @fourcc: image format code (see DRM_FORMAT_* in drm_fourcc.h) + * @modifier: image format modifier (see DRM_FORMAT_MOD_* in drm_fourcc.h) + * @type: source/destination identifier (drm_exynos_ipp_format_flag enum) + * @limits_count: size of limits array (in entries) / number of filled entries + * (set by driver) + * @limits_ptr: pointer to limits array or NULL + */ +struct drm_exynos_ioctl_ipp_get_limits { + __u32 ipp_id; + __u32 fourcc; + __u64 modifier; + __u32 type; + __u32 limits_count; + __u64 limits_ptr; +}; + +enum drm_exynos_ipp_task_id { + /* buffer described by struct drm_exynos_ipp_task_buffer */ + DRM_EXYNOS_IPP_TASK_BUFFER = 0x0001, + /* rectangle described by struct drm_exynos_ipp_task_rect */ + DRM_EXYNOS_IPP_TASK_RECTANGLE = 0x0002, + /* transformation described by struct drm_exynos_ipp_task_transform */ + DRM_EXYNOS_IPP_TASK_TRANSFORM = 0x0003, + /* alpha configuration described by struct drm_exynos_ipp_task_alpha */ + DRM_EXYNOS_IPP_TASK_ALPHA = 0x0004, + + /* source image data (for buffer and rectangle chunks) */ + DRM_EXYNOS_IPP_TASK_TYPE_SOURCE = 0x0001 << 16, + /* destination image data (for buffer and rectangle chunks) */ + DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION = 0x0002 << 16, +}; + +/** + * Memory buffer with image data. + * + * @id: must be DRM_EXYNOS_IPP_TASK_BUFFER + * other parameters are same as for AddFB2 generic DRM ioctl + */ +struct drm_exynos_ipp_task_buffer { + __u32 id; + __u32 fourcc; + __u32 width, height; + __u32 gem_id[4]; + __u32 offset[4]; + __u32 pitch[4]; + __u64 modifier; +}; + +/** + * Rectangle for processing. + * + * @id: must be DRM_EXYNOS_IPP_TASK_RECTANGLE + * @reserved: padding + * @x,@y: left corner in pixels + * @w,@h: width/height in pixels + */ +struct drm_exynos_ipp_task_rect { + __u32 id; + __u32 reserved; + __u32 x; + __u32 y; + __u32 w; + __u32 h; +}; + +/** + * Image tranformation description. + * + * @id: must be DRM_EXYNOS_IPP_TASK_TRANSFORM + * @rotation: DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* values + */ +struct drm_exynos_ipp_task_transform { + __u32 id; + __u32 rotation; +}; + +/** + * Image global alpha configuration for formats without alpha values. + * + * @id: must be DRM_EXYNOS_IPP_TASK_ALPHA + * @value: global alpha value (0-255) + */ +struct drm_exynos_ipp_task_alpha { + __u32 id; + __u32 value; +}; + +enum drm_exynos_ipp_flag { + /* generate DRM event after processing */ + DRM_EXYNOS_IPP_FLAG_EVENT = 0x01, + /* dry run, only check task parameters */ + DRM_EXYNOS_IPP_FLAG_TEST_ONLY = 0x02, + /* non-blocking processing */ + DRM_EXYNOS_IPP_FLAG_NONBLOCK = 0x04, +}; + +#define DRM_EXYNOS_IPP_FLAGS (DRM_EXYNOS_IPP_FLAG_EVENT |\ + DRM_EXYNOS_IPP_FLAG_TEST_ONLY | DRM_EXYNOS_IPP_FLAG_NONBLOCK) + +/** + * Perform image processing described by array of drm_exynos_ipp_task_* + * structures (parameters array). + * + * @ipp_id: id of IPP module to run the task + * @flags: bitmask of drm_exynos_ipp_flag values + * @reserved: padding + * @params_size: size of parameters array (in bytes) + * @params_ptr: pointer to parameters array or NULL + * @user_data: (optional) data for drm event + */ +struct drm_exynos_ioctl_ipp_commit { + __u32 ipp_id; + __u32 flags; + __u32 reserved; + __u32 params_size; + __u64 params_ptr; + __u64 user_data; +}; + #define DRM_EXYNOS_GEM_CREATE 0x00 #define DRM_EXYNOS_GEM_MAP 0x01 /* Reserved 0x03 ~ 0x05 for exynos specific gem ioctl */ @@ -147,6 +360,11 @@ struct drm_exynos_g2d_exec { #define DRM_EXYNOS_G2D_EXEC 0x22 /* Reserved 0x30 ~ 0x33 for obsolete Exynos IPP ioctls */ +/* IPP - Image Post Processing */ +#define DRM_EXYNOS_IPP_GET_RESOURCES 0x40 +#define DRM_EXYNOS_IPP_GET_CAPS 0x41 +#define DRM_EXYNOS_IPP_GET_LIMITS 0x42 +#define DRM_EXYNOS_IPP_COMMIT 0x43 #define DRM_IOCTL_EXYNOS_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + \ DRM_EXYNOS_GEM_CREATE, struct drm_exynos_gem_create) @@ -165,8 +383,20 @@ struct drm_exynos_g2d_exec { #define DRM_IOCTL_EXYNOS_G2D_EXEC DRM_IOWR(DRM_COMMAND_BASE + \ DRM_EXYNOS_G2D_EXEC, struct drm_exynos_g2d_exec) +#define DRM_IOCTL_EXYNOS_IPP_GET_RESOURCES DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_EXYNOS_IPP_GET_RESOURCES, \ + struct drm_exynos_ioctl_ipp_get_res) +#define DRM_IOCTL_EXYNOS_IPP_GET_CAPS DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_EXYNOS_IPP_GET_CAPS, struct drm_exynos_ioctl_ipp_get_caps) +#define DRM_IOCTL_EXYNOS_IPP_GET_LIMITS DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_EXYNOS_IPP_GET_LIMITS, \ + struct drm_exynos_ioctl_ipp_get_limits) +#define DRM_IOCTL_EXYNOS_IPP_COMMIT DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_EXYNOS_IPP_COMMIT, struct drm_exynos_ioctl_ipp_commit) + /* EXYNOS specific events */ #define DRM_EXYNOS_G2D_EVENT 0x80000000 +#define DRM_EXYNOS_IPP_EVENT 0x80000002 struct drm_exynos_g2d_event { struct drm_event base; @@ -177,6 +407,16 @@ struct drm_exynos_g2d_event { __u32 reserved; }; +struct drm_exynos_ipp_event { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 ipp_id; + __u32 sequence; + __u64 reserved; +}; + #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h new file mode 100644 index 000000000000..7b6627783608 --- /dev/null +++ b/include/uapi/drm/v3d_drm.h @@ -0,0 +1,194 @@ +/* + * Copyright © 2014-2018 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _V3D_DRM_H_ +#define _V3D_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_V3D_SUBMIT_CL 0x00 +#define DRM_V3D_WAIT_BO 0x01 +#define DRM_V3D_CREATE_BO 0x02 +#define DRM_V3D_MMAP_BO 0x03 +#define DRM_V3D_GET_PARAM 0x04 +#define DRM_V3D_GET_BO_OFFSET 0x05 + +#define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) +#define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) +#define DRM_IOCTL_V3D_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_CREATE_BO, struct drm_v3d_create_bo) +#define DRM_IOCTL_V3D_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo) +#define DRM_IOCTL_V3D_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param) +#define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset) + +/** + * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D + * engine. + * + * This asks the kernel to have the GPU execute an optional binner + * command list, and a render command list. + */ +struct drm_v3d_submit_cl { + /* Pointer to the binner command list. + * + * This is the first set of commands executed, which runs the + * coordinate shader to determine where primitives land on the screen, + * then writes out the state updates and draw calls necessary per tile + * to the tile allocation BO. + */ + __u32 bcl_start; + + /** End address of the BCL (first byte after the BCL) */ + __u32 bcl_end; + + /* Offset of the render command list. + * + * This is the second set of commands executed, which will either + * execute the tiles that have been set up by the BCL, or a fixed set + * of tiles (in the case of RCL-only blits). + */ + __u32 rcl_start; + + /** End address of the RCL (first byte after the RCL) */ + __u32 rcl_end; + + /** An optional sync object to wait on before starting the BCL. */ + __u32 in_sync_bcl; + /** An optional sync object to wait on before starting the RCL. */ + __u32 in_sync_rcl; + /** An optional sync object to place the completion fence in. */ + __u32 out_sync; + + /* Offset of the tile alloc memory + * + * This is optional on V3D 3.3 (where the CL can set the value) but + * required on V3D 4.1. + */ + __u32 qma; + + /** Size of the tile alloc memory. */ + __u32 qms; + + /** Offset of the tile state data array. */ + __u32 qts; + + /* Pointer to a u32 array of the BOs that are referenced by the job. + */ + __u64 bo_handles; + + /* Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + /* Pad, must be zero-filled. */ + __u32 pad; +}; + +/** + * struct drm_v3d_wait_bo - ioctl argument for waiting for + * completion of the last DRM_V3D_SUBMIT_CL on a BO. + * + * This is useful for cases where multiple processes might be + * rendering to a BO and you want to wait for all rendering to be + * completed. + */ +struct drm_v3d_wait_bo { + __u32 handle; + __u32 pad; + __u64 timeout_ns; +}; + +/** + * struct drm_v3d_create_bo - ioctl argument for creating V3D BOs. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_v3d_create_bo { + __u32 size; + __u32 flags; + /** Returned GEM handle for the BO. */ + __u32 handle; + /** + * Returned offset for the BO in the V3D address space. This offset + * is private to the DRM fd and is valid for the lifetime of the GEM + * handle. + * + * This offset value will always be nonzero, since various HW + * units treat 0 specially. + */ + __u32 offset; +}; + +/** + * struct drm_v3d_mmap_bo - ioctl argument for mapping V3D BOs. + * + * This doesn't actually perform an mmap. Instead, it returns the + * offset you need to use in an mmap on the DRM device node. This + * means that tools like valgrind end up knowing about the mapped + * memory. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_v3d_mmap_bo { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 flags; + /** offset into the drm node to use for subsequent mmap call. */ + __u64 offset; +}; + +enum drm_v3d_param { + DRM_V3D_PARAM_V3D_UIFCFG, + DRM_V3D_PARAM_V3D_HUB_IDENT1, + DRM_V3D_PARAM_V3D_HUB_IDENT2, + DRM_V3D_PARAM_V3D_HUB_IDENT3, + DRM_V3D_PARAM_V3D_CORE0_IDENT0, + DRM_V3D_PARAM_V3D_CORE0_IDENT1, + DRM_V3D_PARAM_V3D_CORE0_IDENT2, +}; + +struct drm_v3d_get_param { + __u32 param; + __u32 pad; + __u64 value; +}; + +/** + * Returns the offset for the BO in the V3D address space for this DRM fd. + * This is the same value returned by drm_v3d_create_bo, if that was called + * from this DRM fd. + */ +struct drm_v3d_get_bo_offset { + __u32 handle; + __u32 offset; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _V3D_DRM_H_ */ diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index b95a0e11cb07..2cac6277a1d7 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -183,10 +183,17 @@ struct drm_vc4_submit_cl { /* ID of the perfmon to attach to this job. 0 means no perfmon. */ __u32 perfmonid; - /* Unused field to align this struct on 64 bits. Must be set to 0. - * If one ever needs to add an u32 field to this struct, this field - * can be used. + /* Syncobj handle to wait on. If set, processing of this render job + * will not start until the syncobj is signaled. 0 means ignore. */ + __u32 in_sync; + + /* Syncobj handle to export fence to. If set, the fence in the syncobj + * will be replaced with a fence that signals upon completion of this + * render job. 0 means ignore. + */ + __u32 out_sync; + __u32 pad2; }; diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 4b04ead26cd9..f43c3c6171ff 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -260,6 +260,7 @@ struct virtio_gpu_cmd_submit { }; #define VIRTIO_GPU_CAPSET_VIRGL 1 +#define VIRTIO_GPU_CAPSET_VIRGL2 2 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ struct virtio_gpu_get_capset_info { diff --git a/scripts/coccinelle/api/drm-get-put.cocci b/scripts/coccinelle/api/drm-get-put.cocci index ceb71ea7f61c..3a09c97ad87d 100644 --- a/scripts/coccinelle/api/drm-get-put.cocci +++ b/scripts/coccinelle/api/drm-get-put.cocci @@ -40,12 +40,6 @@ expression object; - drm_gem_object_unreference_unlocked(object) + drm_gem_object_put_unlocked(object) | -- drm_property_reference_blob(object) -+ drm_property_blob_get(object) -| -- drm_property_unreference_blob(object) -+ drm_property_blob_put(object) -| - drm_dev_unref(object) + drm_dev_put(object) ) @@ -72,10 +66,6 @@ __drm_gem_object_unreference(object) | drm_gem_object_unreference_unlocked(object) | -drm_property_unreference_blob@p(object) -| -drm_property_reference_blob@p(object) -| drm_dev_unref@p(object) ) |