diff options
50 files changed, 1728 insertions, 359 deletions
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml index 0f1e556dc8ef..b659d79393a8 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml @@ -27,6 +27,7 @@ properties: - mediatek,mt8188-dp-intf - mediatek,mt8192-dpi - mediatek,mt8195-dp-intf + - mediatek,mt8195-dpi - items: - enum: - mediatek,mt6795-dpi @@ -35,6 +36,10 @@ properties: - enum: - mediatek,mt8365-dpi - const: mediatek,mt8192-dpi + - items: + - enum: + - mediatek,mt8188-dpi + - const: mediatek,mt8195-dpi reg: maxItems: 1 @@ -116,11 +121,13 @@ examples: - | #include <dt-bindings/interrupt-controller/arm-gic.h> #include <dt-bindings/clock/mt8173-clk.h> + #include <dt-bindings/power/mt8173-power.h> dpi: dpi@1401d000 { compatible = "mediatek,mt8173-dpi"; reg = <0x1401d000 0x1000>; interrupts = <GIC_SPI 194 IRQ_TYPE_LEVEL_LOW>; + power-domains = <&spm MT8173_POWER_DOMAIN_MM>; clocks = <&mmsys CLK_MM_DPI_PIXEL>, <&mmsys CLK_MM_DPI_ENGINE>, <&apmixedsys CLK_APMIXED_TVDPLL>; diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsc.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsc.yaml index 846de6c17d93..a5b88eb97e3b 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsc.yaml +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsc.yaml @@ -22,6 +22,9 @@ properties: oneOf: - enum: - mediatek,mt8195-disp-dsc + - items: + - const: mediatek,mt8188-disp-dsc + - const: mediatek,mt8195-disp-dsc reg: maxItems: 1 diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst index 74fc2cbf1b6f..78b80be17f21 100644 --- a/Documentation/gpu/drivers.rst +++ b/Documentation/gpu/drivers.rst @@ -25,6 +25,7 @@ GPU Driver Documentation panfrost panthor zynqmp + nova/index .. only:: subproject and html diff --git a/Documentation/gpu/nova/core/guidelines.rst b/Documentation/gpu/nova/core/guidelines.rst new file mode 100644 index 000000000000..a389d65d7982 --- /dev/null +++ b/Documentation/gpu/nova/core/guidelines.rst @@ -0,0 +1,24 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========== +Guidelines +========== + +This documents contains the guidelines for nova-core. Additionally, all common +guidelines of the Nova project do apply. + +Driver API +========== + +One main purpose of nova-core is to implement the abstraction around the +firmware interface of GSP and provide a firmware (version) independent API for +2nd level drivers, such as nova-drm or the vGPU manager VFIO driver. + +Therefore, it is not permitted to leak firmware (version) specifics, through the +driver API, to 2nd level drivers. + +Acceptance Criteria +=================== + +- To the extend possible, patches submitted to nova-core must be tested for + regressions with all 2nd level drivers. diff --git a/Documentation/gpu/nova/core/todo.rst b/Documentation/gpu/nova/core/todo.rst new file mode 100644 index 000000000000..ca08377d3b73 --- /dev/null +++ b/Documentation/gpu/nova/core/todo.rst @@ -0,0 +1,446 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========= +Task List +========= + +Tasks may have the following fields: + +- ``Complexity``: Describes the required familiarity with Rust and / or the + corresponding kernel APIs or subsystems. There are four different complexities, + ``Beginner``, ``Intermediate``, ``Advanced`` and ``Expert``. +- ``Reference``: References to other tasks. +- ``Link``: Links to external resources. +- ``Contact``: The person that can be contacted for further information about + the task. + +Enablement (Rust) +================= + +Tasks that are not directly related to nova-core, but are preconditions in terms +of required APIs. + +FromPrimitive API +----------------- + +Sometimes the need arises to convert a number to a value of an enum or a +structure. + +A good example from nova-core would be the ``Chipset`` enum type, which defines +the value ``AD102``. When probing the GPU the value ``0x192`` can be read from a +certain register indication the chipset AD102. Hence, the enum value ``AD102`` +should be derived from the number ``0x192``. Currently, nova-core uses a custom +implementation (``Chipset::from_u32`` for this. + +Instead, it would be desirable to have something like the ``FromPrimitive`` +trait [1] from the num crate. + +Having this generalization also helps with implementing a generic macro that +automatically generates the corresponding mappings between a value and a number. + +| Complexity: Beginner +| Link: https://docs.rs/num/latest/num/trait.FromPrimitive.html + +Generic register abstraction +---------------------------- + +Work out how register constants and structures can be automatically generated +through generalized macros. + +Example: + +.. code-block:: rust + + register!(BOOT0, 0x0, u32, pci::Bar<SIZE>, Fields [ + MINOR_REVISION(3:0, RO), + MAJOR_REVISION(7:4, RO), + REVISION(7:0, RO), // Virtual register combining major and minor rev. + ]) + +This could expand to something like: + +.. code-block:: rust + + const BOOT0_OFFSET: usize = 0x00000000; + const BOOT0_MINOR_REVISION_SHIFT: u8 = 0; + const BOOT0_MINOR_REVISION_MASK: u32 = 0x0000000f; + const BOOT0_MAJOR_REVISION_SHIFT: u8 = 4; + const BOOT0_MAJOR_REVISION_MASK: u32 = 0x000000f0; + const BOOT0_REVISION_SHIFT: u8 = BOOT0_MINOR_REVISION_SHIFT; + const BOOT0_REVISION_MASK: u32 = BOOT0_MINOR_REVISION_MASK | BOOT0_MAJOR_REVISION_MASK; + + struct Boot0(u32); + + impl Boot0 { + #[inline] + fn read(bar: &RevocableGuard<'_, pci::Bar<SIZE>>) -> Self { + Self(bar.readl(BOOT0_OFFSET)) + } + + #[inline] + fn minor_revision(&self) -> u32 { + (self.0 & BOOT0_MINOR_REVISION_MASK) >> BOOT0_MINOR_REVISION_SHIFT + } + + #[inline] + fn major_revision(&self) -> u32 { + (self.0 & BOOT0_MAJOR_REVISION_MASK) >> BOOT0_MAJOR_REVISION_SHIFT + } + + #[inline] + fn revision(&self) -> u32 { + (self.0 & BOOT0_REVISION_MASK) >> BOOT0_REVISION_SHIFT + } + } + +Usage: + +.. code-block:: rust + + let bar = bar.try_access().ok_or(ENXIO)?; + + let boot0 = Boot0::read(&bar); + pr_info!("Revision: {}\n", boot0.revision()); + +| Complexity: Advanced + +Delay / Sleep abstractions +-------------------------- + +Rust abstractions for the kernel's delay() and sleep() functions. + +FUJITA Tomonori plans to work on abstractions for read_poll_timeout_atomic() +(and friends) [1]. + +| Complexity: Beginner +| Link: https://lore.kernel.org/netdev/20250228.080550.354359820929821928.fujita.tomonori@gmail.com/ [1] + +IRQ abstractions +---------------- + +Rust abstractions for IRQ handling. + +There is active ongoing work from Daniel Almeida [1] for the "core" abstractions +to request IRQs. + +Besides optional review and testing work, the required ``pci::Device`` code +around those core abstractions needs to be worked out. + +| Complexity: Intermediate +| Link: https://lore.kernel.org/lkml/20250122163932.46697-1-daniel.almeida@collabora.com/ [1] +| Contact: Daniel Almeida + +Page abstraction for foreign pages +---------------------------------- + +Rust abstractions for pages not created by the Rust page abstraction without +direct ownership. + +There is active onging work from Abdiel Janulgue [1] and Lina [2]. + +| Complexity: Advanced +| Link: https://lore.kernel.org/linux-mm/20241119112408.779243-1-abdiel.janulgue@gmail.com/ [1] +| Link: https://lore.kernel.org/rust-for-linux/20250202-rust-page-v1-0-e3170d7fe55e@asahilina.net/ [2] + +Scatterlist / sg_table abstractions +----------------------------------- + +Rust abstractions for scatterlist / sg_table. + +There is preceding work from Abdiel Janulgue, which hasn't made it to the +mailing list yet. + +| Complexity: Intermediate +| Contact: Abdiel Janulgue + +ELF utils +--------- + +Rust implementation of ELF header representation to retrieve section header +tables, names, and data from an ELF-formatted images. + +There is preceding work from Abdiel Janulgue, which hasn't made it to the +mailing list yet. + +| Complexity: Beginner +| Contact: Abdiel Janulgue + +PCI MISC APIs +------------- + +Extend the existing PCI device / driver abstractions by SR-IOV, config space, +capability, MSI API abstractions. + +| Complexity: Beginner + +Auxiliary bus abstractions +-------------------------- + +Rust abstraction for the auxiliary bus APIs. + +This is needed to connect nova-core to the nova-drm driver. + +| Complexity: Intermediate + +Debugfs abstractions +-------------------- + +Rust abstraction for debugfs APIs. + +| Reference: Export GSP log buffers +| Complexity: Intermediate + +Vec extensions +-------------- + +Implement ``Vec::truncate`` and ``Vec::resize``. + +Currently this is used for some experimental code to parse the vBIOS. + +| Reference vBIOS support +| Complexity: Beginner + +GPU (general) +============= + +Parse firmware headers +---------------------- + +Parse ELF headers from the firmware files loaded from the filesystem. + +| Reference: ELF utils +| Complexity: Beginner +| Contact: Abdiel Janulgue + +Build radix3 page table +----------------------- + +Build the radix3 page table to map the firmware. + +| Complexity: Intermediate +| Contact: Abdiel Janulgue + +vBIOS support +------------- + +Parse the vBIOS and probe the structures required for driver initialization. + +| Contact: Dave Airlie +| Reference: Vec extensions +| Complexity: Intermediate + +Initial Devinit support +----------------------- + +Implement BIOS Device Initialization, i.e. memory sizing, waiting, PLL +configuration. + +| Contact: Dave Airlie +| Complexity: Beginner + +Boot Falcon controller +---------------------- + +Infrastructure to load and execute falcon (sec2) firmware images; handle the +GSP falcon processor and fwsec loading. + +| Complexity: Advanced +| Contact: Dave Airlie + +GPU Timer support +----------------- + +Support for the GPU's internal timer peripheral. + +| Complexity: Beginner +| Contact: Dave Airlie + +MMU / PT management +------------------- + +Work out the architecture for MMU / page table management. + +We need to consider that nova-drm will need rather fine-grained control, +especially in terms of locking, in order to be able to implement asynchronous +Vulkan queues. + +While generally sharing the corresponding code is desirable, it needs to be +evaluated how (and if at all) sharing the corresponding code is expedient. + +| Complexity: Expert + +VRAM memory allocator +--------------------- + +Investigate options for a VRAM memory allocator. + +Some possible options: + - Rust abstractions for + - RB tree (interval tree) / drm_mm + - maple_tree + - native Rust collections + +| Complexity: Advanced + +Instance Memory +--------------- + +Implement support for instmem (bar2) used to store page tables. + +| Complexity: Intermediate +| Contact: Dave Airlie + +GPU System Processor (GSP) +========================== + +Export GSP log buffers +---------------------- + +Recent patches from Timur Tabi [1] added support to expose GSP-RM log buffers +(even after failure to probe the driver) through debugfs. + +This is also an interesting feature for nova-core, especially in the early days. + +| Link: https://lore.kernel.org/nouveau/20241030202952.694055-2-ttabi@nvidia.com/ [1] +| Reference: Debugfs abstractions +| Complexity: Intermediate + +GSP firmware abstraction +------------------------ + +The GSP-RM firmware API is unstable and may incompatibly change from version to +version, in terms of data structures and semantics. + +This problem is one of the big motivations for using Rust for nova-core, since +it turns out that Rust's procedural macro feature provides a rather elegant way +to address this issue: + +1. generate Rust structures from the C headers in a separate namespace per version +2. build abstraction structures (within a generic namespace) that implement the + firmware interfaces; annotate the differences in implementation with version + identifiers +3. use a procedural macro to generate the actual per version implementation out + of this abstraction +4. instantiate the correct version type one on runtime (can be sure that all + have the same interface because it's defined by a common trait) + +There is a PoC implementation of this pattern, in the context of the nova-core +PoC driver. + +This task aims at refining the feature and ideally generalize it, to be usable +by other drivers as well. + +| Complexity: Expert + +GSP message queue +----------------- + +Implement low level GSP message queue (command, status) for communication +between the kernel driver and GSP. + +| Complexity: Advanced +| Contact: Dave Airlie + +Bootstrap GSP +------------- + +Call the boot firmware to boot the GSP processor; execute initial control +messages. + +| Complexity: Intermediate +| Contact: Dave Airlie + +Client / Device APIs +-------------------- + +Implement the GSP message interface for client / device allocation and the +corresponding client and device allocation APIs. + +| Complexity: Intermediate +| Contact: Dave Airlie + +Bar PDE handling +---------------- + +Synchronize page table handling for BARs between the kernel driver and GSP. + +| Complexity: Beginner +| Contact: Dave Airlie + +FIFO engine +----------- + +Implement support for the FIFO engine, i.e. the corresponding GSP message +interface and provide an API for chid allocation and channel handling. + +| Complexity: Advanced +| Contact: Dave Airlie + +GR engine +--------- + +Implement support for the graphics engine, i.e. the corresponding GSP message +interface and provide an API for (golden) context creation and promotion. + +| Complexity: Advanced +| Contact: Dave Airlie + +CE engine +--------- + +Implement support for the copy engine, i.e. the corresponding GSP message +interface. + +| Complexity: Intermediate +| Contact: Dave Airlie + +VFN IRQ controller +------------------ + +Support for the VFN interrupt controller. + +| Complexity: Intermediate +| Contact: Dave Airlie + +External APIs +============= + +nova-core base API +------------------ + +Work out the common pieces of the API to connect 2nd level drivers, i.e. vGPU +manager and nova-drm. + +| Complexity: Advanced + +vGPU manager API +---------------- + +Work out the API parts required by the vGPU manager, which are not covered by +the base API. + +| Complexity: Advanced + +nova-core C API +--------------- + +Implement a C wrapper for the APIs required by the vGPU manager driver. + +| Complexity: Intermediate + +Testing +======= + +CI pipeline +----------- + +Investigate option for continuous integration testing. + +This can go from as simple as running KUnit tests over running (graphics) CTS to +booting up (multiple) guest VMs to test VFIO use-cases. + +It might also be worth to consider the introduction of a new test suite directly +sitting on top of the uAPI for more targeted testing and debugging. There may be +options for collaboration / shared code with the Mesa project. + +| Complexity: Advanced diff --git a/Documentation/gpu/nova/guidelines.rst b/Documentation/gpu/nova/guidelines.rst new file mode 100644 index 000000000000..13ab13984a18 --- /dev/null +++ b/Documentation/gpu/nova/guidelines.rst @@ -0,0 +1,69 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========== +Guidelines +========== + +This document describes the general project guidelines that apply to nova-core +and nova-drm. + +Language +======== + +The Nova project uses the Rust programming language. In this context, all rules +of the Rust for Linux project as documented in +:doc:`../../rust/general-information` apply. Additionally, the following rules +apply. + +- Unless technically necessary otherwise (e.g. uAPI), any driver code is written + in Rust. + +- Unless technically necessary, unsafe Rust code must be avoided. In case of + technical necessity, unsafe code should be isolated in a separate component + providing a safe API for other driver code to use. + +Style +----- + +All rules of the Rust for Linux project as documented in +:doc:`../../rust/coding-guidelines` apply. + +For a submit checklist, please also see the `Rust for Linux Submit checklist +addendum <https://rust-for-linux.com/contributing#submit-checklist-addendum>`_. + +Documentation +============= + +The availability of proper documentation is essential in terms of scalability, +accessibility for new contributors and maintainability of a project in general, +but especially for a driver running as complex hardware as Nova is targeting. + +Hence, adding documentation of any kind is very much encouraged by the project. + +Besides that, there are some minimum requirements. + +- Every non-private structure needs at least a brief doc comment explaining the + semantical sense of the structure, as well as potential locking and lifetime + requirements. It is encouraged to have the same minimum documentation for + non-trivial private structures. + +- uAPIs must be fully documented with kernel-doc comments; additionally, the + semantical behavior must be explained including potential special or corner + cases. + +- The APIs connecting the 1st level driver (nova-core) with 2nd level drivers + must be fully documented. This includes doc comments, potential locking and + lifetime requirements, as well as example code if applicable. + +- Abbreviations must be explained when introduced; terminology must be uniquely + defined. + +- Register addresses, layouts, shift values and masks must be defined properly; + unless obvious, the semantical sense must be documented. This only applies if + the author is able to obtain the corresponding information. + +Acceptance Criteria +=================== + +- Patches must only be applied if reviewed by at least one other person on the + mailing list; this also applies for maintainers. diff --git a/Documentation/gpu/nova/index.rst b/Documentation/gpu/nova/index.rst new file mode 100644 index 000000000000..2701b3f4af35 --- /dev/null +++ b/Documentation/gpu/nova/index.rst @@ -0,0 +1,30 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +======================= +nova NVIDIA GPU drivers +======================= + +The nova driver project consists out of two separate drivers nova-core and +nova-drm and intends to supersede the nouveau driver for NVIDIA GPUs based on +the GPU System Processor (GSP). + +The following documents apply to both nova-core and nova-drm. + +.. toctree:: + :titlesonly: + + guidelines + +nova-core +========= + +The nova-core driver is the core driver for NVIDIA GPUs based on GSP. nova-core, +as the 1st level driver, provides an abstraction around the GPUs hard- and +firmware interfaces providing a common base for 2nd level drivers, such as the +vGPU manager VFIO driver and the nova-drm driver. + +.. toctree:: + :titlesonly: + + core/guidelines + core/todo diff --git a/MAINTAINERS b/MAINTAINERS index 640c784b59bf..1554908c94cc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7457,6 +7457,17 @@ T: git https://gitlab.freedesktop.org/drm/nouveau.git F: drivers/gpu/drm/nouveau/ F: include/uapi/drm/nouveau_drm.h +CORE DRIVER FOR NVIDIA GPUS [RUST] +M: Danilo Krummrich <dakr@kernel.org> +L: nouveau@lists.freedesktop.org +S: Supported +Q: https://patchwork.freedesktop.org/project/nouveau/ +B: https://gitlab.freedesktop.org/drm/nova/-/issues +C: irc://irc.oftc.net/nouveau +T: git https://gitlab.freedesktop.org/drm/nova.git nova-next +F: Documentation/gpu/nova/ +F: drivers/gpu/nova-core/ + DRM DRIVER FOR OLIMEX LCD-OLINUXINO PANELS M: Stefan Mavrodiev <stefan@olimex.com> S: Maintained diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile index 8997f0096545..36a54d456630 100644 --- a/drivers/gpu/Makefile +++ b/drivers/gpu/Makefile @@ -5,3 +5,4 @@ obj-y += host1x/ drm/ vga/ obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ obj-$(CONFIG_TRACE_GPU_MEM) += trace/ +obj-$(CONFIG_NOVA_CORE) += nova-core/ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 2b0327cc47c2..fd8babb513e5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -108,44 +108,6 @@ err_ctx: } struct i915_gem_context * -live_context_for_engine(struct intel_engine_cs *engine, struct file *file) -{ - struct i915_gem_engines *engines; - struct i915_gem_context *ctx; - struct intel_sseu null_sseu = {}; - struct intel_context *ce; - - engines = alloc_engines(1); - if (!engines) - return ERR_PTR(-ENOMEM); - - ctx = live_context(engine->i915, file); - if (IS_ERR(ctx)) { - __free_engines(engines, 0); - return ctx; - } - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - __free_engines(engines, 0); - return ERR_CAST(ce); - } - - intel_context_set_gem(ce, ctx, null_sseu); - engines->engines[0] = ce; - engines->num_engines = 1; - - mutex_lock(&ctx->engines_mutex); - i915_gem_context_set_user_engines(ctx); - engines = rcu_replace_pointer(ctx->engines, engines, 1); - mutex_unlock(&ctx->engines_mutex); - - engines_idle_release(ctx, engines); - - return ctx; -} - -struct i915_gem_context * kernel_context(struct drm_i915_private *i915, struct i915_address_space *vm) { diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h index 7a02fd9b5866..bc8fb37d2d24 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h @@ -23,9 +23,6 @@ void mock_context_close(struct i915_gem_context *ctx); struct i915_gem_context * live_context(struct drm_i915_private *i915, struct file *file); -struct i915_gem_context * -live_context_for_engine(struct intel_engine_cs *engine, struct file *file); - struct i915_gem_context *kernel_context(struct drm_i915_private *i915, struct i915_address_space *vm); void kernel_context_close(struct i915_gem_context *ctx); diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c index 075657018739..5cd58e0f0dcf 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c @@ -103,8 +103,7 @@ static struct dma_buf *mock_dmabuf(int npages) struct dma_buf *dmabuf; int i; - mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *), - GFP_KERNEL); + mock = kmalloc(struct_size(mock, pages, npages), GFP_KERNEL); if (!mock) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index f6b780f893f5..0c723e7c71a2 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -750,7 +750,7 @@ static void swizzle_page(struct page *page) char *vaddr; int i; - vaddr = kmap(page); + vaddr = kmap_local_page(page); for (i = 0; i < PAGE_SIZE; i += 128) { memcpy(temp, &vaddr[i], 64); @@ -758,7 +758,7 @@ static void swizzle_page(struct page *page) memcpy(&vaddr[i + 64], temp, 64); } - kunmap(page); + kunmap_local(vaddr); } /** diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 6dba65e54cdb..a6e50af44b46 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -409,6 +409,9 @@ #define GEN7_SO_PRIM_STORAGE_NEEDED(n) _MMIO(0x5240 + (n) * 8) #define GEN7_SO_PRIM_STORAGE_NEEDED_UDW(n) _MMIO(0x5240 + (n) * 8 + 4) +#define GEN8_WM_CHICKEN2 MCR_REG(0x5584) +#define WAIT_ON_DEPTH_STALL_DONE_DISABLE REG_BIT(5) + #define GEN9_WM_CHICKEN3 _MMIO(0x5588) #define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index d7784650e4d9..1154cd2b7c34 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -464,6 +464,45 @@ static ssize_t slpc_ignore_eff_freq_store(struct kobject *kobj, return err ?: count; } +static ssize_t slpc_power_profile_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + + switch (slpc->power_profile) { + case SLPC_POWER_PROFILES_BASE: + return sysfs_emit(buff, "[%s] %s\n", "base", "power_saving"); + case SLPC_POWER_PROFILES_POWER_SAVING: + return sysfs_emit(buff, "%s [%s]\n", "base", "power_saving"); + } + + return sysfs_emit(buff, "%u\n", slpc->power_profile); +} + +static ssize_t slpc_power_profile_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buff, size_t count) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + char power_saving[] = "power_saving"; + char base[] = "base"; + int err; + u32 val; + + if (!strncmp(buff, power_saving, sizeof(power_saving) - 1)) + val = SLPC_POWER_PROFILES_POWER_SAVING; + else if (!strncmp(buff, base, sizeof(base) - 1)) + val = SLPC_POWER_PROFILES_BASE; + else + return -EINVAL; + + err = intel_guc_slpc_set_power_profile(slpc, val); + return err ?: count; +} + struct intel_gt_bool_throttle_attr { struct attribute attr; ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, @@ -668,6 +707,7 @@ INTEL_GT_ATTR_RO(media_RP0_freq_mhz); INTEL_GT_ATTR_RO(media_RPn_freq_mhz); INTEL_GT_ATTR_RW(slpc_ignore_eff_freq); +INTEL_GT_ATTR_RW(slpc_power_profile); static const struct attribute *media_perf_power_attrs[] = { &attr_media_freq_factor.attr, @@ -864,6 +904,13 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) gt_warn(gt, "failed to create ignore_eff_freq sysfs (%pe)", ERR_PTR(ret)); } + if (intel_uc_uses_guc_slpc(>->uc)) { + ret = sysfs_create_file(kobj, &attr_slpc_power_profile.attr); + if (ret) + gt_warn(gt, "failed to create slpc_power_profile sysfs (%pe)", + ERR_PTR(ret)); + } + if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) { ret = sysfs_create_files(kobj, throttle_reason_attrs); if (ret) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 3ee544e7c203..dbdcfe130ad4 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1113,7 +1113,6 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * Warn CI about the unrecoverable wedged condition. * Time for a reboot. */ - gt_err(gt, "Unrecoverable wedged condition\n"); add_taint_for_CI(gt->i915, TAINT_WARN); return false; } @@ -1272,10 +1271,8 @@ void intel_gt_reset(struct intel_gt *gt, } ret = resume(gt); - if (ret) { - gt_err(gt, "Failed to resume (%d)\n", ret); + if (ret) goto taint; - } finish: reset_finish(gt, awake); @@ -1641,7 +1638,6 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt) set_bit(I915_WEDGED_ON_INIT, >->reset.flags); /* Wedged on init is non-recoverable */ - gt_err(gt, "Non-recoverable wedged on init\n"); add_taint_for_CI(gt->i915, TAINT_WARN); } diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fa304ea088e4..2cfaedb04876 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1025,6 +1025,10 @@ void intel_rps_boost(struct i915_request *rq) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + /* Waitboost should not be done with power saving profile */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) + return; + if (slpc->min_freq_softlimit >= slpc->boost_freq) return; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3ea9b06de1be..116683ebe074 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -691,16 +691,17 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, struct drm_i915_private *i915 = engine->i915; /* - * Wa_1409142259:tgl,dg1,adl-p + * Wa_1409142259:tgl,dg1,adl-p,adl-n * Wa_1409347922:tgl,dg1,adl-p * Wa_1409252684:tgl,dg1,adl-p * Wa_1409217633:tgl,dg1,adl-p * Wa_1409207793:tgl,dg1,adl-p - * Wa_1409178076:tgl,dg1,adl-p - * Wa_1408979724:tgl,dg1,adl-p - * Wa_14010443199:tgl,rkl,dg1,adl-p - * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p - * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p + * Wa_1409178076:tgl,dg1,adl-p,adl-n + * Wa_1408979724:tgl,dg1,adl-p,adl-n + * Wa_14010443199:tgl,rkl,dg1,adl-p,adl-n + * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p,adl-n + * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p,adl-n + * Wa_22010465259:tgl,rkl,dg1,adl-s,adl-p,adl-n */ wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); @@ -741,6 +742,12 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_1606376872 */ wa_masked_en(wal, COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC); } + + /* + * This bit must be set to enable performance optimization for fast + * clears. + */ + wa_mcr_write_or(wal, GEN8_WM_CHICKEN2, WAIT_ON_DEPTH_STALL_DONE_DISABLE); } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 78c03e6c0861..73bc91c6ea07 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -477,12 +477,13 @@ int live_rps_control(void *arg) limit, intel_gpu_freq(rps, limit), min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); - if (limit == rps->min_freq) { - pr_err("%s: GPU throttled to minimum!\n", - engine->name); + if (limit != rps->max_freq) { + u32 throttle = intel_uncore_read(gt->uncore, + intel_gt_perf_limit_reasons_reg(gt)); + + pr_warn("%s: GPU throttled with reasons 0x%08x\n", + engine->name, throttle & GT0_PERF_LIMIT_REASONS_MASK); show_pstate_limits(rps); - err = -ENODEV; - break; } if (igt_flush_test(gt->i915)) { @@ -1115,7 +1116,7 @@ static u64 measure_power(struct intel_rps *rps, int *freq) for (i = 0; i < 5; i++) x[i] = __measure_power(5); - *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; + *freq = (*freq + read_cagf(rps)) / 2; /* A simple triangle filter for better result stability */ sort(x, 5, sizeof(*x), cmp_u64, NULL); diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index e218b229681f..e61bb0bad12c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -95,6 +95,21 @@ static int slpc_restore_freq(struct intel_guc_slpc *slpc, u32 min, u32 max) return 0; } +static u64 slpc_measure_power(struct intel_rps *rps, int *freq) +{ + u64 x[5]; + int i; + + for (i = 0; i < 5; i++) + x[i] = __measure_power(5); + + *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; + + /* A simple triangle filter for better result stability */ + sort(x, 5, sizeof(*x), cmp_u64, NULL); + return div_u64(x[1] + 2 * x[2] + x[3], 4); +} + static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power) { int err = 0; @@ -103,7 +118,7 @@ static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power) if (err) return err; *freq = intel_rps_read_actual_frequency(>->rps); - *power = measure_power(>->rps, freq); + *power = slpc_measure_power(>->rps, freq); return err; } diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index bb696b29ee2c..365c4b8b04f4 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -108,7 +108,7 @@ static int __shmem_rw(struct file *file, loff_t off, if (IS_ERR(page)) return PTR_ERR(page); - vaddr = kmap(page); + vaddr = kmap_local_page(page); if (write) { memcpy(vaddr + offset_in_page(off), ptr, this); set_page_dirty(page); @@ -116,7 +116,7 @@ static int __shmem_rw(struct file *file, loff_t off, memcpy(ptr, vaddr + offset_in_page(off), this); } mark_page_accessed(page); - kunmap(page); + kunmap_local(vaddr); put_page(page); len -= this; @@ -143,11 +143,11 @@ int shmem_read_to_iosys_map(struct file *file, loff_t off, if (IS_ERR(page)) return PTR_ERR(page); - vaddr = kmap(page); + vaddr = kmap_local_page(page); iosys_map_memcpy_to(map, map_off, vaddr + offset_in_page(off), this); mark_page_accessed(page); - kunmap(page); + kunmap_local(vaddr); put_page(page); len -= this; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h index c34674e797c6..6de87ae5669e 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -228,6 +228,11 @@ struct slpc_optimized_strategies { #define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0) +enum slpc_power_profiles { + SLPC_POWER_PROFILES_BASE = 0x0, + SLPC_POWER_PROFILES_POWER_SAVING = 0x1 +}; + /** * DOC: SLPC H2G MESSAGE FORMAT * diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index fe53e8eccf4b..e7ccfa520df3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -259,13 +259,14 @@ static int guc_wait_ucode(struct intel_guc *guc) } else if (delta_ms > 200) { guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", delta_ms, status, count, ret); - guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", - intel_rps_read_actual_frequency(>->rps), before_freq, + guc_warn(guc, "excessive init time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n", + before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); } else { - guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", - delta_ms, intel_rps_read_actual_frequency(>->rps), - before_freq, status, count, ret); + guc_dbg(guc, "init took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), status, count, ret); } return ret; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c index b67a15f74276..868195c33f5b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@ -7,6 +7,7 @@ #include "gt/intel_hwconfig.h" #include "i915_drv.h" #include "i915_memcpy.h" +#include "intel_guc_print.h" /* * GuC has a blob containing hardware configuration information (HWConfig). @@ -42,6 +43,8 @@ static int __guc_action_get_hwconfig(struct intel_guc *guc, }; int ret; + guc_dbg(guc, "Querying HW config table: size = %d, offset = 0x%08X\n", + ggtt_size, ggtt_offset); ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); if (ret == -ENXIO) return -ENOENT; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 1a0e1a412fdb..d5ee6e5e1443 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -15,6 +15,34 @@ #include "gt/intel_gt_regs.h" #include "gt/intel_rps.h" +/** + * DOC: SLPC - Dynamic Frequency management + * + * Single Loop Power Control (SLPC) is a GuC algorithm that manages + * GT frequency based on busyness and how KMD initializes it. SLPC is + * almost completely in control after initialization except for a few + * scenarios mentioned below. + * + * KMD uses the concept of waitboost to ramp frequency to RP0 when there + * are pending submissions for a context. It achieves this by sending GuC a + * request to update the min frequency to RP0. Waitboost is disabled + * when the request retires. + * + * Another form of frequency control happens through per-context hints. + * A context can be marked as low latency during creation. That will ensure + * that SLPC uses an aggressive frequency ramp when that context is active. + * + * Power profiles add another level of control to these mechanisms. + * When power saving profile is chosen, SLPC will use conservative + * thresholds to ramp frequency, thus saving power. KMD will disable + * waitboosts as well, which achieves further power savings. Base profile + * is default and ensures balanced performance for any workload. + * + * Lastly, users have some level of control through sysfs, where min/max + * frequency values can be altered and the use of efficient freq + * can be toggled. + */ + static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) { return container_of(slpc, struct intel_guc, slpc); @@ -265,6 +293,8 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->num_boosts = 0; slpc->media_ratio_mode = SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL; + slpc->power_profile = SLPC_POWER_PROFILES_BASE; + mutex_init(&slpc->lock); INIT_WORK(&slpc->boost_work, slpc_boost_work); @@ -575,6 +605,34 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val) return ret; } +int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + if (val > SLPC_POWER_PROFILES_POWER_SAVING) + return -EINVAL; + + mutex_lock(&slpc->lock); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + ret = slpc_set_param(slpc, + SLPC_PARAM_POWER_PROFILE, + val); + if (ret) + guc_err(slpc_to_guc(slpc), + "Failed to set power profile to %d: %pe\n", + val, ERR_PTR(ret)); + else + slpc->power_profile = val; + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&slpc->lock); + + return ret; +} + void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) { u32 pm_intrmsk_mbz = 0; @@ -736,6 +794,13 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Enable SLPC Optimized Strategy for compute */ intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); + /* Set cached value of power_profile */ + ret = intel_guc_slpc_set_power_profile(slpc, slpc->power_profile); + if (unlikely(ret)) { + guc_probe_error(guc, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret)); + return ret; + } + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 1cb5fd44f05c..fc9f761b4372 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -46,5 +46,6 @@ void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val); int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_set_power_profile(struct intel_guc_slpc *slpc, u32 val); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index a88651331497..83673b10ac4e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -33,6 +33,9 @@ struct intel_guc_slpc { u32 max_freq_softlimit; bool ignore_eff_freq; + /* Base or power saving */ + u32 power_profile; + /* cached media ratio mode */ u32 media_ratio_mode; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index b3cbf85c00cb..f30c90650b7e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -489,13 +489,15 @@ int intel_huc_wait_for_auth_complete(struct intel_huc *huc, if (delta_ms > 50) { huc_warn(huc, "excessive auth time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", delta_ms, huc->status[type].reg.reg, count, ret); - huc_warn(huc, "excessive auth time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", - intel_rps_read_actual_frequency(>->rps), before_freq, + huc_warn(huc, "excessive auth time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n", + before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); } else { - huc_dbg(huc, "auth took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", - delta_ms, intel_rps_read_actual_frequency(>->rps), - before_freq, huc->status[type].reg.reg, count, ret); + huc_dbg(huc, "auth took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, before_freq, intel_rps_read_actual_frequency(>->rps), + intel_rps_get_requested_frequency(>->rps), + huc->status[type].reg.reg, count, ret); } /* mark the load process as complete even if the wait failed */ diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c index 2f88970cc0a9..5862754c662c 100644 --- a/drivers/gpu/drm/i915/i915_module.c +++ b/drivers/gpu/drm/i915/i915_module.c @@ -71,8 +71,6 @@ static const struct { { .init = i915_vma_resource_module_init, .exit = i915_vma_resource_module_exit }, { .init = i915_mock_selftests }, - { .init = i915_pmu_init, - .exit = i915_pmu_exit }, { .init = i915_pci_register_driver, .exit = i915_pci_unregister_driver }, { .init = i915_perf_sysctl_register, diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index e55db036be1b..69a109d02116 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -28,9 +28,6 @@ BIT(I915_SAMPLE_WAIT) | \ BIT(I915_SAMPLE_SEMA)) -static cpumask_t i915_pmu_cpumask; -static unsigned int i915_pmu_target_cpu = -1; - static struct i915_pmu *event_to_pmu(struct perf_event *event) { return container_of(event->pmu, struct i915_pmu, base); @@ -642,10 +639,6 @@ static int i915_pmu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; - /* only allow running on one cpu at a time */ - if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask)) - return -EINVAL; - if (is_engine_event(event)) ret = engine_event_init(event); else @@ -891,11 +884,6 @@ static void i915_pmu_event_del(struct perf_event *event, int flags) i915_pmu_event_stop(event, PERF_EF_UPDATE); } -static int i915_pmu_event_event_idx(struct perf_event *event) -{ - return 0; -} - struct i915_str_attribute { struct device_attribute attr; const char *str; @@ -940,23 +928,6 @@ static ssize_t i915_pmu_event_show(struct device *dev, return sprintf(buf, "config=0x%lx\n", eattr->val); } -static ssize_t cpumask_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask); -} - -static DEVICE_ATTR_RO(cpumask); - -static struct attribute *i915_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group i915_pmu_cpumask_attr_group = { - .attrs = i915_cpumask_attrs, -}; - #define __event(__counter, __name, __unit) \ { \ .counter = (__counter), \ @@ -1173,92 +1144,12 @@ static void free_event_attributes(struct i915_pmu *pmu) pmu->pmu_attr = NULL; } -static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) -{ - struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); - - /* Select the first online CPU as a designated reader. */ - if (cpumask_empty(&i915_pmu_cpumask)) - cpumask_set_cpu(cpu, &i915_pmu_cpumask); - - return 0; -} - -static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) -{ - struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); - unsigned int target = i915_pmu_target_cpu; - - /* - * Unregistering an instance generates a CPU offline event which we must - * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask. - */ - if (!pmu->registered) - return 0; - - if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { - target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); - - /* Migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &i915_pmu_cpumask); - i915_pmu_target_cpu = target; - } - } - - if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) { - perf_pmu_migrate_context(&pmu->base, cpu, target); - pmu->cpuhp.cpu = target; - } - - return 0; -} - -static enum cpuhp_state cpuhp_state = CPUHP_INVALID; - -int i915_pmu_init(void) -{ - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, - "perf/x86/intel/i915:online", - i915_pmu_cpu_online, - i915_pmu_cpu_offline); - if (ret < 0) - pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n", - ret); - else - cpuhp_state = ret; - - return 0; -} - -void i915_pmu_exit(void) -{ - if (cpuhp_state != CPUHP_INVALID) - cpuhp_remove_multi_state(cpuhp_state); -} - -static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) -{ - if (cpuhp_state == CPUHP_INVALID) - return -EINVAL; - - return cpuhp_state_add_instance(cpuhp_state, &pmu->cpuhp.node); -} - -static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) -{ - cpuhp_state_remove_instance(cpuhp_state, &pmu->cpuhp.node); -} - void i915_pmu_register(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; const struct attribute_group *attr_groups[] = { &i915_pmu_format_attr_group, &pmu->events_attr_group, - &i915_pmu_cpumask_attr_group, NULL }; int ret = -ENOMEM; @@ -1266,7 +1157,6 @@ void i915_pmu_register(struct drm_i915_private *i915) spin_lock_init(&pmu->lock); hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; - pmu->cpuhp.cpu = -1; init_rc6(pmu); if (IS_DGFX(i915)) { @@ -1295,28 +1185,22 @@ void i915_pmu_register(struct drm_i915_private *i915) pmu->base.module = THIS_MODULE; pmu->base.task_ctx_nr = perf_invalid_context; + pmu->base.scope = PERF_PMU_SCOPE_SYS_WIDE; pmu->base.event_init = i915_pmu_event_init; pmu->base.add = i915_pmu_event_add; pmu->base.del = i915_pmu_event_del; pmu->base.start = i915_pmu_event_start; pmu->base.stop = i915_pmu_event_stop; pmu->base.read = i915_pmu_event_read; - pmu->base.event_idx = i915_pmu_event_event_idx; ret = perf_pmu_register(&pmu->base, pmu->name, -1); if (ret) goto err_groups; - ret = i915_pmu_register_cpuhp_state(pmu); - if (ret) - goto err_unreg; - pmu->registered = true; return; -err_unreg: - perf_pmu_unregister(&pmu->base); err_groups: kfree(pmu->base.attr_groups); err_attr: @@ -1340,8 +1224,6 @@ void i915_pmu_unregister(struct drm_i915_private *i915) hrtimer_cancel(&pmu->timer); - i915_pmu_unregister_cpuhp_state(pmu); - perf_pmu_unregister(&pmu->base); kfree(pmu->base.attr_groups); if (IS_DGFX(i915)) diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 0ec78c2b4f20..5826cc81858c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -57,13 +57,6 @@ struct i915_pmu_sample { struct i915_pmu { /** - * @cpuhp: Struct used for CPU hotplug handling. - */ - struct { - struct hlist_node node; - unsigned int cpu; - } cpuhp; - /** * @base: PMU base. */ struct pmu base; @@ -155,15 +148,11 @@ struct i915_pmu { }; #ifdef CONFIG_PERF_EVENTS -int i915_pmu_init(void); -void i915_pmu_exit(void); void i915_pmu_register(struct drm_i915_private *i915); void i915_pmu_unregister(struct drm_i915_private *i915); void i915_pmu_gt_parked(struct intel_gt *gt); void i915_pmu_gt_unparked(struct intel_gt *gt); #else -static inline int i915_pmu_init(void) { return 0; } -static inline void i915_pmu_exit(void) {} static inline void i915_pmu_register(struct drm_i915_private *i915) {} static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} static inline void i915_pmu_gt_parked(struct intel_gt *gt) {} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index e817d233df61..ad650f67114a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -45,13 +45,15 @@ static void trash_stolen(struct drm_i915_private *i915) struct i915_ggtt *ggtt = to_gt(i915)->ggtt; const u64 slot = ggtt->error_capture.start; const resource_size_t size = resource_size(&i915->dsm.stolen); + struct rnd_state prng; unsigned long page; - u32 prng = 0x12345678; /* XXX: fsck. needs some more thought... */ if (!i915_ggtt_has_aperture(ggtt)) return; + prandom_seed_state(&prng, 0x12345678); + for (page = 0; page < size; page += PAGE_SIZE) { const dma_addr_t dma = i915->dsm.stolen.start + page; u32 __iomem *s; @@ -64,8 +66,7 @@ static void trash_stolen(struct drm_i915_private *i915) s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); for (x = 0; x < PAGE_SIZE / sizeof(u32); x++) { - prng = next_pseudo_random32(prng); - iowrite32(prng, &s[x]); + iowrite32(prandom_u32_state(&prng), &s[x]); } io_mapping_unmap_atomic(s); } diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.c b/drivers/gpu/drm/mediatek/mtk_crtc.c index 5674f5707cca..8f6fba4217ec 100644 --- a/drivers/gpu/drm/mediatek/mtk_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_crtc.c @@ -620,13 +620,16 @@ static void mtk_crtc_update_config(struct mtk_crtc *mtk_crtc, bool needs_vblank) mbox_send_message(mtk_crtc->cmdq_client.chan, cmdq_handle); mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0); + goto update_config_out; } -#else +#endif spin_lock_irqsave(&mtk_crtc->config_lock, flags); mtk_crtc->config_updating = false; spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); -#endif +#if IS_REACHABLE(CONFIG_MTK_CMDQ) +update_config_out: +#endif mutex_unlock(&mtk_crtc->hw_lock); } diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c index 3d4648d2e15f..ccdc57cef3ea 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp.c +++ b/drivers/gpu/drm/mediatek/mtk_dp.c @@ -1766,7 +1766,7 @@ static int mtk_dp_parse_capabilities(struct mtk_dp *mtk_dp) ret = drm_dp_dpcd_readb(&mtk_dp->aux, DP_MSTM_CAP, &val); if (ret < 1) { - drm_err(mtk_dp->drm_dev, "Read mstm cap failed\n"); + dev_err(mtk_dp->dev, "Read mstm cap failed: %zd\n", ret); return ret == 0 ? -EIO : ret; } @@ -1776,7 +1776,7 @@ static int mtk_dp_parse_capabilities(struct mtk_dp *mtk_dp) DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0, &val); if (ret < 1) { - drm_err(mtk_dp->drm_dev, "Read irq vector failed\n"); + dev_err(mtk_dp->dev, "Read irq vector failed: %zd\n", ret); return ret == 0 ? -EIO : ret; } @@ -2059,7 +2059,7 @@ static int mtk_dp_wait_hpd_asserted(struct drm_dp_aux *mtk_aux, unsigned long wa ret = mtk_dp_parse_capabilities(mtk_dp); if (ret) { - drm_err(mtk_dp->drm_dev, "Can't parse capabilities\n"); + dev_err(mtk_dp->dev, "Can't parse capabilities: %d\n", ret); return ret; } diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index 1864eb02dbf5..0fd13e6dd3f1 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -4,8 +4,10 @@ * Author: Jie Qiu <jie.qiu@mediatek.com> */ +#include <linux/bitfield.h> #include <linux/clk.h> #include <linux/component.h> +#include <linux/debugfs.h> #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/media-bus-format.h> @@ -116,9 +118,15 @@ struct mtk_dpi_yc_limit { u16 c_bottom; }; +struct mtk_dpi_factor { + u32 clock; + u8 factor; +}; + /** * struct mtk_dpi_conf - Configuration of mediatek dpi. - * @cal_factor: Callback function to calculate factor value. + * @dpi_factor: SoC-specific pixel clock PLL factor values. + * @num_dpi_factor: Number of pixel clock PLL factor values. * @reg_h_fre_con: Register address of frequency control. * @max_clock_khz: Max clock frequency supported for this SoCs in khz units. * @edge_sel_en: Enable of edge selection. @@ -127,19 +135,24 @@ struct mtk_dpi_yc_limit { * @is_ck_de_pol: Support CK/DE polarity. * @swap_input_support: Support input swap function. * @support_direct_pin: IP supports direct connection to dpi panels. - * @input_2pixel: Input pixel of dp_intf is 2 pixel per round, so enable this - * config to enable this feature. * @dimension_mask: Mask used for HWIDTH, HPORCH, VSYNC_WIDTH and VSYNC_PORCH * (no shift). * @hvsize_mask: Mask of HSIZE and VSIZE mask (no shift). * @channel_swap_shift: Shift value of channel swap. * @yuv422_en_bit: Enable bit of yuv422. * @csc_enable_bit: Enable bit of CSC. + * @input_2p_en_bit: Enable bit for input two pixel per round feature. + * If present, implies that the feature must be enabled. * @pixels_per_iter: Quantity of transferred pixels per iteration. * @edge_cfg_in_mmsys: If the edge configuration for DPI's output needs to be set in MMSYS. + * @clocked_by_hdmi: HDMI IP outputs clock to dpi_pixel_clk input clock, needed + * for DPI registers access. + * @output_1pixel: Enable outputting one pixel per round; if the input is two pixel per + * round, the DPI hardware will internally transform it to 1T1P. */ struct mtk_dpi_conf { - unsigned int (*cal_factor)(int clock); + const struct mtk_dpi_factor *dpi_factor; + const u8 num_dpi_factor; u32 reg_h_fre_con; u32 max_clock_khz; bool edge_sel_en; @@ -148,14 +161,16 @@ struct mtk_dpi_conf { bool is_ck_de_pol; bool swap_input_support; bool support_direct_pin; - bool input_2pixel; u32 dimension_mask; u32 hvsize_mask; u32 channel_swap_shift; u32 yuv422_en_bit; u32 csc_enable_bit; + u32 input_2p_en_bit; u32 pixels_per_iter; bool edge_cfg_in_mmsys; + bool clocked_by_hdmi; + bool output_1pixel; }; static void mtk_dpi_mask(struct mtk_dpi *dpi, u32 offset, u32 val, u32 mask) @@ -166,6 +181,18 @@ static void mtk_dpi_mask(struct mtk_dpi *dpi, u32 offset, u32 val, u32 mask) writel(tmp, dpi->regs + offset); } +static void mtk_dpi_test_pattern_en(struct mtk_dpi *dpi, u8 type, bool enable) +{ + u32 val; + + if (enable) + val = FIELD_PREP(DPI_PAT_SEL, type) | DPI_PAT_EN; + else + val = 0; + + mtk_dpi_mask(dpi, DPI_PATTERN0, val, DPI_PAT_SEL | DPI_PAT_EN); +} + static void mtk_dpi_sw_reset(struct mtk_dpi *dpi, bool reset) { mtk_dpi_mask(dpi, DPI_RET, reset ? RST : 0, RST); @@ -410,12 +437,13 @@ static void mtk_dpi_config_swap_input(struct mtk_dpi *dpi, bool enable) static void mtk_dpi_config_2n_h_fre(struct mtk_dpi *dpi) { - mtk_dpi_mask(dpi, dpi->conf->reg_h_fre_con, H_FRE_2N, H_FRE_2N); + if (dpi->conf->reg_h_fre_con) + mtk_dpi_mask(dpi, dpi->conf->reg_h_fre_con, H_FRE_2N, H_FRE_2N); } static void mtk_dpi_config_disable_edge(struct mtk_dpi *dpi) { - if (dpi->conf->edge_sel_en) + if (dpi->conf->edge_sel_en && dpi->conf->reg_h_fre_con) mtk_dpi_mask(dpi, dpi->conf->reg_h_fre_con, 0, EDGE_SEL_EN); } @@ -471,6 +499,7 @@ static void mtk_dpi_power_off(struct mtk_dpi *dpi) mtk_dpi_disable(dpi); clk_disable_unprepare(dpi->pixel_clk); + clk_disable_unprepare(dpi->tvd_clk); clk_disable_unprepare(dpi->engine_clk); } @@ -487,6 +516,12 @@ static int mtk_dpi_power_on(struct mtk_dpi *dpi) goto err_refcount; } + ret = clk_prepare_enable(dpi->tvd_clk); + if (ret) { + dev_err(dpi->dev, "Failed to enable tvd pll: %d\n", ret); + goto err_engine; + } + ret = clk_prepare_enable(dpi->pixel_clk); if (ret) { dev_err(dpi->dev, "Failed to enable pixel clock: %d\n", ret); @@ -496,32 +531,39 @@ static int mtk_dpi_power_on(struct mtk_dpi *dpi) return 0; err_pixel: + clk_disable_unprepare(dpi->tvd_clk); +err_engine: clk_disable_unprepare(dpi->engine_clk); err_refcount: dpi->refcount--; return ret; } -static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, - struct drm_display_mode *mode) +static unsigned int mtk_dpi_calculate_factor(struct mtk_dpi *dpi, int mode_clk) +{ + const struct mtk_dpi_factor *dpi_factor = dpi->conf->dpi_factor; + int i; + + for (i = 0; i < dpi->conf->num_dpi_factor; i++) { + if (mode_clk <= dpi_factor[i].clock) + return dpi_factor[i].factor; + } + + /* If no match try the lowest possible factor */ + return dpi_factor[dpi->conf->num_dpi_factor - 1].factor; +} + +static void mtk_dpi_set_pixel_clk(struct mtk_dpi *dpi, struct videomode *vm, int mode_clk) { - struct mtk_dpi_polarities dpi_pol; - struct mtk_dpi_sync_param hsync; - struct mtk_dpi_sync_param vsync_lodd = { 0 }; - struct mtk_dpi_sync_param vsync_leven = { 0 }; - struct mtk_dpi_sync_param vsync_rodd = { 0 }; - struct mtk_dpi_sync_param vsync_reven = { 0 }; - struct videomode vm = { 0 }; unsigned long pll_rate; unsigned int factor; /* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */ - factor = dpi->conf->cal_factor(mode->clock); - drm_display_mode_to_videomode(mode, &vm); - pll_rate = vm.pixelclock * factor; + factor = mtk_dpi_calculate_factor(dpi, mode_clk); + pll_rate = vm->pixelclock * factor; dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n", - pll_rate, vm.pixelclock); + pll_rate, vm->pixelclock); clk_set_rate(dpi->tvd_clk, pll_rate); pll_rate = clk_get_rate(dpi->tvd_clk); @@ -531,20 +573,36 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, * pixels for each iteration: divide the clock by this number and * adjust the display porches accordingly. */ - vm.pixelclock = pll_rate / factor; - vm.pixelclock /= dpi->conf->pixels_per_iter; + vm->pixelclock = pll_rate / factor; + vm->pixelclock /= dpi->conf->pixels_per_iter; if ((dpi->output_fmt == MEDIA_BUS_FMT_RGB888_2X12_LE) || (dpi->output_fmt == MEDIA_BUS_FMT_RGB888_2X12_BE)) - clk_set_rate(dpi->pixel_clk, vm.pixelclock * 2); + clk_set_rate(dpi->pixel_clk, vm->pixelclock * 2); else - clk_set_rate(dpi->pixel_clk, vm.pixelclock); - + clk_set_rate(dpi->pixel_clk, vm->pixelclock); - vm.pixelclock = clk_get_rate(dpi->pixel_clk); + vm->pixelclock = clk_get_rate(dpi->pixel_clk); dev_dbg(dpi->dev, "Got PLL %lu Hz, pixel clock %lu Hz\n", - pll_rate, vm.pixelclock); + pll_rate, vm->pixelclock); +} + +static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, + struct drm_display_mode *mode) +{ + struct mtk_dpi_polarities dpi_pol; + struct mtk_dpi_sync_param hsync; + struct mtk_dpi_sync_param vsync_lodd = { 0 }; + struct mtk_dpi_sync_param vsync_leven = { 0 }; + struct mtk_dpi_sync_param vsync_rodd = { 0 }; + struct mtk_dpi_sync_param vsync_reven = { 0 }; + struct videomode vm = { 0 }; + + drm_display_mode_to_videomode(mode, &vm); + + if (!dpi->conf->clocked_by_hdmi) + mtk_dpi_set_pixel_clk(dpi, &vm, mode->clock); dpi_pol.ck_pol = MTK_DPI_POLARITY_FALLING; dpi_pol.de_pol = MTK_DPI_POLARITY_RISING; @@ -607,12 +665,18 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, if (dpi->conf->support_direct_pin) { mtk_dpi_config_yc_map(dpi, dpi->yc_map); mtk_dpi_config_2n_h_fre(dpi); - mtk_dpi_dual_edge(dpi); + + /* DPI can connect to either an external bridge or the internal HDMI encoder */ + if (dpi->conf->output_1pixel) + mtk_dpi_mask(dpi, DPI_CON, DPI_OUTPUT_1T1P_EN, DPI_OUTPUT_1T1P_EN); + else + mtk_dpi_dual_edge(dpi); + mtk_dpi_config_disable_edge(dpi); } - if (dpi->conf->input_2pixel) { - mtk_dpi_mask(dpi, DPI_CON, DPINTF_INPUT_2P_EN, - DPINTF_INPUT_2P_EN); + if (dpi->conf->input_2p_en_bit) { + mtk_dpi_mask(dpi, DPI_CON, dpi->conf->input_2p_en_bit, + dpi->conf->input_2p_en_bit); } mtk_dpi_sw_reset(dpi, false); @@ -767,6 +831,99 @@ mtk_dpi_bridge_mode_valid(struct drm_bridge *bridge, return MODE_OK; } +static int mtk_dpi_debug_tp_show(struct seq_file *m, void *arg) +{ + struct mtk_dpi *dpi = m->private; + bool en; + u32 val; + + if (!dpi) + return -EINVAL; + + val = readl(dpi->regs + DPI_PATTERN0); + en = val & DPI_PAT_EN; + val = FIELD_GET(DPI_PAT_SEL, val); + + seq_printf(m, "DPI Test Pattern: %s\n", en ? "Enabled" : "Disabled"); + + if (en) { + seq_printf(m, "Internal pattern %d: ", val); + switch (val) { + case 0: + seq_puts(m, "256 Vertical Gray\n"); + break; + case 1: + seq_puts(m, "1024 Vertical Gray\n"); + break; + case 2: + seq_puts(m, "256 Horizontal Gray\n"); + break; + case 3: + seq_puts(m, "1024 Horizontal Gray\n"); + break; + case 4: + seq_puts(m, "Vertical Color bars\n"); + break; + case 6: + seq_puts(m, "Frame border\n"); + break; + case 7: + seq_puts(m, "Dot moire\n"); + break; + default: + seq_puts(m, "Invalid selection\n"); + break; + } + } + + return 0; +} + +static ssize_t mtk_dpi_debug_tp_write(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + struct seq_file *m = file->private_data; + u32 en, type; + char buf[6]; + + if (!m || !m->private || *offp || len > sizeof(buf) - 1) + return -EINVAL; + + memset(buf, 0, sizeof(buf)); + if (copy_from_user(buf, ubuf, len)) + return -EFAULT; + + if (sscanf(buf, "%u %u", &en, &type) != 2) + return -EINVAL; + + if (en < 0 || en > 1 || type < 0 || type > 7) + return -EINVAL; + + mtk_dpi_test_pattern_en((struct mtk_dpi *)m->private, type, en); + return len; +} + +static int mtk_dpi_debug_tp_open(struct inode *inode, struct file *file) +{ + return single_open(file, mtk_dpi_debug_tp_show, inode->i_private); +} + +static const struct file_operations mtk_dpi_debug_tp_fops = { + .owner = THIS_MODULE, + .open = mtk_dpi_debug_tp_open, + .read = seq_read, + .write = mtk_dpi_debug_tp_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static void mtk_dpi_debugfs_init(struct drm_bridge *bridge, struct dentry *root) +{ + struct mtk_dpi *dpi = bridge_to_dpi(bridge); + + debugfs_create_file("dpi_test_pattern", 0640, root, dpi, &mtk_dpi_debug_tp_fops); +} + static const struct drm_bridge_funcs mtk_dpi_bridge_funcs = { .attach = mtk_dpi_bridge_attach, .mode_set = mtk_dpi_bridge_mode_set, @@ -779,20 +936,23 @@ static const struct drm_bridge_funcs mtk_dpi_bridge_funcs = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .atomic_reset = drm_atomic_helper_bridge_reset, + .debugfs_init = mtk_dpi_debugfs_init, }; void mtk_dpi_start(struct device *dev) { struct mtk_dpi *dpi = dev_get_drvdata(dev); - mtk_dpi_power_on(dpi); + if (!dpi->conf->clocked_by_hdmi) + mtk_dpi_power_on(dpi); } void mtk_dpi_stop(struct device *dev) { struct mtk_dpi *dpi = dev_get_drvdata(dev); - mtk_dpi_power_off(dpi); + if (!dpi->conf->clocked_by_hdmi) + mtk_dpi_power_off(dpi); } unsigned int mtk_dpi_encoder_index(struct device *dev) @@ -857,48 +1017,6 @@ static const struct component_ops mtk_dpi_component_ops = { .unbind = mtk_dpi_unbind, }; -static unsigned int mt8173_calculate_factor(int clock) -{ - if (clock <= 27000) - return 3 << 4; - else if (clock <= 84000) - return 3 << 3; - else if (clock <= 167000) - return 3 << 2; - else - return 3 << 1; -} - -static unsigned int mt2701_calculate_factor(int clock) -{ - if (clock <= 64000) - return 4; - else if (clock <= 128000) - return 2; - else - return 1; -} - -static unsigned int mt8183_calculate_factor(int clock) -{ - if (clock <= 27000) - return 8; - else if (clock <= 167000) - return 4; - else - return 2; -} - -static unsigned int mt8195_dpintf_calculate_factor(int clock) -{ - if (clock < 70000) - return 4; - else if (clock < 200000) - return 2; - else - return 1; -} - static const u32 mt8173_output_fmts[] = { MEDIA_BUS_FMT_RGB888_1X24, }; @@ -913,8 +1031,25 @@ static const u32 mt8195_output_fmts[] = { MEDIA_BUS_FMT_YUYV8_1X16, }; +static const struct mtk_dpi_factor dpi_factor_mt2701[] = { + { 64000, 4 }, { 128000, 2 }, { U32_MAX, 1 } +}; + +static const struct mtk_dpi_factor dpi_factor_mt8173[] = { + { 27000, 48 }, { 84000, 24 }, { 167000, 12 }, { U32_MAX, 6 } +}; + +static const struct mtk_dpi_factor dpi_factor_mt8183[] = { + { 27000, 8 }, { 167000, 4 }, { U32_MAX, 2 } +}; + +static const struct mtk_dpi_factor dpi_factor_mt8195_dp_intf[] = { + { 70000 - 1, 4 }, { 200000 - 1, 2 }, { U32_MAX, 1 } +}; + static const struct mtk_dpi_conf mt8173_conf = { - .cal_factor = mt8173_calculate_factor, + .dpi_factor = dpi_factor_mt8173, + .num_dpi_factor = ARRAY_SIZE(dpi_factor_mt8173), .reg_h_fre_con = 0xe0, .max_clock_khz = 300000, .output_fmts = mt8173_output_fmts, @@ -931,7 +1066,8 @@ static const struct mtk_dpi_conf mt8173_conf = { }; static const struct mtk_dpi_conf mt2701_conf = { - .cal_factor = mt2701_calculate_factor, + .dpi_factor = dpi_factor_mt2701, + .num_dpi_factor = ARRAY_SIZE(dpi_factor_mt2701), .reg_h_fre_con = 0xb0, .edge_sel_en = true, .max_clock_khz = 150000, @@ -949,7 +1085,8 @@ static const struct mtk_dpi_conf mt2701_conf = { }; static const struct mtk_dpi_conf mt8183_conf = { - .cal_factor = mt8183_calculate_factor, + .dpi_factor = dpi_factor_mt8183, + .num_dpi_factor = ARRAY_SIZE(dpi_factor_mt8183), .reg_h_fre_con = 0xe0, .max_clock_khz = 100000, .output_fmts = mt8183_output_fmts, @@ -966,7 +1103,8 @@ static const struct mtk_dpi_conf mt8183_conf = { }; static const struct mtk_dpi_conf mt8186_conf = { - .cal_factor = mt8183_calculate_factor, + .dpi_factor = dpi_factor_mt8183, + .num_dpi_factor = ARRAY_SIZE(dpi_factor_mt8183), .reg_h_fre_con = 0xe0, .max_clock_khz = 150000, .output_fmts = mt8183_output_fmts, @@ -984,7 +1122,8 @@ static const struct mtk_dpi_conf mt8186_conf = { }; static const struct mtk_dpi_conf mt8192_conf = { - .cal_factor = mt8183_calculate_factor, + .dpi_factor = dpi_factor_mt8183, + .num_dpi_factor = ARRAY_SIZE(dpi_factor_mt8183), .reg_h_fre_con = 0xe0, .max_clock_khz = 150000, .output_fmts = mt8183_output_fmts, @@ -1000,18 +1139,37 @@ static const struct mtk_dpi_conf mt8192_conf = { .csc_enable_bit = CSC_ENABLE, }; +static const struct mtk_dpi_conf mt8195_conf = { + .max_clock_khz = 594000, + .output_fmts = mt8183_output_fmts, + .num_output_fmts = ARRAY_SIZE(mt8183_output_fmts), + .pixels_per_iter = 1, + .is_ck_de_pol = true, + .swap_input_support = true, + .support_direct_pin = true, + .dimension_mask = HPW_MASK, + .hvsize_mask = HSIZE_MASK, + .channel_swap_shift = CH_SWAP, + .yuv422_en_bit = YUV422_EN, + .csc_enable_bit = CSC_ENABLE, + .input_2p_en_bit = DPI_INPUT_2P_EN, + .clocked_by_hdmi = true, + .output_1pixel = true, +}; + static const struct mtk_dpi_conf mt8195_dpintf_conf = { - .cal_factor = mt8195_dpintf_calculate_factor, + .dpi_factor = dpi_factor_mt8195_dp_intf, + .num_dpi_factor = ARRAY_SIZE(dpi_factor_mt8195_dp_intf), .max_clock_khz = 600000, .output_fmts = mt8195_output_fmts, .num_output_fmts = ARRAY_SIZE(mt8195_output_fmts), .pixels_per_iter = 4, - .input_2pixel = true, .dimension_mask = DPINTF_HPW_MASK, .hvsize_mask = DPINTF_HSIZE_MASK, .channel_swap_shift = DPINTF_CH_SWAP, .yuv422_en_bit = DPINTF_YUV422_EN, .csc_enable_bit = DPINTF_CSC_ENABLE, + .input_2p_en_bit = DPINTF_INPUT_2P_EN, }; static int mtk_dpi_probe(struct platform_device *pdev) @@ -1102,6 +1260,7 @@ static const struct of_device_id mtk_dpi_of_ids[] = { { .compatible = "mediatek,mt8188-dp-intf", .data = &mt8195_dpintf_conf }, { .compatible = "mediatek,mt8192-dpi", .data = &mt8192_conf }, { .compatible = "mediatek,mt8195-dp-intf", .data = &mt8195_dpintf_conf }, + { .compatible = "mediatek,mt8195-dpi", .data = &mt8195_conf }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, mtk_dpi_of_ids); diff --git a/drivers/gpu/drm/mediatek/mtk_dpi_regs.h b/drivers/gpu/drm/mediatek/mtk_dpi_regs.h index 62bd4931b344..23eeefce8fd2 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi_regs.h +++ b/drivers/gpu/drm/mediatek/mtk_dpi_regs.h @@ -40,6 +40,11 @@ #define FAKE_DE_LEVEN BIT(21) #define FAKE_DE_RODD BIT(22) #define FAKE_DE_REVEN BIT(23) + +/* DPI_CON: DPI instances */ +#define DPI_OUTPUT_1T1P_EN BIT(24) +#define DPI_INPUT_2P_EN BIT(25) +/* DPI_CON: DPINTF instances */ #define DPINTF_YUV422_EN BIT(24) #define DPINTF_CSC_ENABLE BIT(26) #define DPINTF_INPUT_2P_EN BIT(29) @@ -235,4 +240,8 @@ #define MATRIX_SEL_RGB_TO_JPEG 0 #define MATRIX_SEL_RGB_TO_BT601 2 +#define DPI_PATTERN0 0xf00 +#define DPI_PAT_EN BIT(0) +#define DPI_PAT_SEL GENMASK(6, 4) + #endif /* __MTK_DPI_REGS_H */ diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index f22ad2882697..74158b9d6503 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -327,6 +327,10 @@ static const struct mtk_mmsys_driver_data mt8195_vdosys1_driver_data = { .min_height = 1, }; +static const struct mtk_mmsys_driver_data mt8365_mmsys_driver_data = { + .mmsys_dev_num = 1, +}; + static const struct of_device_id mtk_drm_of_ids[] = { { .compatible = "mediatek,mt2701-mmsys", .data = &mt2701_mmsys_driver_data}, @@ -354,6 +358,8 @@ static const struct of_device_id mtk_drm_of_ids[] = { .data = &mt8195_vdosys0_driver_data}, { .compatible = "mediatek,mt8195-vdosys1", .data = &mt8195_vdosys1_driver_data}, + { .compatible = "mediatek,mt8365-mmsys", + .data = &mt8365_mmsys_driver_data}, { } }; MODULE_DEVICE_TABLE(of, mtk_drm_of_ids); @@ -754,6 +760,8 @@ static const struct of_device_id mtk_ddp_comp_dt_ids[] = { .data = (void *)MTK_DISP_MUTEX }, { .compatible = "mediatek,mt8195-disp-mutex", .data = (void *)MTK_DISP_MUTEX }, + { .compatible = "mediatek,mt8365-disp-mutex", + .data = (void *)MTK_DISP_MUTEX }, { .compatible = "mediatek,mt8173-disp-od", .data = (void *)MTK_DISP_OD }, { .compatible = "mediatek,mt2701-disp-ovl", @@ -810,6 +818,8 @@ static const struct of_device_id mtk_ddp_comp_dt_ids[] = { .data = (void *)MTK_DPI }, { .compatible = "mediatek,mt8195-dp-intf", .data = (void *)MTK_DP_INTF }, + { .compatible = "mediatek,mt8195-dpi", + .data = (void *)MTK_DPI }, { .compatible = "mediatek,mt2701-dsi", .data = (void *)MTK_DSI }, { .compatible = "mediatek,mt8173-dsi", diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 0683c2b3ca5b..d1f407fb7eb1 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -1116,12 +1116,12 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host, const struct mipi_dsi_msg *msg) { struct mtk_dsi *dsi = host_to_dsi(host); - u32 recv_cnt, i; + ssize_t recv_cnt; u8 read_data[16]; void *src_addr; u8 irq_flag = CMD_DONE_INT_FLAG; u32 dsi_mode; - int ret; + int ret, i; dsi_mode = readl(dsi->regs + DSI_MODE_CTRL); if (dsi_mode & MODE) { @@ -1170,7 +1170,7 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host, if (recv_cnt) memcpy(msg->rx_buf, src_addr, recv_cnt); - DRM_INFO("dsi get %d byte data from the panel address(0x%x)\n", + DRM_INFO("dsi get %zd byte data from the panel address(0x%x)\n", recv_cnt, *((u8 *)(msg->tx_buf))); restore_dsi_mode: diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index d4ab098e1174..06e4fac152b7 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -137,7 +137,7 @@ enum hdmi_aud_channel_swap_type { struct hdmi_audio_param { enum hdmi_audio_coding_type aud_codec; - enum hdmi_audio_sample_size aud_sampe_size; + enum hdmi_audio_sample_size aud_sample_size; enum hdmi_aud_input_type aud_input_type; enum hdmi_aud_i2s_fmt aud_i2s_fmt; enum hdmi_aud_mclk aud_mclk; @@ -163,16 +163,10 @@ struct mtk_hdmi { struct clk *clk[MTK_HDMI_CLK_COUNT]; struct drm_display_mode mode; bool dvi_mode; - u32 min_clock; - u32 max_clock; - u32 max_hdisplay; - u32 max_vdisplay; - u32 ibias; - u32 ibias_up; struct regmap *sys_regmap; unsigned int sys_offset; void __iomem *regs; - enum hdmi_colorspace csp; + struct platform_device *audio_pdev; struct hdmi_audio_param aud_param; bool audio_enable; bool powered; @@ -987,15 +981,14 @@ static int mtk_hdmi_setup_avi_infoframe(struct mtk_hdmi *hdmi, return 0; } -static int mtk_hdmi_setup_spd_infoframe(struct mtk_hdmi *hdmi, - const char *vendor, - const char *product) +static int mtk_hdmi_setup_spd_infoframe(struct mtk_hdmi *hdmi) { + struct drm_bridge *bridge = &hdmi->bridge; struct hdmi_spd_infoframe frame; u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_SPD_INFOFRAME_SIZE]; ssize_t err; - err = hdmi_spd_infoframe_init(&frame, vendor, product); + err = hdmi_spd_infoframe_init(&frame, bridge->vendor, bridge->product); if (err < 0) { dev_err(hdmi->dev, "Failed to initialize SPD infoframe: %zd\n", err); @@ -1072,9 +1065,8 @@ static int mtk_hdmi_output_init(struct mtk_hdmi *hdmi) { struct hdmi_audio_param *aud_param = &hdmi->aud_param; - hdmi->csp = HDMI_COLORSPACE_RGB; aud_param->aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; - aud_param->aud_sampe_size = HDMI_AUDIO_SAMPLE_SIZE_16; + aud_param->aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; aud_param->aud_input_type = HDMI_AUD_INPUT_I2S; aud_param->aud_i2s_fmt = HDMI_I2S_MODE_I2S_24BIT; aud_param->aud_mclk = HDMI_AUD_MCLK_128FS; @@ -1167,13 +1159,12 @@ static int mtk_hdmi_clk_enable_audio(struct mtk_hdmi *hdmi) return ret; ret = clk_prepare_enable(hdmi->clk[MTK_HDMI_CLK_AUD_SPDIF]); - if (ret) - goto err; + if (ret) { + clk_disable_unprepare(hdmi->clk[MTK_HDMI_CLK_AUD_BCLK]); + return ret; + } return 0; -err: - clk_disable_unprepare(hdmi->clk[MTK_HDMI_CLK_AUD_BCLK]); - return ret; } static void mtk_hdmi_clk_disable_audio(struct mtk_hdmi *hdmi) @@ -1377,7 +1368,7 @@ static void mtk_hdmi_send_infoframe(struct mtk_hdmi *hdmi, { mtk_hdmi_setup_audio_infoframe(hdmi); mtk_hdmi_setup_avi_infoframe(hdmi, mode); - mtk_hdmi_setup_spd_infoframe(hdmi, "mediatek", "On-chip HDMI"); + mtk_hdmi_setup_spd_infoframe(hdmi); if (mode->flags & DRM_MODE_FLAG_3D_MASK) mtk_hdmi_setup_vendor_specific_infoframe(hdmi, mode); } @@ -1569,14 +1560,14 @@ static int mtk_hdmi_audio_hw_params(struct device *dev, void *data, switch (daifmt->fmt) { case HDMI_I2S: hdmi_params.aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; - hdmi_params.aud_sampe_size = HDMI_AUDIO_SAMPLE_SIZE_16; + hdmi_params.aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; hdmi_params.aud_input_type = HDMI_AUD_INPUT_I2S; hdmi_params.aud_i2s_fmt = HDMI_I2S_MODE_I2S_24BIT; hdmi_params.aud_mclk = HDMI_AUD_MCLK_128FS; break; case HDMI_SPDIF: hdmi_params.aud_codec = HDMI_AUDIO_CODING_TYPE_PCM; - hdmi_params.aud_sampe_size = HDMI_AUDIO_SAMPLE_SIZE_16; + hdmi_params.aud_sample_size = HDMI_AUDIO_SAMPLE_SIZE_16; hdmi_params.aud_input_type = HDMI_AUD_INPUT_SPDIF; break; default: @@ -1659,6 +1650,11 @@ static const struct hdmi_codec_ops mtk_hdmi_audio_codec_ops = { .hook_plugged_cb = mtk_hdmi_audio_hook_plugged_cb, }; +static void mtk_hdmi_unregister_audio_driver(void *data) +{ + platform_device_unregister(data); +} + static int mtk_hdmi_register_audio_driver(struct device *dev) { struct mtk_hdmi *hdmi = dev_get_drvdata(dev); @@ -1669,15 +1665,21 @@ static int mtk_hdmi_register_audio_driver(struct device *dev) .data = hdmi, .no_capture_mute = 1, }; - struct platform_device *pdev; + int ret; - pdev = platform_device_register_data(dev, HDMI_CODEC_DRV_NAME, - PLATFORM_DEVID_AUTO, &codec_data, - sizeof(codec_data)); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); + hdmi->audio_pdev = platform_device_register_data(dev, + HDMI_CODEC_DRV_NAME, + PLATFORM_DEVID_AUTO, + &codec_data, + sizeof(codec_data)); + if (IS_ERR(hdmi->audio_pdev)) + return PTR_ERR(hdmi->audio_pdev); + + ret = devm_add_action_or_reset(dev, mtk_hdmi_unregister_audio_driver, + hdmi->audio_pdev); + if (ret) + return ret; - DRM_INFO("%s driver bound to HDMI\n", HDMI_CODEC_DRV_NAME); return 0; } @@ -1721,14 +1723,17 @@ static int mtk_hdmi_probe(struct platform_device *pdev) hdmi->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_HPD; hdmi->bridge.type = DRM_MODE_CONNECTOR_HDMIA; - drm_bridge_add(&hdmi->bridge); + hdmi->bridge.vendor = "MediaTek"; + hdmi->bridge.product = "On-Chip HDMI"; + + ret = devm_drm_bridge_add(dev, &hdmi->bridge); + if (ret) + return dev_err_probe(dev, ret, "Failed to add bridge\n"); ret = mtk_hdmi_clk_enable_audio(hdmi); - if (ret) { - drm_bridge_remove(&hdmi->bridge); + if (ret) return dev_err_probe(dev, ret, "Failed to enable audio clocks\n"); - } return 0; } @@ -1737,12 +1742,10 @@ static void mtk_hdmi_remove(struct platform_device *pdev) { struct mtk_hdmi *hdmi = platform_get_drvdata(pdev); - drm_bridge_remove(&hdmi->bridge); mtk_hdmi_clk_disable_audio(hdmi); } -#ifdef CONFIG_PM_SLEEP -static int mtk_hdmi_suspend(struct device *dev) +static __maybe_unused int mtk_hdmi_suspend(struct device *dev) { struct mtk_hdmi *hdmi = dev_get_drvdata(dev); @@ -1751,22 +1754,14 @@ static int mtk_hdmi_suspend(struct device *dev) return 0; } -static int mtk_hdmi_resume(struct device *dev) +static __maybe_unused int mtk_hdmi_resume(struct device *dev) { struct mtk_hdmi *hdmi = dev_get_drvdata(dev); - int ret = 0; - - ret = mtk_hdmi_clk_enable_audio(hdmi); - if (ret) { - dev_err(dev, "hdmi resume failed!\n"); - return ret; - } - return 0; + return mtk_hdmi_clk_enable_audio(hdmi); } -#endif -static SIMPLE_DEV_PM_OPS(mtk_hdmi_pm_ops, - mtk_hdmi_suspend, mtk_hdmi_resume); + +static SIMPLE_DEV_PM_OPS(mtk_hdmi_pm_ops, mtk_hdmi_suspend, mtk_hdmi_resume); static const struct mtk_hdmi_conf mtk_hdmi_conf_mt2701 = { .tz_disabled = true, @@ -1778,15 +1773,10 @@ static const struct mtk_hdmi_conf mtk_hdmi_conf_mt8167 = { }; static const struct of_device_id mtk_hdmi_of_ids[] = { - { .compatible = "mediatek,mt2701-hdmi", - .data = &mtk_hdmi_conf_mt2701, - }, - { .compatible = "mediatek,mt8167-hdmi", - .data = &mtk_hdmi_conf_mt8167, - }, - { .compatible = "mediatek,mt8173-hdmi", - }, - {} + { .compatible = "mediatek,mt2701-hdmi", .data = &mtk_hdmi_conf_mt2701 }, + { .compatible = "mediatek,mt8167-hdmi", .data = &mtk_hdmi_conf_mt8167 }, + { .compatible = "mediatek,mt8173-hdmi" }, + { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, mtk_hdmi_of_ids); diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig new file mode 100644 index 000000000000..ad0c06756516 --- /dev/null +++ b/drivers/gpu/nova-core/Kconfig @@ -0,0 +1,14 @@ +config NOVA_CORE + tristate "Nova Core GPU driver" + depends on PCI + depends on RUST + depends on RUST_FW_LOADER_ABSTRACTIONS + default n + help + Choose this if you want to build the Nova Core driver for Nvidia + GPUs based on the GPU System Processor (GSP). This is true for Turing + and later GPUs. + + This driver is work in progress and may not be functional. + + If M is selected, the module will be called nova_core. diff --git a/drivers/gpu/nova-core/Makefile b/drivers/gpu/nova-core/Makefile new file mode 100644 index 000000000000..2d78c50126e1 --- /dev/null +++ b/drivers/gpu/nova-core/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_NOVA_CORE) += nova_core.o diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs new file mode 100644 index 000000000000..63c19f140fbd --- /dev/null +++ b/drivers/gpu/nova-core/driver.rs @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{bindings, c_str, pci, prelude::*}; + +use crate::gpu::Gpu; + +#[pin_data] +pub(crate) struct NovaCore { + #[pin] + pub(crate) gpu: Gpu, +} + +const BAR0_SIZE: usize = 8; +pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>; + +kernel::pci_device_table!( + PCI_TABLE, + MODULE_PCI_TABLE, + <NovaCore as pci::Driver>::IdInfo, + [( + pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_NVIDIA, bindings::PCI_ANY_ID as _), + () + )] +); + +impl pci::Driver for NovaCore { + type IdInfo = (); + const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE; + + fn probe(pdev: &mut pci::Device, _info: &Self::IdInfo) -> Result<Pin<KBox<Self>>> { + dev_dbg!(pdev.as_ref(), "Probe Nova Core GPU driver.\n"); + + pdev.enable_device_mem()?; + pdev.set_master(); + + let bar = pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0"))?; + + let this = KBox::pin_init( + try_pin_init!(Self { + gpu <- Gpu::new(pdev, bar)?, + }), + GFP_KERNEL, + )?; + + Ok(this) + } +} diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs new file mode 100644 index 000000000000..6e6361c59ca1 --- /dev/null +++ b/drivers/gpu/nova-core/firmware.rs @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::gpu; +use kernel::firmware; + +pub(crate) struct ModInfoBuilder<const N: usize>(firmware::ModInfoBuilder<N>); + +impl<const N: usize> ModInfoBuilder<N> { + const VERSION: &'static str = "535.113.01"; + + const fn make_entry_file(self, chipset: &str, fw: &str) -> Self { + ModInfoBuilder( + self.0 + .new_entry() + .push("nvidia/") + .push(chipset) + .push("/gsp/") + .push(fw) + .push("-") + .push(Self::VERSION) + .push(".bin"), + ) + } + + const fn make_entry_chipset(self, chipset: &str) -> Self { + self.make_entry_file(chipset, "booter_load") + .make_entry_file(chipset, "booter_unload") + .make_entry_file(chipset, "bootloader") + .make_entry_file(chipset, "gsp") + } + + pub(crate) const fn create( + module_name: &'static kernel::str::CStr, + ) -> firmware::ModInfoBuilder<N> { + let mut this = Self(firmware::ModInfoBuilder::new(module_name)); + let mut i = 0; + + while i < gpu::Chipset::NAMES.len() { + this = this.make_entry_chipset(gpu::Chipset::NAMES[i]); + i += 1; + } + + this.0 + } +} diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs new file mode 100644 index 000000000000..17c9660da450 --- /dev/null +++ b/drivers/gpu/nova-core/gpu.rs @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + device, devres::Devres, error::code::*, firmware, fmt, pci, prelude::*, str::CString, +}; + +use crate::driver::Bar0; +use crate::regs; +use crate::util; +use core::fmt; + +macro_rules! define_chipset { + ({ $($variant:ident = $value:expr),* $(,)* }) => + { + /// Enum representation of the GPU chipset. + #[derive(fmt::Debug)] + pub(crate) enum Chipset { + $($variant = $value),*, + } + + impl Chipset { + pub(crate) const ALL: &'static [Chipset] = &[ + $( Chipset::$variant, )* + ]; + + pub(crate) const NAMES: [&'static str; Self::ALL.len()] = [ + $( util::const_bytes_to_str( + util::to_lowercase_bytes::<{ stringify!($variant).len() }>( + stringify!($variant) + ).as_slice() + ), )* + ]; + } + + // TODO replace with something like derive(FromPrimitive) + impl TryFrom<u32> for Chipset { + type Error = kernel::error::Error; + + fn try_from(value: u32) -> Result<Self, Self::Error> { + match value { + $( $value => Ok(Chipset::$variant), )* + _ => Err(ENODEV), + } + } + } + } +} + +define_chipset!({ + // Turing + TU102 = 0x162, + TU104 = 0x164, + TU106 = 0x166, + TU117 = 0x167, + TU116 = 0x168, + // Ampere + GA102 = 0x172, + GA103 = 0x173, + GA104 = 0x174, + GA106 = 0x176, + GA107 = 0x177, + // Ada + AD102 = 0x192, + AD103 = 0x193, + AD104 = 0x194, + AD106 = 0x196, + AD107 = 0x197, +}); + +impl Chipset { + pub(crate) fn arch(&self) -> Architecture { + match self { + Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => { + Architecture::Turing + } + Self::GA102 | Self::GA103 | Self::GA104 | Self::GA106 | Self::GA107 => { + Architecture::Ampere + } + Self::AD102 | Self::AD103 | Self::AD104 | Self::AD106 | Self::AD107 => { + Architecture::Ada + } + } + } +} + +// TODO +// +// The resulting strings are used to generate firmware paths, hence the +// generated strings have to be stable. +// +// Hence, replace with something like strum_macros derive(Display). +// +// For now, redirect to fmt::Debug for convenience. +impl fmt::Display for Chipset { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +/// Enum representation of the GPU generation. +#[derive(fmt::Debug)] +pub(crate) enum Architecture { + Turing, + Ampere, + Ada, +} + +pub(crate) struct Revision { + major: u8, + minor: u8, +} + +impl Revision { + fn from_boot0(boot0: regs::Boot0) -> Self { + Self { + major: boot0.major_rev(), + minor: boot0.minor_rev(), + } + } +} + +impl fmt::Display for Revision { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:x}.{:x}", self.major, self.minor) + } +} + +/// Structure holding the metadata of the GPU. +pub(crate) struct Spec { + chipset: Chipset, + /// The revision of the chipset. + revision: Revision, +} + +impl Spec { + fn new(bar: &Devres<Bar0>) -> Result<Spec> { + let bar = bar.try_access().ok_or(ENXIO)?; + let boot0 = regs::Boot0::read(&bar); + + Ok(Self { + chipset: boot0.chipset().try_into()?, + revision: Revision::from_boot0(boot0), + }) + } +} + +/// Structure encapsulating the firmware blobs required for the GPU to operate. +#[expect(dead_code)] +pub(crate) struct Firmware { + booter_load: firmware::Firmware, + booter_unload: firmware::Firmware, + bootloader: firmware::Firmware, + gsp: firmware::Firmware, +} + +impl Firmware { + fn new(dev: &device::Device, spec: &Spec, ver: &str) -> Result<Firmware> { + let mut chip_name = CString::try_from_fmt(fmt!("{}", spec.chipset))?; + chip_name.make_ascii_lowercase(); + + let request = |name_| { + CString::try_from_fmt(fmt!("nvidia/{}/gsp/{}-{}.bin", &*chip_name, name_, ver)) + .and_then(|path| firmware::Firmware::request(&path, dev)) + }; + + Ok(Firmware { + booter_load: request("booter_load")?, + booter_unload: request("booter_unload")?, + bootloader: request("bootloader")?, + gsp: request("gsp")?, + }) + } +} + +/// Structure holding the resources required to operate the GPU. +#[pin_data] +pub(crate) struct Gpu { + spec: Spec, + /// MMIO mapping of PCI BAR 0 + bar: Devres<Bar0>, + fw: Firmware, +} + +impl Gpu { + pub(crate) fn new(pdev: &pci::Device, bar: Devres<Bar0>) -> Result<impl PinInit<Self>> { + let spec = Spec::new(&bar)?; + let fw = Firmware::new(pdev.as_ref(), &spec, "535.113.01")?; + + dev_info!( + pdev.as_ref(), + "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", + spec.chipset, + spec.chipset.arch(), + spec.revision + ); + + Ok(pin_init!(Self { spec, bar, fw })) + } +} diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs new file mode 100644 index 000000000000..a91cd924054b --- /dev/null +++ b/drivers/gpu/nova-core/nova_core.rs @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Nova Core GPU Driver + +mod driver; +mod firmware; +mod gpu; +mod regs; +mod util; + +kernel::module_pci_driver! { + type: driver::NovaCore, + name: "NovaCore", + author: "Danilo Krummrich", + description: "Nova Core GPU driver", + license: "GPL v2", + firmware: [], +} + +kernel::module_firmware!(firmware::ModInfoBuilder); diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs new file mode 100644 index 000000000000..50aefb150b0b --- /dev/null +++ b/drivers/gpu/nova-core/regs.rs @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::driver::Bar0; + +// TODO +// +// Create register definitions via generic macros. See task "Generic register +// abstraction" in Documentation/gpu/nova/core/todo.rst. + +const BOOT0_OFFSET: usize = 0x00000000; + +// 3:0 - chipset minor revision +const BOOT0_MINOR_REV_SHIFT: u8 = 0; +const BOOT0_MINOR_REV_MASK: u32 = 0x0000000f; + +// 7:4 - chipset major revision +const BOOT0_MAJOR_REV_SHIFT: u8 = 4; +const BOOT0_MAJOR_REV_MASK: u32 = 0x000000f0; + +// 23:20 - chipset implementation Identifier (depends on architecture) +const BOOT0_IMPL_SHIFT: u8 = 20; +const BOOT0_IMPL_MASK: u32 = 0x00f00000; + +// 28:24 - chipset architecture identifier +const BOOT0_ARCH_MASK: u32 = 0x1f000000; + +// 28:20 - chipset identifier (virtual register field combining BOOT0_IMPL and +// BOOT0_ARCH) +const BOOT0_CHIPSET_SHIFT: u8 = BOOT0_IMPL_SHIFT; +const BOOT0_CHIPSET_MASK: u32 = BOOT0_IMPL_MASK | BOOT0_ARCH_MASK; + +#[derive(Copy, Clone)] +pub(crate) struct Boot0(u32); + +impl Boot0 { + #[inline] + pub(crate) fn read(bar: &Bar0) -> Self { + Self(bar.readl(BOOT0_OFFSET)) + } + + #[inline] + pub(crate) fn chipset(&self) -> u32 { + (self.0 & BOOT0_CHIPSET_MASK) >> BOOT0_CHIPSET_SHIFT + } + + #[inline] + pub(crate) fn minor_rev(&self) -> u8 { + ((self.0 & BOOT0_MINOR_REV_MASK) >> BOOT0_MINOR_REV_SHIFT) as u8 + } + + #[inline] + pub(crate) fn major_rev(&self) -> u8 { + ((self.0 & BOOT0_MAJOR_REV_MASK) >> BOOT0_MAJOR_REV_SHIFT) as u8 + } +} diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs new file mode 100644 index 000000000000..332a64cfc6a9 --- /dev/null +++ b/drivers/gpu/nova-core/util.rs @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +pub(crate) const fn to_lowercase_bytes<const N: usize>(s: &str) -> [u8; N] { + let src = s.as_bytes(); + let mut dst = [0; N]; + let mut i = 0; + + while i < src.len() && i < N { + dst[i] = (src[i] as char).to_ascii_lowercase() as u8; + i += 1; + } + + dst +} + +pub(crate) const fn const_bytes_to_str(bytes: &[u8]) -> &str { + match core::str::from_utf8(bytes) { + Ok(string) => string, + Err(_) => kernel::build_error!("Bytes are not valid UTF-8."), + } +} diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 44c9ef1435a2..5df981920a94 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -39,6 +39,7 @@ source "drivers/gpu/vga/Kconfig" source "drivers/gpu/host1x/Kconfig" source "drivers/gpu/ipu-v3/Kconfig" +source "drivers/gpu/nova-core/Kconfig" source "drivers/gpu/drm/Kconfig" diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index c5162fdc95ff..f04b058b09b2 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -115,3 +115,219 @@ unsafe impl Send for Firmware {} // SAFETY: `Firmware` only holds a pointer to a C `struct firmware`, references to which are safe to // be used from any thread. unsafe impl Sync for Firmware {} + +/// Create firmware .modinfo entries. +/// +/// This macro is the counterpart of the C macro `MODULE_FIRMWARE()`, but instead of taking a +/// simple string literals, which is already covered by the `firmware` field of +/// [`crate::prelude::module!`], it allows the caller to pass a builder type, based on the +/// [`ModInfoBuilder`], which can create the firmware modinfo strings in a more flexible way. +/// +/// Drivers should extend the [`ModInfoBuilder`] with their own driver specific builder type. +/// +/// The `builder` argument must be a type which implements the following function. +/// +/// `const fn create(module_name: &'static CStr) -> ModInfoBuilder` +/// +/// `create` should pass the `module_name` to the [`ModInfoBuilder`] and, with the help of +/// it construct the corresponding firmware modinfo. +/// +/// Typically, such contracts would be enforced by a trait, however traits do not (yet) support +/// const functions. +/// +/// # Example +/// +/// ``` +/// # mod module_firmware_test { +/// # use kernel::firmware; +/// # use kernel::prelude::*; +/// # +/// # struct MyModule; +/// # +/// # impl kernel::Module for MyModule { +/// # fn init(_module: &'static ThisModule) -> Result<Self> { +/// # Ok(Self) +/// # } +/// # } +/// # +/// # +/// struct Builder<const N: usize>; +/// +/// impl<const N: usize> Builder<N> { +/// const DIR: &'static str = "vendor/chip/"; +/// const FILES: [&'static str; 3] = [ "foo", "bar", "baz" ]; +/// +/// const fn create(module_name: &'static kernel::str::CStr) -> firmware::ModInfoBuilder<N> { +/// let mut builder = firmware::ModInfoBuilder::new(module_name); +/// +/// let mut i = 0; +/// while i < Self::FILES.len() { +/// builder = builder.new_entry() +/// .push(Self::DIR) +/// .push(Self::FILES[i]) +/// .push(".bin"); +/// +/// i += 1; +/// } +/// +/// builder +/// } +/// } +/// +/// module! { +/// type: MyModule, +/// name: "module_firmware_test", +/// author: "Rust for Linux", +/// description: "module_firmware! test module", +/// license: "GPL", +/// } +/// +/// kernel::module_firmware!(Builder); +/// # } +/// ``` +#[macro_export] +macro_rules! module_firmware { + // The argument is the builder type without the const generic, since it's deferred from within + // this macro. Hence, we can neither use `expr` nor `ty`. + ($($builder:tt)*) => { + const _: () = { + const __MODULE_FIRMWARE_PREFIX: &'static $crate::str::CStr = if cfg!(MODULE) { + $crate::c_str!("") + } else { + <LocalModule as $crate::ModuleMetadata>::NAME + }; + + #[link_section = ".modinfo"] + #[used] + static __MODULE_FIRMWARE: [u8; $($builder)*::create(__MODULE_FIRMWARE_PREFIX) + .build_length()] = $($builder)*::create(__MODULE_FIRMWARE_PREFIX).build(); + }; + }; +} + +/// Builder for firmware module info. +/// +/// [`ModInfoBuilder`] is a helper component to flexibly compose firmware paths strings for the +/// .modinfo section in const context. +/// +/// Therefore the [`ModInfoBuilder`] provides the methods [`ModInfoBuilder::new_entry`] and +/// [`ModInfoBuilder::push`], where the latter is used to push path components and the former to +/// mark the beginning of a new path string. +/// +/// [`ModInfoBuilder`] is meant to be used in combination with [`kernel::module_firmware!`]. +/// +/// The const generic `N` as well as the `module_name` parameter of [`ModInfoBuilder::new`] is an +/// internal implementation detail and supplied through the above macro. +pub struct ModInfoBuilder<const N: usize> { + buf: [u8; N], + n: usize, + module_name: &'static CStr, +} + +impl<const N: usize> ModInfoBuilder<N> { + /// Create an empty builder instance. + pub const fn new(module_name: &'static CStr) -> Self { + Self { + buf: [0; N], + n: 0, + module_name, + } + } + + const fn push_internal(mut self, bytes: &[u8]) -> Self { + let mut j = 0; + + if N == 0 { + self.n += bytes.len(); + return self; + } + + while j < bytes.len() { + if self.n < N { + self.buf[self.n] = bytes[j]; + } + self.n += 1; + j += 1; + } + self + } + + /// Push an additional path component. + /// + /// Append path components to the [`ModInfoBuilder`] instance. Paths need to be separated + /// with [`ModInfoBuilder::new_entry`]. + /// + /// # Example + /// + /// ``` + /// use kernel::firmware::ModInfoBuilder; + /// + /// # const DIR: &str = "vendor/chip/"; + /// # const fn no_run<const N: usize>(builder: ModInfoBuilder<N>) { + /// let builder = builder.new_entry() + /// .push(DIR) + /// .push("foo.bin") + /// .new_entry() + /// .push(DIR) + /// .push("bar.bin"); + /// # } + /// ``` + pub const fn push(self, s: &str) -> Self { + // Check whether there has been an initial call to `next_entry()`. + if N != 0 && self.n == 0 { + crate::build_error!("Must call next_entry() before push()."); + } + + self.push_internal(s.as_bytes()) + } + + const fn push_module_name(self) -> Self { + let mut this = self; + let module_name = this.module_name; + + if !this.module_name.is_empty() { + this = this.push_internal(module_name.as_bytes_with_nul()); + + if N != 0 { + // Re-use the space taken by the NULL terminator and swap it with the '.' separator. + this.buf[this.n - 1] = b'.'; + } + } + + this + } + + /// Prepare the [`ModInfoBuilder`] for the next entry. + /// + /// This method acts as a separator between module firmware path entries. + /// + /// Must be called before constructing a new entry with subsequent calls to + /// [`ModInfoBuilder::push`]. + /// + /// See [`ModInfoBuilder::push`] for an example. + pub const fn new_entry(self) -> Self { + self.push_internal(b"\0") + .push_module_name() + .push_internal(b"firmware=") + } + + /// Build the byte array. + pub const fn build(self) -> [u8; N] { + // Add the final NULL terminator. + let this = self.push_internal(b"\0"); + + if this.n == N { + this.buf + } else { + crate::build_error!("Length mismatch."); + } + } +} + +impl ModInfoBuilder<0> { + /// Return the length of the byte array to build. + pub const fn build_length(self) -> usize { + // Compensate for the NULL terminator added by `build`. + self.n + 1 + } +} diff --git a/rust/macros/module.rs b/rust/macros/module.rs index cdf94f4982df..110e59c64197 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -228,6 +228,10 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { kernel::ThisModule::from_ptr(core::ptr::null_mut()) }}; + /// The `LocalModule` type is the type of the module created by `module!`, + /// `module_pci_driver!`, `module_platform_driver!`, etc. + type LocalModule = {type_}; + impl kernel::ModuleMetadata for {type_} {{ const NAME: &'static kernel::str::CStr = kernel::c_str!(\"{name}\"); }} |