diff options
Diffstat (limited to 'drivers/gpu')
66 files changed, 3371 insertions, 337 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 616dfd4a1398..908360584e4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -71,6 +71,12 @@ amdgpu-y += \ amdgpu_vce.o \ vce_v3_0.o +# add amdkfd interfaces +amdgpu-y += \ + amdgpu_amdkfd.o \ + amdgpu_amdkfd_gfx_v7.o \ + amdgpu_amdkfd_gfx_v8.o + amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 01657830b470..f3791e0d27d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2011,6 +2011,9 @@ struct amdgpu_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; + + /* amdkfd interface */ + struct kfd_dev *kfd; }; bool amdgpu_device_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c new file mode 100644 index 000000000000..bc763e0c8f4c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -0,0 +1,267 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu_amdkfd.h" +#include "amdgpu_family.h" +#include <drm/drmP.h> +#include "amdgpu.h" +#include <linux/module.h> + +const struct kfd2kgd_calls *kfd2kgd; +const struct kgd2kfd_calls *kgd2kfd; +bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); + +bool amdgpu_amdkfd_init(void) +{ +#if defined(CONFIG_HSA_AMD_MODULE) + bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); + + kgd2kfd_init_p = symbol_request(kgd2kfd_init); + + if (kgd2kfd_init_p == NULL) + return false; +#endif + return true; +} + +bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev) +{ +#if defined(CONFIG_HSA_AMD_MODULE) + bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); +#endif + + switch (rdev->asic_type) { + case CHIP_KAVERI: + kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); + break; + case CHIP_CARRIZO: + kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); + break; + default: + return false; + } + +#if defined(CONFIG_HSA_AMD_MODULE) + kgd2kfd_init_p = symbol_request(kgd2kfd_init); + + if (kgd2kfd_init_p == NULL) { + kfd2kgd = NULL; + return false; + } + + if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) { + symbol_put(kgd2kfd_init); + kfd2kgd = NULL; + kgd2kfd = NULL; + + return false; + } + + return true; +#elif defined(CONFIG_HSA_AMD) + if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) { + kfd2kgd = NULL; + kgd2kfd = NULL; + return false; + } + + return true; +#else + kfd2kgd = NULL; + return false; +#endif +} + +void amdgpu_amdkfd_fini(void) +{ + if (kgd2kfd) { + kgd2kfd->exit(); + symbol_put(kgd2kfd_init); + } +} + +void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev) +{ + if (kgd2kfd) + rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, + rdev->pdev, kfd2kgd); +} + +void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev) +{ + if (rdev->kfd) { + struct kgd2kfd_shared_resources gpu_resources = { + .compute_vmid_bitmap = 0xFF00, + + .first_compute_pipe = 1, + .compute_pipe_count = 4 - 1, + }; + + amdgpu_doorbell_get_kfd_info(rdev, + &gpu_resources.doorbell_physical_address, + &gpu_resources.doorbell_aperture_size, + &gpu_resources.doorbell_start_offset); + + kgd2kfd->device_init(rdev->kfd, &gpu_resources); + } +} + +void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev) +{ + if (rdev->kfd) { + kgd2kfd->device_exit(rdev->kfd); + rdev->kfd = NULL; + } +} + +void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, + const void *ih_ring_entry) +{ + if (rdev->kfd) + kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); +} + +void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev) +{ + if (rdev->kfd) + kgd2kfd->suspend(rdev->kfd); +} + +int amdgpu_amdkfd_resume(struct amdgpu_device *rdev) +{ + int r = 0; + + if (rdev->kfd) + r = kgd2kfd->resume(rdev->kfd); + + return r; +} + +u32 pool_to_domain(enum kgd_memory_pool p) +{ + switch (p) { + case KGD_POOL_FRAMEBUFFER: return AMDGPU_GEM_DOMAIN_VRAM; + default: return AMDGPU_GEM_DOMAIN_GTT; + } +} + +int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr) +{ + struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct kgd_mem **mem = (struct kgd_mem **) mem_obj; + int r; + + BUG_ON(kgd == NULL); + BUG_ON(gpu_addr == NULL); + BUG_ON(cpu_ptr == NULL); + + *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if ((*mem) == NULL) + return -ENOMEM; + + r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, &(*mem)->bo); + if (r) { + dev_err(rdev->dev, + "failed to allocate BO for amdkfd (%d)\n", r); + return r; + } + + /* map the buffer */ + r = amdgpu_bo_reserve((*mem)->bo, true); + if (r) { + dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); + goto allocate_mem_reserve_bo_failed; + } + + r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, + &(*mem)->gpu_addr); + if (r) { + dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); + goto allocate_mem_pin_bo_failed; + } + *gpu_addr = (*mem)->gpu_addr; + + r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); + if (r) { + dev_err(rdev->dev, + "(%d) failed to map bo to kernel for amdkfd\n", r); + goto allocate_mem_kmap_bo_failed; + } + *cpu_ptr = (*mem)->cpu_ptr; + + amdgpu_bo_unreserve((*mem)->bo); + + return 0; + +allocate_mem_kmap_bo_failed: + amdgpu_bo_unpin((*mem)->bo); +allocate_mem_pin_bo_failed: + amdgpu_bo_unreserve((*mem)->bo); +allocate_mem_reserve_bo_failed: + amdgpu_bo_unref(&(*mem)->bo); + + return r; +} + +void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) +{ + struct kgd_mem *mem = (struct kgd_mem *) mem_obj; + + BUG_ON(mem == NULL); + + amdgpu_bo_reserve(mem->bo, true); + amdgpu_bo_kunmap(mem->bo); + amdgpu_bo_unpin(mem->bo); + amdgpu_bo_unreserve(mem->bo); + amdgpu_bo_unref(&(mem->bo)); + kfree(mem); +} + +uint64_t get_vmem_size(struct kgd_dev *kgd) +{ + struct amdgpu_device *rdev = + (struct amdgpu_device *)kgd; + + BUG_ON(kgd == NULL); + + return rdev->mc.real_vram_size; +} + +uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) +{ + struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + + if (rdev->asic_funcs->get_gpu_clock_counter) + return rdev->asic_funcs->get_gpu_clock_counter(rdev); + return 0; +} + +uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) +{ + struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + + /* The sclk is in quantas of 10kHz */ + return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h new file mode 100644 index 000000000000..a8be765542e6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -0,0 +1,65 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* amdgpu_amdkfd.h defines the private interface between amdgpu and amdkfd. */ + +#ifndef AMDGPU_AMDKFD_H_INCLUDED +#define AMDGPU_AMDKFD_H_INCLUDED + +#include <linux/types.h> +#include <kgd_kfd_interface.h> + +struct amdgpu_device; + +struct kgd_mem { + struct amdgpu_bo *bo; + uint64_t gpu_addr; + void *cpu_ptr; +}; + +bool amdgpu_amdkfd_init(void); +void amdgpu_amdkfd_fini(void); + +bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev); + +void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev); +int amdgpu_amdkfd_resume(struct amdgpu_device *rdev); +void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, + const void *ih_ring_entry); +void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev); +void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev); +void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev); + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); + +/* Shared API */ +int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr); +void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); +uint64_t get_vmem_size(struct kgd_dev *kgd); +uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); + +uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); + +#endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c new file mode 100644 index 000000000000..2daad335b809 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -0,0 +1,670 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "cikd.h" +#include "cik_sdma.h" +#include "amdgpu_ucode.h" +#include "gca/gfx_7_2_d.h" +#include "gca/gfx_7_2_enum.h" +#include "gca/gfx_7_2_sh_mask.h" +#include "oss/oss_2_0_d.h" +#include "oss/oss_2_0_sh_mask.h" +#include "gmc/gmc_7_1_d.h" +#include "gmc/gmc_7_1_sh_mask.h" +#include "cik_structs.h" + +#define CIK_PIPE_PER_MEC (4) + +enum { + MAX_TRAPID = 8, /* 3 bits in the bitfield. */ + MAX_WATCH_ADDRESSES = 4 +}; + +enum { + ADDRESS_WATCH_REG_ADDR_HI = 0, + ADDRESS_WATCH_REG_ADDR_LO, + ADDRESS_WATCH_REG_CNTL, + ADDRESS_WATCH_REG_MAX +}; + +/* not defined in the CI/KV reg file */ +enum { + ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, + ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, + /* extend the mask to 26 bits to match the low address field */ + ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, + ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF +}; + +static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { + mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL, + mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL, + mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL, + mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL +}; + +union TCP_WATCH_CNTL_BITS { + struct { + uint32_t mask:24; + uint32_t vmid:4; + uint32_t atc:1; + uint32_t mode:2; + uint32_t valid:1; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); + +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); + +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int timeout); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); + +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); + +static const struct kfd2kgd_calls kfd2kgd = { + .init_gtt_mem_allocation = alloc_gtt_mem, + .free_gtt_mem = free_gtt_mem, + .get_vmem_size = get_vmem_size, + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_pipeline = kgd_init_pipeline, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, + .write_vmid_invalidate_request = write_vmid_invalidate_request, + .get_fw_version = get_fw_version +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions() +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); + + mutex_lock(&adev->srbm_mutex); + WREG32(mmSRBM_GFX_CNTL, value); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + WREG32(mmSRBM_GFX_CNTL, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; + uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(mmSH_MEM_CONFIG, sh_mem_config); + WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); + WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); + WREG32(mmSH_MEM_BASES, sh_mem_bases); + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished and the + * SW cleared it. So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping); + + while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid))) + cpu_relax(); + WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); + + /* Mapping vmid to pasid also for IH block */ + WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping); + + return 0; +} + +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; + uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, 0, 0); + WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8)); + WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8)); + WREG32(mmCP_HPD_EOP_VMID, 0); + WREG32(mmCP_HPD_EOP_CONTROL, hpd_size); + unlock_srbm(kgd); + + return 0; +} + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; + pipe = (pipe_id % CIK_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) +{ + uint32_t retval; + + retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + + m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; + + pr_debug("kfd: sdma base address: 0x%x\n", retval); + + return retval; +} + +static inline struct cik_mqd *get_mqd(void *mqd) +{ + return (struct cik_mqd *)mqd; +} + +static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +{ + return (struct cik_sdma_rlc_registers *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t wptr_shadow, is_wptr_shadow_valid; + struct cik_mqd *m; + + m = get_mqd(mqd); + + is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); + + acquire_queue(kgd, pipe_id, queue_id); + WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); + WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); + WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); + + WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); + WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); + WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); + + WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); + WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); + WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); + + WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); + + WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); + WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); + WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type); + + WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo); + WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi); + WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo); + WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi); + + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + m->cp_hqd_pq_rptr_report_addr_hi); + + WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); + + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo); + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi); + + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); + + WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); + + WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); + + WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); + WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); + + WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); + + if (is_wptr_shadow_valid) + WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow); + + WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); + release_queue(kgd); + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdma_rlc_virtual_addr); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, + m->sdma_rlc_rb_base); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdma_rlc_rb_base_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdma_rlc_rb_rptr_addr_lo); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdma_rlc_rb_rptr_addr_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, + m->sdma_rlc_doorbell); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdma_rlc_rb_cntl); + + return 0; +} + +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(mmCP_HQD_ACTIVE); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(mmCP_HQD_PQ_BASE) && + high == RREG32(mmCP_HQD_PQ_BASE_HI)) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t temp; + + acquire_queue(kgd, pipe_id, queue_id); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); + + WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); + + while (true) { + temp = RREG32(mmCP_HQD_ACTIVE); + if (temp & CP_HQD_ACTIVE__ACTIVE__SHIFT) + break; + if (timeout == 0) { + pr_err("kfd: cp queue preemption time out (%dms)\n", + temp); + release_queue(kgd); + return -ETIME; + } + msleep(20); + timeout -= 20; + } + + release_queue(kgd); + return 0; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int timeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + uint32_t temp; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) + break; + if (timeout == 0) + return -ETIME; + msleep(20); + timeout -= 20; + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + + return 0; +} + +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + union TCP_WATCH_CNTL_BITS cntl; + unsigned int i; + + cntl.u32All = 0; + + cntl.bitfields.valid = 0; + cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; + cntl.bitfields.atc = 1; + + /* Turning off this address until we set all the registers */ + for (i = 0; i < MAX_WATCH_ADDRESSES; i++) + WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], cntl.u32All); + + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + union TCP_WATCH_CNTL_BITS cntl; + + cntl.u32All = cntl_val; + + /* Turning off this watch point until we set all the registers */ + cntl.bitfields.valid = 0; + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], cntl.u32All); + + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_HI], addr_hi); + + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_LO], addr_lo); + + /* Enable the watch point */ + cntl.bitfields.valid = 1; + + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], cntl.u32All); + + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(mmGRBM_GFX_INDEX, gfx_index_val); + WREG32(mmSQ_CMD, sq_cmd); + + /* Restore the GRBM_GFX_INDEX register */ + + data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | + GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | + GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK; + + WREG32(mmGRBM_GFX_INDEX, data); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; +} + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); +} + +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + const union amdgpu_firmware_header *hdr; + + BUG_ON(kgd == NULL); + + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *) + adev->sdma[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *) + adev->sdma[1].fw->data; + break; + + default: + return 0; + } + + if (hdr == NULL) + return 0; + + /* Only 12 bit in use*/ + return hdr->common.ucode_version; +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c new file mode 100644 index 000000000000..dfd1d503bccf --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -0,0 +1,543 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_ucode.h" +#include "gca/gfx_8_0_sh_mask.h" +#include "gca/gfx_8_0_d.h" +#include "gca/gfx_8_0_enum.h" +#include "oss/oss_3_0_sh_mask.h" +#include "oss/oss_3_0_d.h" +#include "gmc/gmc_8_1_sh_mask.h" +#include "gmc/gmc_8_1_d.h" +#include "vi_structs.h" +#include "vid.h" + +#define VI_PIPE_PER_MEC (4) + +struct cik_sdma_rlc_registers; + +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int timeout); +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); + +static const struct kfd2kgd_calls kfd2kgd = { + .init_gtt_mem_allocation = alloc_gtt_mem, + .free_gtt_mem = free_gtt_mem, + .get_vmem_size = get_vmem_size, + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_pipeline = kgd_init_pipeline, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + get_atc_vmid_pasid_mapping_valid, + .write_vmid_invalidate_request = write_vmid_invalidate_request, + .get_fw_version = get_fw_version +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions() +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); + + mutex_lock(&adev->srbm_mutex); + WREG32(mmSRBM_GFX_CNTL, value); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + WREG32(mmSRBM_GFX_CNTL, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; + uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(mmSH_MEM_CONFIG, sh_mem_config); + WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); + WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); + WREG32(mmSH_MEM_BASES, sh_mem_bases); + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished + * and the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping); + + while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid))) + cpu_relax(); + WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); + + /* Mapping vmid to pasid also for IH block */ + WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping); + + return 0; +} + +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr) +{ + return 0; +} + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; + pipe = (pipe_id % VI_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) +{ + return 0; +} + +static inline struct vi_mqd *get_mqd(void *mqd) +{ + return (struct vi_mqd *)mqd; +} + +static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +{ + return (struct cik_sdma_rlc_registers *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr) +{ + struct vi_mqd *m; + uint32_t shadow_wptr, valid_wptr; + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + m = get_mqd(mqd); + + valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); + acquire_queue(kgd, pipe_id, queue_id); + + WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); + WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); + WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); + + WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); + WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); + WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); + WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); + WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); + WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); + WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + m->cp_hqd_pq_rptr_report_addr_hi); + + if (valid_wptr > 0) + WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr); + + WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); + + WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo); + WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi); + WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control); + WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); + WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); + WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events); + + WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo); + WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi); + WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control); + WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset); + WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size); + WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset); + WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size); + + WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); + + WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request); + WREG32(mmCP_HQD_ERROR, m->cp_hqd_error); + WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); + WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones); + + WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); + + release_queue(kgd); + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +{ + return 0; +} + +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(mmCP_HQD_ACTIVE); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(mmCP_HQD_PQ_BASE) && + high == RREG32(mmCP_HQD_PQ_BASE_HI)) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t temp; + + acquire_queue(kgd, pipe_id, queue_id); + + WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); + + while (true) { + temp = RREG32(mmCP_HQD_ACTIVE); + if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) + break; + if (timeout == 0) { + pr_err("kfd: cp queue preemption time out (%dms)\n", + temp); + release_queue(kgd); + return -ETIME; + } + msleep(20); + timeout -= 20; + } + + release_queue(kgd); + return 0; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int timeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + uint32_t temp; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) + break; + if (timeout == 0) + return -ETIME; + msleep(20); + timeout -= 20; + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + + return 0; +} + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); +} + +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(mmGRBM_GFX_INDEX, gfx_index_val); + WREG32(mmSQ_CMD, sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SH_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(mmGRBM_GFX_INDEX, data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return 0; +} + +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + const union amdgpu_firmware_header *hdr; + + BUG_ON(kgd == NULL); + + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *) + adev->sdma[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *) + adev->sdma[1].fw->data; + break; + + default: + return 0; + } + + if (hdr == NULL) + return 0; + + /* Only 12 bit in use*/ + return hdr->common.ucode_version; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d63135bf29c0..1f040d85ac47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -669,6 +669,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, static int amdgpu_cs_dependencies(struct amdgpu_device *adev, struct amdgpu_cs_parser *p) { + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_ib *ib; int i, j, r; @@ -694,6 +695,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, for (j = 0; j < num_deps; ++j) { struct amdgpu_fence *fence; struct amdgpu_ring *ring; + struct amdgpu_ctx *ctx; r = amdgpu_cs_get_ring(adev, deps[j].ip_type, deps[j].ip_instance, @@ -701,14 +703,21 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, if (r) return r; + ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id); + if (ctx == NULL) + return -EINVAL; + r = amdgpu_fence_recreate(ring, p->filp, deps[j].handle, &fence); - if (r) + if (r) { + amdgpu_ctx_put(ctx); return r; + } amdgpu_sync_fence(&ib->sync, fence); amdgpu_fence_unref(&fence); + amdgpu_ctx_put(ctx); } } @@ -808,12 +817,16 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, wait->in.ring, &ring); - if (r) + if (r) { + amdgpu_ctx_put(ctx); return r; + } r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); - if (r) + if (r) { + amdgpu_ctx_put(ctx); return r; + } r = fence_wait_timeout(&fence->base, true, timeout); amdgpu_fence_unref(&fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ba46be361c9b..d79009b65867 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1207,10 +1207,15 @@ static int amdgpu_early_init(struct amdgpu_device *adev) } else { if (adev->ip_blocks[i].funcs->early_init) { r = adev->ip_blocks[i].funcs->early_init((void *)adev); - if (r) + if (r == -ENOENT) + adev->ip_block_enabled[i] = false; + else if (r) return r; + else + adev->ip_block_enabled[i] = true; + } else { + adev->ip_block_enabled[i] = true; } - adev->ip_block_enabled[i] = true; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 56da962231fc..115906f5fda0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -44,6 +44,8 @@ #include "amdgpu.h" #include "amdgpu_irq.h" +#include "amdgpu_amdkfd.h" + /* * KMS wrapper. * - 3.0.0 - initial driver @@ -527,12 +529,15 @@ static int __init amdgpu_init(void) driver->num_ioctls = amdgpu_max_kms_ioctl; amdgpu_register_atpx_handler(); + amdgpu_amdkfd_init(); + /* let modprobe override vga console setting */ return drm_pci_init(driver, pdriver); } static void __exit amdgpu_exit(void) { + amdgpu_amdkfd_fini(); drm_pci_exit(driver, pdriver); amdgpu_unregister_atpx_handler(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index db5422e65ec5..fb44dd2231b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -24,6 +24,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" +#include "amdgpu_amdkfd.h" /** * amdgpu_ih_ring_alloc - allocate memory for the IH ring @@ -199,6 +200,12 @@ restart_ih: rmb(); while (adev->irq.ih.rptr != wptr) { + u32 ring_index = adev->irq.ih.rptr >> 2; + + /* Before dispatching irq to IP blocks, send it to amdkfd */ + amdgpu_amdkfd_interrupt(adev, + (const void *) &adev->irq.ih.ring[ring_index]); + amdgpu_ih_decode_iv(adev, &entry); adev->irq.ih.rptr &= adev->irq.ih.ptr_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 5533434c7a8f..8c40a9671b9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -34,6 +34,7 @@ #include <linux/vga_switcheroo.h> #include <linux/slab.h> #include <linux/pm_runtime.h> +#include "amdgpu_amdkfd.h" #if defined(CONFIG_VGA_SWITCHEROO) bool amdgpu_has_atpx(void); @@ -61,6 +62,8 @@ int amdgpu_driver_unload_kms(struct drm_device *dev) pm_runtime_get_sync(dev->dev); + amdgpu_amdkfd_device_fini(adev); + amdgpu_acpi_fini(adev); amdgpu_device_fini(adev); @@ -118,6 +121,10 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } + amdgpu_amdkfd_load_interface(adev); + amdgpu_amdkfd_device_probe(adev); + amdgpu_amdkfd_device_init(adev); + if (amdgpu_device_is_px(dev)) { pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 341c56681841..b3b66a0d5ff7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -64,6 +64,8 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" +#include "amdgpu_amdkfd.h" + /* * Indirect registers accessor */ @@ -2448,14 +2450,21 @@ static int cik_common_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_amdkfd_suspend(adev); + return cik_common_hw_fini(adev); } static int cik_common_resume(void *handle) { + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return cik_common_hw_init(adev); + r = cik_common_hw_init(adev); + if (r) + return r; + + return amdgpu_amdkfd_resume(adev); } static bool cik_common_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index d19085a97064..a3e3dfaa01a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -552,6 +552,12 @@ #define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_SEMAPHORE 0x00000006 +/* if PTR32, these are the bases for scratch and lds */ +#define PRIVATE_BASE(x) ((x) << 0) /* scratch */ +#define SHARED_BASE(x) ((x) << 16) /* LDS */ + +#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200 + /* valid for both DEFAULT_MTYPE and APE1_MTYPE */ enum { MTYPE_CACHED = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index f75a31df30bd..1a2d419cbf16 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -1679,25 +1679,31 @@ static int cz_dpm_unforce_dpm_levels(struct amdgpu_device *adev) if (ret) return ret; - DRM_INFO("DPM unforce state min=%d, max=%d.\n", - pi->sclk_dpm.soft_min_clk, - pi->sclk_dpm.soft_max_clk); + DRM_DEBUG("DPM unforce state min=%d, max=%d.\n", + pi->sclk_dpm.soft_min_clk, + pi->sclk_dpm.soft_max_clk); return 0; } static int cz_dpm_force_dpm_level(struct amdgpu_device *adev, - enum amdgpu_dpm_forced_level level) + enum amdgpu_dpm_forced_level level) { int ret = 0; switch (level) { case AMDGPU_DPM_FORCED_LEVEL_HIGH: + ret = cz_dpm_unforce_dpm_levels(adev); + if (ret) + return ret; ret = cz_dpm_force_highest(adev); if (ret) return ret; break; case AMDGPU_DPM_FORCED_LEVEL_LOW: + ret = cz_dpm_unforce_dpm_levels(adev); + if (ret) + return ret; ret = cz_dpm_force_lowest(adev); if (ret) return ret; @@ -1711,6 +1717,8 @@ static int cz_dpm_force_dpm_level(struct amdgpu_device *adev, break; } + adev->pm.dpm.forced_level = level; + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 08387dfd98a7..cc050a329c49 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2566,6 +2566,7 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode) struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + unsigned type; switch (mode) { case DRM_MODE_DPMS_ON: @@ -2574,6 +2575,9 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode) dce_v8_0_vga_enable(crtc, true); amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); dce_v8_0_vga_enable(crtc, false); + /* Make sure VBLANK interrupt is still enabled */ + type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); + amdgpu_irq_update(adev, &adev->crtc_irq, type); drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id); dce_v8_0_crtc_load_lut(crtc); break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 7b683fb2173c..1c7c992dea37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1813,10 +1813,7 @@ static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev, u32 data, mask; data = RREG32(mmCC_RB_BACKEND_DISABLE); - if (data & 1) - data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; - else - data = 0; + data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index fa5a4448531d..68552da40287 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -122,6 +122,32 @@ static void vi_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->smc_idx_lock, flags); } +/* smu_8_0_d.h */ +#define mmMP0PUB_IND_INDEX 0x180 +#define mmMP0PUB_IND_DATA 0x181 + +static u32 cz_smc_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&adev->smc_idx_lock, flags); + WREG32(mmMP0PUB_IND_INDEX, (reg)); + r = RREG32(mmMP0PUB_IND_DATA); + spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + return r; +} + +static void cz_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&adev->smc_idx_lock, flags); + WREG32(mmMP0PUB_IND_INDEX, (reg)); + WREG32(mmMP0PUB_IND_DATA, (v)); + spin_unlock_irqrestore(&adev->smc_idx_lock, flags); +} + static u32 vi_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags; @@ -1222,8 +1248,13 @@ static int vi_common_early_init(void *handle) bool smc_enabled = false; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->smc_rreg = &vi_smc_rreg; - adev->smc_wreg = &vi_smc_wreg; + if (adev->flags & AMDGPU_IS_APU) { + adev->smc_rreg = &cz_smc_rreg; + adev->smc_wreg = &cz_smc_wreg; + } else { + adev->smc_rreg = &vi_smc_rreg; + adev->smc_wreg = &vi_smc_wreg; + } adev->pcie_rreg = &vi_pcie_rreg; adev->pcie_wreg = &vi_pcie_wreg; adev->uvd_ctx_rreg = &vi_uvd_ctx_rreg; diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 31bb89452e12..d98aa9d82fa1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -66,6 +66,11 @@ #define AMDGPU_NUM_OF_VMIDS 8 +#define PIPEID(x) ((x) << 0) +#define MEID(x) ((x) << 2) +#define VMID(x) ((x) << 4) +#define QUEUEID(x) ((x) << 8) + #define RB_BITMAP_WIDTH_PER_SH 2 #define MC_SEQ_MISC0__MT__MASK 0xf0000000 diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index 8dfac37ff327..e13c67c8d2c0 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,6 +4,6 @@ config HSA_AMD tristate "HSA kernel driver for AMD GPU devices" - depends on DRM_RADEON && AMD_IOMMU_V2 && X86_64 + depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64 help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 28551153ec6d..7fc9b0f444cb 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -2,7 +2,8 @@ # Makefile for Heterogenous System Architecture support for AMD GPU devices # -ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ +ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ \ + -Idrivers/gpu/drm/amd/include/asic_reg amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h index 183be5b8414f..48769d12dd7b 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h @@ -65,17 +65,6 @@ #define AQL_ENABLE 1 -#define SDMA_RB_VMID(x) (x << 24) -#define SDMA_RB_ENABLE (1 << 0) -#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */ -#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12) -#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ -#define SDMA_OFFSET(x) (x << 0) -#define SDMA_DB_ENABLE (1 << 28) -#define SDMA_ATC (1 << 0) -#define SDMA_VA_PTR32 (1 << 4) -#define SDMA_VA_SHARED_BASE(x) (x << 8) - #define GRBM_GFX_INDEX 0x30800 #define ATC_VMID_PASID_MAPPING_VALID (1U << 31) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 75312c82969f..3f95f7cb4019 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -80,7 +80,12 @@ static const struct kfd_deviceid supported_devices[] = { { 0x1318, &kaveri_device_info }, /* Kaveri */ { 0x131B, &kaveri_device_info }, /* Kaveri */ { 0x131C, &kaveri_device_info }, /* Kaveri */ - { 0x131D, &kaveri_device_info } /* Kaveri */ + { 0x131D, &kaveri_device_info }, /* Kaveri */ + { 0x9870, &carrizo_device_info }, /* Carrizo */ + { 0x9874, &carrizo_device_info }, /* Carrizo */ + { 0x9875, &carrizo_device_info }, /* Carrizo */ + { 0x9876, &carrizo_device_info }, /* Carrizo */ + { 0x9877, &carrizo_device_info } /* Carrizo */ }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 9ce8a20a7aff..23ce774ff09d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -23,6 +23,7 @@ #include "kfd_device_queue_manager.h" #include "cik_regs.h" +#include "oss/oss_2_4_sh_mask.h" static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd, @@ -135,13 +136,16 @@ static int register_process_cik(struct device_queue_manager *dqm, static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { - uint32_t value = SDMA_ATC; + uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT); if (q->process->is_32bit_user_mode) - value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd)); + value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) | + get_sh_mem_bases_32(qpd_to_pdd(qpd)); else - value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64( - qpd_to_pdd(qpd))); + value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) && + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; + q->properties.sdma_vm_addr = value; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 4c15212a3899..44c38e8e54d3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -22,6 +22,10 @@ */ #include "kfd_device_queue_manager.h" +#include "gca/gfx_8_0_enum.h" +#include "gca/gfx_8_0_sh_mask.h" +#include "gca/gfx_8_0_enum.h" +#include "oss/oss_3_0_sh_mask.h" static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd, @@ -37,14 +41,40 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) { - pr_warn("amdkfd: VI DQM is not currently supported\n"); - ops->set_cache_memory_policy = set_cache_memory_policy_vi; ops->register_process = register_process_vi; ops->initialize = initialize_cpsch_vi; ops->init_sdma_vm = init_sdma_vm; } +static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) +{ + /* In 64-bit mode, we can only control the top 3 bits of the LDS, + * scratch and GPUVM apertures. + * The hardware fills in the remaining 59 bits according to the + * following pattern: + * LDS: X0000000'00000000 - X0000001'00000000 (4GB) + * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) + * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) + * + * (where X/Y is the configurable nybble with the low-bit 0) + * + * LDS and scratch will have the same top nybble programmed in the + * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. + * GPUVM can have a different top nybble programmed in the + * top 3 bits of SH_MEM_BASES.SHARED_BASE. + * We don't bother to support different top nybbles + * for LDS/Scratch and GPUVM. + */ + + BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + top_address_nybble == 0); + + return top_address_nybble << 12 | + (top_address_nybble << 12) << + SH_MEM_BASES__SHARED_BASE__SHIFT; +} + static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd, enum cache_policy default_policy, @@ -52,18 +82,83 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, void __user *alternate_aperture_base, uint64_t alternate_aperture_size) { - return false; + uint32_t default_mtype; + uint32_t ape1_mtype; + + default_mtype = (default_policy == cache_policy_coherent) ? + MTYPE_CC : + MTYPE_NC; + + ape1_mtype = (alternate_policy == cache_policy_coherent) ? + MTYPE_CC : + MTYPE_NC; + + qpd->sh_mem_config = (qpd->sh_mem_config & + SH_MEM_CONFIG__ADDRESS_MODE_MASK) | + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | + default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | + ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT | + SH_MEM_CONFIG__PRIVATE_ATC_MASK; + + return true; } static int register_process_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { - return -1; + struct kfd_process_device *pdd; + unsigned int temp; + + BUG_ON(!dqm || !qpd); + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | + MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | + MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT | + SH_MEM_CONFIG__PRIVATE_ATC_MASK; + + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + if (qpd->pqm->process->is_32bit_user_mode) { + temp = get_sh_mem_bases_32(pdd); + qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT; + qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 << + SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; + } else { + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 << + SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; + } + + pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); + + return 0; } static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { + uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT); + + if (q->process->is_32bit_user_mode) + value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) | + get_sh_mem_bases_32(qpd_to_pdd(qpd)); + else + value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) && + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; + + q->properties.sdma_vm_addr = value; } static int initialize_cpsch_vi(struct device_queue_manager *dqm) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 434979428fc0..d83de985e88c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -27,6 +27,7 @@ #include "kfd_mqd_manager.h" #include "cik_regs.h" #include "cik_structs.h" +#include "oss/oss_2_4_sh_mask.h" static inline struct cik_mqd *get_mqd(void *mqd) { @@ -214,17 +215,20 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, BUG_ON(!mm || !mqd || !q); m = get_sdma_mqd(mqd); - m->sdma_rlc_rb_cntl = - SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) | - SDMA_RB_VMID(q->vmid) | - SDMA_RPTR_WRITEBACK_ENABLE | - SDMA_RPTR_WRITEBACK_TIMER(6); + m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) << + SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8); m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE; + m->sdma_rlc_doorbell = q->doorbell_off << + SDMA0_RLC0_DOORBELL__OFFSET__SHIFT | + 1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT; + m->sdma_rlc_virtual_addr = q->sdma_vm_addr; m->sdma_engine_id = q->sdma_engine_id; @@ -234,7 +238,9 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, if (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0) { - m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE; + m->sdma_rlc_rb_cntl |= + 1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT; + q->is_active = true; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index b3a7e3ba1e38..fa32c32fa1c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -22,12 +22,255 @@ */ #include <linux/printk.h> +#include <linux/slab.h> #include "kfd_priv.h" #include "kfd_mqd_manager.h" +#include "vi_structs.h" +#include "gca/gfx_8_0_sh_mask.h" +#include "gca/gfx_8_0_enum.h" + +#define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8 + +static inline struct vi_mqd *get_mqd(void *mqd) +{ + return (struct vi_mqd *)mqd; +} + +static int init_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + uint64_t addr; + struct vi_mqd *m; + + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd), + mqd_mem_obj); + if (retval != 0) + return -ENOMEM; + + m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr; + addr = (*mqd_mem_obj)->gpu_addr; + + memset(m, 0, sizeof(struct vi_mqd)); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | + 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + + m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT | + MTYPE_UC << CP_MQD_CONTROL__MTYPE__SHIFT; + + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT | + 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | + 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + + m->cp_hqd_pipe_priority = 1; + m->cp_hqd_queue_priority = 15; + + m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT; + + if (q->format == KFD_QUEUE_FORMAT_AQL) + m->cp_hqd_iq_rptr = 1; + + *mqd = m; + if (gart_addr != NULL) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static int load_mqd(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr) +{ + return mm->dev->kfd2kgd->hqd_load + (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); +} + +static int __update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, unsigned int mtype, + unsigned int atc_bit) +{ + struct vi_mqd *m; + + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + + m = get_mqd(mqd); + + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT | + atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT | + mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT; + m->cp_hqd_pq_control |= + ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + pr_debug("kfd: cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); + + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + + m->cp_hqd_pq_doorbell_control = + 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT | + q->doorbell_off << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + pr_debug("kfd: cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + + m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT | + mtype << CP_HQD_EOP_CONTROL__MTYPE__SHIFT; + + m->cp_hqd_ib_control = atc_bit << CP_HQD_IB_CONTROL__IB_ATC__SHIFT | + 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | + mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT; + + m->cp_hqd_eop_control |= + ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_eop_base_addr_lo = + lower_32_bits(q->eop_ring_buffer_address >> 8); + m->cp_hqd_eop_base_addr_hi = + upper_32_bits(q->eop_ring_buffer_address >> 8); + + m->cp_hqd_iq_timer = atc_bit << CP_HQD_IQ_TIMER__IQ_ATC__SHIFT | + mtype << CP_HQD_IQ_TIMER__MTYPE__SHIFT; + + m->cp_hqd_vmid = q->vmid; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | + 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; + } + + m->cp_hqd_active = 0; + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { + m->cp_hqd_active = 1; + q->is_active = true; + } + + return 0; +} + + +static int update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + return __update_mqd(mm, mqd, q, MTYPE_CC, 1); +} + +static int destroy_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_destroy + (mm->dev->kgd, type, timeout, + pipe_id, queue_id); +} + +static void uninit_mqd(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + BUG_ON(!mm || !mqd); + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static bool is_occupied(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_is_occupied( + mm->dev->kgd, queue_address, + pipe_id, queue_id); +} + +static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + struct vi_mqd *m; + int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q); + + if (retval != 0) + return retval; + + m = get_mqd(*mqd); + + m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | + 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; + + return retval; +} + +static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct vi_mqd *m; + int retval = __update_mqd(mm, mqd, q, MTYPE_UC, 0); + + if (retval != 0) + return retval; + + m = get_mqd(mqd); + m->cp_hqd_vmid = q->vmid; + return retval; +} struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, - struct kfd_dev *dev) + struct kfd_dev *dev) { - pr_warn("amdkfd: VI MQD is not currently supported\n"); - return NULL; + struct mqd_manager *mqd; + + BUG_ON(!dev); + BUG_ON(type >= KFD_MQD_TYPE_MAX); + + pr_debug("kfd: In func %s\n", __func__); + + mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); + if (!mqd) + return NULL; + + mqd->dev = dev; + + switch (type) { + case KFD_MQD_TYPE_CP: + case KFD_MQD_TYPE_COMPUTE: + mqd->init_mqd = init_mqd; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + break; + case KFD_MQD_TYPE_HIQ: + mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + break; + case KFD_MQD_TYPE_SDMA: + break; + default: + kfree(mqd); + return NULL; + } + + return mqd; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 99b6d28a11c3..90f391434fa3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -27,6 +27,7 @@ #include "kfd_kernel_queue.h" #include "kfd_priv.h" #include "kfd_pm4_headers.h" +#include "kfd_pm4_headers_vi.h" #include "kfd_pm4_opcodes.h" static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, @@ -55,6 +56,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, bool *over_subscription) { unsigned int process_count, queue_count; + unsigned int map_queue_size; BUG_ON(!pm || !rlib_size || !over_subscription); @@ -69,9 +71,13 @@ static void pm_calc_rlib_size(struct packet_manager *pm, pr_debug("kfd: over subscribed runlist\n"); } + map_queue_size = + (pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ? + sizeof(struct pm4_mes_map_queues) : + sizeof(struct pm4_map_queues); /* calculate run list ib allocation size */ *rlib_size = process_count * sizeof(struct pm4_map_process) + - queue_count * sizeof(struct pm4_map_queues); + queue_count * map_queue_size; /* * Increase the allocation size in case we need a chained run list @@ -176,6 +182,71 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, return 0; } +static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) +{ + struct pm4_mes_map_queues *packet; + bool use_static = is_static; + + BUG_ON(!pm || !buffer || !q); + + pr_debug("kfd: In func %s\n", __func__); + + packet = (struct pm4_mes_map_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_map_queues)); + + packet->header.u32all = build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe_vi; + packet->bitfields2.num_queues = 1; + packet->bitfields2.queue_sel = + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; + + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_compute_vi; + + switch (q->properties.type) { + case KFD_QUEUE_TYPE_COMPUTE: + if (use_static) + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_latency_static_queue_vi; + break; + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.queue_type = + queue_type__mes_map_queues__debug_interface_queue_vi; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__sdma0_vi; + use_static = false; /* no static queues under SDMA */ + break; + default: + pr_err("kfd: in %s queue type %d\n", __func__, + q->properties.type); + BUG(); + break; + } + packet->bitfields3.doorbell_offset = + q->properties.doorbell_off; + + packet->mqd_addr_lo = + lower_32_bits(q->gart_mqd_addr); + + packet->mqd_addr_hi = + upper_32_bits(q->gart_mqd_addr); + + packet->wptr_addr_lo = + lower_32_bits((uint64_t)q->properties.write_ptr); + + packet->wptr_addr_hi = + upper_32_bits((uint64_t)q->properties.write_ptr); + + return 0; +} + static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, struct queue *q, bool is_static) { @@ -292,8 +363,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n", kq->queue->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], - kq->queue, qpd->is_debug); + if (pm->dqm->dev->device_info->asic_family == + CHIP_CARRIZO) + retval = pm_create_map_queue_vi(pm, + &rl_buffer[rl_wptr], + kq->queue, + qpd->is_debug); + else + retval = pm_create_map_queue(pm, + &rl_buffer[rl_wptr], + kq->queue, + qpd->is_debug); if (retval != 0) return retval; @@ -309,8 +389,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n", q->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], - q, qpd->is_debug); + if (pm->dqm->dev->device_info->asic_family == + CHIP_CARRIZO) + retval = pm_create_map_queue_vi(pm, + &rl_buffer[rl_wptr], + q, + qpd->is_debug); + else + retval = pm_create_map_queue(pm, + &rl_buffer[rl_wptr], + q, + qpd->is_debug); if (retval != 0) return retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h new file mode 100644 index 000000000000..08c721922812 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h @@ -0,0 +1,398 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef F32_MES_PM4_PACKETS_H +#define F32_MES_PM4_PACKETS_H + +#ifndef PM4_MES_HEADER_DEFINED +#define PM4_MES_HEADER_DEFINED +union PM4_MES_TYPE_3_HEADER { + struct { + uint32_t reserved1 : 8; /* < reserved */ + uint32_t opcode : 8; /* < IT opcode */ + uint32_t count : 14;/* < number of DWORDs - 1 in the + information body. */ + uint32_t type : 2; /* < packet identifier. + It should be 3 for type 3 packets */ + }; + uint32_t u32All; +}; +#endif /* PM4_MES_HEADER_DEFINED */ + +/*--------------------MES_SET_RESOURCES--------------------*/ + +#ifndef PM4_MES_SET_RESOURCES_DEFINED +#define PM4_MES_SET_RESOURCES_DEFINED +enum mes_set_resources_queue_type_enum { + queue_type__mes_set_resources__kernel_interface_queue_kiq = 0, + queue_type__mes_set_resources__hsa_interface_queue_hiq = 1, + queue_type__mes_set_resources__hsa_debug_interface_queue = 4 +}; + + +struct pm4_mes_set_resources { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t vmid_mask:16; + uint32_t unmap_latency:8; + uint32_t reserved1:5; + enum mes_set_resources_queue_type_enum queue_type:3; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t queue_mask_lo; + uint32_t queue_mask_hi; + uint32_t gws_mask_lo; + uint32_t gws_mask_hi; + + union { + struct { + uint32_t oac_mask:16; + uint32_t reserved2:16; + } bitfields7; + uint32_t ordinal7; + }; + + union { + struct { + uint32_t gds_heap_base:6; + uint32_t reserved3:5; + uint32_t gds_heap_size:6; + uint32_t reserved4:15; + } bitfields8; + uint32_t ordinal8; + }; + +}; +#endif + +/*--------------------MES_RUN_LIST--------------------*/ + +#ifndef PM4_MES_RUN_LIST_DEFINED +#define PM4_MES_RUN_LIST_DEFINED + +struct pm4_mes_runlist { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:2; + uint32_t ib_base_lo:30; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t ib_base_hi:16; + uint32_t reserved2:16; + } bitfields3; + uint32_t ordinal3; + }; + + union { + struct { + uint32_t ib_size:20; + uint32_t chain:1; + uint32_t offload_polling:1; + uint32_t reserved3:1; + uint32_t valid:1; + uint32_t reserved4:8; + } bitfields4; + uint32_t ordinal4; + }; + +}; +#endif + +/*--------------------MES_MAP_PROCESS--------------------*/ + +#ifndef PM4_MES_MAP_PROCESS_DEFINED +#define PM4_MES_MAP_PROCESS_DEFINED + +struct pm4_mes_map_process { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:8; + uint32_t diq_enable:1; + uint32_t process_quantum:7; + } bitfields2; + uint32_t ordinal2; +}; + + union { + struct { + uint32_t page_table_base:28; + uint32_t reserved2:4; + } bitfields3; + uint32_t ordinal3; + }; + + uint32_t sh_mem_bases; + uint32_t sh_mem_ape1_base; + uint32_t sh_mem_ape1_limit; + uint32_t sh_mem_config; + uint32_t gds_addr_lo; + uint32_t gds_addr_hi; + + union { + struct { + uint32_t num_gws:6; + uint32_t reserved3:2; + uint32_t num_oac:4; + uint32_t reserved4:4; + uint32_t gds_size:6; + uint32_t num_queues:10; + } bitfields10; + uint32_t ordinal10; + }; + +}; +#endif + +/*--------------------MES_MAP_QUEUES--------------------*/ + +#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED +#define PM4_MES_MAP_QUEUES_VI_DEFINED +enum mes_map_queues_queue_sel_vi_enum { + queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0, +queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1 +}; + +enum mes_map_queues_queue_type_vi_enum { + queue_type__mes_map_queues__normal_compute_vi = 0, + queue_type__mes_map_queues__debug_interface_queue_vi = 1, + queue_type__mes_map_queues__normal_latency_static_queue_vi = 2, +queue_type__mes_map_queues__low_latency_static_queue_vi = 3 +}; + +enum mes_map_queues_alloc_format_vi_enum { + alloc_format__mes_map_queues__one_per_pipe_vi = 0, +alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 +}; + +enum mes_map_queues_engine_sel_vi_enum { + engine_sel__mes_map_queues__compute_vi = 0, + engine_sel__mes_map_queues__sdma0_vi = 2, + engine_sel__mes_map_queues__sdma1_vi = 3 +}; + + +struct pm4_mes_map_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:4; + enum mes_map_queues_queue_sel_vi_enum queue_sel:2; + uint32_t reserved2:15; + enum mes_map_queues_queue_type_vi_enum queue_type:3; + enum mes_map_queues_alloc_format_vi_enum alloc_format:2; + enum mes_map_queues_engine_sel_vi_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t reserved3:1; + uint32_t check_disable:1; + uint32_t doorbell_offset:21; + uint32_t reserved4:3; + uint32_t queue:6; + } bitfields3; + uint32_t ordinal3; + }; + + uint32_t mqd_addr_lo; + uint32_t mqd_addr_hi; + uint32_t wptr_addr_lo; + uint32_t wptr_addr_hi; +}; +#endif + +/*--------------------MES_QUERY_STATUS--------------------*/ + +#ifndef PM4_MES_QUERY_STATUS_DEFINED +#define PM4_MES_QUERY_STATUS_DEFINED +enum mes_query_status_interrupt_sel_enum { + interrupt_sel__mes_query_status__completion_status = 0, + interrupt_sel__mes_query_status__process_status = 1, + interrupt_sel__mes_query_status__queue_status = 2 +}; + +enum mes_query_status_command_enum { + command__mes_query_status__interrupt_only = 0, + command__mes_query_status__fence_only_immediate = 1, + command__mes_query_status__fence_only_after_write_ack = 2, + command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3 +}; + +enum mes_query_status_engine_sel_enum { + engine_sel__mes_query_status__compute = 0, + engine_sel__mes_query_status__sdma0_queue = 2, + engine_sel__mes_query_status__sdma1_queue = 3 +}; + +struct pm4_mes_query_status { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t context_id:28; + enum mes_query_status_interrupt_sel_enum + interrupt_sel:2; + enum mes_query_status_command_enum command:2; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:16; + } bitfields3a; + struct { + uint32_t reserved2:2; + uint32_t doorbell_offset:21; + uint32_t reserved3:2; + enum mes_query_status_engine_sel_enum engine_sel:3; + uint32_t reserved4:4; + } bitfields3b; + uint32_t ordinal3; + }; + + uint32_t addr_lo; + uint32_t addr_hi; + uint32_t data_lo; + uint32_t data_hi; +}; +#endif + +/*--------------------MES_UNMAP_QUEUES--------------------*/ + +#ifndef PM4_MES_UNMAP_QUEUES_DEFINED +#define PM4_MES_UNMAP_QUEUES_DEFINED +enum mes_unmap_queues_action_enum { + action__mes_unmap_queues__preempt_queues = 0, + action__mes_unmap_queues__reset_queues = 1, + action__mes_unmap_queues__disable_process_queues = 2, + action__mes_unmap_queues__reserved = 3 +}; + +enum mes_unmap_queues_queue_sel_enum { + queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, + queue_sel__mes_unmap_queues__unmap_all_queues = 2, + queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3 +}; + +enum mes_unmap_queues_engine_sel_enum { + engine_sel__mes_unmap_queues__compute = 0, + engine_sel__mes_unmap_queues__sdma0 = 2, + engine_sel__mes_unmap_queues__sdmal = 3 +}; + +struct PM4_MES_UNMAP_QUEUES { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + enum mes_unmap_queues_action_enum action:2; + uint32_t reserved1:2; + enum mes_unmap_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:20; + enum mes_unmap_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved3:16; + } bitfields3a; + struct { + uint32_t reserved4:2; + uint32_t doorbell_offset0:21; + uint32_t reserved5:9; + } bitfields3b; + uint32_t ordinal3; + }; + + union { + struct { + uint32_t reserved6:2; + uint32_t doorbell_offset1:21; + uint32_t reserved7:9; + } bitfields4; + uint32_t ordinal4; + }; + + union { + struct { + uint32_t reserved8:2; + uint32_t doorbell_offset2:21; + uint32_t reserved9:9; + } bitfields5; + uint32_t ordinal5; + }; + + union { + struct { + uint32_t reserved10:2; + uint32_t doorbell_offset3:21; + uint32_t reserved11:9; + } bitfields6; + uint32_t ordinal6; + }; +}; +#endif + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index c25728bc388a..74909e72a009 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1186,6 +1186,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * TODO: Retrieve max engine clock values from KGD */ + if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { + dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE; + pr_info("amdkfd: adding doorbell packet type capability\n"); + } + res = 0; err: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 989624b3cd14..c3ddb9b95ff8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -40,6 +40,7 @@ #define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 #define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 #define HSA_CAP_RESERVED 0xfffff000 +#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000 struct kfd_node_properties { uint32_t cpu_cores_count; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 9080daa116b6..888250b33ea8 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -52,7 +52,8 @@ enum kgd_engine_type { KGD_ENGINE_MEC1, KGD_ENGINE_MEC2, KGD_ENGINE_RLC, - KGD_ENGINE_SDMA, + KGD_ENGINE_SDMA1, + KGD_ENGINE_SDMA2, KGD_ENGINE_MAX }; diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h new file mode 100644 index 000000000000..65cfacd7a66c --- /dev/null +++ b/drivers/gpu/drm/amd/include/vi_structs.h @@ -0,0 +1,417 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef VI_STRUCTS_H_ +#define VI_STRUCTS_H_ + +struct vi_sdma_mqd { + uint32_t sdmax_rlcx_rb_cntl; + uint32_t sdmax_rlcx_rb_base; + uint32_t sdmax_rlcx_rb_base_hi; + uint32_t sdmax_rlcx_rb_rptr; + uint32_t sdmax_rlcx_rb_wptr; + uint32_t sdmax_rlcx_rb_wptr_poll_cntl; + uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi; + uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo; + uint32_t sdmax_rlcx_rb_rptr_addr_hi; + uint32_t sdmax_rlcx_rb_rptr_addr_lo; + uint32_t sdmax_rlcx_ib_cntl; + uint32_t sdmax_rlcx_ib_rptr; + uint32_t sdmax_rlcx_ib_offset; + uint32_t sdmax_rlcx_ib_base_lo; + uint32_t sdmax_rlcx_ib_base_hi; + uint32_t sdmax_rlcx_ib_size; + uint32_t sdmax_rlcx_skip_cntl; + uint32_t sdmax_rlcx_context_status; + uint32_t sdmax_rlcx_doorbell; + uint32_t sdmax_rlcx_virtual_addr; + uint32_t sdmax_rlcx_ape1_cntl; + uint32_t sdmax_rlcx_doorbell_log; + uint32_t reserved_22; + uint32_t reserved_23; + uint32_t reserved_24; + uint32_t reserved_25; + uint32_t reserved_26; + uint32_t reserved_27; + uint32_t reserved_28; + uint32_t reserved_29; + uint32_t reserved_30; + uint32_t reserved_31; + uint32_t reserved_32; + uint32_t reserved_33; + uint32_t reserved_34; + uint32_t reserved_35; + uint32_t reserved_36; + uint32_t reserved_37; + uint32_t reserved_38; + uint32_t reserved_39; + uint32_t reserved_40; + uint32_t reserved_41; + uint32_t reserved_42; + uint32_t reserved_43; + uint32_t reserved_44; + uint32_t reserved_45; + uint32_t reserved_46; + uint32_t reserved_47; + uint32_t reserved_48; + uint32_t reserved_49; + uint32_t reserved_50; + uint32_t reserved_51; + uint32_t reserved_52; + uint32_t reserved_53; + uint32_t reserved_54; + uint32_t reserved_55; + uint32_t reserved_56; + uint32_t reserved_57; + uint32_t reserved_58; + uint32_t reserved_59; + uint32_t reserved_60; + uint32_t reserved_61; + uint32_t reserved_62; + uint32_t reserved_63; + uint32_t reserved_64; + uint32_t reserved_65; + uint32_t reserved_66; + uint32_t reserved_67; + uint32_t reserved_68; + uint32_t reserved_69; + uint32_t reserved_70; + uint32_t reserved_71; + uint32_t reserved_72; + uint32_t reserved_73; + uint32_t reserved_74; + uint32_t reserved_75; + uint32_t reserved_76; + uint32_t reserved_77; + uint32_t reserved_78; + uint32_t reserved_79; + uint32_t reserved_80; + uint32_t reserved_81; + uint32_t reserved_82; + uint32_t reserved_83; + uint32_t reserved_84; + uint32_t reserved_85; + uint32_t reserved_86; + uint32_t reserved_87; + uint32_t reserved_88; + uint32_t reserved_89; + uint32_t reserved_90; + uint32_t reserved_91; + uint32_t reserved_92; + uint32_t reserved_93; + uint32_t reserved_94; + uint32_t reserved_95; + uint32_t reserved_96; + uint32_t reserved_97; + uint32_t reserved_98; + uint32_t reserved_99; + uint32_t reserved_100; + uint32_t reserved_101; + uint32_t reserved_102; + uint32_t reserved_103; + uint32_t reserved_104; + uint32_t reserved_105; + uint32_t reserved_106; + uint32_t reserved_107; + uint32_t reserved_108; + uint32_t reserved_109; + uint32_t reserved_110; + uint32_t reserved_111; + uint32_t reserved_112; + uint32_t reserved_113; + uint32_t reserved_114; + uint32_t reserved_115; + uint32_t reserved_116; + uint32_t reserved_117; + uint32_t reserved_118; + uint32_t reserved_119; + uint32_t reserved_120; + uint32_t reserved_121; + uint32_t reserved_122; + uint32_t reserved_123; + uint32_t reserved_124; + uint32_t reserved_125; + uint32_t reserved_126; + uint32_t reserved_127; +}; + +struct vi_mqd { + uint32_t header; + uint32_t compute_dispatch_initiator; + uint32_t compute_dim_x; + uint32_t compute_dim_y; + uint32_t compute_dim_z; + uint32_t compute_start_x; + uint32_t compute_start_y; + uint32_t compute_start_z; + uint32_t compute_num_thread_x; + uint32_t compute_num_thread_y; + uint32_t compute_num_thread_z; + uint32_t compute_pipelinestat_enable; + uint32_t compute_perfcount_enable; + uint32_t compute_pgm_lo; + uint32_t compute_pgm_hi; + uint32_t compute_tba_lo; + uint32_t compute_tba_hi; + uint32_t compute_tma_lo; + uint32_t compute_tma_hi; + uint32_t compute_pgm_rsrc1; + uint32_t compute_pgm_rsrc2; + uint32_t compute_vmid; + uint32_t compute_resource_limits; + uint32_t compute_static_thread_mgmt_se0; + uint32_t compute_static_thread_mgmt_se1; + uint32_t compute_tmpring_size; + uint32_t compute_static_thread_mgmt_se2; + uint32_t compute_static_thread_mgmt_se3; + uint32_t compute_restart_x; + uint32_t compute_restart_y; + uint32_t compute_restart_z; + uint32_t compute_thread_trace_enable; + uint32_t compute_misc_reserved; + uint32_t compute_dispatch_id; + uint32_t compute_threadgroup_id; + uint32_t compute_relaunch; + uint32_t compute_wave_restore_addr_lo; + uint32_t compute_wave_restore_addr_hi; + uint32_t compute_wave_restore_control; + uint32_t reserved_39; + uint32_t reserved_40; + uint32_t reserved_41; + uint32_t reserved_42; + uint32_t reserved_43; + uint32_t reserved_44; + uint32_t reserved_45; + uint32_t reserved_46; + uint32_t reserved_47; + uint32_t reserved_48; + uint32_t reserved_49; + uint32_t reserved_50; + uint32_t reserved_51; + uint32_t reserved_52; + uint32_t reserved_53; + uint32_t reserved_54; + uint32_t reserved_55; + uint32_t reserved_56; + uint32_t reserved_57; + uint32_t reserved_58; + uint32_t reserved_59; + uint32_t reserved_60; + uint32_t reserved_61; + uint32_t reserved_62; + uint32_t reserved_63; + uint32_t reserved_64; + uint32_t compute_user_data_0; + uint32_t compute_user_data_1; + uint32_t compute_user_data_2; + uint32_t compute_user_data_3; + uint32_t compute_user_data_4; + uint32_t compute_user_data_5; + uint32_t compute_user_data_6; + uint32_t compute_user_data_7; + uint32_t compute_user_data_8; + uint32_t compute_user_data_9; + uint32_t compute_user_data_10; + uint32_t compute_user_data_11; + uint32_t compute_user_data_12; + uint32_t compute_user_data_13; + uint32_t compute_user_data_14; + uint32_t compute_user_data_15; + uint32_t cp_compute_csinvoc_count_lo; + uint32_t cp_compute_csinvoc_count_hi; + uint32_t reserved_83; + uint32_t reserved_84; + uint32_t reserved_85; + uint32_t cp_mqd_query_time_lo; + uint32_t cp_mqd_query_time_hi; + uint32_t cp_mqd_connect_start_time_lo; + uint32_t cp_mqd_connect_start_time_hi; + uint32_t cp_mqd_connect_end_time_lo; + uint32_t cp_mqd_connect_end_time_hi; + uint32_t cp_mqd_connect_end_wf_count; + uint32_t cp_mqd_connect_end_pq_rptr; + uint32_t cp_mqd_connect_end_pq_wptr; + uint32_t cp_mqd_connect_end_ib_rptr; + uint32_t reserved_96; + uint32_t reserved_97; + uint32_t cp_mqd_save_start_time_lo; + uint32_t cp_mqd_save_start_time_hi; + uint32_t cp_mqd_save_end_time_lo; + uint32_t cp_mqd_save_end_time_hi; + uint32_t cp_mqd_restore_start_time_lo; + uint32_t cp_mqd_restore_start_time_hi; + uint32_t cp_mqd_restore_end_time_lo; + uint32_t cp_mqd_restore_end_time_hi; + uint32_t reserved_106; + uint32_t reserved_107; + uint32_t gds_cs_ctxsw_cnt0; + uint32_t gds_cs_ctxsw_cnt1; + uint32_t gds_cs_ctxsw_cnt2; + uint32_t gds_cs_ctxsw_cnt3; + uint32_t reserved_112; + uint32_t reserved_113; + uint32_t cp_pq_exe_status_lo; + uint32_t cp_pq_exe_status_hi; + uint32_t cp_packet_id_lo; + uint32_t cp_packet_id_hi; + uint32_t cp_packet_exe_status_lo; + uint32_t cp_packet_exe_status_hi; + uint32_t gds_save_base_addr_lo; + uint32_t gds_save_base_addr_hi; + uint32_t gds_save_mask_lo; + uint32_t gds_save_mask_hi; + uint32_t ctx_save_base_addr_lo; + uint32_t ctx_save_base_addr_hi; + uint32_t reserved_126; + uint32_t reserved_127; + uint32_t cp_mqd_base_addr_lo; + uint32_t cp_mqd_base_addr_hi; + uint32_t cp_hqd_active; + uint32_t cp_hqd_vmid; + uint32_t cp_hqd_persistent_state; + uint32_t cp_hqd_pipe_priority; + uint32_t cp_hqd_queue_priority; + uint32_t cp_hqd_quantum; + uint32_t cp_hqd_pq_base_lo; + uint32_t cp_hqd_pq_base_hi; + uint32_t cp_hqd_pq_rptr; + uint32_t cp_hqd_pq_rptr_report_addr_lo; + uint32_t cp_hqd_pq_rptr_report_addr_hi; + uint32_t cp_hqd_pq_wptr_poll_addr_lo; + uint32_t cp_hqd_pq_wptr_poll_addr_hi; + uint32_t cp_hqd_pq_doorbell_control; + uint32_t cp_hqd_pq_wptr; + uint32_t cp_hqd_pq_control; + uint32_t cp_hqd_ib_base_addr_lo; + uint32_t cp_hqd_ib_base_addr_hi; + uint32_t cp_hqd_ib_rptr; + uint32_t cp_hqd_ib_control; + uint32_t cp_hqd_iq_timer; + uint32_t cp_hqd_iq_rptr; + uint32_t cp_hqd_dequeue_request; + uint32_t cp_hqd_dma_offload; + uint32_t cp_hqd_sema_cmd; + uint32_t cp_hqd_msg_type; + uint32_t cp_hqd_atomic0_preop_lo; + uint32_t cp_hqd_atomic0_preop_hi; + uint32_t cp_hqd_atomic1_preop_lo; + uint32_t cp_hqd_atomic1_preop_hi; + uint32_t cp_hqd_hq_status0; + uint32_t cp_hqd_hq_control0; + uint32_t cp_mqd_control; + uint32_t cp_hqd_hq_status1; + uint32_t cp_hqd_hq_control1; + uint32_t cp_hqd_eop_base_addr_lo; + uint32_t cp_hqd_eop_base_addr_hi; + uint32_t cp_hqd_eop_control; + uint32_t cp_hqd_eop_rptr; + uint32_t cp_hqd_eop_wptr; + uint32_t cp_hqd_eop_done_events; + uint32_t cp_hqd_ctx_save_base_addr_lo; + uint32_t cp_hqd_ctx_save_base_addr_hi; + uint32_t cp_hqd_ctx_save_control; + uint32_t cp_hqd_cntl_stack_offset; + uint32_t cp_hqd_cntl_stack_size; + uint32_t cp_hqd_wg_state_offset; + uint32_t cp_hqd_ctx_save_size; + uint32_t cp_hqd_gds_resource_state; + uint32_t cp_hqd_error; + uint32_t cp_hqd_eop_wptr_mem; + uint32_t cp_hqd_eop_dones; + uint32_t reserved_182; + uint32_t reserved_183; + uint32_t reserved_184; + uint32_t reserved_185; + uint32_t reserved_186; + uint32_t reserved_187; + uint32_t reserved_188; + uint32_t reserved_189; + uint32_t reserved_190; + uint32_t reserved_191; + uint32_t iqtimer_pkt_header; + uint32_t iqtimer_pkt_dw0; + uint32_t iqtimer_pkt_dw1; + uint32_t iqtimer_pkt_dw2; + uint32_t iqtimer_pkt_dw3; + uint32_t iqtimer_pkt_dw4; + uint32_t iqtimer_pkt_dw5; + uint32_t iqtimer_pkt_dw6; + uint32_t iqtimer_pkt_dw7; + uint32_t iqtimer_pkt_dw8; + uint32_t iqtimer_pkt_dw9; + uint32_t iqtimer_pkt_dw10; + uint32_t iqtimer_pkt_dw11; + uint32_t iqtimer_pkt_dw12; + uint32_t iqtimer_pkt_dw13; + uint32_t iqtimer_pkt_dw14; + uint32_t iqtimer_pkt_dw15; + uint32_t iqtimer_pkt_dw16; + uint32_t iqtimer_pkt_dw17; + uint32_t iqtimer_pkt_dw18; + uint32_t iqtimer_pkt_dw19; + uint32_t iqtimer_pkt_dw20; + uint32_t iqtimer_pkt_dw21; + uint32_t iqtimer_pkt_dw22; + uint32_t iqtimer_pkt_dw23; + uint32_t iqtimer_pkt_dw24; + uint32_t iqtimer_pkt_dw25; + uint32_t iqtimer_pkt_dw26; + uint32_t iqtimer_pkt_dw27; + uint32_t iqtimer_pkt_dw28; + uint32_t iqtimer_pkt_dw29; + uint32_t iqtimer_pkt_dw30; + uint32_t iqtimer_pkt_dw31; + uint32_t reserved_225; + uint32_t reserved_226; + uint32_t reserved_227; + uint32_t set_resources_header; + uint32_t set_resources_dw1; + uint32_t set_resources_dw2; + uint32_t set_resources_dw3; + uint32_t set_resources_dw4; + uint32_t set_resources_dw5; + uint32_t set_resources_dw6; + uint32_t set_resources_dw7; + uint32_t reserved_236; + uint32_t reserved_237; + uint32_t reserved_238; + uint32_t reserved_239; + uint32_t queue_doorbell_id0; + uint32_t queue_doorbell_id1; + uint32_t queue_doorbell_id2; + uint32_t queue_doorbell_id3; + uint32_t queue_doorbell_id4; + uint32_t queue_doorbell_id5; + uint32_t queue_doorbell_id6; + uint32_t queue_doorbell_id7; + uint32_t queue_doorbell_id8; + uint32_t queue_doorbell_id9; + uint32_t queue_doorbell_id10; + uint32_t queue_doorbell_id11; + uint32_t queue_doorbell_id12; + uint32_t queue_doorbell_id13; + uint32_t queue_doorbell_id14; + uint32_t queue_doorbell_id15; +}; + +#endif /* VI_STRUCTS_H_ */ diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 42d2ffa08716..01ffe9bffe38 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -531,8 +531,6 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc, drm_crtc_vblank_off(crtc); - crtc->mode = *adj; - val = dcrtc->dumb_ctrl & ~CFG_DUMB_ENA; if (val != dcrtc->dumb_ctrl) { dcrtc->dumb_ctrl = val; diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c index 580e10acaa3a..60a688ef81c7 100644 --- a/drivers/gpu/drm/armada/armada_gem.c +++ b/drivers/gpu/drm/armada/armada_gem.c @@ -69,8 +69,9 @@ void armada_gem_free_object(struct drm_gem_object *obj) if (dobj->obj.import_attach) { /* We only ever display imported data */ - dma_buf_unmap_attachment(dobj->obj.import_attach, dobj->sgt, - DMA_TO_DEVICE); + if (dobj->sgt) + dma_buf_unmap_attachment(dobj->obj.import_attach, + dobj->sgt, DMA_TO_DEVICE); drm_prime_gem_destroy(&dobj->obj, NULL); } diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index c5b06fdb459c..e939faba7fcc 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -7,6 +7,7 @@ * published by the Free Software Foundation. */ #include <drm/drmP.h> +#include <drm/drm_plane_helper.h> #include "armada_crtc.h" #include "armada_drm.h" #include "armada_fb.h" @@ -85,16 +86,8 @@ static void armada_plane_vbl(struct armada_crtc *dcrtc, void *data) if (fb) armada_drm_queue_unref_work(dcrtc->crtc.dev, fb); -} -static unsigned armada_limit(int start, unsigned size, unsigned max) -{ - int end = start + size; - if (end < 0) - return 0; - if (start < 0) - start = 0; - return (unsigned)end > max ? max - start : end - start; + wake_up(&dplane->vbl.wait); } static int @@ -105,26 +98,39 @@ armada_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, { struct armada_plane *dplane = drm_to_armada_plane(plane); struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + struct drm_rect src = { + .x1 = src_x, + .y1 = src_y, + .x2 = src_x + src_w, + .y2 = src_y + src_h, + }; + struct drm_rect dest = { + .x1 = crtc_x, + .y1 = crtc_y, + .x2 = crtc_x + crtc_w, + .y2 = crtc_y + crtc_h, + }; + const struct drm_rect clip = { + .x2 = crtc->mode.hdisplay, + .y2 = crtc->mode.vdisplay, + }; uint32_t val, ctrl0; unsigned idx = 0; + bool visible; int ret; - crtc_w = armada_limit(crtc_x, crtc_w, dcrtc->crtc.mode.hdisplay); - crtc_h = armada_limit(crtc_y, crtc_h, dcrtc->crtc.mode.vdisplay); + ret = drm_plane_helper_check_update(plane, crtc, fb, &src, &dest, &clip, + 0, INT_MAX, true, false, &visible); + if (ret) + return ret; + ctrl0 = CFG_DMA_FMT(drm_fb_to_armada_fb(fb)->fmt) | CFG_DMA_MOD(drm_fb_to_armada_fb(fb)->mod) | CFG_CBSH_ENA | CFG_DMA_HSMOOTH | CFG_DMA_ENA; /* Does the position/size result in nothing to display? */ - if (crtc_w == 0 || crtc_h == 0) { + if (!visible) ctrl0 &= ~CFG_DMA_ENA; - } - - /* - * FIXME: if the starting point is off screen, we need to - * adjust src_x, src_y, src_w, src_h appropriately, and - * according to the scale. - */ if (!dcrtc->plane) { dcrtc->plane = plane; @@ -134,15 +140,19 @@ armada_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, /* FIXME: overlay on an interlaced display */ /* Just updating the position/size? */ if (plane->fb == fb && dplane->ctrl0 == ctrl0) { - val = (src_h & 0xffff0000) | src_w >> 16; + val = (drm_rect_height(&src) & 0xffff0000) | + drm_rect_width(&src) >> 16; dplane->src_hw = val; writel_relaxed(val, dcrtc->base + LCD_SPU_DMA_HPXL_VLN); - val = crtc_h << 16 | crtc_w; + + val = drm_rect_height(&dest) << 16 | drm_rect_width(&dest); dplane->dst_hw = val; writel_relaxed(val, dcrtc->base + LCD_SPU_DZM_HPXL_VLN); - val = crtc_y << 16 | crtc_x; + + val = dest.y1 << 16 | dest.x1; dplane->dst_yx = val; writel_relaxed(val, dcrtc->base + LCD_SPU_DMA_OVSA_HPXL_VLN); + return 0; } else if (~dplane->ctrl0 & ctrl0 & CFG_DMA_ENA) { /* Power up the Y/U/V FIFOs on ENA 0->1 transitions */ @@ -150,15 +160,14 @@ armada_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, dcrtc->base + LCD_SPU_SRAM_PARA1); } - ret = wait_event_timeout(dplane->vbl.wait, - list_empty(&dplane->vbl.update.node), - HZ/25); - if (ret < 0) - return ret; + wait_event_timeout(dplane->vbl.wait, + list_empty(&dplane->vbl.update.node), + HZ/25); if (plane->fb != fb) { struct armada_gem_object *obj = drm_fb_obj(fb); - uint32_t sy, su, sv; + uint32_t addr[3], pixel_format; + int i, num_planes, hsub; /* * Take a reference on the new framebuffer - we want to @@ -178,26 +187,39 @@ armada_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, older_fb); } - src_y >>= 16; - src_x >>= 16; - sy = obj->dev_addr + fb->offsets[0] + src_y * fb->pitches[0] + - src_x * fb->bits_per_pixel / 8; - su = obj->dev_addr + fb->offsets[1] + src_y * fb->pitches[1] + - src_x; - sv = obj->dev_addr + fb->offsets[2] + src_y * fb->pitches[2] + - src_x; + src_y = src.y1 >> 16; + src_x = src.x1 >> 16; - armada_reg_queue_set(dplane->vbl.regs, idx, sy, + pixel_format = fb->pixel_format; + hsub = drm_format_horz_chroma_subsampling(pixel_format); + num_planes = drm_format_num_planes(pixel_format); + + /* + * Annoyingly, shifting a YUYV-format image by one pixel + * causes the U/V planes to toggle. Toggle the UV swap. + * (Unfortunately, this causes momentary colour flickering.) + */ + if (src_x & (hsub - 1) && num_planes == 1) + ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV); + + for (i = 0; i < num_planes; i++) + addr[i] = obj->dev_addr + fb->offsets[i] + + src_y * fb->pitches[i] + + src_x * drm_format_plane_cpp(pixel_format, i); + for (; i < ARRAY_SIZE(addr); i++) + addr[i] = 0; + + armada_reg_queue_set(dplane->vbl.regs, idx, addr[0], LCD_SPU_DMA_START_ADDR_Y0); - armada_reg_queue_set(dplane->vbl.regs, idx, su, + armada_reg_queue_set(dplane->vbl.regs, idx, addr[1], LCD_SPU_DMA_START_ADDR_U0); - armada_reg_queue_set(dplane->vbl.regs, idx, sv, + armada_reg_queue_set(dplane->vbl.regs, idx, addr[2], LCD_SPU_DMA_START_ADDR_V0); - armada_reg_queue_set(dplane->vbl.regs, idx, sy, + armada_reg_queue_set(dplane->vbl.regs, idx, addr[0], LCD_SPU_DMA_START_ADDR_Y1); - armada_reg_queue_set(dplane->vbl.regs, idx, su, + armada_reg_queue_set(dplane->vbl.regs, idx, addr[1], LCD_SPU_DMA_START_ADDR_U1); - armada_reg_queue_set(dplane->vbl.regs, idx, sv, + armada_reg_queue_set(dplane->vbl.regs, idx, addr[2], LCD_SPU_DMA_START_ADDR_V1); val = fb->pitches[0] << 16 | fb->pitches[0]; @@ -208,24 +230,27 @@ armada_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, LCD_SPU_DMA_PITCH_UV); } - val = (src_h & 0xffff0000) | src_w >> 16; + val = (drm_rect_height(&src) & 0xffff0000) | drm_rect_width(&src) >> 16; if (dplane->src_hw != val) { dplane->src_hw = val; armada_reg_queue_set(dplane->vbl.regs, idx, val, LCD_SPU_DMA_HPXL_VLN); } - val = crtc_h << 16 | crtc_w; + + val = drm_rect_height(&dest) << 16 | drm_rect_width(&dest); if (dplane->dst_hw != val) { dplane->dst_hw = val; armada_reg_queue_set(dplane->vbl.regs, idx, val, LCD_SPU_DZM_HPXL_VLN); } - val = crtc_y << 16 | crtc_x; + + val = dest.y1 << 16 | dest.x1; if (dplane->dst_yx != val) { dplane->dst_yx = val; armada_reg_queue_set(dplane->vbl.regs, idx, val, LCD_SPU_DMA_OVSA_HPXL_VLN); } + if (dplane->ctrl0 != ctrl0) { dplane->ctrl0 = ctrl0; armada_reg_queue_mod(dplane->vbl.regs, idx, ctrl0, @@ -279,7 +304,11 @@ static int armada_plane_disable(struct drm_plane *plane) static void armada_plane_destroy(struct drm_plane *plane) { - kfree(plane); + struct armada_plane *dplane = drm_to_armada_plane(plane); + + drm_plane_cleanup(plane); + + kfree(dplane); } static int armada_plane_set_property(struct drm_plane *plane, diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index f6f2fb58eb37..acebd1617264 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1463,24 +1463,18 @@ retry: if (get_user(obj_id, objs_ptr + copied_objs)) { ret = -EFAULT; - goto fail; + goto out; } obj = drm_mode_object_find(dev, obj_id, DRM_MODE_OBJECT_ANY); if (!obj || !obj->properties) { ret = -ENOENT; - goto fail; - } - - if (obj->type == DRM_MODE_OBJECT_PLANE) { - plane = obj_to_plane(obj); - plane_mask |= (1 << drm_plane_index(plane)); - plane->old_fb = plane->fb; + goto out; } if (get_user(count_props, count_props_ptr + copied_objs)) { ret = -EFAULT; - goto fail; + goto out; } copied_objs++; @@ -1492,28 +1486,34 @@ retry: if (get_user(prop_id, props_ptr + copied_props)) { ret = -EFAULT; - goto fail; + goto out; } prop = drm_property_find(dev, prop_id); if (!prop) { ret = -ENOENT; - goto fail; + goto out; } if (copy_from_user(&prop_value, prop_values_ptr + copied_props, sizeof(prop_value))) { ret = -EFAULT; - goto fail; + goto out; } ret = atomic_set_prop(state, obj, prop, prop_value); if (ret) - goto fail; + goto out; copied_props++; } + + if (obj->type == DRM_MODE_OBJECT_PLANE && count_props) { + plane = obj_to_plane(obj); + plane_mask |= (1 << drm_plane_index(plane)); + plane->old_fb = plane->fb; + } } if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT) { @@ -1523,7 +1523,7 @@ retry: e = create_vblank_event(dev, file_priv, arg->user_data); if (!e) { ret = -ENOMEM; - goto fail; + goto out; } crtc_state->event = e; @@ -1533,13 +1533,15 @@ retry: if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) { ret = drm_atomic_check_only(state); /* _check_only() does not free state, unlike _commit() */ - drm_atomic_state_free(state); + if (!ret) + drm_atomic_state_free(state); } else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) { ret = drm_atomic_async_commit(state); } else { ret = drm_atomic_commit(state); } +out: /* if succeeded, fixup legacy plane crtc/fb ptrs before dropping * locks (ie. while it is still safe to deref plane->state). We * need to do this here because the driver entry points cannot @@ -1552,41 +1554,35 @@ retry: drm_framebuffer_reference(new_fb); plane->fb = new_fb; plane->crtc = plane->state->crtc; - } else { - plane->old_fb = NULL; - } - if (plane->old_fb) { - drm_framebuffer_unreference(plane->old_fb); - plane->old_fb = NULL; + + if (plane->old_fb) + drm_framebuffer_unreference(plane->old_fb); } + plane->old_fb = NULL; } - drm_modeset_drop_locks(&ctx); - drm_modeset_acquire_fini(&ctx); - - return ret; + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + drm_modeset_backoff(&ctx); + goto retry; + } -fail: - if (ret == -EDEADLK) - goto backoff; + if (ret) { + if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT) { + for_each_crtc_in_state(state, crtc, crtc_state, i) { + if (!crtc_state->event) + continue; - if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT) { - for_each_crtc_in_state(state, crtc, crtc_state, i) { - destroy_vblank_event(dev, file_priv, crtc_state->event); - crtc_state->event = NULL; + destroy_vblank_event(dev, file_priv, + crtc_state->event); + } } - } - drm_atomic_state_free(state); + drm_atomic_state_free(state); + } drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); return ret; - -backoff: - drm_atomic_state_clear(state); - drm_modeset_backoff(&ctx); - - goto retry; } diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 5b59d5ad7d1c..f94cc371742e 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -665,10 +665,16 @@ drm_atomic_helper_update_legacy_modeset_state(struct drm_device *dev, /* set legacy state in the crtc structure */ for_each_crtc_in_state(old_state, crtc, old_crtc_state, i) { + struct drm_plane *primary = crtc->primary; + crtc->mode = crtc->state->mode; crtc->enabled = crtc->state->enable; - crtc->x = crtc->primary->state->src_x >> 16; - crtc->y = crtc->primary->state->src_y >> 16; + + if (drm_atomic_get_existing_plane_state(old_state, primary) && + primary->state->crtc == crtc) { + crtc->x = primary->state->src_x >> 16; + crtc->y = primary->state->src_y >> 16; + } if (crtc->state->enable) drm_calc_timestamping_constants(crtc, @@ -1915,10 +1921,6 @@ retry: if (ret != 0) goto fail; - /* TODO: ->page_flip is the only driver callback where the core - * doesn't update plane->fb. For now patch it up here. */ - plane->fb = plane->state->fb; - /* Driver takes ownership of state on successful async commit. */ return 0; fail: diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c index 9b23525c0ed0..192a5f9eeb74 100644 --- a/drivers/gpu/drm/drm_context.c +++ b/drivers/gpu/drm/drm_context.c @@ -53,6 +53,10 @@ struct drm_ctx_list { */ void drm_legacy_ctxbitmap_free(struct drm_device * dev, int ctx_handle) { + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return; + mutex_lock(&dev->struct_mutex); idr_remove(&dev->ctx_idr, ctx_handle); mutex_unlock(&dev->struct_mutex); @@ -85,10 +89,13 @@ static int drm_legacy_ctxbitmap_next(struct drm_device * dev) * * Initialise the drm_device::ctx_idr */ -int drm_legacy_ctxbitmap_init(struct drm_device * dev) +void drm_legacy_ctxbitmap_init(struct drm_device * dev) { + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return; + idr_init(&dev->ctx_idr); - return 0; } /** @@ -101,6 +108,10 @@ int drm_legacy_ctxbitmap_init(struct drm_device * dev) */ void drm_legacy_ctxbitmap_cleanup(struct drm_device * dev) { + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return; + mutex_lock(&dev->struct_mutex); idr_destroy(&dev->ctx_idr); mutex_unlock(&dev->struct_mutex); @@ -119,6 +130,10 @@ void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file) { struct drm_ctx_list *pos, *tmp; + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return; + mutex_lock(&dev->ctxlist_mutex); list_for_each_entry_safe(pos, tmp, &dev->ctxlist, head) { @@ -161,6 +176,10 @@ int drm_legacy_getsareactx(struct drm_device *dev, void *data, struct drm_local_map *map; struct drm_map_list *_entry; + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->struct_mutex); map = idr_find(&dev->ctx_idr, request->ctx_id); @@ -205,6 +224,10 @@ int drm_legacy_setsareactx(struct drm_device *dev, void *data, struct drm_local_map *map = NULL; struct drm_map_list *r_list = NULL; + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + mutex_lock(&dev->struct_mutex); list_for_each_entry(r_list, &dev->maplist, head) { if (r_list->map @@ -305,6 +328,10 @@ int drm_legacy_resctx(struct drm_device *dev, void *data, struct drm_ctx ctx; int i; + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + if (res->count >= DRM_RESERVED_CONTEXTS) { memset(&ctx, 0, sizeof(ctx)); for (i = 0; i < DRM_RESERVED_CONTEXTS; i++) { @@ -335,6 +362,10 @@ int drm_legacy_addctx(struct drm_device *dev, void *data, struct drm_ctx_list *ctx_entry; struct drm_ctx *ctx = data; + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + ctx->handle = drm_legacy_ctxbitmap_next(dev); if (ctx->handle == DRM_KERNEL_CONTEXT) { /* Skip kernel's context and get a new one. */ @@ -378,6 +409,10 @@ int drm_legacy_getctx(struct drm_device *dev, void *data, { struct drm_ctx *ctx = data; + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + /* This is 0, because we don't handle any context flags */ ctx->flags = 0; @@ -400,6 +435,10 @@ int drm_legacy_switchctx(struct drm_device *dev, void *data, { struct drm_ctx *ctx = data; + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + DRM_DEBUG("%d\n", ctx->handle); return drm_context_switch(dev, dev->last_context, ctx->handle); } @@ -420,6 +459,10 @@ int drm_legacy_newctx(struct drm_device *dev, void *data, { struct drm_ctx *ctx = data; + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + DRM_DEBUG("%d\n", ctx->handle); drm_context_switch_complete(dev, file_priv, ctx->handle); @@ -442,6 +485,10 @@ int drm_legacy_rmctx(struct drm_device *dev, void *data, { struct drm_ctx *ctx = data; + if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && + drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + DRM_DEBUG("%d\n", ctx->handle); if (ctx->handle != DRM_KERNEL_CONTEXT) { if (dev->driver->context_dtor) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 9c978d9de3b8..a717d18e7a97 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -2706,8 +2706,11 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; - /* For some reason crtc x/y offsets are signed internally. */ - if (crtc_req->x > INT_MAX || crtc_req->y > INT_MAX) + /* + * Universal plane src offsets are only 16.16, prevent havoc for + * drivers using universal plane code internally. + */ + if (crtc_req->x & 0xffff0000 || crtc_req->y & 0xffff0000) return -ERANGE; drm_modeset_lock_all(dev); @@ -4298,7 +4301,6 @@ void drm_property_unreference_blob(struct drm_property_blob *blob) mutex_unlock(&dev->mode_config.blob_lock); else might_lock(&dev->mode_config.blob_lock); - } EXPORT_SYMBOL(drm_property_unreference_blob); @@ -5344,13 +5346,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, /* Keep the old fb, don't unref it. */ crtc->primary->old_fb = NULL; } else { - /* - * Warn if the driver hasn't properly updated the crtc->fb - * field to reflect that the new framebuffer is now used. - * Failing to do so will screw with the reference counting - * on framebuffers. - */ - WARN_ON(crtc->primary->fb != fb); + crtc->primary->fb = fb; /* Unref only the old framebuffer. */ fb = NULL; } diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 393114df88a3..d3dfb0ebbeb2 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -928,15 +928,15 @@ int drm_helper_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mod if (crtc->funcs->atomic_duplicate_state) crtc_state = crtc->funcs->atomic_duplicate_state(crtc); else { - crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL); - if (!crtc_state) - return -ENOMEM; - if (crtc->state) - __drm_atomic_helper_crtc_duplicate_state(crtc, crtc_state); - else - crtc_state->crtc = crtc; + if (!crtc->state) + drm_atomic_helper_crtc_reset(crtc); + + crtc_state = drm_atomic_helper_crtc_duplicate_state(crtc); } + if (!crtc_state) + return -ENOMEM; + crtc_state->planes_changed = true; crtc_state->mode_changed = true; ret = drm_atomic_set_mode_for_crtc(crtc_state, mode); @@ -957,11 +957,11 @@ int drm_helper_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mod ret = drm_helper_crtc_mode_set_base(crtc, x, y, old_fb); out: - if (crtc->funcs->atomic_destroy_state) - crtc->funcs->atomic_destroy_state(crtc, crtc_state); - else { - __drm_atomic_helper_crtc_destroy_state(crtc, crtc_state); - kfree(crtc_state); + if (crtc_state) { + if (crtc->funcs->atomic_destroy_state) + crtc->funcs->atomic_destroy_state(crtc, crtc_state); + else + drm_atomic_helper_crtc_destroy_state(crtc, crtc_state); } return ret; diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index b7bf4ce8c012..9b51fe11ff19 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -582,11 +582,7 @@ struct drm_device *drm_dev_alloc(struct drm_driver *driver, if (drm_ht_create(&dev->map_hash, 12)) goto err_minors; - ret = drm_legacy_ctxbitmap_init(dev); - if (ret) { - DRM_ERROR("Cannot allocate memory for context bitmap.\n"); - goto err_ht; - } + drm_legacy_ctxbitmap_init(dev); if (drm_core_check_feature(dev, DRIVER_GEM)) { ret = drm_gem_init(dev); @@ -600,7 +596,6 @@ struct drm_device *drm_dev_alloc(struct drm_driver *driver, err_ctxbitmap: drm_legacy_ctxbitmap_cleanup(dev); -err_ht: drm_ht_remove(&dev->map_hash); err_minors: drm_minor_free(dev, DRM_MINOR_LEGACY); diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index cac422916c7a..eaf652b389d9 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -429,24 +429,6 @@ static bool drm_fb_helper_force_kernel_mode(void) return error; } -static int drm_fb_helper_panic(struct notifier_block *n, unsigned long ununsed, - void *panic_str) -{ - /* - * It's a waste of time and effort to switch back to text console - * if the kernel should reboot before panic messages can be seen. - */ - if (panic_timeout < 0) - return 0; - - pr_err("panic occurred, switching back to text console\n"); - return drm_fb_helper_force_kernel_mode(); -} - -static struct notifier_block paniced = { - .notifier_call = drm_fb_helper_panic, -}; - static bool drm_fb_helper_is_bound(struct drm_fb_helper *fb_helper) { struct drm_device *dev = fb_helper->dev; @@ -672,9 +654,6 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper) if (!list_empty(&fb_helper->kernel_fb_list)) { list_del(&fb_helper->kernel_fb_list); if (list_empty(&kernel_fb_helper_list)) { - pr_info("drm: unregistered panic notifier\n"); - atomic_notifier_chain_unregister(&panic_notifier_list, - &paniced); unregister_sysrq_key('v', &sysrq_drm_fb_helper_restore_op); } } @@ -1109,12 +1088,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, dev_info(fb_helper->dev->dev, "fb%d: %s frame buffer device\n", info->node, info->fix.id); - /* Switch back to kernel console on panic */ - /* multi card linked list maybe */ if (list_empty(&kernel_fb_helper_list)) { - dev_info(fb_helper->dev->dev, "registered panic notifier\n"); - atomic_notifier_chain_register(&panic_notifier_list, - &paniced); register_sysrq_key('v', &sysrq_drm_fb_helper_restore_op); } diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 16a164770713..27a4228b4343 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -778,22 +778,14 @@ void drm_gem_vm_open(struct vm_area_struct *vma) struct drm_gem_object *obj = vma->vm_private_data; drm_gem_object_reference(obj); - - mutex_lock(&obj->dev->struct_mutex); - drm_vm_open_locked(obj->dev, vma); - mutex_unlock(&obj->dev->struct_mutex); } EXPORT_SYMBOL(drm_gem_vm_open); void drm_gem_vm_close(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; - struct drm_device *dev = obj->dev; - mutex_lock(&dev->struct_mutex); - drm_vm_close_locked(obj->dev, vma); - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); + drm_gem_object_unreference_unlocked(obj); } EXPORT_SYMBOL(drm_gem_vm_close); @@ -850,7 +842,6 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, */ drm_gem_object_reference(obj); - drm_vm_open_locked(dev, vma); return 0; } EXPORT_SYMBOL(drm_gem_mmap_obj); diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index aa8bbb460c57..ddfa6014c2c2 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -70,6 +70,8 @@ #define DRM_IOCTL_WAIT_VBLANK32 DRM_IOWR(0x3a, drm_wait_vblank32_t) +#define DRM_IOCTL_MODE_ADDFB232 DRM_IOWR(0xb8, drm_mode_fb_cmd232_t) + typedef struct drm_version_32 { int version_major; /**< Major version */ int version_minor; /**< Minor version */ @@ -93,7 +95,7 @@ static int compat_drm_version(struct file *file, unsigned int cmd, return -EFAULT; version = compat_alloc_user_space(sizeof(*version)); - if (!access_ok(VERIFY_WRITE, version, sizeof(*version))) + if (!version) return -EFAULT; if (__put_user(v32.name_len, &version->name_len) || __put_user((void __user *)(unsigned long)v32.name, @@ -140,7 +142,7 @@ static int compat_drm_getunique(struct file *file, unsigned int cmd, return -EFAULT; u = compat_alloc_user_space(sizeof(*u)); - if (!access_ok(VERIFY_WRITE, u, sizeof(*u))) + if (!u) return -EFAULT; if (__put_user(uq32.unique_len, &u->unique_len) || __put_user((void __user *)(unsigned long)uq32.unique, @@ -168,7 +170,7 @@ static int compat_drm_setunique(struct file *file, unsigned int cmd, return -EFAULT; u = compat_alloc_user_space(sizeof(*u)); - if (!access_ok(VERIFY_WRITE, u, sizeof(*u))) + if (!u) return -EFAULT; if (__put_user(uq32.unique_len, &u->unique_len) || __put_user((void __user *)(unsigned long)uq32.unique, @@ -200,7 +202,7 @@ static int compat_drm_getmap(struct file *file, unsigned int cmd, return -EFAULT; map = compat_alloc_user_space(sizeof(*map)); - if (!access_ok(VERIFY_WRITE, map, sizeof(*map))) + if (!map) return -EFAULT; if (__put_user(idx, &map->offset)) return -EFAULT; @@ -237,7 +239,7 @@ static int compat_drm_addmap(struct file *file, unsigned int cmd, return -EFAULT; map = compat_alloc_user_space(sizeof(*map)); - if (!access_ok(VERIFY_WRITE, map, sizeof(*map))) + if (!map) return -EFAULT; if (__put_user(m32.offset, &map->offset) || __put_user(m32.size, &map->size) @@ -277,7 +279,7 @@ static int compat_drm_rmmap(struct file *file, unsigned int cmd, return -EFAULT; map = compat_alloc_user_space(sizeof(*map)); - if (!access_ok(VERIFY_WRITE, map, sizeof(*map))) + if (!map) return -EFAULT; if (__put_user((void *)(unsigned long)handle, &map->handle)) return -EFAULT; @@ -306,7 +308,7 @@ static int compat_drm_getclient(struct file *file, unsigned int cmd, return -EFAULT; client = compat_alloc_user_space(sizeof(*client)); - if (!access_ok(VERIFY_WRITE, client, sizeof(*client))) + if (!client) return -EFAULT; if (__put_user(idx, &client->idx)) return -EFAULT; @@ -345,7 +347,7 @@ static int compat_drm_getstats(struct file *file, unsigned int cmd, int i, err; stats = compat_alloc_user_space(sizeof(*stats)); - if (!access_ok(VERIFY_WRITE, stats, sizeof(*stats))) + if (!stats) return -EFAULT; err = drm_ioctl(file, DRM_IOCTL_GET_STATS, (unsigned long)stats); @@ -382,8 +384,7 @@ static int compat_drm_addbufs(struct file *file, unsigned int cmd, unsigned long agp_start; buf = compat_alloc_user_space(sizeof(*buf)); - if (!access_ok(VERIFY_WRITE, buf, sizeof(*buf)) - || !access_ok(VERIFY_WRITE, argp, sizeof(*argp))) + if (!buf || !access_ok(VERIFY_WRITE, argp, sizeof(*argp))) return -EFAULT; if (__copy_in_user(buf, argp, offsetof(drm_buf_desc32_t, agp_start)) @@ -414,7 +415,7 @@ static int compat_drm_markbufs(struct file *file, unsigned int cmd, return -EFAULT; buf = compat_alloc_user_space(sizeof(*buf)); - if (!access_ok(VERIFY_WRITE, buf, sizeof(*buf))) + if (!buf) return -EFAULT; if (__put_user(b32.size, &buf->size) @@ -455,7 +456,7 @@ static int compat_drm_infobufs(struct file *file, unsigned int cmd, nbytes = sizeof(*request) + count * sizeof(struct drm_buf_desc); request = compat_alloc_user_space(nbytes); - if (!access_ok(VERIFY_WRITE, request, nbytes)) + if (!request) return -EFAULT; list = (struct drm_buf_desc *) (request + 1); @@ -516,7 +517,7 @@ static int compat_drm_mapbufs(struct file *file, unsigned int cmd, return -EINVAL; nbytes = sizeof(*request) + count * sizeof(struct drm_buf_pub); request = compat_alloc_user_space(nbytes); - if (!access_ok(VERIFY_WRITE, request, nbytes)) + if (!request) return -EFAULT; list = (struct drm_buf_pub *) (request + 1); @@ -563,7 +564,7 @@ static int compat_drm_freebufs(struct file *file, unsigned int cmd, return -EFAULT; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request))) + if (!request) return -EFAULT; if (__put_user(req32.count, &request->count) || __put_user((int __user *)(unsigned long)req32.list, @@ -589,7 +590,7 @@ static int compat_drm_setsareactx(struct file *file, unsigned int cmd, return -EFAULT; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request))) + if (!request) return -EFAULT; if (__put_user(req32.ctx_id, &request->ctx_id) || __put_user((void *)(unsigned long)req32.handle, @@ -613,7 +614,7 @@ static int compat_drm_getsareactx(struct file *file, unsigned int cmd, return -EFAULT; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request))) + if (!request) return -EFAULT; if (__put_user(ctx_id, &request->ctx_id)) return -EFAULT; @@ -646,7 +647,7 @@ static int compat_drm_resctx(struct file *file, unsigned int cmd, return -EFAULT; res = compat_alloc_user_space(sizeof(*res)); - if (!access_ok(VERIFY_WRITE, res, sizeof(*res))) + if (!res) return -EFAULT; if (__put_user(res32.count, &res->count) || __put_user((struct drm_ctx __user *) (unsigned long)res32.contexts, @@ -689,7 +690,7 @@ static int compat_drm_dma(struct file *file, unsigned int cmd, return -EFAULT; d = compat_alloc_user_space(sizeof(*d)); - if (!access_ok(VERIFY_WRITE, d, sizeof(*d))) + if (!d) return -EFAULT; if (__put_user(d32.context, &d->context) @@ -764,7 +765,7 @@ static int compat_drm_agp_info(struct file *file, unsigned int cmd, int err; info = compat_alloc_user_space(sizeof(*info)); - if (!access_ok(VERIFY_WRITE, info, sizeof(*info))) + if (!info) return -EFAULT; err = drm_ioctl(file, DRM_IOCTL_AGP_INFO, (unsigned long)info); @@ -807,7 +808,7 @@ static int compat_drm_agp_alloc(struct file *file, unsigned int cmd, return -EFAULT; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + if (!request || __put_user(req32.size, &request->size) || __put_user(req32.type, &request->type)) return -EFAULT; @@ -834,7 +835,7 @@ static int compat_drm_agp_free(struct file *file, unsigned int cmd, u32 handle; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + if (!request || get_user(handle, &argp->handle) || __put_user(handle, &request->handle)) return -EFAULT; @@ -858,7 +859,7 @@ static int compat_drm_agp_bind(struct file *file, unsigned int cmd, return -EFAULT; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + if (!request || __put_user(req32.handle, &request->handle) || __put_user(req32.offset, &request->offset)) return -EFAULT; @@ -874,7 +875,7 @@ static int compat_drm_agp_unbind(struct file *file, unsigned int cmd, u32 handle; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + if (!request || get_user(handle, &argp->handle) || __put_user(handle, &request->handle)) return -EFAULT; @@ -897,8 +898,7 @@ static int compat_drm_sg_alloc(struct file *file, unsigned int cmd, unsigned long x; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) - || !access_ok(VERIFY_WRITE, argp, sizeof(*argp)) + if (!request || !access_ok(VERIFY_WRITE, argp, sizeof(*argp)) || __get_user(x, &argp->size) || __put_user(x, &request->size)) return -EFAULT; @@ -923,8 +923,7 @@ static int compat_drm_sg_free(struct file *file, unsigned int cmd, unsigned long x; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) - || !access_ok(VERIFY_WRITE, argp, sizeof(*argp)) + if (!request || !access_ok(VERIFY_WRITE, argp, sizeof(*argp)) || __get_user(x, &argp->handle) || __put_user(x << PAGE_SHIFT, &request->handle)) return -EFAULT; @@ -952,7 +951,7 @@ static int compat_drm_update_draw(struct file *file, unsigned int cmd, return -EFAULT; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) || + if (!request || __put_user(update32.handle, &request->handle) || __put_user(update32.type, &request->type) || __put_user(update32.num, &request->num) || @@ -994,7 +993,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, return -EFAULT; request = compat_alloc_user_space(sizeof(*request)); - if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + if (!request || __put_user(req32.request.type, &request->request.type) || __put_user(req32.request.sequence, &request->request.sequence) || __put_user(req32.request.signal, &request->request.signal)) @@ -1016,6 +1015,63 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, return 0; } +typedef struct drm_mode_fb_cmd232 { + u32 fb_id; + u32 width; + u32 height; + u32 pixel_format; + u32 flags; + u32 handles[4]; + u32 pitches[4]; + u32 offsets[4]; + u64 modifier[4]; +} __attribute__((packed)) drm_mode_fb_cmd232_t; + +static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct drm_mode_fb_cmd232 __user *argp = (void __user *)arg; + struct drm_mode_fb_cmd232 req32; + struct drm_mode_fb_cmd2 __user *req64; + int i; + int err; + + if (copy_from_user(&req32, argp, sizeof(req32))) + return -EFAULT; + + req64 = compat_alloc_user_space(sizeof(*req64)); + + if (!access_ok(VERIFY_WRITE, req64, sizeof(*req64)) + || __put_user(req32.width, &req64->width) + || __put_user(req32.height, &req64->height) + || __put_user(req32.pixel_format, &req64->pixel_format) + || __put_user(req32.flags, &req64->flags)) + return -EFAULT; + + for (i = 0; i < 4; i++) { + if (__put_user(req32.handles[i], &req64->handles[i])) + return -EFAULT; + if (__put_user(req32.pitches[i], &req64->pitches[i])) + return -EFAULT; + if (__put_user(req32.offsets[i], &req64->offsets[i])) + return -EFAULT; + if (__put_user(req32.modifier[i], &req64->modifier[i])) + return -EFAULT; + } + + err = drm_ioctl(file, DRM_IOCTL_MODE_ADDFB2, (unsigned long)req64); + if (err) + return err; + + if (__get_user(req32.fb_id, &req64->fb_id)) + return -EFAULT; + + if (copy_to_user(argp, &req32, sizeof(req32))) + return -EFAULT; + + return 0; +} + static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_VERSION32)] = compat_drm_version, [DRM_IOCTL_NR(DRM_IOCTL_GET_UNIQUE32)] = compat_drm_getunique, @@ -1048,6 +1104,7 @@ static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_UPDATE_DRAW32)] = compat_drm_update_draw, #endif [DRM_IOCTL_NR(DRM_IOCTL_WAIT_VBLANK32)] = compat_drm_wait_vblank, + [DRM_IOCTL_NR(DRM_IOCTL_MODE_ADDFB232)] = compat_drm_mode_addfb2, }; /** diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index f9cc68fbd2a3..9fd784b8966b 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1267,7 +1267,7 @@ EXPORT_SYMBOL(drm_crtc_vblank_off); /** * drm_crtc_vblank_reset - reset vblank state to off on a CRTC - * @crtc: CRTC in question + * @drm_crtc: CRTC in question * * Drivers can use this function to reset the vblank state to off at load time. * Drivers should use this together with the drm_crtc_vblank_off() and diff --git a/drivers/gpu/drm/drm_legacy.h b/drivers/gpu/drm/drm_legacy.h index c1dc61473db5..9b731786e4db 100644 --- a/drivers/gpu/drm/drm_legacy.h +++ b/drivers/gpu/drm/drm_legacy.h @@ -42,7 +42,7 @@ struct drm_file; #define DRM_KERNEL_CONTEXT 0 #define DRM_RESERVED_CONTEXTS 1 -int drm_legacy_ctxbitmap_init(struct drm_device *dev); +void drm_legacy_ctxbitmap_init(struct drm_device *dev); void drm_legacy_ctxbitmap_cleanup(struct drm_device *dev); void drm_legacy_ctxbitmap_free(struct drm_device *dev, int ctx_handle); void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file); diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index f861361a635e..4924d381b664 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -61,6 +61,9 @@ int drm_legacy_lock(struct drm_device *dev, void *data, struct drm_master *master = file_priv->master; int ret = 0; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + ++file_priv->lock_count; if (lock->context == DRM_KERNEL_CONTEXT) { @@ -153,6 +156,9 @@ int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_ struct drm_lock *lock = data; struct drm_master *master = file_priv->master; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + if (lock->context == DRM_KERNEL_CONTEXT) { DRM_ERROR("Process %d using kernel context %d\n", task_pid_nr(current), lock->context); diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index 2f0ed11024eb..b07a213f5655 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -525,10 +525,12 @@ int drm_plane_helper_update(struct drm_plane *plane, struct drm_crtc *crtc, if (plane->funcs->atomic_duplicate_state) plane_state = plane->funcs->atomic_duplicate_state(plane); - else if (plane->state) + else { + if (!plane->state) + drm_atomic_helper_plane_reset(plane); + plane_state = drm_atomic_helper_plane_duplicate_state(plane); - else - plane_state = kzalloc(sizeof(*plane_state), GFP_KERNEL); + } if (!plane_state) return -ENOMEM; plane_state->plane = plane; @@ -572,10 +574,12 @@ int drm_plane_helper_disable(struct drm_plane *plane) if (plane->funcs->atomic_duplicate_state) plane_state = plane->funcs->atomic_duplicate_state(plane); - else if (plane->state) + else { + if (!plane->state) + drm_atomic_helper_plane_reset(plane); + plane_state = drm_atomic_helper_plane_duplicate_state(plane); - else - plane_state = kzalloc(sizeof(*plane_state), GFP_KERNEL); + } if (!plane_state) return -ENOMEM; plane_state->plane = plane; diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c index 214eceefc981..e671ad369416 100644 --- a/drivers/gpu/drm/imx/imx-tve.c +++ b/drivers/gpu/drm/imx/imx-tve.c @@ -301,7 +301,7 @@ static void imx_tve_encoder_prepare(struct drm_encoder *encoder) switch (tve->mode) { case TVE_MODE_VGA: - imx_drm_set_bus_format_pins(encoder, MEDIA_BUS_FMT_YUV8_1X24, + imx_drm_set_bus_format_pins(encoder, MEDIA_BUS_FMT_GBR888_1X24, tve->hsync_pin, tve->vsync_pin); break; case TVE_MODE_TVOUT: diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index 74a9ce40ddc4..b4deb9cf9d71 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -21,6 +21,7 @@ #include <drm/drm_panel.h> #include <linux/videodev2.h> #include <video/of_display_timing.h> +#include <linux/of_graph.h> #include "imx-drm.h" @@ -208,7 +209,7 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data) { struct drm_device *drm = data; struct device_node *np = dev->of_node; - struct device_node *panel_node; + struct device_node *port; const u8 *edidp; struct imx_parallel_display *imxpd; int ret; @@ -234,11 +235,19 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data) imxpd->bus_format = MEDIA_BUS_FMT_RGB666_1X24_CPADHI; } - panel_node = of_parse_phandle(np, "fsl,panel", 0); - if (panel_node) { - imxpd->panel = of_drm_find_panel(panel_node); - if (!imxpd->panel) - return -EPROBE_DEFER; + /* port@1 is the output port */ + port = of_graph_get_port_by_id(np, 1); + if (port) { + struct device_node *endpoint, *remote; + + endpoint = of_get_child_by_name(port, "endpoint"); + if (endpoint) { + remote = of_graph_get_remote_port_parent(endpoint); + if (remote) + imxpd->panel = of_drm_find_panel(remote); + if (!imxpd->panel) + return -EPROBE_DEFER; + } } imxpd->dev = dev; diff --git a/drivers/gpu/drm/mgag200/mgag200_fb.c b/drivers/gpu/drm/mgag200/mgag200_fb.c index c36b8304042b..958cf3cf082d 100644 --- a/drivers/gpu/drm/mgag200/mgag200_fb.c +++ b/drivers/gpu/drm/mgag200/mgag200_fb.c @@ -167,7 +167,6 @@ static int mgag200fb_create(struct drm_fb_helper *helper, struct drm_framebuffer *fb; struct drm_gem_object *gobj = NULL; struct device *device = &dev->pdev->dev; - struct mgag200_bo *bo; int ret; void *sysram; int size; @@ -185,7 +184,6 @@ static int mgag200fb_create(struct drm_fb_helper *helper, DRM_ERROR("failed to create fbcon backing object %d\n", ret); return ret; } - bo = gem_to_mga_bo(gobj); sysram = vmalloc(size); if (!sysram) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index ad4b9010dfb0..cd75cff096e1 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -158,7 +158,7 @@ static int mga_g200se_set_plls(struct mga_device *mdev, long clock) static int mga_g200wb_set_plls(struct mga_device *mdev, long clock) { unsigned int vcomax, vcomin, pllreffreq; - unsigned int delta, tmpdelta, permitteddelta; + unsigned int delta, tmpdelta; unsigned int testp, testm, testn; unsigned int p, m, n; unsigned int computed; @@ -172,7 +172,6 @@ static int mga_g200wb_set_plls(struct mga_device *mdev, long clock) pllreffreq = 48000; delta = 0xffffffff; - permitteddelta = clock * 5 / 1000; for (testp = 1; testp < 9; testp++) { if (clock * testp > vcomax) @@ -298,7 +297,7 @@ static int mga_g200wb_set_plls(struct mga_device *mdev, long clock) static int mga_g200ev_set_plls(struct mga_device *mdev, long clock) { unsigned int vcomax, vcomin, pllreffreq; - unsigned int delta, tmpdelta, permitteddelta; + unsigned int delta, tmpdelta; unsigned int testp, testm, testn; unsigned int p, m, n; unsigned int computed; @@ -310,7 +309,6 @@ static int mga_g200ev_set_plls(struct mga_device *mdev, long clock) pllreffreq = 50000; delta = 0xffffffff; - permitteddelta = clock * 5 / 1000; for (testp = 16; testp > 0; testp--) { if (clock * testp > vcomax) @@ -392,7 +390,7 @@ static int mga_g200ev_set_plls(struct mga_device *mdev, long clock) static int mga_g200eh_set_plls(struct mga_device *mdev, long clock) { unsigned int vcomax, vcomin, pllreffreq; - unsigned int delta, tmpdelta, permitteddelta; + unsigned int delta, tmpdelta; unsigned int testp, testm, testn; unsigned int p, m, n; unsigned int computed; @@ -406,7 +404,6 @@ static int mga_g200eh_set_plls(struct mga_device *mdev, long clock) pllreffreq = 33333; delta = 0xffffffff; - permitteddelta = clock * 5 / 1000; for (testp = 16; testp > 0; testp >>= 1) { if (clock * testp > vcomax) diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c index d16964ea0ed4..05108b505fbf 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ttm.c +++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c @@ -378,7 +378,7 @@ int mgag200_bo_pin(struct mgag200_bo *bo, u32 pl_flag, u64 *gpu_addr) int mgag200_bo_unpin(struct mgag200_bo *bo) { - int i, ret; + int i; if (!bo->pin_count) { DRM_ERROR("unpin bad %p\n", bo); return 0; @@ -389,11 +389,7 @@ int mgag200_bo_unpin(struct mgag200_bo *bo) for (i = 0; i < bo->placement.num_placement ; i++) bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; - ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); - if (ret) - return ret; - - return 0; + return ttm_bo_validate(&bo->bo, &bo->placement, false, false); } int mgag200_bo_push_sysram(struct mgag200_bo *bo) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 649024d4daf1..36b40c9252b5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -943,7 +943,8 @@ static struct drm_driver driver_stub = { .driver_features = DRIVER_USE_AGP | - DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_RENDER, + DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_RENDER | + DRIVER_KMS_LEGACY_CONTEXT, .load = nouveau_drm_load, .unload = nouveau_drm_unload, diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 8730562323a8..4a09947be244 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5818,7 +5818,7 @@ int ci_dpm_init(struct radeon_device *rdev) tmp |= DPM_ENABLED; break; default: - DRM_ERROR("Invalid PCC GPIO: %u!\n", gpio.shift); + DRM_DEBUG("Invalid PCC GPIO: %u!\n", gpio.shift); break; } WREG32_SMC(CNB_PWRMGT_CNTL, tmp); diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 5450fa95a47e..c4777c8d0312 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -260,8 +260,10 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, } } } - mb(); - radeon_gart_tlb_flush(rdev); + if (rdev->gart.ptr) { + mb(); + radeon_gart_tlb_flush(rdev); + } } /** @@ -306,8 +308,10 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, page_base += RADEON_GPU_PAGE_SIZE; } } - mb(); - radeon_gart_tlb_flush(rdev); + if (rdev->gart.ptr) { + mb(); + radeon_gart_tlb_flush(rdev); + } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 013ec7106e55..3dcc5733ff69 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -36,6 +36,7 @@ void radeon_gem_object_free(struct drm_gem_object *gobj) if (robj) { if (robj->gem_base.import_attach) drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg); + radeon_mn_unregister(robj); radeon_bo_unref(&robj); } } diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index e476c331f3fa..9a4d69e59401 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -845,7 +845,8 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data; break; - case KGD_ENGINE_SDMA: + case KGD_ENGINE_SDMA1: + case KGD_ENGINE_SDMA2: hdr = (const union radeon_firmware_header *) rdev->sdma_fw->data; break; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 318165d4855c..676362769b8d 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -75,7 +75,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct radeon_bo, tbo); radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); - radeon_mn_unregister(bo); mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 1dbdf3230dae..787cd8fd897f 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2926,6 +2926,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { /* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */ { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 }, + { PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 }, { 0, 0, 0, 0 }, }; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 01b558fe3695..9a0c2911272a 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -555,7 +555,6 @@ static struct platform_driver rockchip_drm_platform_driver = { .probe = rockchip_drm_platform_probe, .remove = rockchip_drm_platform_remove, .driver = { - .owner = THIS_MODULE, .name = "rockchip-drm", .of_match_table = rockchip_drm_dt_ids, .pm = &rockchip_drm_pm_ops, diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index 77d52893d40f..002645bb5bbf 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -162,7 +162,8 @@ static void rockchip_drm_output_poll_changed(struct drm_device *dev) struct rockchip_drm_private *private = dev->dev_private; struct drm_fb_helper *fb_helper = &private->fbdev_helper; - drm_fb_helper_hotplug_event(fb_helper); + if (fb_helper) + drm_fb_helper_hotplug_event(fb_helper); } static const struct drm_mode_config_funcs rockchip_drm_mode_config_funcs = { diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index eb2282cc4a56..eba5f8a52fbd 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -54,55 +54,56 @@ static void rockchip_gem_free_buf(struct rockchip_gem_object *rk_obj) &rk_obj->dma_attrs); } -int rockchip_gem_mmap_buf(struct drm_gem_object *obj, - struct vm_area_struct *vma) +static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma) + { + int ret; struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj); struct drm_device *drm = obj->dev; - unsigned long vm_size; - vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; - vm_size = vma->vm_end - vma->vm_start; - - if (vm_size > obj->size) - return -EINVAL; + /* + * dma_alloc_attrs() allocated a struct page table for rk_obj, so clear + * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap(). + */ + vma->vm_flags &= ~VM_PFNMAP; - return dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr, + ret = dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr, obj->size, &rk_obj->dma_attrs); + if (ret) + drm_gem_vm_close(vma); + + return ret; } -/* drm driver mmap file operations */ -int rockchip_gem_mmap(struct file *filp, struct vm_area_struct *vma) +int rockchip_gem_mmap_buf(struct drm_gem_object *obj, + struct vm_area_struct *vma) { - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_gem_object *obj; - struct drm_vma_offset_node *node; + struct drm_device *drm = obj->dev; int ret; - if (drm_device_is_unplugged(dev)) - return -ENODEV; + mutex_lock(&drm->struct_mutex); + ret = drm_gem_mmap_obj(obj, obj->size, vma); + mutex_unlock(&drm->struct_mutex); + if (ret) + return ret; - mutex_lock(&dev->struct_mutex); + return rockchip_drm_gem_object_mmap(obj, vma); +} - node = drm_vma_offset_exact_lookup(dev->vma_offset_manager, - vma->vm_pgoff, - vma_pages(vma)); - if (!node) { - mutex_unlock(&dev->struct_mutex); - DRM_ERROR("failed to find vma node.\n"); - return -EINVAL; - } else if (!drm_vma_node_is_allowed(node, filp)) { - mutex_unlock(&dev->struct_mutex); - return -EACCES; - } +/* drm driver mmap file operations */ +int rockchip_gem_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_gem_object *obj; + int ret; - obj = container_of(node, struct drm_gem_object, vma_node); - ret = rockchip_gem_mmap_buf(obj, vma); + ret = drm_gem_mmap(filp, vma); + if (ret) + return ret; - mutex_unlock(&dev->struct_mutex); + obj = vma->vm_private_data; - return ret; + return rockchip_drm_gem_object_mmap(obj, vma); } struct rockchip_gem_object * diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index dc65161d7cad..34b78e736532 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -170,6 +170,7 @@ struct vop_win_phy { struct vop_reg enable; struct vop_reg format; + struct vop_reg rb_swap; struct vop_reg act_info; struct vop_reg dsp_info; struct vop_reg dsp_st; @@ -199,8 +200,12 @@ struct vop_data { static const uint32_t formats_01[] = { DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, DRM_FORMAT_RGB888, + DRM_FORMAT_BGR888, DRM_FORMAT_RGB565, + DRM_FORMAT_BGR565, DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV24, @@ -209,8 +214,12 @@ static const uint32_t formats_01[] = { static const uint32_t formats_234[] = { DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, DRM_FORMAT_RGB888, + DRM_FORMAT_BGR888, DRM_FORMAT_RGB565, + DRM_FORMAT_BGR565, }; static const struct vop_win_phy win01_data = { @@ -218,6 +227,7 @@ static const struct vop_win_phy win01_data = { .nformats = ARRAY_SIZE(formats_01), .enable = VOP_REG(WIN0_CTRL0, 0x1, 0), .format = VOP_REG(WIN0_CTRL0, 0x7, 1), + .rb_swap = VOP_REG(WIN0_CTRL0, 0x1, 12), .act_info = VOP_REG(WIN0_ACT_INFO, 0x1fff1fff, 0), .dsp_info = VOP_REG(WIN0_DSP_INFO, 0x0fff0fff, 0), .dsp_st = VOP_REG(WIN0_DSP_ST, 0x1fff1fff, 0), @@ -234,6 +244,7 @@ static const struct vop_win_phy win23_data = { .nformats = ARRAY_SIZE(formats_234), .enable = VOP_REG(WIN2_CTRL0, 0x1, 0), .format = VOP_REG(WIN2_CTRL0, 0x7, 1), + .rb_swap = VOP_REG(WIN2_CTRL0, 0x1, 12), .dsp_info = VOP_REG(WIN2_DSP_INFO0, 0x0fff0fff, 0), .dsp_st = VOP_REG(WIN2_DSP_ST0, 0x1fff1fff, 0), .yrgb_mst = VOP_REG(WIN2_MST0, 0xffffffff, 0), @@ -242,15 +253,6 @@ static const struct vop_win_phy win23_data = { .dst_alpha_ctl = VOP_REG(WIN2_DST_ALPHA_CTRL, 0xff, 0), }; -static const struct vop_win_phy cursor_data = { - .data_formats = formats_234, - .nformats = ARRAY_SIZE(formats_234), - .enable = VOP_REG(HWC_CTRL0, 0x1, 0), - .format = VOP_REG(HWC_CTRL0, 0x7, 1), - .dsp_st = VOP_REG(HWC_DSP_ST, 0x1fff1fff, 0), - .yrgb_mst = VOP_REG(HWC_MST, 0xffffffff, 0), -}; - static const struct vop_ctrl ctrl_data = { .standby = VOP_REG(SYS_CTRL, 0x1, 22), .gate_en = VOP_REG(SYS_CTRL, 0x1, 23), @@ -282,14 +284,14 @@ static const struct vop_reg_data vop_init_reg_table[] = { /* * Note: rk3288 has a dedicated 'cursor' window, however, that window requires * special support to get alpha blending working. For now, just use overlay - * window 1 for the drm cursor. + * window 3 for the drm cursor. + * */ static const struct vop_win_data rk3288_vop_win_data[] = { { .base = 0x00, .phy = &win01_data, .type = DRM_PLANE_TYPE_PRIMARY }, - { .base = 0x40, .phy = &win01_data, .type = DRM_PLANE_TYPE_CURSOR }, + { .base = 0x40, .phy = &win01_data, .type = DRM_PLANE_TYPE_OVERLAY }, { .base = 0x00, .phy = &win23_data, .type = DRM_PLANE_TYPE_OVERLAY }, - { .base = 0x50, .phy = &win23_data, .type = DRM_PLANE_TYPE_OVERLAY }, - { .base = 0x00, .phy = &cursor_data, .type = DRM_PLANE_TYPE_OVERLAY }, + { .base = 0x50, .phy = &win23_data, .type = DRM_PLANE_TYPE_CURSOR }, }; static const struct vop_data rk3288_vop = { @@ -352,15 +354,32 @@ static inline void vop_mask_write_relaxed(struct vop *vop, uint32_t offset, } } +static bool has_rb_swapped(uint32_t format) +{ + switch (format) { + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_BGR888: + case DRM_FORMAT_BGR565: + return true; + default: + return false; + } +} + static enum vop_data_format vop_convert_format(uint32_t format) { switch (format) { case DRM_FORMAT_XRGB8888: case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: return VOP_FMT_ARGB8888; case DRM_FORMAT_RGB888: + case DRM_FORMAT_BGR888: return VOP_FMT_RGB888; case DRM_FORMAT_RGB565: + case DRM_FORMAT_BGR565: return VOP_FMT_RGB565; case DRM_FORMAT_NV12: return VOP_FMT_YUV420SP; @@ -378,6 +397,7 @@ static bool is_alpha_support(uint32_t format) { switch (format) { case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_ABGR8888: return true; default: return false; @@ -588,6 +608,7 @@ static int vop_update_plane_event(struct drm_plane *plane, enum vop_data_format format; uint32_t val; bool is_alpha; + bool rb_swap; bool visible; int ret; struct drm_rect dest = { @@ -621,6 +642,7 @@ static int vop_update_plane_event(struct drm_plane *plane, return 0; is_alpha = is_alpha_support(fb->pixel_format); + rb_swap = has_rb_swapped(fb->pixel_format); format = vop_convert_format(fb->pixel_format); if (format < 0) return format; @@ -689,6 +711,7 @@ static int vop_update_plane_event(struct drm_plane *plane, val = (dsp_sty - 1) << 16; val |= (dsp_stx - 1) & 0xffff; VOP_WIN_SET(vop, win, dsp_st, val); + VOP_WIN_SET(vop, win, rb_swap, rb_swap); if (is_alpha) { VOP_WIN_SET(vop, win, dst_alpha_ctl, diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index 3077f1554099..624d941aaad1 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -963,14 +963,13 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) } else { pool->npages_free += count; list_splice(&ttm_dma->pages_list, &pool->free_list); - npages = count; - if (pool->npages_free > _manager->options.max_size) { + /* + * Wait to have at at least NUM_PAGES_TO_ALLOC number of pages + * to free in order to minimize calls to set_memory_wb(). + */ + if (pool->npages_free >= (_manager->options.max_size + + NUM_PAGES_TO_ALLOC)) npages = pool->npages_free - _manager->options.max_size; - /* free at least NUM_PAGES_TO_ALLOC number of pages - * to reduce calls to set_memory_wb */ - if (npages < NUM_PAGES_TO_ALLOC) - npages = NUM_PAGES_TO_ALLOC; - } } spin_unlock_irqrestore(&pool->lock, irq_flags); diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 6d2f39d36e44..00f2058944e5 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -1107,6 +1107,9 @@ static int ipu_irq_init(struct ipu_soc *ipu) return ret; } + for (i = 0; i < IPU_NUM_IRQS; i += 32) + ipu_cm_write(ipu, 0, IPU_INT_CTRL(i / 32)); + for (i = 0; i < IPU_NUM_IRQS; i += 32) { gc = irq_get_domain_generic_chip(ipu->domain, i); gc->reg_base = ipu->cm_reg; |