From 284710fa6c3a5fddbc0f8c6b3a07861a312c18d2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 30 Jan 2017 11:09:31 +0100 Subject: drm/amdgpu: add basic PRT support (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Future hardware generations can handle PRT flags on a per page basis, but current hardware can only turn it on globally. Add the basic handling for both, a global callback to enable/disable triggered by setting a per mapping flag. v2: agd: rebase fixes Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c1b913541739..618f12884eed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -294,6 +294,8 @@ struct amdgpu_gart_funcs { uint32_t gpu_page_idx, /* pte/pde to update */ uint64_t addr, /* addr to write into pte/pde */ uint32_t flags); /* access flags */ + /* enable/disable PRT support */ + void (*set_prt)(struct amdgpu_device *adev, bool enable); }; /* provided by the ih block */ -- cgit v1.2.3 From b85891bd6d1bf887b3398f4c44b7a30b37f4485e Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Mon, 16 Jan 2017 13:59:01 +0800 Subject: drm/amdgpu: IOCTL interface for PRT support v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Till GFX8 we can only enable PRT support globally, but with the next hardware generation we can do this on a per page basis. Keep the interface consistent by adding PRT mappings and enable support globally on current hardware when the first mapping is made. v2: disable PRT support delayed and on all error paths v3: PRT and other permissions are mutal exclusive, PRT mappings don't need a BO. v4: update PRT mappings durign CS as well, make va_flags 64bit Signed-off-by: Junwei Zhang Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 62 ++++++++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 ++++++ include/uapi/drm/amdgpu_drm.h | 2 ++ 5 files changed, 64 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 618f12884eed..b9212537b17d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -701,6 +701,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); struct amdgpu_fpriv { struct amdgpu_vm vm; + struct amdgpu_bo_va *prt_va; struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 99424cb8020b..89dcb07ab213 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -759,10 +759,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo amdgpu_bo_unref(&parser->uf_entry.robj); } -static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, - struct amdgpu_vm *vm) +static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) { struct amdgpu_device *adev = p->adev; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va *bo_va; struct amdgpu_bo *bo; int i, r; @@ -779,6 +780,15 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, if (r) return r; + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); + if (r) + return r; + + r = amdgpu_sync_fence(adev, &p->job->sync, + fpriv->prt_va->last_pt_update); + if (r) + return r; + if (amdgpu_sriov_vf(adev)) { struct dma_fence *f; bo_va = vm->csa_bo_va; @@ -855,7 +865,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, if (p->job->vm) { p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); - r = amdgpu_bo_vm_update_pte(p, vm); + r = amdgpu_bo_vm_update_pte(p); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 106cf83c2e6b..3c22656aa1bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -553,6 +553,12 @@ error: int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { + const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE | + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE; + const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE | + AMDGPU_VM_PAGE_PRT; + struct drm_amdgpu_gem_va *args = data; struct drm_gem_object *gobj; struct amdgpu_device *adev = dev->dev_private; @@ -563,7 +569,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; struct list_head list; - uint32_t invalid_flags, va_flags = 0; + uint64_t va_flags = 0; int r = 0; if (!adev->vm_manager.enabled) @@ -577,11 +583,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - invalid_flags = ~(AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE | - AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE); - if ((args->flags & invalid_flags)) { - dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n", - args->flags, invalid_flags); + if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) { + dev_err(&dev->pdev->dev, "invalid flags combination 0x%08X\n", + args->flags); return -EINVAL; } @@ -595,28 +599,34 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - gobj = drm_gem_object_lookup(filp, args->handle); - if (gobj == NULL) - return -ENOENT; - abo = gem_to_amdgpu_bo(gobj); INIT_LIST_HEAD(&list); - tv.bo = &abo->tbo; - tv.shared = false; - list_add(&tv.head, &list); + if (!(args->flags & AMDGPU_VM_PAGE_PRT)) { + gobj = drm_gem_object_lookup(filp, args->handle); + if (gobj == NULL) + return -ENOENT; + abo = gem_to_amdgpu_bo(gobj); + tv.bo = &abo->tbo; + tv.shared = false; + list_add(&tv.head, &list); + } else { + gobj = NULL; + abo = NULL; + } amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); - if (r) { - drm_gem_object_unreference_unlocked(gobj); - return r; - } + if (r) + goto error_unref; - bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo); - if (!bo_va) { - ttm_eu_backoff_reservation(&ticket, &list); - drm_gem_object_unreference_unlocked(gobj); - return -ENOENT; + if (abo) { + bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo); + if (!bo_va) { + r = -ENOENT; + goto error_backoff; + } + } else { + bo_va = fpriv->prt_va; } switch (args->operation) { @@ -627,6 +637,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, va_flags |= AMDGPU_PTE_WRITEABLE; if (args->flags & AMDGPU_VM_PAGE_EXECUTABLE) va_flags |= AMDGPU_PTE_EXECUTABLE; + if (args->flags & AMDGPU_VM_PAGE_PRT) + va_flags |= AMDGPU_PTE_PRT; r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); @@ -637,11 +649,13 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } - if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && - !amdgpu_vm_debug) + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug) amdgpu_gem_va_update_vm(adev, bo_va, &list, args->operation); + +error_backoff: ttm_eu_backoff_reservation(&ticket, &list); +error_unref: drm_gem_object_unreference_unlocked(gobj); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 61d94c745672..49f93ee019e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -655,6 +655,14 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto out_suspend; } + fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); + if (!fpriv->prt_va) { + r = -ENOMEM; + amdgpu_vm_fini(adev, &fpriv->vm); + kfree(fpriv); + goto out_suspend; + } + if (amdgpu_sriov_vf(adev)) { r = amdgpu_map_static_csa(adev, &fpriv->vm); if (r) @@ -699,6 +707,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_uvd_free_handles(adev, file_priv); amdgpu_vce_free_handles(adev, file_priv); + amdgpu_vm_bo_rmv(adev, fpriv->prt_va); + if (amdgpu_sriov_vf(adev)) { /* TODO: how to handle reserve failure */ BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false)); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 5797283c2d79..1c0ddf71193e 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -361,6 +361,8 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) /* executable mapping, new for VI */ #define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) +/* partially resident texture */ +#define AMDGPU_VM_PAGE_PRT (1 << 4) struct drm_amdgpu_gem_va { /** GEM object handle */ -- cgit v1.2.3 From f7c35abe933c2ee34008c7415578611adcf3fcc6 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 27 Jan 2017 11:56:05 +0100 Subject: drm/amdgpu: implement PRT for GFX6 v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable/disable the handling globally for now and print a warning when we enable it for the first time. v2: write to the correct register, adjust bits to that hw generation v3: fix compilation, add the missing register bit definitions Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 55 ++++++++++++++++++++++ .../drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h | 4 ++ 3 files changed, 60 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b9212537b17d..3edc8719e1be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -569,6 +569,7 @@ struct amdgpu_mc { uint32_t vram_type; uint32_t srbm_soft_reset; struct amdgpu_mode_mc_save save; + bool prt_warning; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 0635829b18cf..33284287cdf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -400,6 +400,60 @@ static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, WREG32(mmVM_CONTEXT1_CNTL, tmp); } + /** + + * gmc_v8_0_set_prt - set PRT VM fault + + * + + * @adev: amdgpu_device pointer + + * @enable: enable/disable VM fault handling for PRT + +*/ +static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) +{ + u32 tmp; + + if (enable && !adev->mc.prt_warning) { + dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n"); + adev->mc.prt_warning = true; + } + + tmp = RREG32(mmVM_PRT_CNTL); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS, + enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS, + enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + L2_CACHE_STORE_INVALID_ENTRIES, + enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + L1_TLB_STORE_INVALID_ENTRIES, + enable); + WREG32(mmVM_PRT_CNTL, tmp); + + if (enable) { + uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t high = adev->vm_manager.max_pfn; + + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high); + } else { + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0); + } +} + static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) { int r, i; @@ -1082,6 +1136,7 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = { static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = { .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v6_0_gart_set_pte_pde, + .set_prt = gmc_v6_0_set_prt, }; static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h index 0f6c6c8d089b..7155312326e8 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gmc/gmc_6_0_sh_mask.h @@ -11891,5 +11891,9 @@ #define VM_PRT_CNTL__L1_TLB_STORE_INVALID_ENTRIES__SHIFT 0x00000003 #define VM_PRT_CNTL__L2_CACHE_STORE_INVALID_ENTRIES_MASK 0x00000004L #define VM_PRT_CNTL__L2_CACHE_STORE_INVALID_ENTRIES__SHIFT 0x00000002 +#define VM_PRT_CNTL__CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS_MASK 0x00000001L +#define VM_PRT_CNTL__CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS__SHIFT 0x00000000 +#define VM_PRT_CNTL__TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS_MASK 0x00000002L +#define VM_PRT_CNTL__TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS__SHIFT 0x00000001 #endif -- cgit v1.2.3 From 15d72fd7456b6273b89bd9c43e167e5867af389d Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 25 Jan 2017 15:07:40 +0800 Subject: drm/amdgpu:impl RREG32 no kiq version some registers are PF & VF copy, and we can safely use mmio method to access them. and sometime we are forbid to use kiq to access registers for example in INTR context. we need a MACRO that always disable KIQ for regs accessing Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Xiangliang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 21 ++++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++------ 2 files changed, 20 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3edc8719e1be..3ba21ffdc6c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1505,9 +1505,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev); int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, - bool always_indirect); + uint32_t acc_flags); void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - bool always_indirect); + uint32_t acc_flags); u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); @@ -1517,11 +1517,18 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); /* * Registers read & write functions. */ -#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false) -#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), true) -#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), false)) -#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), false) -#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), true) + +#define AMDGPU_REGS_IDX (1<<0) +#define AMDGPU_REGS_NO_KIQ (1<<1) + +#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) +#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) + +#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) +#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX) +#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) +#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0) +#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_IDX) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c2491a4e9ce7..ae9e9e789e9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -90,16 +90,16 @@ bool amdgpu_device_is_px(struct drm_device *dev) * MMIO register access helper functions. */ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, - bool always_indirect) + uint32_t acc_flags) { uint32_t ret; - if (amdgpu_sriov_runtime(adev)) { + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { BUG_ON(in_interrupt()); return amdgpu_virt_kiq_rreg(adev, reg); } - if ((reg * 4) < adev->rmmio_size && !always_indirect) + if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); else { unsigned long flags; @@ -114,16 +114,16 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, } void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - bool always_indirect) + uint32_t acc_flags) { trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); - if (amdgpu_sriov_runtime(adev)) { + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { BUG_ON(in_interrupt()); return amdgpu_virt_kiq_wreg(adev, reg, v); } - if ((reg * 4) < adev->rmmio_size && !always_indirect) + if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); else { unsigned long flags; -- cgit v1.2.3 From 223049cd54bdf32be581aff4c2cc0f8a9cb7cef0 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 26 Jan 2017 15:32:16 +0800 Subject: drm/amdgpu:new field in_reset introduced for gfx use it to seperate driver load and gpu reset/resume because gfx IP need different approach for different hw_init trigger source Signed-off-by: Monk Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3ba21ffdc6c0..9b3b2a3001cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -903,6 +903,7 @@ struct amdgpu_gfx { /* reset mask */ uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; + bool in_reset; }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, -- cgit v1.2.3 From 9b49c3ab06a7d0ad4bd3a8474cf0f2640aaadfd6 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 26 Jan 2017 15:33:52 +0800 Subject: drm/amdgpu:alloc mqd backup this is required for restoring the mqds after GPU reset. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Xiangliang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9b3b2a3001cc..fa831a8030e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -783,6 +783,7 @@ struct amdgpu_mec { u32 num_pipe; u32 num_mec; u32 num_queue; + struct vi_mqd *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; }; struct amdgpu_kiq { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 919c02f975fe..b2bfee3e5b67 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -7315,6 +7315,11 @@ static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev) dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); return r; } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } /* create MQD for each KCQ */ @@ -7329,6 +7334,11 @@ static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev) dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); return r; } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[i]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } } -- cgit v1.2.3 From 59a82d7d69cf63da641314384b1e691aa8c12999 Mon Sep 17 00:00:00 2001 From: Xiangliang Yu Date: Fri, 17 Feb 2017 16:03:10 +0800 Subject: drm/amdgpu: change pointer of mqd_ptr & mqd_backup to void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vi_mqd is only used by VI family but mqd_ptr and mqd_backup is common for all ASIC, so change the pointer to void. Signed-off-by: Xiangliang Yu Reviewed-by: Alex Deucher Reviewed-by: Christian König Reviewed-by: Monk Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 26 +++++++++++++++++--------- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index fa831a8030e4..80ab8516db7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -783,7 +783,7 @@ struct amdgpu_mec { u32 num_pipe; u32 num_mec; u32 num_queue; - struct vi_mqd *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; + void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; }; struct amdgpu_kiq { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 5d0f7c52fc01..da702dc6e2e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -162,7 +162,7 @@ struct amdgpu_ring { u32 queue; struct amdgpu_bo *mqd_obj; uint64_t mqd_gpu_addr; - struct vi_mqd *mqd_ptr; + void *mqd_ptr; u32 doorbell_index; bool use_doorbell; unsigned wptr_offs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 61e23926eb7f..8c4b40002dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4940,8 +4940,10 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) gfx_v8_0_cp_compute_enable(adev, true); ring = &adev->gfx.kiq.ring; - if (!amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr)) { - r = gfx_v8_0_kiq_init_queue(ring, ring->mqd_ptr, ring->mqd_gpu_addr); + if (!amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr)) { + r = gfx_v8_0_kiq_init_queue(ring, + (struct vi_mqd *)ring->mqd_ptr, + ring->mqd_gpu_addr); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; if (r) @@ -4952,8 +4954,10 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - if (!amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr)) { - r = gfx_v8_0_kiq_init_queue(ring, ring->mqd_ptr, ring->mqd_gpu_addr); + if (!amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr)) { + r = gfx_v8_0_kiq_init_queue(ring, + (struct vi_mqd *)ring->mqd_ptr, + ring->mqd_gpu_addr); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; if (r) @@ -7333,7 +7337,7 @@ static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev) if (!ring->mqd_obj) { r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); + &ring->mqd_gpu_addr, &ring->mqd_ptr); if (r) { dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); return r; @@ -7352,7 +7356,7 @@ static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev) if (!ring->mqd_obj) { r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); + &ring->mqd_gpu_addr, &ring->mqd_ptr); if (r) { dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); return r; @@ -7375,9 +7379,13 @@ static void gfx_v8_0_compute_mqd_soft_fini(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); } ring = &adev->gfx.kiq.ring; - amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); -} \ No newline at end of file + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); +} -- cgit v1.2.3 From df6e2c4aeb263f9b9b904c1a087411ddf25c5e94 Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Fri, 17 Feb 2017 11:05:49 +0800 Subject: drm/amdgpu: export gfx config double offchip LDS buffers (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: move the config struct to drm_amdgpu_info_device v3: move the config feature to amdgpu_gca_config Signed-off-by: Junwei Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 14 ++++++++++++++ include/uapi/drm/amdgpu_drm.h | 2 ++ 6 files changed, 33 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 80ab8516db7c..ef72f52a6a20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -845,6 +845,9 @@ struct amdgpu_gca_config { uint32_t macrotile_mode_array[16]; struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE]; + + /* gfx configure feature */ + uint32_t double_offchip_lds_buf; }; struct amdgpu_cu_info { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 027692bf8457..096386515f2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -528,6 +528,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.vram_type = adev->mc.vram_type; dev_info.vram_bit_width = adev->mc.vram_width; dev_info.vce_harvest_config = adev->vce.harvest_config; + dev_info.gc_double_offchip_lds_buf = + adev->gfx.config.double_offchip_lds_buf; return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 2086e7e68de4..e78433799a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1579,6 +1579,11 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +static void gfx_v6_0_config_init(struct amdgpu_device *adev) +{ + adev->gfx.config.double_offchip_lds_buf = 1; +} + static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) { u32 gb_addr_config = 0; @@ -1736,6 +1741,7 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) gfx_v6_0_setup_spi(adev); gfx_v6_0_get_cu_info(adev); + gfx_v6_0_config_init(adev); WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 1f9354541f29..286d6763afa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1876,6 +1876,11 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); } +static void gfx_v7_0_config_init(struct amdgpu_device *adev) +{ + adev->gfx.config.double_offchip_lds_buf = 1; +} + /** * gfx_v7_0_gpu_init - setup the 3D engine * @@ -1899,6 +1904,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) gfx_v7_0_setup_rb(adev); gfx_v7_0_get_cu_info(adev); + gfx_v7_0_config_init(adev); /* set HW defaults for 3D engine */ WREG32(mmCP_MEQ_THRESHOLDS, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index c2a325185753..5682d945e588 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3846,6 +3846,19 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); } +static void gfx_v8_0_config_init(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + default: + adev->gfx.config.double_offchip_lds_buf = 1; + break; + case CHIP_CARRIZO: + case CHIP_STONEY: + adev->gfx.config.double_offchip_lds_buf = 0; + break; + } +} + static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) { u32 tmp; @@ -3859,6 +3872,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) gfx_v8_0_tiling_mode_table_init(adev); gfx_v8_0_setup_rb(adev); gfx_v8_0_get_cu_info(adev); + gfx_v8_0_config_init(adev); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index a30fe693175f..732c662fad79 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -726,6 +726,8 @@ struct drm_amdgpu_info_device { __u32 vram_bit_width; /* vce harvesting instance */ __u32 vce_harvest_config; + /* gfx double offchip LDS buffers */ + __u32 gc_double_offchip_lds_buf; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3 From ea323f88a23fae24df7cbfd77a375e668394c7dc Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Tue, 21 Feb 2017 10:32:37 +0800 Subject: drm/amdgpu: rename amdgpu_gca_config to amdgpu_gfx_config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Junwei Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ef72f52a6a20..4015fac77e99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -815,7 +815,7 @@ struct amdgpu_rb_config { uint32_t raster_config_1; }; -struct amdgpu_gca_config { +struct amdgpu_gfx_config { unsigned max_shader_engines; unsigned max_tile_pipes; unsigned max_cu_per_sh; @@ -867,7 +867,7 @@ struct amdgpu_gfx_funcs { struct amdgpu_gfx { struct mutex gpu_clock_mutex; - struct amdgpu_gca_config config; + struct amdgpu_gfx_config config; struct amdgpu_rlc rlc; struct amdgpu_mec mec; struct amdgpu_kiq kiq; -- cgit v1.2.3 From 81522f71b1871a85bd32567904f414a42e8431af Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 15 Nov 2016 08:15:28 -0500 Subject: drm/amdgpu: remove unused sync testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not used in a while. Signed-off-by: Christian König Reviewed-by: Ken Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 -- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 --- drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 79 ------------------------------ 3 files changed, 89 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4015fac77e99..f0e8b2ac84f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1121,10 +1121,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number); * Testing */ void amdgpu_test_moves(struct amdgpu_device *adev); -void amdgpu_test_ring_sync(struct amdgpu_device *adev, - struct amdgpu_ring *cpA, - struct amdgpu_ring *cpB); -void amdgpu_test_syncing(struct amdgpu_device *adev); /* * MMU Notifier diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e71a89c73684..ffaa703e89f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1912,12 +1912,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, else DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); } - if ((amdgpu_testing & 2)) { - if (adev->accel_working) - amdgpu_test_syncing(adev); - else - DRM_INFO("amdgpu: acceleration disabled, skipping sync tests\n"); - } if (amdgpu_benchmarking) { if (adev->accel_working) amdgpu_benchmark(adev, amdgpu_benchmarking); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index b0483e6e536f..15510dadde01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -237,82 +237,3 @@ void amdgpu_test_moves(struct amdgpu_device *adev) if (adev->mman.buffer_funcs) amdgpu_do_test_moves(adev); } - -void amdgpu_test_ring_sync(struct amdgpu_device *adev, - struct amdgpu_ring *ringA, - struct amdgpu_ring *ringB) -{ -} - -static void amdgpu_test_ring_sync2(struct amdgpu_device *adev, - struct amdgpu_ring *ringA, - struct amdgpu_ring *ringB, - struct amdgpu_ring *ringC) -{ -} - -static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA, - struct amdgpu_ring *ringB) -{ - if (ringA == &ringA->adev->vce.ring[0] && - ringB == &ringB->adev->vce.ring[1]) - return false; - - return true; -} - -void amdgpu_test_syncing(struct amdgpu_device *adev) -{ - int i, j, k; - - for (i = 1; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ringA = adev->rings[i]; - if (!ringA || !ringA->ready) - continue; - - for (j = 0; j < i; ++j) { - struct amdgpu_ring *ringB = adev->rings[j]; - if (!ringB || !ringB->ready) - continue; - - if (!amdgpu_test_sync_possible(ringA, ringB)) - continue; - - DRM_INFO("Testing syncing between rings %d and %d...\n", i, j); - amdgpu_test_ring_sync(adev, ringA, ringB); - - DRM_INFO("Testing syncing between rings %d and %d...\n", j, i); - amdgpu_test_ring_sync(adev, ringB, ringA); - - for (k = 0; k < j; ++k) { - struct amdgpu_ring *ringC = adev->rings[k]; - if (!ringC || !ringC->ready) - continue; - - if (!amdgpu_test_sync_possible(ringA, ringC)) - continue; - - if (!amdgpu_test_sync_possible(ringB, ringC)) - continue; - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k); - amdgpu_test_ring_sync2(adev, ringA, ringB, ringC); - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, k, j); - amdgpu_test_ring_sync2(adev, ringA, ringC, ringB); - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, i, k); - amdgpu_test_ring_sync2(adev, ringB, ringA, ringC); - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, k, i); - amdgpu_test_ring_sync2(adev, ringB, ringC, ringA); - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, i, j); - amdgpu_test_ring_sync2(adev, ringC, ringA, ringB); - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, j, i); - amdgpu_test_ring_sync2(adev, ringC, ringB, ringA); - } - } - } -} -- cgit v1.2.3 From 04e30c9c8667494847f3876fe3e6c8b39fa6fd1b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 8 Mar 2017 15:12:52 +0100 Subject: drm/amdgpu: Merge pre/postclose hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Again no apparent explanation for the split except hysterical raisins. Merging them also makes it a bit more obviuos what's going on wrt the runtime pm refdancing. Cc: Alex Deucher Cc: Christian König Cc: amd-gfx@lists.freedesktop.org Reviewed-by: Christian König Signed-off-by: Daniel Vetter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 17 ++--------------- 3 files changed, 2 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f0e8b2ac84f4..946ef0c6c75d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1774,8 +1774,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev); int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv); void amdgpu_driver_postclose_kms(struct drm_device *dev, struct drm_file *file_priv); -void amdgpu_driver_preclose_kms(struct drm_device *dev, - struct drm_file *file_priv); int amdgpu_suspend(struct amdgpu_device *adev); int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ad174d17ca32..482145c55101 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -689,7 +689,6 @@ static struct drm_driver kms_driver = { DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, .load = amdgpu_driver_load_kms, .open = amdgpu_driver_open_kms, - .preclose = amdgpu_driver_preclose_kms, .postclose = amdgpu_driver_postclose_kms, .lastclose = amdgpu_driver_lastclose_kms, .set_busid = drm_pci_set_busid, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 096386515f2f..099a90e98a39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -779,6 +779,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (!fpriv) return; + pm_runtime_get_sync(dev->dev); + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_uvd_free_handles(adev, file_priv); @@ -809,21 +811,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pm_runtime_put_autosuspend(dev->dev); } -/** - * amdgpu_driver_preclose_kms - drm callback for pre close - * - * @dev: drm dev pointer - * @file_priv: drm file - * - * On device pre close, tear down hyperz and cmask filps on r1xx-r5xx - * (all asics). - */ -void amdgpu_driver_preclose_kms(struct drm_device *dev, - struct drm_file *file_priv) -{ - pm_runtime_get_sync(dev->dev); -} - /* * VBlank related functions. */ -- cgit v1.2.3 From 8fe733289bc00914e9ace101088857cda20a1c51 Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Thu, 10 Mar 2016 14:20:39 +0800 Subject: drm/amdgpu: init aperture definitions (v2) v2: agd: move apertures to mc structure Signed-off-by: Flora Cui Reviewed-by: Jammy Zhou Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +++++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 24 +++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 +++++++++++++---- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 8 ++++++++ 5 files changed, 55 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 946ef0c6c75d..d5eed6b86314 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -570,6 +570,11 @@ struct amdgpu_mc { uint32_t srbm_soft_reset; struct amdgpu_mode_mc_save save; bool prt_warning; + /* apertures */ + u64 shared_aperture_start; + u64 shared_aperture_end; + u64 private_aperture_start; + u64 private_aperture_end; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index e471c08dd249..1b8b4941dcf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1889,7 +1889,8 @@ static void gfx_v7_0_config_init(struct amdgpu_device *adev) */ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) { - u32 tmp, sh_mem_cfg; + u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base; + u32 tmp; int i; WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT)); @@ -1920,15 +1921,32 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) /* where to put LDS, scratch, GPUVM in FSA64 space */ sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); + sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE, + MTYPE_NC); + sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE, + MTYPE_UC); + sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0); + + sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, + SWIZZLE_ENABLE, 1); + sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, + ELEMENT_SIZE, 1); + sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, + INDEX_STRIDE, 3); mutex_lock(&adev->srbm_mutex); - for (i = 0; i < 16; i++) { + for (i = 0; i < adev->vm_manager.num_ids; i++) { + if (i == 0) + sh_mem_base = 0; + else + sh_mem_base = adev->mc.shared_aperture_start >> 48; cik_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_cfg); WREG32(mmSH_MEM_APE1_BASE, 1); WREG32(mmSH_MEM_APE1_LIMIT, 0); - WREG32(mmSH_MEM_BASES, 0); + WREG32(mmSH_MEM_BASES, sh_mem_base); + WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5dcf8dbdd5b3..fefec6e6379b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3859,7 +3859,7 @@ static void gfx_v8_0_config_init(struct amdgpu_device *adev) static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) { - u32 tmp; + u32 tmp, sh_static_mem_cfg; int i; WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); @@ -3874,8 +3874,14 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ + sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, + SWIZZLE_ENABLE, 1); + sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, + ELEMENT_SIZE, 1); + sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, + INDEX_STRIDE, 3); mutex_lock(&adev->srbm_mutex); - for (i = 0; i < 16; i++) { + for (i = 0; i < adev->vm_manager.num_ids; i++) { vi_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ if (i == 0) { @@ -3884,17 +3890,20 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); WREG32(mmSH_MEM_CONFIG, tmp); + WREG32(mmSH_MEM_BASES, 0); } else { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); WREG32(mmSH_MEM_CONFIG, tmp); + tmp = adev->mc.shared_aperture_start >> 48; + WREG32(mmSH_MEM_BASES, tmp); } WREG32(mmSH_MEM_APE1_BASE, 1); WREG32(mmSH_MEM_APE1_LIMIT, 0); - WREG32(mmSH_MEM_BASES, 0); + WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); } vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 552bf6b7851c..13efb679dfa1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -934,6 +934,14 @@ static int gmc_v7_0_early_init(void *handle) gmc_v7_0_set_gart_funcs(adev); gmc_v7_0_set_irq_funcs(adev); + adev->mc.shared_aperture_start = 0x2000000000000000ULL; + adev->mc.shared_aperture_end = + adev->mc.shared_aperture_start + (4ULL << 30) - 1; + adev->mc.private_aperture_start = + adev->mc.shared_aperture_end + 1; + adev->mc.private_aperture_end = + adev->mc.private_aperture_start + (4ULL << 30) - 1; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index f2bd0164bdfd..952ba1e02a77 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -939,6 +939,14 @@ static int gmc_v8_0_early_init(void *handle) gmc_v8_0_set_gart_funcs(adev); gmc_v8_0_set_irq_funcs(adev); + adev->mc.shared_aperture_start = 0x2000000000000000ULL; + adev->mc.shared_aperture_end = + adev->mc.shared_aperture_start + (4ULL << 30) - 1; + adev->mc.private_aperture_start = + adev->mc.shared_aperture_end + 1; + adev->mc.private_aperture_end = + adev->mc.private_aperture_start + (4ULL << 30) - 1; + return 0; } -- cgit v1.2.3 From 536fbf946cf84ff60cdef471c23ab96058e62f39 Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Sat, 12 Mar 2016 09:32:30 +0800 Subject: drm/amdgpu: change wptr to 64 bits (v2) Newer asics need 64 bit wptrs. If the wptr is now smaller than the rptr that doesn't indicate a wrap-around anymore. v2: integrate Christian's comments. Signed-off-by: Ken Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 5 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 12 +++++++----- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 12 +++++++----- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 11 ++++++----- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 18 ++++++++++-------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 23 +++++++++++++---------- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 11 ++++++----- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 15 ++++++++------- drivers/gpu/drm/amd/amdgpu/si_dma.c | 12 +++++++----- drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 10 ++++++---- drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 17 +++++++++-------- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 24 +++++++++++++----------- 15 files changed, 107 insertions(+), 83 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d5eed6b86314..93ecf692b9f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1601,7 +1601,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) { if (ring->count_dw <= 0) DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); - ring->ring[ring->wptr++] = v; + ring->ring[ring->wptr++ & ring->buf_mask] = v; ring->wptr &= ring->ptr_mask; ring->count_dw--; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index cead88ac3788..c9b536f4b019 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -232,7 +232,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } amdgpu_ring_clear_ring(ring); } - ring->ptr_mask = (ring->ring_size / 4) - 1; + ring->buf_mask = (ring->ring_size / 4) - 1; + ring->ptr_mask = ring->funcs->support_64bit_ptrs ? + 0xffffffffffffffff : ring->buf_mask; + ring->max_dw = max_dw; if (amdgpu_debugfs_ring_init(adev, ring)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index da702dc6e2e5..dada0a022bed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -96,10 +96,11 @@ struct amdgpu_ring_funcs { enum amdgpu_ring_type type; uint32_t align_mask; u32 nop; + bool support_64bit_ptrs; /* ring read/write ptr handling */ - u32 (*get_rptr)(struct amdgpu_ring *ring); - u32 (*get_wptr)(struct amdgpu_ring *ring); + u64 (*get_rptr)(struct amdgpu_ring *ring); + u64 (*get_wptr)(struct amdgpu_ring *ring); void (*set_wptr)(struct amdgpu_ring *ring); /* validating and patching of IBs */ int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); @@ -148,13 +149,14 @@ struct amdgpu_ring { struct amdgpu_bo *ring_obj; volatile uint32_t *ring; unsigned rptr_offs; - unsigned wptr; - unsigned wptr_old; + u64 wptr; + u64 wptr_old; unsigned ring_size; unsigned max_dw; int count_dw; uint64_t gpu_addr; - uint32_t ptr_mask; + uint64_t ptr_mask; + uint32_t buf_mask; bool ready; u32 idx; u32 me; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c33bc1bb4655..131f69b3f70e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -158,7 +158,7 @@ out: * * Get the current rptr from the hardware (CIK+). */ -static uint32_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) { u32 rptr; @@ -174,7 +174,7 @@ static uint32_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) * * Get the current wptr from the hardware (CIK+). */ -static uint32_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; @@ -194,7 +194,8 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], + (lower_32_bits(ring->wptr) << 2) & 0x3fffc); } static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -225,7 +226,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, u32 extra_bits = vm_id & 0xf; /* IB packet must end on a 8 DW boundary */ - cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8); + cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ @@ -432,7 +433,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); /* enable DMA RB */ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], @@ -1209,6 +1210,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, .align_mask = 0xf, .nop = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0), + .support_64bit_ptrs = false, .get_rptr = cik_sdma_ring_get_rptr, .get_wptr = cik_sdma_ring_get_wptr, .set_wptr = cik_sdma_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 02ca2322c30b..1a1de6499517 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -2192,12 +2192,12 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev) return 0; } -static u32 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring) +static u64 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs]; } -static u32 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring) +static u64 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2215,7 +2215,7 @@ static void gfx_v6_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB0_WPTR); } @@ -2224,10 +2224,10 @@ static void gfx_v6_0_ring_set_wptr_compute(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->gfx.compute_ring[0]) { - WREG32(mmCP_RB1_WPTR, ring->wptr); + WREG32(mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB1_WPTR); } else if (ring == &adev->gfx.compute_ring[1]) { - WREG32(mmCP_RB2_WPTR, ring->wptr); + WREG32(mmCP_RB2_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB2_WPTR); } else { BUG(); @@ -3631,6 +3631,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { .type = AMDGPU_RING_TYPE_GFX, .align_mask = 0xff, .nop = 0x80000000, + .support_64bit_ptrs = false, .get_rptr = gfx_v6_0_ring_get_rptr, .get_wptr = gfx_v6_0_ring_get_wptr, .set_wptr = gfx_v6_0_ring_set_wptr_gfx, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 1b8b4941dcf3..1de2e5318b67 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2629,7 +2629,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); ring->wptr = 0; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); /* set the wb address wether it's enabled or not */ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); @@ -2658,12 +2658,12 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) return 0; } -static u32 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring) +static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs]; } -static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) +static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2674,11 +2674,11 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB0_WPTR); } -static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) +static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) { /* XXX check if swapping is necessary on BE */ return ring->adev->wb.wb[ring->wptr_offs]; @@ -2689,8 +2689,8 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr; - WDOORBELL32(ring->doorbell_index, ring->wptr); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } /** @@ -3160,7 +3160,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; - mqd->queue_state.cp_hqd_pq_wptr = ring->wptr; + mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); @@ -5206,6 +5206,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .type = AMDGPU_RING_TYPE_GFX, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = false, .get_rptr = gfx_v7_0_ring_get_rptr, .get_wptr = gfx_v7_0_ring_get_wptr_gfx, .set_wptr = gfx_v7_0_ring_set_wptr_gfx, @@ -5236,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .type = AMDGPU_RING_TYPE_COMPUTE, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = false, .get_rptr = gfx_v7_0_ring_get_rptr, .get_wptr = gfx_v7_0_ring_get_wptr_compute, .set_wptr = gfx_v7_0_ring_set_wptr_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index fefec6e6379b..03ff1399fbfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4490,7 +4490,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); ring->wptr = 0; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); /* set the wb address wether it's enabled or not */ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); @@ -5204,7 +5204,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; - mqd->cp_hqd_pq_wptr = ring->wptr; + mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); @@ -6458,12 +6458,12 @@ static int gfx_v8_0_set_clockgating_state(void *handle, return 0; } -static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) +static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs]; } -static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) +static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -6480,10 +6480,10 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr; - WDOORBELL32(ring->doorbell_index, ring->wptr); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB0_WPTR); } } @@ -6671,7 +6671,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, } } -static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) +static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->wptr_offs]; } @@ -6681,8 +6681,8 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr; - WDOORBELL32(ring->doorbell_index, ring->wptr); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, @@ -7037,6 +7037,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .type = AMDGPU_RING_TYPE_GFX, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = false, .get_rptr = gfx_v8_0_ring_get_rptr, .get_wptr = gfx_v8_0_ring_get_wptr_gfx, .set_wptr = gfx_v8_0_ring_set_wptr_gfx, @@ -7069,6 +7070,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .type = AMDGPU_RING_TYPE_COMPUTE, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = false, .get_rptr = gfx_v8_0_ring_get_rptr, .get_wptr = gfx_v8_0_ring_get_wptr_compute, .set_wptr = gfx_v8_0_ring_set_wptr_compute, @@ -7097,6 +7099,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { .type = AMDGPU_RING_TYPE_KIQ, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = false, .get_rptr = gfx_v8_0_ring_get_rptr, .get_wptr = gfx_v8_0_ring_get_wptr_compute, .set_wptr = gfx_v8_0_ring_set_wptr_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index a881cf475a19..a733c0f63bba 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -186,7 +186,7 @@ out: * * Get the current rptr from the hardware (VI+). */ -static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) { /* XXX check if swapping is necessary on BE */ return ring->adev->wb.wb[ring->rptr_offs] >> 2; @@ -199,7 +199,7 @@ static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) * * Get the current wptr from the hardware (VI+). */ -static uint32_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; @@ -220,7 +220,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); } static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -251,7 +251,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, u32 vmid = vm_id & 0xf; /* IB packet must end on a 8 DW boundary */ - sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); + sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); @@ -466,7 +466,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); @@ -1206,6 +1206,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = false, .get_rptr = sdma_v2_4_ring_get_rptr, .get_wptr = sdma_v2_4_ring_get_wptr, .set_wptr = sdma_v2_4_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index c4d7dd7f73a8..cafa3852143d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -337,7 +337,7 @@ out: * * Get the current rptr from the hardware (VI+). */ -static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) { /* XXX check if swapping is necessary on BE */ return ring->adev->wb.wb[ring->rptr_offs] >> 2; @@ -350,7 +350,7 @@ static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) * * Get the current wptr from the hardware (VI+). */ -static uint32_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 wptr; @@ -380,12 +380,12 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr << 2; - WDOORBELL32(ring->doorbell_index, ring->wptr << 2); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr) << 2; + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); } else { int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); } } @@ -417,7 +417,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, u32 vmid = vm_id & 0xf; /* IB packet must end on a 8 DW boundary */ - sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); + sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); @@ -660,7 +660,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); @@ -1579,6 +1579,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = false, .get_rptr = sdma_v3_0_ring_get_rptr, .get_wptr = sdma_v3_0_ring_get_wptr, .set_wptr = sdma_v3_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 3372a071bb85..c4fb3f94c26f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -37,12 +37,12 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); static void si_dma_set_irq_funcs(struct amdgpu_device *adev); -static uint32_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs>>2]; } -static uint32_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; @@ -55,7 +55,8 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); + WREG32(DMA_RB_WPTR + sdma_offsets[me], + (lower_32_bits(ring->wptr) << 2) & 0x3fffc); } static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, @@ -65,7 +66,7 @@ static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. * Pad as necessary with NOPs. */ - while ((ring->wptr & 7) != 5) + while ((lower_32_bits(ring->wptr) & 7) != 5) amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); @@ -184,7 +185,7 @@ static int si_dma_start(struct amdgpu_device *adev) WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); ring->wptr = 0; - WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); ring->ready = true; @@ -766,6 +767,7 @@ static const struct amdgpu_ring_funcs si_dma_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, .align_mask = 0xf, .nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0), + .support_64bit_ptrs = false, .get_rptr = si_dma_ring_get_rptr, .get_wptr = si_dma_ring_get_wptr, .set_wptr = si_dma_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index b34cefc7ebd5..4bcb2f37cb7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -55,7 +55,7 @@ static void uvd_v4_2_set_dcm(struct amdgpu_device *adev, * * Returns the current hardware read pointer */ -static uint32_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -69,7 +69,7 @@ static uint32_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t uvd_v4_2_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t uvd_v4_2_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -87,7 +87,7 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } static int uvd_v4_2_early_init(void *handle) @@ -367,7 +367,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev) WREG32(mmUVD_RBC_RB_RPTR, 0x0); ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); /* set the ring address */ WREG32(mmUVD_RBC_RB_BASE, ring->gpu_addr); @@ -770,6 +770,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .nop = PACKET0(mmUVD_NO_OP, 0), + .support_64bit_ptrs = false, .get_rptr = uvd_v4_2_ring_get_rptr, .get_wptr = uvd_v4_2_ring_get_wptr, .set_wptr = uvd_v4_2_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index ad8c02e423d4..35008c181363 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -51,7 +51,7 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev, * * Returns the current hardware read pointer */ -static uint32_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -65,7 +65,7 @@ static uint32_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t uvd_v5_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t uvd_v5_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -83,7 +83,7 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } static int uvd_v5_0_early_init(void *handle) @@ -424,7 +424,7 @@ static int uvd_v5_0_start(struct amdgpu_device *adev) WREG32(mmUVD_RBC_RB_RPTR, 0); ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_P(mmUVD_RBC_RB_CNTL, 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); @@ -879,6 +879,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .nop = PACKET0(mmUVD_NO_OP, 0), + .support_64bit_ptrs = false, .get_rptr = uvd_v5_0_ring_get_rptr, .get_wptr = uvd_v5_0_ring_get_wptr, .set_wptr = uvd_v5_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 18a6de4e1512..46fe4980accc 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -54,7 +54,7 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, * * Returns the current hardware read pointer */ -static uint32_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -68,7 +68,7 @@ static uint32_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t uvd_v6_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t uvd_v6_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -86,7 +86,7 @@ static void uvd_v6_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } static int uvd_v6_0_early_init(void *handle) @@ -521,7 +521,7 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) WREG32(mmUVD_RBC_RB_RPTR, 0); ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0); @@ -1108,6 +1108,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .nop = PACKET0(mmUVD_NO_OP, 0), + .support_64bit_ptrs = false, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, @@ -1134,6 +1135,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .nop = PACKET0(mmUVD_NO_OP, 0), + .support_64bit_ptrs = false, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index cb0b730ff77a..3433a73ae04b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -52,7 +52,7 @@ static void vce_v2_0_set_irq_funcs(struct amdgpu_device *adev); * * Returns the current hardware read pointer */ -static uint32_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -69,7 +69,7 @@ static uint32_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -91,9 +91,9 @@ static void vce_v2_0_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->vce.ring[0]) - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); else - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); } static int vce_v2_0_lmi_clean(struct amdgpu_device *adev) @@ -241,15 +241,15 @@ static int vce_v2_0_start(struct amdgpu_device *adev) vce_v2_0_mc_resume(adev); ring = &adev->vce.ring[0]; - WREG32(mmVCE_RB_RPTR, ring->wptr); - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); ring = &adev->vce.ring[1]; - WREG32(mmVCE_RB_RPTR2, ring->wptr); - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); @@ -631,6 +631,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { .type = AMDGPU_RING_TYPE_VCE, .align_mask = 0xf, .nop = VCE_CMD_NO_OP, + .support_64bit_ptrs = false, .get_rptr = vce_v2_0_ring_get_rptr, .get_wptr = vce_v2_0_ring_get_wptr, .set_wptr = vce_v2_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 93ec8815bb13..2c5f88cad8ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -73,7 +73,7 @@ static int vce_v3_0_wait_for_idle(void *handle); * * Returns the current hardware read pointer */ -static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -92,7 +92,7 @@ static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -116,11 +116,11 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->vce.ring[0]) - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); else if (ring == &adev->vce.ring[1]) - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); else - WREG32(mmVCE_RB_WPTR3, ring->wptr); + WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); } static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) @@ -231,22 +231,22 @@ static int vce_v3_0_start(struct amdgpu_device *adev) int idx, r; ring = &adev->vce.ring[0]; - WREG32(mmVCE_RB_RPTR, ring->wptr); - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); ring = &adev->vce.ring[1]; - WREG32(mmVCE_RB_RPTR2, ring->wptr); - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); ring = &adev->vce.ring[2]; - WREG32(mmVCE_RB_RPTR3, ring->wptr); - WREG32(mmVCE_RB_WPTR3, ring->wptr); + WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); @@ -860,6 +860,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { .type = AMDGPU_RING_TYPE_VCE, .align_mask = 0xf, .nop = VCE_CMD_NO_OP, + .support_64bit_ptrs = false, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, @@ -882,6 +883,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCE, .align_mask = 0xf, .nop = VCE_CMD_NO_OP, + .support_64bit_ptrs = false, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, -- cgit v1.2.3 From 7014285ade54bae2fe03aa397aa45846a0cd3e31 Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Fri, 18 Mar 2016 15:08:49 +0800 Subject: drm/amdgpu: add 64bit wb functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer asics need 64 bit writeback slots. Reviewed-by: Christian König Signed-off-by: Ken Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 39 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 50 ++++++++++++++++++++++-------- 3 files changed, 78 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 93ecf692b9f6..9c1ce40b33b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1021,6 +1021,8 @@ struct amdgpu_wb { int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb); void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb); +int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb); +void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb); void amdgpu_get_pcie_info(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ffaa703e89f5..fcf83178b113 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -515,6 +515,29 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) } } +/** + * amdgpu_wb_get_64bit - Allocate a wb entry + * + * @adev: amdgpu_device pointer + * @wb: wb index + * + * Allocate a wb slot for use by the driver (all asics). + * Returns 0 on success or -EINVAL on failure. + */ +int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb) +{ + unsigned long offset = bitmap_find_next_zero_area_off(adev->wb.used, + adev->wb.num_wb, 0, 2, 7, 0); + if ((offset + 1) < adev->wb.num_wb) { + __set_bit(offset, adev->wb.used); + __set_bit(offset + 1, adev->wb.used); + *wb = offset; + return 0; + } else { + return -EINVAL; + } +} + /** * amdgpu_wb_free - Free a wb entry * @@ -529,6 +552,22 @@ void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb) __clear_bit(wb, adev->wb.used); } +/** + * amdgpu_wb_free_64bit - Free a wb entry + * + * @adev: amdgpu_device pointer + * @wb: wb index + * + * Free a wb slot allocated for use by the driver (all asics) + */ +void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb) +{ + if ((wb + 1) < adev->wb.num_wb) { + __clear_bit(wb, adev->wb.used); + __clear_bit(wb + 1, adev->wb.used); + } +} + /** * amdgpu_vram_location - try to find VRAM location * @adev: amdgpu device structure holding all necessary informations diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index c9b536f4b019..bfd4022210ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -182,16 +182,32 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } - r = amdgpu_wb_get(adev, &ring->rptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); - return r; - } + if (ring->funcs->support_64bit_ptrs) { + r = amdgpu_wb_get_64bit(adev, &ring->rptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); + return r; + } + + r = amdgpu_wb_get_64bit(adev, &ring->wptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); + return r; + } + + } else { + r = amdgpu_wb_get(adev, &ring->rptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); + return r; + } + + r = amdgpu_wb_get(adev, &ring->wptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_wb_get(adev, &ring->wptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); - return r; } r = amdgpu_wb_get(adev, &ring->fence_offs); @@ -256,10 +272,18 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) { ring->ready = false; - amdgpu_wb_free(ring->adev, ring->cond_exe_offs); - amdgpu_wb_free(ring->adev, ring->fence_offs); - amdgpu_wb_free(ring->adev, ring->rptr_offs); - amdgpu_wb_free(ring->adev, ring->wptr_offs); + if (ring->funcs->support_64bit_ptrs) { + amdgpu_wb_free_64bit(ring->adev, ring->cond_exe_offs); + amdgpu_wb_free_64bit(ring->adev, ring->fence_offs); + amdgpu_wb_free_64bit(ring->adev, ring->rptr_offs); + amdgpu_wb_free_64bit(ring->adev, ring->wptr_offs); + } else { + amdgpu_wb_free(ring->adev, ring->cond_exe_offs); + amdgpu_wb_free(ring->adev, ring->fence_offs); + amdgpu_wb_free(ring->adev, ring->rptr_offs); + amdgpu_wb_free(ring->adev, ring->wptr_offs); + } + amdgpu_bo_free_kernel(&ring->ring_obj, &ring->gpu_addr, -- cgit v1.2.3 From 832be4041d4999e008839d12d1efe118da27bd99 Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Fri, 18 Mar 2016 15:23:08 +0800 Subject: drm/amdgpu: add 64bit doorbell functions (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer asics need 64 bit doorbells. v2: fix comment (Nils) Reviewed-by: Christian König Signed-off-by: Ken Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 38 ++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9c1ce40b33b2..8c36765d3f02 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1521,6 +1521,8 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); +u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index); +void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); /* * Registers read & write functions. @@ -1575,6 +1577,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); #define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index)) #define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v)) +#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index)) +#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v)) #define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT #define REG_FIELD_MASK(reg, field) reg##__##field##_MASK diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fcf83178b113..ff0b9920ea79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -194,6 +194,44 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) } } +/** + * amdgpu_mm_rdoorbell64 - read a doorbell Qword + * + * @adev: amdgpu_device pointer + * @index: doorbell index + * + * Returns the value in the doorbell aperture at the + * requested doorbell index (VEGA10+). + */ +u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) +{ + if (index < adev->doorbell.num_doorbells) { + return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); + } else { + DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); + return 0; + } +} + +/** + * amdgpu_mm_wdoorbell64 - write a doorbell Qword + * + * @adev: amdgpu_device pointer + * @index: doorbell index + * @v: value to write + * + * Writes @v to the doorbell aperture at the + * requested doorbell index (VEGA10+). + */ +void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) +{ + if (index < adev->doorbell.num_doorbells) { + atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); + } else { + DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); + } +} + /** * amdgpu_invalid_rreg - dummy reg read function * -- cgit v1.2.3 From 6b777607c1442857e160a57d02b9951d8e7639b4 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 21 Sep 2016 16:19:19 +0800 Subject: drm/amdgpu: expand pte flags to uint64_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Necessary for new asics. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++++++++--------- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/si_dma.c | 2 +- 11 files changed, 25 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8c36765d3f02..2541d7013e03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -280,7 +280,7 @@ struct amdgpu_vm_pte_funcs { void (*set_pte_pde)(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags); + uint32_t incr, uint64_t flags); }; /* provided by the gmc block */ @@ -293,7 +293,7 @@ struct amdgpu_gart_funcs { void *cpu_pt_addr, /* cpu addr of page table */ uint32_t gpu_page_idx, /* pte/pde to update */ uint64_t addr, /* addr to write into pte/pde */ - uint32_t flags); /* access flags */ + uint64_t flags); /* access flags */ /* enable/disable PRT support */ void (*set_prt)(struct amdgpu_device *adev, bool enable); }; @@ -539,7 +539,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages); int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, - dma_addr_t *dma_addr, uint32_t flags); + dma_addr_t *dma_addr, uint64_t flags); int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); /* @@ -1746,7 +1746,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); -uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, +uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem); void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base); void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 964d2a946ed5..2916fabf3d1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -229,7 +229,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, unsigned p; int i, j; u64 page_base; - uint32_t flags = AMDGPU_PTE_SYSTEM; + uint64_t flags = AMDGPU_PTE_SYSTEM; if (!adev->gart.ready) { WARN(1, "trying to unbind memory from uninitialized GART !\n"); @@ -271,7 +271,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, */ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, dma_addr_t *dma_addr, - uint32_t flags) + uint64_t flags) { unsigned t; unsigned p; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4c6094eefc51..f2241bb67987 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -746,7 +746,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) { struct ttm_tt *ttm = bo->ttm; struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; - uint32_t flags; + uint64_t flags; int r; if (!ttm || amdgpu_ttm_is_bound(ttm)) @@ -1027,10 +1027,10 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); } -uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, +uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem) { - uint32_t flags = 0; + uint64_t flags = 0; if (mem && mem->mem_type != TTM_PL_SYSTEM) flags |= AMDGPU_PTE_VALID; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 7e22c3558b29..a45de6e6a0f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -64,7 +64,7 @@ struct amdgpu_pte_update_params { /* Function which actually does the update */ void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, - uint32_t flags); + uint64_t flags); /* indicate update pt or its shadow */ bool shadow; }; @@ -496,7 +496,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, - uint32_t flags) + uint64_t flags) { trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); @@ -525,7 +525,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, - uint32_t flags) + uint64_t flags) { uint64_t src = (params->src + (addr >> 12) * 8); @@ -718,7 +718,7 @@ error_free: static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, struct amdgpu_vm *vm, uint64_t start, uint64_t end, - uint64_t dst, uint32_t flags) + uint64_t dst, uint64_t flags) { const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; @@ -808,7 +808,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, struct amdgpu_vm *vm, uint64_t start, uint64_t end, - uint64_t dst, uint32_t flags) + uint64_t dst, uint64_t flags) { /** * The MC L1 TLB supports variable sized pages, based on a fragment @@ -885,7 +885,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, dma_addr_t *pages_addr, struct amdgpu_vm *vm, uint64_t start, uint64_t last, - uint32_t flags, uint64_t addr, + uint64_t flags, uint64_t addr, struct dma_fence **fence) { struct amdgpu_ring *ring; @@ -1023,11 +1023,11 @@ error_free: */ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct dma_fence *exclusive, - uint32_t gtt_flags, + uint64_t gtt_flags, dma_addr_t *pages_addr, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, - uint32_t flags, + uint64_t flags, struct drm_mm_node *nodes, struct dma_fence **fence) { @@ -1114,7 +1114,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; - uint32_t gtt_flags, flags; + uint64_t gtt_flags, flags; struct ttm_mem_reg *mem; struct drm_mm_node *nodes; struct dma_fence *exclusive; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 3eee569701d7..0b62764caa28 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -749,7 +749,7 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, */ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags) + uint32_t incr, uint64_t flags) { /* for physically contiguous pages (vram) */ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index d3b7fe88452c..c3c6853051cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -367,7 +367,7 @@ static int gmc_v6_0_gart_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, uint32_t gpu_page_idx, uint64_t addr, - uint32_t flags) + uint64_t flags) { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 9f761e4e03db..e95af8ae0ac8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -439,7 +439,7 @@ static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, uint32_t gpu_page_idx, uint64_t addr, - uint32_t flags) + uint64_t flags) { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 724ec3745c8c..17d19f06ce4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -531,7 +531,7 @@ static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, uint32_t gpu_page_idx, uint64_t addr, - uint32_t flags) + uint64_t flags) { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 0b1aa9462608..1a4b351f350b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -798,7 +798,7 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, */ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags) + uint32_t incr, uint64_t flags) { /* for physically contiguous pages (vram) */ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 610bc3e4a728..49a099aa9c88 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1007,7 +1007,7 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, */ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags) + uint32_t incr, uint64_t flags) { /* for physically contiguous pages (vram) */ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index d2edd3212344..a6862b185bbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -398,7 +398,7 @@ static void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags) + uint32_t incr, uint64_t flags) { uint64_t value; unsigned ndw; -- cgit v1.2.3 From 4b98e0c4aeb9e246fd877640d45dfbdcdf6283a9 Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Tue, 14 Feb 2017 12:31:36 -0500 Subject: drm/amdgpu: set GART PTE asic specific flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set asic specific gart pte flags in the gmc IP module for each asic. Signed-off-by: Alex Xie Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 +--- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 1 + 5 files changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2541d7013e03..abf2b39fd968 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -524,6 +524,10 @@ struct amdgpu_gart { struct page **pages; #endif bool ready; + + /* Asic default pte flags */ + uint64_t gart_pte_flags; + const struct amdgpu_gart_funcs *gart_funcs; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index f2241bb67987..987f8f02348d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1042,9 +1042,7 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, flags |= AMDGPU_PTE_SNOOPED; } - if (adev->asic_type >= CHIP_TONGA) - flags |= AMDGPU_PTE_EXECUTABLE; - + flags |= adev->gart.gart_pte_flags; flags |= AMDGPU_PTE_READABLE; if (!amdgpu_ttm_tt_is_readonly(ttm)) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index c3c6853051cd..6a1f566632be 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -554,6 +554,7 @@ static int gmc_v6_0_gart_init(struct amdgpu_device *adev) if (r) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; + adev->gart.gart_pte_flags = 0; return amdgpu_gart_table_vram_alloc(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index e95af8ae0ac8..9f41d9fa5ed5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -658,6 +658,7 @@ static int gmc_v7_0_gart_init(struct amdgpu_device *adev) if (r) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; + adev->gart.gart_pte_flags = 0; return amdgpu_gart_table_vram_alloc(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 17d19f06ce4a..233ac3504fe9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -789,6 +789,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev) if (r) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; + adev->gart.gart_pte_flags = AMDGPU_PTE_EXECUTABLE; return amdgpu_gart_table_vram_alloc(adev); } -- cgit v1.2.3 From 5463545b9280093f4614049df2dfee3f9e662172 Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Tue, 14 Feb 2017 12:22:57 -0500 Subject: drm/amdgpu: add a callback to set vm mapping flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This lets each asic set whichever flags it supports. Signed-off-by: Alex Xie Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 12 +++--------- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 16 ++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 16 ++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 18 ++++++++++++++++++ 5 files changed, 57 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index abf2b39fd968..00553bf26e66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -296,6 +296,9 @@ struct amdgpu_gart_funcs { uint64_t flags); /* access flags */ /* enable/disable PRT support */ void (*set_prt)(struct amdgpu_device *adev, bool enable); + /* set pte flags based per asic */ + uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, + uint32_t flags); }; /* provided by the ih block */ @@ -1682,6 +1685,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) +#define amdgpu_vm_get_pte_flags(adev, flags) (adev)->gart.gart_funcs->get_vm_pte_flags((adev),(flags)) #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 3c22656aa1bf..6daf004955b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -569,7 +569,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; struct list_head list; - uint64_t va_flags = 0; + uint64_t va_flags; int r = 0; if (!adev->vm_manager.enabled) @@ -631,14 +631,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - if (args->flags & AMDGPU_VM_PAGE_READABLE) - va_flags |= AMDGPU_PTE_READABLE; - if (args->flags & AMDGPU_VM_PAGE_WRITEABLE) - va_flags |= AMDGPU_PTE_WRITEABLE; - if (args->flags & AMDGPU_VM_PAGE_EXECUTABLE) - va_flags |= AMDGPU_PTE_EXECUTABLE; - if (args->flags & AMDGPU_VM_PAGE_PRT) - va_flags |= AMDGPU_PTE_PRT; + va_flags = amdgpu_vm_get_pte_flags(adev, args->flags); + r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 6a1f566632be..35f74c6d1f29 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -379,6 +379,21 @@ static int gmc_v6_0_gart_set_pte_pde(struct amdgpu_device *adev, return 0; } +static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { @@ -1138,6 +1153,7 @@ static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = { .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v6_0_gart_set_pte_pde, .set_prt = gmc_v6_0_set_prt, + .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags }; static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 9f41d9fa5ed5..dc9b6d62d1ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -451,6 +451,21 @@ static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev, return 0; } +static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -1323,6 +1338,7 @@ static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = { .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v7_0_gart_set_pte_pde, .set_prt = gmc_v7_0_set_prt, + .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags }; static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 233ac3504fe9..c087b00598cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -563,6 +563,23 @@ static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev, return 0; } +static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) + pte_flag |= AMDGPU_PTE_EXECUTABLE; + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -1562,6 +1579,7 @@ static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = { .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v8_0_gart_set_pte_pde, .set_prt = gmc_v8_0_set_prt, + .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags }; static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { -- cgit v1.2.3 From bbf282d884793f95b89dc35059f8703eaf48ca00 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 3 Mar 2017 17:26:10 -0500 Subject: drm/amdgpu: add asic callback to get memsize register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer asics use different registers so abstract it. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdgpu/cik.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/si.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/vi.c | 6 ++++++ 5 files changed, 23 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 00553bf26e66..0cb305bf9c51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1221,6 +1221,8 @@ struct amdgpu_asic_funcs { /* static power management */ int (*get_pcie_lanes)(struct amdgpu_device *adev); void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes); + /* get config memsize register */ + u32 (*get_config_memsize)(struct amdgpu_device *adev); }; /* @@ -1680,6 +1682,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev)) #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l)) #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v))) +#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a70247203f18..c0d12e83d5ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -713,7 +713,7 @@ bool amdgpu_need_post(struct amdgpu_device *adev) return true; } /* then check MEM_SIZE, in case the crtcs are off */ - reg = RREG32(mmCONFIG_MEMSIZE); + reg = amdgpu_asic_get_config_memsize(adev); if (reg) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index c4d4b35e54ec..dbaedb4f2a6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1212,6 +1212,11 @@ static int cik_asic_reset(struct amdgpu_device *adev) return r; } +static u32 cik_get_config_memsize(struct amdgpu_device *adev) +{ + return RREG32(mmCONFIG_MEMSIZE); +} + static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock, u32 cntl_reg, u32 status_reg) { @@ -1641,6 +1646,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .get_xclk = &cik_get_xclk, .set_uvd_clocks = &cik_set_uvd_clocks, .set_vce_clocks = &cik_set_vce_clocks, + .get_config_memsize = &cik_get_config_memsize, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index b71e3faa40db..c0b1aabf282f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -45,6 +45,7 @@ #include "gmc/gmc_6_0_d.h" #include "dce/dce_6_0_d.h" #include "uvd/uvd_4_0_d.h" +#include "bif/bif_3_0_d.h" static const u32 tahiti_golden_registers[] = { @@ -1155,6 +1156,11 @@ static int si_asic_reset(struct amdgpu_device *adev) return 0; } +static u32 si_get_config_memsize(struct amdgpu_device *adev) +{ + return RREG32(mmCONFIG_MEMSIZE); +} + static void si_vga_set_state(struct amdgpu_device *adev, bool state) { uint32_t temp; @@ -1206,6 +1212,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .get_xclk = &si_get_xclk, .set_uvd_clocks = &si_set_uvd_clocks, .set_vce_clocks = NULL, + .get_config_memsize = &si_get_config_memsize, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 7fa314c217c9..f4751a287bfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -751,6 +751,11 @@ static int vi_asic_reset(struct amdgpu_device *adev) return r; } +static u32 vi_get_config_memsize(struct amdgpu_device *adev) +{ + return RREG32(mmCONFIG_MEMSIZE); +} + static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock, u32 cntl_reg, u32 status_reg) { @@ -900,6 +905,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .get_xclk = &vi_get_xclk, .set_uvd_clocks = &vi_set_uvd_clocks, .set_vce_clocks = &vi_set_vce_clocks, + .get_config_memsize = &vi_get_config_memsize, }; static int vi_common_early_init(void *handle) -- cgit v1.2.3 From 4df654d293c6424afdd28b1b27b487c565fffb3e Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Mon, 2 Jan 2017 10:07:33 -0500 Subject: drm/amdgpu: move amdgpu_uvd structure to uvd header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Christian König Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 30 +----------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0cb305bf9c51..d7f9f1e6ae60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -59,6 +59,7 @@ #include "amd_powerplay.h" #include "amdgpu_dpm.h" #include "amdgpu_acp.h" +#include "amdgpu_uvd.h" #include "gpu_scheduler.h" #include "amdgpu_virt.h" @@ -1033,35 +1034,6 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb); void amdgpu_get_pcie_info(struct amdgpu_device *adev); -/* - * UVD - */ -#define AMDGPU_DEFAULT_UVD_HANDLES 10 -#define AMDGPU_MAX_UVD_HANDLES 40 -#define AMDGPU_UVD_STACK_SIZE (200*1024) -#define AMDGPU_UVD_HEAP_SIZE (256*1024) -#define AMDGPU_UVD_SESSION_SIZE (50*1024) -#define AMDGPU_UVD_FIRMWARE_OFFSET 256 - -struct amdgpu_uvd { - struct amdgpu_bo *vcpu_bo; - void *cpu_addr; - uint64_t gpu_addr; - unsigned fw_version; - void *saved_bo; - unsigned max_handles; - atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; - struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; - struct delayed_work idle_work; - const struct firmware *fw; /* UVD firmware */ - struct amdgpu_ring ring; - struct amdgpu_irq_src irq; - bool address_64_bit; - bool use_ctx_buf; - struct amd_sched_entity entity; - uint32_t srbm_soft_reset; -}; - /* * VCE */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index c10682baccae..797210dd52de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -24,6 +24,32 @@ #ifndef __AMDGPU_UVD_H__ #define __AMDGPU_UVD_H__ +#define AMDGPU_DEFAULT_UVD_HANDLES 10 +#define AMDGPU_MAX_UVD_HANDLES 40 +#define AMDGPU_UVD_STACK_SIZE (200*1024) +#define AMDGPU_UVD_HEAP_SIZE (256*1024) +#define AMDGPU_UVD_SESSION_SIZE (50*1024) +#define AMDGPU_UVD_FIRMWARE_OFFSET 256 + +struct amdgpu_uvd { + struct amdgpu_bo *vcpu_bo; + void *cpu_addr; + uint64_t gpu_addr; + unsigned fw_version; + void *saved_bo; + unsigned max_handles; + atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; + struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; + struct delayed_work idle_work; + const struct firmware *fw; /* UVD firmware */ + struct amdgpu_ring ring; + struct amdgpu_irq_src irq; + bool address_64_bit; + bool use_ctx_buf; + struct amd_sched_entity entity; + uint32_t srbm_soft_reset; +}; + int amdgpu_uvd_sw_init(struct amdgpu_device *adev); int amdgpu_uvd_sw_fini(struct amdgpu_device *adev); int amdgpu_uvd_suspend(struct amdgpu_device *adev); -- cgit v1.2.3 From 5e5681788befb2d84d0780f81ea72821e6723ba1 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Tue, 10 Jan 2017 11:02:58 -0500 Subject: drm/amdgpu: move amdgpu_vce structure to vce header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Christian König Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 29 +---------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 28 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d7f9f1e6ae60..a17a54fc8f27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -60,6 +60,7 @@ #include "amdgpu_dpm.h" #include "amdgpu_acp.h" #include "amdgpu_uvd.h" +#include "amdgpu_vce.h" #include "gpu_scheduler.h" #include "amdgpu_virt.h" @@ -1034,34 +1035,6 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb); void amdgpu_get_pcie_info(struct amdgpu_device *adev); -/* - * VCE - */ -#define AMDGPU_MAX_VCE_HANDLES 16 -#define AMDGPU_VCE_FIRMWARE_OFFSET 256 - -#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0) -#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1) - -struct amdgpu_vce { - struct amdgpu_bo *vcpu_bo; - uint64_t gpu_addr; - unsigned fw_version; - unsigned fb_version; - atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; - struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES]; - uint32_t img_size[AMDGPU_MAX_VCE_HANDLES]; - struct delayed_work idle_work; - struct mutex idle_mutex; - const struct firmware *fw; /* VCE firmware */ - struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; - struct amdgpu_irq_src irq; - unsigned harvest_config; - struct amd_sched_entity entity; - uint32_t srbm_soft_reset; - unsigned num_rings; -}; - /* * SDMA */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index d98041f7508d..0a7f18c461e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -24,6 +24,31 @@ #ifndef __AMDGPU_VCE_H__ #define __AMDGPU_VCE_H__ +#define AMDGPU_MAX_VCE_HANDLES 16 +#define AMDGPU_VCE_FIRMWARE_OFFSET 256 + +#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0) +#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1) + +struct amdgpu_vce { + struct amdgpu_bo *vcpu_bo; + uint64_t gpu_addr; + unsigned fw_version; + unsigned fb_version; + atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; + struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES]; + uint32_t img_size[AMDGPU_MAX_VCE_HANDLES]; + struct delayed_work idle_work; + struct mutex idle_mutex; + const struct firmware *fw; /* VCE firmware */ + struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; + struct amdgpu_irq_src irq; + unsigned harvest_config; + struct amd_sched_entity entity; + uint32_t srbm_soft_reset; + unsigned num_rings; +}; + int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size); int amdgpu_vce_sw_fini(struct amdgpu_device *adev); int amdgpu_vce_suspend(struct amdgpu_device *adev); -- cgit v1.2.3 From 0cdd500560e233aef4e0749c9f014e9ee8f4d752 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Feb 2017 16:01:58 -0500 Subject: amdgpu: detect if we are using atomfirmware or atombios for vbios (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supposedly atomfirmware rom header is 3.3 atombios is 1.1. v2: rebased on newer kernel Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 30 +++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a17a54fc8f27..2790129c0b76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1308,6 +1308,7 @@ struct amdgpu_device { bool have_disp_power_ref; /* BIOS */ + bool is_atom_fw; uint8_t *bios; uint32_t bios_size; struct amdgpu_bo *stollen_vga_memory; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 821f7cc2051f..365e735f6647 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -86,6 +86,18 @@ static bool check_atom_bios(uint8_t *bios, size_t size) return false; } +static bool is_atom_fw(uint8_t *bios) +{ + uint16_t bios_header_start = bios[0x48] | (bios[0x49] << 8); + uint8_t frev = bios[bios_header_start + 2]; + uint8_t crev = bios[bios_header_start + 3]; + + if ((frev < 3) || + ((frev == 3) && (crev < 3))) + return false; + + return true; +} /* If you boot an IGP board with a discrete card as the primary, * the IGP rom is not accessible via the rom bar as the IGP rom is @@ -419,26 +431,30 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev) bool amdgpu_get_bios(struct amdgpu_device *adev) { if (amdgpu_atrm_get_bios(adev)) - return true; + goto success; if (amdgpu_acpi_vfct_bios(adev)) - return true; + goto success; if (igp_read_bios_from_vram(adev)) - return true; + goto success; if (amdgpu_read_bios(adev)) - return true; + goto success; if (amdgpu_read_bios_from_rom(adev)) - return true; + goto success; if (amdgpu_read_disabled_bios(adev)) - return true; + goto success; if (amdgpu_read_platform_bios(adev)) - return true; + goto success; DRM_ERROR("Unable to locate a BIOS ROM\n"); return false; + +success: + adev->is_atom_fw = is_atom_fw(adev->bios); + return true; } -- cgit v1.2.3 From a5bde2f964fa0657b8750156d9bc0ca156e18d2b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 23 Sep 2016 16:23:41 -0400 Subject: drm/amdgpu: add basic support for atomfirmware.h (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds basic support for asics that use atomfirmware.h to define their vbios tables. v2: rebase v3: squash in num scratch reg fix Acked-by: Christian König Reviewed-by: Ken Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 112 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h | 33 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 35 ++++--- 5 files changed, 172 insertions(+), 13 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2814aad81752..9a4e9ec10646 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ - amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o + amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2790129c0b76..a7108ba94794 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -111,7 +111,7 @@ extern int amdgpu_vram_page_split; #define AMDGPU_IB_POOL_SIZE 16 #define AMDGPU_DEBUGFS_MAX_COMPONENTS 32 #define AMDGPUFB_CONN_LIMIT 4 -#define AMDGPU_BIOS_NUM_SCRATCH 8 +#define AMDGPU_BIOS_NUM_SCRATCH 16 /* max number of IP instances */ #define AMDGPU_MAX_SDMA_INSTANCES 2 @@ -1312,6 +1312,7 @@ struct amdgpu_device { uint8_t *bios; uint32_t bios_size; struct amdgpu_bo *stollen_vga_memory; + uint32_t bios_scratch_reg_offset; uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; /* Register/doorbell mmio */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c new file mode 100644 index 000000000000..4b9abd68e04f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -0,0 +1,112 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include +#include "amdgpu.h" +#include "atomfirmware.h" +#include "amdgpu_atomfirmware.h" +#include "atom.h" + +#define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t)) + +bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev) +{ + int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + uint16_t data_offset; + + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL, + NULL, NULL, &data_offset)) { + struct atom_firmware_info_v3_1 *firmware_info = + (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios + + data_offset); + + if (le32_to_cpu(firmware_info->firmware_capability) & + ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION) + return true; + } + return false; +} + +void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev) +{ + int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + uint16_t data_offset; + + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL, + NULL, NULL, &data_offset)) { + struct atom_firmware_info_v3_1 *firmware_info = + (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios + + data_offset); + + adev->bios_scratch_reg_offset = + le32_to_cpu(firmware_info->bios_scratch_reg_startaddr); + } +} + +void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) + adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i); +} + +void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) + WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]); +} + +int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) +{ + struct atom_context *ctx = adev->mode_info.atom_context; + int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + vram_usagebyfirmware); + uint16_t data_offset; + int usage_bytes = 0; + + if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { + struct vram_usagebyfirmware_v2_1 *firmware_usage = + (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); + + DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n", + le32_to_cpu(firmware_usage->start_address_in_kb), + le16_to_cpu(firmware_usage->used_by_firmware_in_kb), + le16_to_cpu(firmware_usage->used_by_driver_in_kb)); + + usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) * 1024; + } + ctx->scratch_size_bytes = 0; + if (usage_bytes == 0) + usage_bytes = 20 * 1024; + /* allocate some scratch memory */ + ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL); + if (!ctx->scratch) + return -ENOMEM; + ctx->scratch_size_bytes = usage_bytes; + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h new file mode 100644 index 000000000000..d0c4dcd7fa96 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -0,0 +1,33 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_ATOMFIRMWARE_H__ +#define __AMDGPU_ATOMFIRMWARE_H__ + +bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev); +void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); +void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev); +void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev); +int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index dfbfd56bcc25..fbacc13534d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -40,6 +40,7 @@ #include "amdgpu_i2c.h" #include "atom.h" #include "amdgpu_atombios.h" +#include "amdgpu_atomfirmware.h" #include "amd_pcie.h" #ifdef CONFIG_DRM_AMDGPU_SI #include "si.h" @@ -993,8 +994,13 @@ static int amdgpu_atombios_init(struct amdgpu_device *adev) } mutex_init(&adev->mode_info.atom_context->mutex); - amdgpu_atombios_scratch_regs_init(adev); - amdgpu_atombios_allocate_fb_scratch(adev); + if (adev->is_atom_fw) { + amdgpu_atomfirmware_scratch_regs_init(adev); + amdgpu_atomfirmware_allocate_fb_scratch(adev); + } else { + amdgpu_atombios_scratch_regs_init(adev); + amdgpu_atombios_allocate_fb_scratch(adev); + } return 0; } @@ -1759,8 +1765,13 @@ static int amdgpu_resume(struct amdgpu_device *adev) static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) { - if (amdgpu_atombios_has_gpu_virtualization_table(adev)) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + if (adev->is_atom_fw) { + if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + } else { + if (amdgpu_atombios_has_gpu_virtualization_table(adev)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + } } /** @@ -1931,14 +1942,16 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("GPU post is not needed\n"); } - /* Initialize clocks */ - r = amdgpu_atombios_get_clock_info(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); - goto failed; + if (!adev->is_atom_fw) { + /* Initialize clocks */ + r = amdgpu_atombios_get_clock_info(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); + return r; + } + /* init i2c buses */ + amdgpu_atombios_i2c_init(adev); } - /* init i2c buses */ - amdgpu_atombios_i2c_init(adev); /* Fence driver */ r = amdgpu_fence_driver_init(adev); -- cgit v1.2.3 From e635ee07456ac686b3c26ab3c5735936faebfb2e Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 1 Nov 2016 15:35:38 +0800 Subject: drm/amdgpu: use new flag to handle different firmware loading method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces a new flag named "amdgpu_firmware_load_type" to handle different firmware loading method. Since Vega10, there are three ways to load firmware. It would be better to use a flag and a fw_load_type kernel parameter to configure it. Acked-by: Christian König Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 10 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 67 +++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 3 ++ drivers/gpu/drm/amd/amdgpu/cik.c | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +-- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/vi.c | 4 +- 10 files changed, 90 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a7108ba94794..c83d7bd5b712 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -81,7 +81,7 @@ extern int amdgpu_pcie_gen2; extern int amdgpu_msi; extern int amdgpu_lockup_timeout; extern int amdgpu_dpm; -extern int amdgpu_smc_load_fw; +extern int amdgpu_fw_load_type; extern int amdgpu_aspm; extern int amdgpu_runtime_pm; extern unsigned amdgpu_ip_block_mask; @@ -1063,9 +1063,15 @@ struct amdgpu_sdma { /* * Firmware */ +enum amdgpu_firmware_load_type { + AMDGPU_FW_LOAD_DIRECT = 0, + AMDGPU_FW_LOAD_SMU, + AMDGPU_FW_LOAD_PSP, +}; + struct amdgpu_firmware { struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM]; - bool smu_load; + enum amdgpu_firmware_load_type load_type; struct amdgpu_bo *fw_buf; unsigned int fw_size; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 778d16f835ab..7292f4e7bb1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -80,7 +80,7 @@ int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; int amdgpu_lockup_timeout = 0; int amdgpu_dpm = -1; -int amdgpu_smc_load_fw = 1; +int amdgpu_fw_load_type = -1; int amdgpu_aspm = -1; int amdgpu_runtime_pm = -1; unsigned amdgpu_ip_block_mask = 0xffffffff; @@ -140,8 +140,8 @@ module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(dpm, amdgpu_dpm, int, 0444); -MODULE_PARM_DESC(smc_load_fw, "SMC firmware loading(1 = enable, 0 = disable)"); -module_param_named(smc_load_fw, amdgpu_smc_load_fw, int, 0444); +MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)"); +module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444); MODULE_PARM_DESC(aspm, "ASPM support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(aspm, amdgpu_aspm, int, 0444); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index d56d200a3646..96a5113b948f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -163,7 +163,7 @@ static int amdgpu_pp_hw_init(void *handle) int ret = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->pp_enabled && adev->firmware.smu_load) + if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) amdgpu_ucode_init_bo(adev); if (adev->powerplay.ip_funcs->hw_init) @@ -190,7 +190,7 @@ static int amdgpu_pp_hw_fini(void *handle) ret = adev->powerplay.ip_funcs->hw_fini( adev->powerplay.pp_handle); - if (adev->pp_enabled && adev->firmware.smu_load) + if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) amdgpu_ucode_fini_bo(adev); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index be16377128c3..73c3e664d99a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -217,6 +217,49 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, return true; } +enum amdgpu_firmware_load_type +amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) +{ + switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + return AMDGPU_FW_LOAD_DIRECT; +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_HAWAII: + case CHIP_MULLINS: + return AMDGPU_FW_LOAD_DIRECT; +#endif + case CHIP_TOPAZ: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + if (!load_type) + return AMDGPU_FW_LOAD_DIRECT; + else + return AMDGPU_FW_LOAD_SMU; + case CHIP_VEGA10: + if (!load_type) + return AMDGPU_FW_LOAD_DIRECT; + else + return AMDGPU_FW_LOAD_PSP; + default: + DRM_ERROR("Unknow firmware load type\n"); + } + + return AMDGPU_FW_LOAD_DIRECT; +} + static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode, uint64_t mc_addr, void *kptr) { @@ -273,7 +316,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) uint64_t fw_mc_addr; void *fw_buf_ptr = NULL; uint64_t fw_offset = 0; - int i, err; + int i, err, max; struct amdgpu_firmware_info *ucode = NULL; const struct common_firmware_header *header = NULL; @@ -306,7 +349,16 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) amdgpu_bo_unreserve(*bo); - for (i = 0; i < AMDGPU_UCODE_ID_MAXIMUM; i++) { + /* + * if SMU loaded firmware, it needn't add SMC, UVD, and VCE + * ucode info here + */ + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + max = AMDGPU_UCODE_ID_MAXIMUM - 3; + else + max = AMDGPU_UCODE_ID_MAXIMUM; + + for (i = 0; i < max; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { header = (const struct common_firmware_header *)ucode->fw->data; @@ -331,7 +383,8 @@ failed_pin: failed_reserve: amdgpu_bo_unref(bo); failed: - adev->firmware.smu_load = false; + if (err) + adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT; return err; } @@ -340,8 +393,14 @@ int amdgpu_ucode_fini_bo(struct amdgpu_device *adev) { int i; struct amdgpu_firmware_info *ucode = NULL; + int max; + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + max = AMDGPU_UCODE_ID_MAXIMUM - 3; + else + max = AMDGPU_UCODE_ID_MAXIMUM; - for (i = 0; i < AMDGPU_UCODE_ID_MAXIMUM; i++) { + for (i = 0; i < max; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { ucode->mc_addr = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 19a584cd4527..2b212b04ce54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -176,4 +176,7 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, int amdgpu_ucode_init_bo(struct amdgpu_device *adev); int amdgpu_ucode_fini_bo(struct amdgpu_device *adev); +enum amdgpu_firmware_load_type +amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index dbaedb4f2a6b..9d33e5641419 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1785,6 +1785,8 @@ static int cik_common_early_init(void *handle) return -EINVAL; } + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + amdgpu_get_pcie_info(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1e177cd9b46b..423ed68c2e52 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1040,7 +1040,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) } } - if (adev->firmware.smu_load) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; info->fw = adev->gfx.pfp_fw; @@ -4253,7 +4253,7 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) gfx_v8_0_init_pg(adev); if (!adev->pp_enabled) { - if (!adev->firmware.smu_load) { + if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { /* legacy rlc firmware loading */ r = gfx_v8_0_rlc_load_microcode(adev); if (r) @@ -5269,7 +5269,7 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) gfx_v8_0_enable_gui_idle_interrupt(adev, false); if (!adev->pp_enabled) { - if (!adev->firmware.smu_load) { + if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { /* legacy firmware loading */ r = gfx_v8_0_cp_gfx_load_microcode(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 1a4b351f350b..182de5dc2081 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -158,7 +158,7 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) if (adev->sdma.instance[i].feature_version >= 20) adev->sdma.instance[i].burst_nop = true; - if (adev->firmware.smu_load) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; info->fw = adev->sdma.instance[i].fw; @@ -562,7 +562,7 @@ static int sdma_v2_4_start(struct amdgpu_device *adev) int r; if (!adev->pp_enabled) { - if (!adev->firmware.smu_load) { + if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { r = sdma_v2_4_load_microcode(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 5f7812d1c90d..5be84df27cad 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -310,7 +310,7 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) if (adev->sdma.instance[i].feature_version >= 20) adev->sdma.instance[i].burst_nop = true; - if (adev->firmware.smu_load) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; info->fw = adev->sdma.instance[i].fw; @@ -771,7 +771,7 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) int r, i; if (!adev->pp_enabled) { - if (!adev->firmware.smu_load) { + if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { r = sdma_v3_0_load_microcode(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 1ff36724fb2a..1fe654e8cb7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1117,8 +1117,8 @@ static int vi_common_early_init(void *handle) return -EINVAL; } - if (amdgpu_smc_load_fw && smc_enabled) - adev->firmware.smu_load = true; + /* vi use smc load by default */ + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); amdgpu_get_pcie_info(adev); -- cgit v1.2.3 From d0e95758e3b122c5fea63ce5c3898dcc5e5ea846 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Mon, 12 Dec 2016 13:40:37 -0500 Subject: drm/amdgpu: gb_addr_config struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Christian König Signed-off-by: Andrey Grodzovsky Reviewed-by: Ken Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c83d7bd5b712..4b92f36e18e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -829,6 +829,15 @@ struct amdgpu_rb_config { uint32_t raster_config_1; }; +struct gb_addr_config { + uint16_t pipe_interleave_size; + uint8_t num_pipes; + uint8_t max_compress_frags; + uint8_t num_banks; + uint8_t num_se; + uint8_t num_rb_per_se; +}; + struct amdgpu_gfx_config { unsigned max_shader_engines; unsigned max_tile_pipes; @@ -858,6 +867,7 @@ struct amdgpu_gfx_config { uint32_t tile_mode_array[32]; uint32_t macrotile_mode_array[16]; + struct gb_addr_config gb_addr_config_fields; struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE]; /* gfx configure feature */ -- cgit v1.2.3 From 39807b939e2ca619d57eb9cae31f7d70a36392ed Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Fri, 18 Mar 2016 15:41:42 +0800 Subject: drm/amdgpu: add 64bit doorbell assignments Signed-off-by: Ken Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 68 +++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4b92f36e18e6..75525980a5a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -618,6 +618,74 @@ struct amdgpu_doorbell { u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */ }; +/* + * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space + */ +typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT +{ + /* + * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in + * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range. + * Compute related doorbells are allocated from 0x00 to 0x8a + */ + + + /* kernel scheduling */ + AMDGPU_DOORBELL64_KIQ = 0x00, + + /* HSA interface queue and debug queue */ + AMDGPU_DOORBELL64_HIQ = 0x01, + AMDGPU_DOORBELL64_DIQ = 0x02, + + /* Compute engines */ + AMDGPU_DOORBELL64_MEC_RING0 = 0x03, + AMDGPU_DOORBELL64_MEC_RING1 = 0x04, + AMDGPU_DOORBELL64_MEC_RING2 = 0x05, + AMDGPU_DOORBELL64_MEC_RING3 = 0x06, + AMDGPU_DOORBELL64_MEC_RING4 = 0x07, + AMDGPU_DOORBELL64_MEC_RING5 = 0x08, + AMDGPU_DOORBELL64_MEC_RING6 = 0x09, + AMDGPU_DOORBELL64_MEC_RING7 = 0x0a, + + /* User queue doorbell range (128 doorbells) */ + AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b, + AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a, + + /* Graphics engine */ + AMDGPU_DOORBELL64_GFX_RING0 = 0x8b, + + /* + * Other graphics doorbells can be allocated here: from 0x8c to 0xef + * Graphics voltage island aperture 1 + * default non-graphics QWORD index is 0xF0 - 0xFF inclusive + */ + + /* sDMA engines */ + AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0, + AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1, + AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2, + AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3, + + /* Interrupt handler */ + AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */ + AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */ + AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */ + + /* VCN engine */ + AMDGPU_DOORBELL64_VCN0 = 0xF8, + AMDGPU_DOORBELL64_VCN1 = 0xF9, + AMDGPU_DOORBELL64_VCN2 = 0xFA, + AMDGPU_DOORBELL64_VCN3 = 0xFB, + AMDGPU_DOORBELL64_VCN4 = 0xFC, + AMDGPU_DOORBELL64_VCN5 = 0xFD, + AMDGPU_DOORBELL64_VCN6 = 0xFE, + AMDGPU_DOORBELL64_VCN7 = 0xFF, + + AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, + AMDGPU_DOORBELL64_INVALID = 0xFFFF +} AMDGPU_DOORBELL64_ASSIGNMENT; + + void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, phys_addr_t *aperture_base, size_t *aperture_size, -- cgit v1.2.3 From bce23e00f3369ce8c32c90f087e37c01f83002d1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Mar 2017 12:52:08 -0400 Subject: drm/amdgpu: add NGG parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NGG (Next Generation Graphics) is a new feature in GFX9.0. This adds the relevant parameters. Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 29 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 7 +++++++ include/uapi/drm/amdgpu_drm.h | 8 ++++++++ 4 files changed, 65 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 75525980a5a2..886f105958a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -103,6 +103,11 @@ extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern unsigned amdgpu_pp_feature_mask; extern int amdgpu_vram_page_split; +extern int amdgpu_ngg; +extern int amdgpu_prim_buf_per_se; +extern int amdgpu_pos_buf_per_se; +extern int amdgpu_cntl_sb_buf_per_se; +extern int amdgpu_param_buf_per_se; #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ @@ -957,6 +962,28 @@ struct amdgpu_gfx_funcs { void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst); }; +struct amdgpu_ngg_buf { + struct amdgpu_bo *bo; + uint64_t gpu_addr; + uint32_t size; + uint32_t bo_size; +}; + +enum { + PRIM = 0, + POS, + CNTL, + PARAM, + NGG_BUF_MAX +}; + +struct amdgpu_ngg { + struct amdgpu_ngg_buf buf[NGG_BUF_MAX]; + uint32_t gds_reserve_addr; + uint32_t gds_reserve_size; + bool init; +}; + struct amdgpu_gfx { struct mutex gpu_clock_mutex; struct amdgpu_gfx_config config; @@ -1000,6 +1027,8 @@ struct amdgpu_gfx { uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; bool in_reset; + /* NGG */ + struct amdgpu_ngg ngg; }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7292f4e7bb1a..2b05c891747b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -103,6 +103,11 @@ unsigned amdgpu_pg_mask = 0xffffffff; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; unsigned amdgpu_pp_feature_mask = 0xffffffff; +int amdgpu_ngg = 0; +int amdgpu_prim_buf_per_se = 0; +int amdgpu_pos_buf_per_se = 0; +int amdgpu_cntl_sb_buf_per_se = 0; +int amdgpu_param_buf_per_se = 0; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -210,6 +215,22 @@ MODULE_PARM_DESC(virtual_display, "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)"); module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); +MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))"); +module_param_named(ngg, amdgpu_ngg, int, 0444); + +MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)"); +module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444); + +MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)"); +module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444); + +MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)"); +module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); + +MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); +module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); + + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index ef91c8e2b8e3..a6d15978d821 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -541,6 +541,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.gc_double_offchip_lds_buf = adev->gfx.config.double_offchip_lds_buf; + if (amdgpu_ngg) { + dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr; + dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr; + dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr; + dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr; + } + return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; } diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 5d8e7090fe9e..d3f121a02bed 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -743,6 +743,14 @@ struct drm_amdgpu_info_device { __u32 vce_harvest_config; /* gfx double offchip LDS buffers */ __u32 gc_double_offchip_lds_buf; + /* NGG Primitive Buffer */ + __u64 prim_buf_gpu_addr; + /* NGG Position Buffer */ + __u64 pos_buf_gpu_addr; + /* NGG Control Sideband */ + __u64 cntl_sb_buf_gpu_addr; + /* NGG Parameter Cache */ + __u64 param_buf_gpu_addr; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3 From 2445b22751a0c039c2d1f35412e45350e847855d Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Fri, 3 Mar 2017 16:20:35 -0500 Subject: drm/amdgpu: rework common ucode handling for vega10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle ucode differences in vega10. Acked-by: Christian König Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 70 +++++++++++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 5 +++ 3 files changed, 53 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 886f105958a5..f990eecacb11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1181,6 +1181,7 @@ struct amdgpu_firmware { enum amdgpu_firmware_load_type load_type; struct amdgpu_bo *fw_buf; unsigned int fw_size; + unsigned int max_ucodes; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 73c3e664d99a..a1891c93cdbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -260,10 +260,12 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; } -static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode, - uint64_t mc_addr, void *kptr) +static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, + struct amdgpu_firmware_info *ucode, + uint64_t mc_addr, void *kptr) { const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr = NULL; if (NULL == ucode->fw) return 0; @@ -276,11 +278,35 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode, header = (const struct common_firmware_header *)ucode->fw->data; - ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); - - memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + - le32_to_cpu(header->ucode_array_offset_bytes)), - ucode->ucode_size); + cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP || + (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 && + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT && + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) { + ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); + + memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + + le32_to_cpu(header->ucode_array_offset_bytes)), + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1 || + ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2) { + ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) - + le32_to_cpu(cp_hdr->jt_size) * 4; + + memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + + le32_to_cpu(header->ucode_array_offset_bytes)), + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT || + ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT) { + ucode->ucode_size = le32_to_cpu(cp_hdr->jt_size) * 4; + + memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + + le32_to_cpu(header->ucode_array_offset_bytes) + + le32_to_cpu(cp_hdr->jt_offset) * 4), + ucode->ucode_size); + } return 0; } @@ -306,17 +332,18 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, (le32_to_cpu(header->jt_offset) * 4); memcpy(dst_addr, src_addr, le32_to_cpu(header->jt_size) * 4); + ucode->ucode_size += le32_to_cpu(header->jt_size) * 4; + return 0; } - int amdgpu_ucode_init_bo(struct amdgpu_device *adev) { struct amdgpu_bo **bo = &adev->firmware.fw_buf; uint64_t fw_mc_addr; void *fw_buf_ptr = NULL; uint64_t fw_offset = 0; - int i, err, max; + int i, err; struct amdgpu_firmware_info *ucode = NULL; const struct common_firmware_header *header = NULL; @@ -349,29 +376,32 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) amdgpu_bo_unreserve(*bo); + memset(fw_buf_ptr, 0, adev->firmware.fw_size); + /* * if SMU loaded firmware, it needn't add SMC, UVD, and VCE * ucode info here */ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) - max = AMDGPU_UCODE_ID_MAXIMUM - 3; + adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4; else - max = AMDGPU_UCODE_ID_MAXIMUM; + adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM; - for (i = 0; i < max; i++) { + for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { header = (const struct common_firmware_header *)ucode->fw->data; - amdgpu_ucode_init_single_fw(ucode, fw_mc_addr + fw_offset, - fw_buf_ptr + fw_offset); - if (i == AMDGPU_UCODE_ID_CP_MEC1) { + amdgpu_ucode_init_single_fw(adev, ucode, fw_mc_addr + fw_offset, + (void *)((uint8_t *)fw_buf_ptr + fw_offset)); + if (i == AMDGPU_UCODE_ID_CP_MEC1 && + adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { const struct gfx_firmware_header_v1_0 *cp_hdr; cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; amdgpu_ucode_patch_jt(ucode, fw_mc_addr + fw_offset, fw_buf_ptr + fw_offset); fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); } - fw_offset += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE); } } return 0; @@ -393,14 +423,8 @@ int amdgpu_ucode_fini_bo(struct amdgpu_device *adev) { int i; struct amdgpu_firmware_info *ucode = NULL; - int max; - - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) - max = AMDGPU_UCODE_ID_MAXIMUM - 3; - else - max = AMDGPU_UCODE_ID_MAXIMUM; - for (i = 0; i < max; i++) { + for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { ucode->mc_addr = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 2b212b04ce54..39a074980c8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -128,9 +128,14 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_PFP, AMDGPU_UCODE_ID_CP_ME, AMDGPU_UCODE_ID_CP_MEC1, + AMDGPU_UCODE_ID_CP_MEC1_JT, AMDGPU_UCODE_ID_CP_MEC2, + AMDGPU_UCODE_ID_CP_MEC2_JT, AMDGPU_UCODE_ID_RLC_G, AMDGPU_UCODE_ID_STORAGE, + AMDGPU_UCODE_ID_SMC, + AMDGPU_UCODE_ID_UVD, + AMDGPU_UCODE_ID_VCE, AMDGPU_UCODE_ID_MAXIMUM, }; -- cgit v1.2.3 From e60f8db5e4c85d7f8868a06b02f06c0ae0e4c332 Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Thu, 9 Mar 2017 11:36:26 -0500 Subject: drm/amdgpu: Add GMC 9.0 support (v2) On SOC-15 parts, the GMC (Graphics Memory Controller) consists of two hubs: GFX (graphics and compute) and MM (sdma, uvd, vce). v2: drop sdma from Makefile, fix duplicate return statement. Signed-off-by: Alex Xie Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 30 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 28 +- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 447 +++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h | 35 ++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 825 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h | 30 ++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 585 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h | 35 ++ drivers/gpu/drm/amd/include/amd_shared.h | 2 + 10 files changed, 2013 insertions(+), 7 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c create mode 100644 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h create mode 100644 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c create mode 100644 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h create mode 100644 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c create mode 100644 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 4ae5ddc9ac69..ccbc4c0f5b42 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -39,7 +39,8 @@ amdgpu-y += \ # add GMC block amdgpu-y += \ gmc_v7_0.o \ - gmc_v8_0.o + gmc_v8_0.o \ + gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o # add IH block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f990eecacb11..13ea68a81ac1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -121,6 +121,11 @@ extern int amdgpu_param_buf_per_se; /* max number of IP instances */ #define AMDGPU_MAX_SDMA_INSTANCES 2 +/* max number of VMHUB */ +#define AMDGPU_MAX_VMHUBS 2 +#define AMDGPU_MMHUB 0 +#define AMDGPU_GFXHUB 1 + /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (8 << 20) @@ -308,6 +313,12 @@ struct amdgpu_gart_funcs { uint32_t flags); }; +/* provided by the mc block */ +struct amdgpu_mc_funcs { + /* adjust mc addr in fb for APU case */ + u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr); +}; + /* provided by the ih block */ struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ @@ -556,6 +567,21 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, dma_addr_t *dma_addr, uint64_t flags); int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); +/* + * VMHUB structures, functions & helpers + */ +struct amdgpu_vmhub { + uint32_t ctx0_ptb_addr_lo32; + uint32_t ctx0_ptb_addr_hi32; + uint32_t vm_inv_eng0_req; + uint32_t vm_inv_eng0_ack; + uint32_t vm_context0_cntl; + uint32_t vm_l2_pro_fault_status; + uint32_t vm_l2_pro_fault_cntl; + uint32_t (*get_invalidate_req)(unsigned int vm_id); + uint32_t (*get_vm_protection_bits)(void); +}; + /* * GPU MC structures, functions & helpers */ @@ -589,6 +615,9 @@ struct amdgpu_mc { u64 shared_aperture_end; u64 private_aperture_start; u64 private_aperture_end; + /* protects concurrent invalidation */ + spinlock_t invalidate_lock; + const struct amdgpu_mc_funcs *mc_funcs; }; /* @@ -1473,6 +1502,7 @@ struct amdgpu_device { struct amdgpu_gart gart; struct amdgpu_dummy_page dummy_page; struct amdgpu_vm_manager vm_manager; + struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; /* memory management */ struct amdgpu_mman mman; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 2d9ec9cac447..3d6b9a904350 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -447,6 +447,16 @@ static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) return false; } +static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) +{ + u64 addr = mc_addr; + + if (adev->mc.mc_funcs && adev->mc.mc_funcs->adjust_mc_addr) + addr = adev->mc.mc_funcs->adjust_mc_addr(adev, addr); + + return addr; +} + /** * amdgpu_vm_flush - hardware flush the vm * @@ -477,9 +487,10 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) if (ring->funcs->emit_vm_flush && (job->vm_needs_flush || amdgpu_vm_is_gpu_reset(adev, id))) { struct dma_fence *fence; + u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); - trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id); - amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); + trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id); + amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); r = amdgpu_fence_emit(ring, &fence); if (r) @@ -715,15 +726,18 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { if (count) { + uint64_t pt_addr = + amdgpu_vm_adjust_mc_addr(adev, last_pt); + if (shadow) amdgpu_vm_do_set_ptes(¶ms, last_shadow, - last_pt, count, + pt_addr, count, incr, AMDGPU_PTE_VALID); amdgpu_vm_do_set_ptes(¶ms, last_pde, - last_pt, count, incr, + pt_addr, count, incr, AMDGPU_PTE_VALID); } @@ -737,11 +751,13 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, } if (count) { + uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt); + if (vm->page_directory->shadow) - amdgpu_vm_do_set_ptes(¶ms, last_shadow, last_pt, + amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr, count, incr, AMDGPU_PTE_VALID); - amdgpu_vm_do_set_ptes(¶ms, last_pde, last_pt, + amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr, count, incr, AMDGPU_PTE_VALID); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c new file mode 100644 index 000000000000..1ff019cda42d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -0,0 +1,447 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "gfxhub_v1_0.h" + +#include "vega10/soc15ip.h" +#include "vega10/GC/gc_9_0_offset.h" +#include "vega10/GC/gc_9_0_sh_mask.h" +#include "vega10/GC/gc_9_0_default.h" +#include "vega10/vega10_enum.h" + +#include "soc15_common.h" + +int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) +{ + u32 tmp; + u64 value; + u32 i; + + /* Program MC. */ + /* Update configuration */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR), + adev->mc.vram_start >> 18); + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR), + adev->mc.vram_end >> 18); + + value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32(SOC15_REG_OFFSET(GC, 0, + mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), + (u32)(value >> 12)); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB), + (u32)(value >> 44)); + + /* Disable AGP. */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BASE), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_TOP), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BOT), 0xFFFFFFFF); + + /* GART Enable. */ + + /* Setup TLB control */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, + 3); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, + 1); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, + 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ECO_BITS, + 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + MTYPE, + MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ATC_EN, + 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + + /* Setup L2 cache */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + ENABLE_L2_FRAGMENT_PROCESSING, + 0); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0);/* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + CONTEXT1_IDENTITY_ACCESS_MODE, + 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + IDENTITY_MODE_FRAGMENT_SIZE, + 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2), tmp); + + tmp = mmVM_L2_CNTL3_DEFAULT; + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4)); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, + 0); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, + 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4), tmp); + + /* setup context0 */ + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32), + (u32)(adev->mc.gtt_start >> 12)); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32), + (u32)(adev->mc.gtt_start >> 44)); + + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32), + (u32)(adev->mc.gtt_end >> 12)); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32), + (u32)(adev->mc.gtt_end >> 44)); + + BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); + value = adev->gart.table_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + value &= 0x0000FFFFFFFFF000ULL; + value |= 0x1; /*valid bit*/ + + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32), + (u32)value); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32), + (u32)(value >> 32)); + + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32), + (u32)(adev->dummy_page.addr >> 12)); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32), + (u32)(adev->dummy_page.addr >> 44)); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2)); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, + 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL), tmp); + + /* Disable identity aperture.*/ + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F); + + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); + + for (i = 0; i <= 14; i++) { + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + amdgpu_vm_block_size - 9); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i, tmp); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, + adev->vm_manager.max_pfn - 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, 0); + } + + + return 0; +} + +void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL) + i, 0); + + /* Setup TLB control */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, + 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + + /* Setup L2 cache */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), 0); +} + +/** + * gfxhub_v1_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + VM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); +} + +static uint32_t gfxhub_v1_0_get_invalidate_req(unsigned int vm_id) +{ + u32 req = 0; + + /* invalidate using legacy mode on vm_id*/ + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vm_id); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +static uint32_t gfxhub_v1_0_get_vm_protection_bits(void) +{ + return (VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK); +} + +static int gfxhub_v1_0_early_init(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_late_init(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); + + hub->get_invalidate_req = gfxhub_v1_0_get_invalidate_req; + hub->get_vm_protection_bits = gfxhub_v1_0_get_vm_protection_bits; + + return 0; +} + +static int gfxhub_v1_0_sw_fini(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned i; + + for (i = 0 ; i < 18; ++i) { + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) + + 2 * i, 0xffffffff); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) + + 2 * i, 0x1f); + } + + return 0; +} + +static int gfxhub_v1_0_hw_fini(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_suspend(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_resume(void *handle) +{ + return 0; +} + +static bool gfxhub_v1_0_is_idle(void *handle) +{ + return true; +} + +static int gfxhub_v1_0_wait_for_idle(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_soft_reset(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int gfxhub_v1_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs gfxhub_v1_0_ip_funcs = { + .name = "gfxhub_v1_0", + .early_init = gfxhub_v1_0_early_init, + .late_init = gfxhub_v1_0_late_init, + .sw_init = gfxhub_v1_0_sw_init, + .sw_fini = gfxhub_v1_0_sw_fini, + .hw_init = gfxhub_v1_0_hw_init, + .hw_fini = gfxhub_v1_0_hw_fini, + .suspend = gfxhub_v1_0_suspend, + .resume = gfxhub_v1_0_resume, + .is_idle = gfxhub_v1_0_is_idle, + .wait_for_idle = gfxhub_v1_0_wait_for_idle, + .soft_reset = gfxhub_v1_0_soft_reset, + .set_clockgating_state = gfxhub_v1_0_set_clockgating_state, + .set_powergating_state = gfxhub_v1_0_set_powergating_state, +}; + +const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_GFXHUB, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &gfxhub_v1_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h new file mode 100644 index 000000000000..5129a8ff0932 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h @@ -0,0 +1,35 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFXHUB_V1_0_H__ +#define __GFXHUB_V1_0_H__ + +int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev); +void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev); +void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value); + +extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs; +extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c new file mode 100644 index 000000000000..c069db71afae --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -0,0 +1,825 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include "amdgpu.h" +#include "gmc_v9_0.h" + +#include "vega10/soc15ip.h" +#include "vega10/HDP/hdp_4_0_offset.h" +#include "vega10/HDP/hdp_4_0_sh_mask.h" +#include "vega10/GC/gc_9_0_sh_mask.h" +#include "vega10/vega10_enum.h" + +#include "soc15_common.h" + +#include "nbio_v6_1.h" +#include "gfxhub_v1_0.h" +#include "mmhub_v1_0.h" + +#define mmDF_CS_AON0_DramBaseAddress0 0x0044 +#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0 +//DF_CS_AON0_DramBaseAddress0 +#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 +#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 +#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4 +#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8 +#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc +#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L +#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L +#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L +#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L +#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L + +/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ +#define AMDGPU_NUM_OF_VMIDS 8 + +static const u32 golden_settings_vega10_hdp[] = +{ + 0xf64, 0x0fffffff, 0x00000000, + 0xf65, 0x0fffffff, 0x00000000, + 0xf66, 0x0fffffff, 0x00000000, + 0xf67, 0x0fffffff, 0x00000000, + 0xf68, 0x0fffffff, 0x00000000, + 0xf6a, 0x0fffffff, 0x00000000, + 0xf6b, 0x0fffffff, 0x00000000, + 0xf6c, 0x0fffffff, 0x00000000, + 0xf6d, 0x0fffffff, 0x00000000, + 0xf6e, 0x0fffffff, 0x00000000, +}; + +static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + struct amdgpu_vmhub *hub; + u32 tmp, reg, bits, i; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + /* MM HUB */ + hub = &adev->vmhub[AMDGPU_MMHUB]; + bits = hub->get_vm_protection_bits(); + for (i = 0; i< 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp &= ~bits; + WREG32(reg, tmp); + } + + /* GFX HUB */ + hub = &adev->vmhub[AMDGPU_GFXHUB]; + bits = hub->get_vm_protection_bits(); + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp &= ~bits; + WREG32(reg, tmp); + } + break; + case AMDGPU_IRQ_STATE_ENABLE: + /* MM HUB */ + hub = &adev->vmhub[AMDGPU_MMHUB]; + bits = hub->get_vm_protection_bits(); + for (i = 0; i< 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp |= bits; + WREG32(reg, tmp); + } + + /* GFX HUB */ + hub = &adev->vmhub[AMDGPU_GFXHUB]; + bits = hub->get_vm_protection_bits(); + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp |= bits; + WREG32(reg, tmp); + } + break; + default: + break; + } + + return 0; +} + +static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct amdgpu_vmhub *gfxhub = &adev->vmhub[AMDGPU_GFXHUB]; + struct amdgpu_vmhub *mmhub = &adev->vmhub[AMDGPU_MMHUB]; + uint32_t status; + u64 addr; + + addr = (u64)entry->src_data[0] << 12; + addr |= ((u64)entry->src_data[1] & 0xf) << 44; + + if (entry->vm_id_src) { + status = RREG32(mmhub->vm_l2_pro_fault_status); + WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1); + } else { + status = RREG32(gfxhub->vm_l2_pro_fault_status); + WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1); + } + + DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u) " + "at page 0x%016llx from %d\n" + "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + entry->vm_id_src ? "mmhub" : "gfxhub", + entry->src_id, entry->ring_id, entry->vm_id, entry->pas_id, + addr, entry->client_id, status); + + return 0; +} + +static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = { + .set = gmc_v9_0_vm_fault_interrupt_state, + .process = gmc_v9_0_process_interrupt, +}; + +static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->mc.vm_fault.num_types = 1; + adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs; +} + +/* + * GART + * VMID 0 is the physical GPU addresses as used by the kernel. + * VMIDs 1-15 are used for userspace clients and are handled + * by the amdgpu vm/hsa code. + */ + +/** + * gmc_v9_0_gart_flush_gpu_tlb - gart tlb flush callback + * + * @adev: amdgpu_device pointer + * @vmid: vm instance to flush + * + * Flush the TLB for the requested page table. + */ +static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, + uint32_t vmid) +{ + /* Use register 17 for GART */ + const unsigned eng = 17; + unsigned i, j; + + /* flush hdp cache */ + nbio_v6_1_hdp_flush(adev); + + spin_lock(&adev->mc.invalidate_lock); + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmhub *hub = &adev->vmhub[i]; + u32 tmp = hub->get_invalidate_req(vmid); + + WREG32(hub->vm_inv_eng0_req + eng, tmp); + + /* Busy wait for ACK.*/ + for (j = 0; j < 100; j++) { + tmp = RREG32(hub->vm_inv_eng0_ack + eng); + tmp &= 1 << vmid; + if (tmp) + break; + cpu_relax(); + } + if (j < 100) + continue; + + /* Wait for ACK with a delay.*/ + for (j = 0; j < adev->usec_timeout; j++) { + tmp = RREG32(hub->vm_inv_eng0_ack + eng); + tmp &= 1 << vmid; + if (tmp) + break; + udelay(1); + } + if (j < adev->usec_timeout) + continue; + + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); + } + + spin_unlock(&adev->mc.invalidate_lock); +} + +/** + * gmc_v9_0_gart_set_pte_pde - update the page tables using MMIO + * + * @adev: amdgpu_device pointer + * @cpu_pt_addr: cpu address of the page table + * @gpu_page_idx: entry in the page table to update + * @addr: dst addr to write into pte/pde + * @flags: access flags + * + * Update the page tables using the CPU. + */ +static int gmc_v9_0_gart_set_pte_pde(struct amdgpu_device *adev, + void *cpu_pt_addr, + uint32_t gpu_page_idx, + uint64_t addr, + uint64_t flags) +{ + void __iomem *ptr = (void *)cpu_pt_addr; + uint64_t value; + + /* + * PTE format on VEGA 10: + * 63:59 reserved + * 58:57 mtype + * 56 F + * 55 L + * 54 P + * 53 SW + * 52 T + * 50:48 reserved + * 47:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 Z + * 2 snooped + * 1 system + * 0 valid + * + * PDE format on VEGA 10: + * 63:59 block fragment size + * 58:55 reserved + * 54 P + * 53:48 reserved + * 47:6 physical base address of PD or PTE + * 5:3 reserved + * 2 C + * 1 system + * 0 valid + */ + + /* + * The following is for PTE only. GART does not have PDEs. + */ + value = addr & 0x0000FFFFFFFFF000ULL; + value |= flags; + writeq(value, ptr + (gpu_page_idx * 8)); + return 0; +} + +static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) + +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) + pte_flag |= AMDGPU_PTE_EXECUTABLE; + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + + switch (flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_NC: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_CC: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_UC); + break; + default: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); + break; + } + + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + +static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { + .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, + .set_pte_pde = gmc_v9_0_gart_set_pte_pde, + .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags +}; + +static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) +{ + if (adev->gart.gart_funcs == NULL) + adev->gart.gart_funcs = &gmc_v9_0_gart_funcs; +} + +static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) +{ + return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start; +} + +static const struct amdgpu_mc_funcs gmc_v9_0_mc_funcs = { + .adjust_mc_addr = gmc_v9_0_adjust_mc_addr, +}; + +static void gmc_v9_0_set_mc_funcs(struct amdgpu_device *adev) +{ + adev->mc.mc_funcs = &gmc_v9_0_mc_funcs; +} + +static int gmc_v9_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gmc_v9_0_set_gart_funcs(adev); + gmc_v9_0_set_mc_funcs(adev); + gmc_v9_0_set_irq_funcs(adev); + + return 0; +} + +static int gmc_v9_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); +} + +static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, + struct amdgpu_mc *mc) +{ + u64 base = mmhub_v1_0_get_fb_location(adev); + amdgpu_vram_location(adev, &adev->mc, base); + adev->mc.gtt_base_align = 0; + amdgpu_gtt_location(adev, mc); +} + +/** + * gmc_v9_0_mc_init - initialize the memory controller driver params + * + * @adev: amdgpu_device pointer + * + * Look up the amount of vram, vram width, and decide how to place + * vram and gart within the GPU's physical address space. + * Returns 0 for success. + */ +static int gmc_v9_0_mc_init(struct amdgpu_device *adev) +{ + u32 tmp; + int chansize, numchan; + + /* hbm memory channel size */ + chansize = 128; + + tmp = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_CS_AON0_DramBaseAddress0)); + tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; + tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; + switch (tmp) { + case 0: + default: + numchan = 1; + break; + case 1: + numchan = 2; + break; + case 2: + numchan = 0; + break; + case 3: + numchan = 4; + break; + case 4: + numchan = 0; + break; + case 5: + numchan = 8; + break; + case 6: + numchan = 0; + break; + case 7: + numchan = 16; + break; + case 8: + numchan = 2; + break; + } + adev->mc.vram_width = numchan * chansize; + + /* Could aper size report 0 ? */ + adev->mc.aper_base = pci_resource_start(adev->pdev, 0); + adev->mc.aper_size = pci_resource_len(adev->pdev, 0); + /* size in MB on si */ + adev->mc.mc_vram_size = + nbio_v6_1_get_memsize(adev) * 1024ULL * 1024ULL; + adev->mc.real_vram_size = adev->mc.mc_vram_size; + adev->mc.visible_vram_size = adev->mc.aper_size; + + /* In case the PCI BAR is larger than the actual amount of vram */ + if (adev->mc.visible_vram_size > adev->mc.real_vram_size) + adev->mc.visible_vram_size = adev->mc.real_vram_size; + + /* unless the user had overridden it, set the gart + * size equal to the 1024 or vram, whichever is larger. + */ + if (amdgpu_gart_size == -1) + adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + else + adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; + + gmc_v9_0_vram_gtt_location(adev, &adev->mc); + + return 0; +} + +static int gmc_v9_0_gart_init(struct amdgpu_device *adev) +{ + int r; + + if (adev->gart.robj) { + WARN(1, "VEGA10 PCIE GART already initialized\n"); + return 0; + } + /* Initialize common gart structure */ + r = amdgpu_gart_init(adev); + if (r) + return r; + adev->gart.table_size = adev->gart.num_gpu_pages * 8; + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE(MTYPE_UC) | + AMDGPU_PTE_EXECUTABLE; + return amdgpu_gart_table_vram_alloc(adev); +} + +/* + * vm + * VMID 0 is the physical GPU addresses as used by the kernel. + * VMIDs 1-15 are used for userspace clients and are handled + * by the amdgpu vm/hsa code. + */ +/** + * gmc_v9_0_vm_init - vm init callback + * + * @adev: amdgpu_device pointer + * + * Inits vega10 specific vm parameters (number of VMs, base of vram for + * VMIDs 1-15) (vega10). + * Returns 0 for success. + */ +static int gmc_v9_0_vm_init(struct amdgpu_device *adev) +{ + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; + amdgpu_vm_manager_init(adev); + + /* base offset of vram pages */ + /*XXX This value is not zero for APU*/ + adev->vm_manager.vram_base_offset = 0; + + return 0; +} + +/** + * gmc_v9_0_vm_fini - vm fini callback + * + * @adev: amdgpu_device pointer + * + * Tear down any asic specific VM setup. + */ +static void gmc_v9_0_vm_fini(struct amdgpu_device *adev) +{ + return; +} + +static int gmc_v9_0_sw_init(void *handle) +{ + int r; + int dma_bits; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + spin_lock_init(&adev->mc.invalidate_lock); + + if (adev->flags & AMD_IS_APU) { + adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + } else { + /* XXX Don't know how to get VRAM type yet. */ + adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM; + } + + /* This interrupt is VMC page fault.*/ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, + &adev->mc.vm_fault); + + if (r) + return r; + + /* Adjust VM size here. + * Currently default to 64GB ((16 << 20) 4k pages). + * Max GPUVM size is 48 bits. + */ + adev->vm_manager.max_pfn = amdgpu_vm_size << 18; + + /* Set the internal MC address mask + * This is the max address of the GPU's + * internal address space. + */ + adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ + + /* set DMA mask + need_dma32 flags. + * PCIE - can handle 44-bits. + * IGP - can handle 44-bits + * PCI - dma32 for legacy pci gart, 44 bits on vega10 + */ + adev->need_dma32 = false; + dma_bits = adev->need_dma32 ? 32 : 44; + r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + if (r) { + adev->need_dma32 = true; + dma_bits = 32; + printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + } + r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + if (r) { + pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); + printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); + } + + r = gmc_v9_0_mc_init(adev); + if (r) + return r; + + /* Memory manager */ + r = amdgpu_bo_init(adev); + if (r) + return r; + + r = gmc_v9_0_gart_init(adev); + if (r) + return r; + + if (!adev->vm_manager.enabled) { + r = gmc_v9_0_vm_init(adev); + if (r) { + dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); + return r; + } + adev->vm_manager.enabled = true; + } + return r; +} + +/** + * gmc_v8_0_gart_fini - vm fini callback + * + * @adev: amdgpu_device pointer + * + * Tears down the driver GART/VM setup (CIK). + */ +static void gmc_v9_0_gart_fini(struct amdgpu_device *adev) +{ + amdgpu_gart_table_vram_free(adev); + amdgpu_gart_fini(adev); +} + +static int gmc_v9_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->vm_manager.enabled) { + amdgpu_vm_manager_fini(adev); + gmc_v9_0_vm_fini(adev); + adev->vm_manager.enabled = false; + } + gmc_v9_0_gart_fini(adev); + amdgpu_gem_force_release(adev); + amdgpu_bo_fini(adev); + + return 0; +} + +static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + break; + default: + break; + } +} + +/** + * gmc_v9_0_gart_enable - gart enable + * + * @adev: amdgpu_device pointer + */ +static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) +{ + int r; + bool value; + u32 tmp; + + amdgpu_program_register_sequence(adev, + golden_settings_vega10_hdp, + (const u32)ARRAY_SIZE(golden_settings_vega10_hdp)); + + if (adev->gart.robj == NULL) { + dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); + return -EINVAL; + } + r = amdgpu_gart_table_vram_pin(adev); + if (r) + return r; + + /* After HDP is initialized, flush HDP.*/ + nbio_v6_1_hdp_flush(adev); + + r = gfxhub_v1_0_gart_enable(adev); + if (r) + return r; + + r = mmhub_v1_0_gart_enable(adev); + if (r) + return r; + + tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL)); + tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; + WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL)); + WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL), tmp); + + + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) + value = false; + else + value = true; + + gfxhub_v1_0_set_fault_enable_default(adev, value); + mmhub_v1_0_set_fault_enable_default(adev, value); + + gmc_v9_0_gart_flush_gpu_tlb(adev, 0); + + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(adev->mc.gtt_size >> 20), + (unsigned long long)adev->gart.table_addr); + adev->gart.ready = true; + return 0; +} + +static int gmc_v9_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* The sequence of these two function calls matters.*/ + gmc_v9_0_init_golden_registers(adev); + + r = gmc_v9_0_gart_enable(adev); + + return r; +} + +/** + * gmc_v9_0_gart_disable - gart disable + * + * @adev: amdgpu_device pointer + * + * This disables all VM page table. + */ +static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) +{ + gfxhub_v1_0_gart_disable(adev); + mmhub_v1_0_gart_disable(adev); + amdgpu_gart_table_vram_unpin(adev); +} + +static int gmc_v9_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); + gmc_v9_0_gart_disable(adev); + + return 0; +} + +static int gmc_v9_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->vm_manager.enabled) { + gmc_v9_0_vm_fini(adev); + adev->vm_manager.enabled = false; + } + gmc_v9_0_hw_fini(adev); + + return 0; +} + +static int gmc_v9_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = gmc_v9_0_hw_init(adev); + if (r) + return r; + + if (!adev->vm_manager.enabled) { + r = gmc_v9_0_vm_init(adev); + if (r) { + dev_err(adev->dev, + "vm manager initialization failed (%d).\n", r); + return r; + } + adev->vm_manager.enabled = true; + } + + return r; +} + +static bool gmc_v9_0_is_idle(void *handle) +{ + /* MC is always ready in GMC v9.*/ + return true; +} + +static int gmc_v9_0_wait_for_idle(void *handle) +{ + /* There is no need to wait for MC idle in GMC v9.*/ + return 0; +} + +static int gmc_v9_0_soft_reset(void *handle) +{ + /* XXX for emulation.*/ + return 0; +} + +static int gmc_v9_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int gmc_v9_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs gmc_v9_0_ip_funcs = { + .name = "gmc_v9_0", + .early_init = gmc_v9_0_early_init, + .late_init = gmc_v9_0_late_init, + .sw_init = gmc_v9_0_sw_init, + .sw_fini = gmc_v9_0_sw_fini, + .hw_init = gmc_v9_0_hw_init, + .hw_fini = gmc_v9_0_hw_fini, + .suspend = gmc_v9_0_suspend, + .resume = gmc_v9_0_resume, + .is_idle = gmc_v9_0_is_idle, + .wait_for_idle = gmc_v9_0_wait_for_idle, + .soft_reset = gmc_v9_0_soft_reset, + .set_clockgating_state = gmc_v9_0_set_clockgating_state, + .set_powergating_state = gmc_v9_0_set_powergating_state, +}; + +const struct amdgpu_ip_block_version gmc_v9_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 9, + .minor = 0, + .rev = 0, + .funcs = &gmc_v9_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h new file mode 100644 index 000000000000..b030ca5ea107 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GMC_V9_0_H__ +#define __GMC_V9_0_H__ + +extern const struct amd_ip_funcs gmc_v9_0_ip_funcs; +extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c new file mode 100644 index 000000000000..b1e0e6b796b8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -0,0 +1,585 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "mmhub_v1_0.h" + +#include "vega10/soc15ip.h" +#include "vega10/MMHUB/mmhub_1_0_offset.h" +#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" +#include "vega10/MMHUB/mmhub_1_0_default.h" +#include "vega10/ATHUB/athub_1_0_offset.h" +#include "vega10/ATHUB/athub_1_0_sh_mask.h" +#include "vega10/ATHUB/athub_1_0_default.h" +#include "vega10/vega10_enum.h" + +#include "soc15_common.h" + +u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) +{ + u64 base = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE)); + + base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) +{ + u32 tmp; + u64 value; + uint64_t addr; + u32 i; + + /* Program MC. */ + /* Update configuration */ + DRM_INFO("%s -- in\n", __func__); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR), + adev->mc.vram_start >> 18); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR), + adev->mc.vram_end >> 18); + value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), + (u32)(value >> 12)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB), + (u32)(value >> 44)); + + /* Disable AGP. */ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BASE), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_TOP), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BOT), 0x00FFFFFF); + + /* GART Enable. */ + + /* Setup TLB control */ + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, + 3); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, + 1); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, + 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ECO_BITS, + 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + MTYPE, + MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ATC_EN, + 1); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + + /* Setup L2 cache */ + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + ENABLE_L2_FRAGMENT_PROCESSING, + 0); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0);/* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + CONTEXT1_IDENTITY_ACCESS_MODE, + 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + IDENTITY_MODE_FRAGMENT_SIZE, + 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2), tmp); + + tmp = mmVM_L2_CNTL3_DEFAULT; + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4)); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, + 0); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, + 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4), tmp); + + /* setup context0 */ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32), + (u32)(adev->mc.gtt_start >> 12)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32), + (u32)(adev->mc.gtt_start >> 44)); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32), + (u32)(adev->mc.gtt_end >> 12)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32), + (u32)(adev->mc.gtt_end >> 44)); + + BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); + value = adev->gart.table_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + value &= 0x0000FFFFFFFFF000ULL; + value |= 0x1; /* valid bit */ + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32), + (u32)value); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32), + (u32)(value >> 32)); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32), + (u32)(adev->dummy_page.addr >> 12)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32), + (u32)(adev->dummy_page.addr >> 44)); + + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2)); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, + 1); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp); + + addr = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL); + tmp = RREG32(addr); + + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL), tmp); + + tmp = RREG32(addr); + + /* Disable identity aperture.*/ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); + + for (i = 0; i <= 14; i++) { + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) + + i); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PAGE_TABLE_DEPTH, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + amdgpu_vm_block_size - 9); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) + i, tmp); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, + adev->vm_manager.max_pfn - 1); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, 0); + } + + return 0; +} + +void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL) + i, 0); + + /* Setup TLB control */ + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, + 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + + /* Setup L2 cache */ + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), 0); +} + +/** + * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) +{ + u32 tmp; + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + VM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); +} + +static uint32_t mmhub_v1_0_get_invalidate_req(unsigned int vm_id) +{ + u32 req = 0; + + /* invalidate using legacy mode on vm_id*/ + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vm_id); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +static uint32_t mmhub_v1_0_get_vm_protection_bits(void) +{ + return (VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK); +} + +static int mmhub_v1_0_early_init(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_late_init(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); + + hub->get_invalidate_req = mmhub_v1_0_get_invalidate_req; + hub->get_vm_protection_bits = mmhub_v1_0_get_vm_protection_bits; + + return 0; +} + +static int mmhub_v1_0_sw_fini(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) + + 2 * i, 0xffffffff); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) + + 2 * i, 0x1f); + } + + return 0; +} + +static int mmhub_v1_0_hw_fini(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_suspend(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_resume(void *handle) +{ + return 0; +} + +static bool mmhub_v1_0_is_idle(void *handle) +{ + return true; +} + +static int mmhub_v1_0_wait_for_idle(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_soft_reset(void *handle) +{ + return 0; +} + +static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data, def1, data1, def2, data2; + + def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); + def1 = data1 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2)); + def2 = data2 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + data |= ATC_L2_MISC_CG__ENABLE_MASK; + + data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + + data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } else { + data &= ~ATC_L2_MISC_CG__ENABLE_MASK; + + data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + + data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } + + if (def != data) + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); + + if (def1 != data1) + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2), data1); + + if (def2 != data2) + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2), data2); +} + +static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); +} + +static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); +} + +static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if(def != data) + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); +} + +static int mmhub_v1_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_VEGA10: + mmhub_v1_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + athub_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + mmhub_v1_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + athub_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +static int mmhub_v1_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs mmhub_v1_0_ip_funcs = { + .name = "mmhub_v1_0", + .early_init = mmhub_v1_0_early_init, + .late_init = mmhub_v1_0_late_init, + .sw_init = mmhub_v1_0_sw_init, + .sw_fini = mmhub_v1_0_sw_fini, + .hw_init = mmhub_v1_0_hw_init, + .hw_fini = mmhub_v1_0_hw_fini, + .suspend = mmhub_v1_0_suspend, + .resume = mmhub_v1_0_resume, + .is_idle = mmhub_v1_0_is_idle, + .wait_for_idle = mmhub_v1_0_wait_for_idle, + .soft_reset = mmhub_v1_0_soft_reset, + .set_clockgating_state = mmhub_v1_0_set_clockgating_state, + .set_powergating_state = mmhub_v1_0_set_powergating_state, +}; + +const struct amdgpu_ip_block_version mmhub_v1_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_MMHUB, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &mmhub_v1_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h new file mode 100644 index 000000000000..aadedf99c028 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h @@ -0,0 +1,35 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V1_0_H__ +#define __MMHUB_V1_0_H__ + +u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev); +int mmhub_v1_0_gart_enable(struct amdgpu_device *adev); +void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); +void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value); + +extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs; +extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 717d6bea7b52..a94420d680ec 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -74,6 +74,8 @@ enum amd_ip_block_type { AMD_IP_BLOCK_TYPE_UVD, AMD_IP_BLOCK_TYPE_VCE, AMD_IP_BLOCK_TYPE_ACP, + AMD_IP_BLOCK_TYPE_GFXHUB, + AMD_IP_BLOCK_TYPE_MMHUB }; enum amd_clockgating_state { -- cgit v1.2.3 From b1023571479020e9e9c15a51b43bf8e15406952b Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Fri, 3 Mar 2017 17:59:39 -0500 Subject: drm/amdgpu: implement GFX 9.0 support (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for gfx v9.0. v2: update golden settings from Ken Acked-by: Christian König Signed-off-by: Ken Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3292 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h | 35 + 4 files changed, 3331 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 9c117d03b0e3..82047fce2db1 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -64,7 +64,8 @@ amdgpu-y += \ # add GFX block amdgpu-y += \ amdgpu_gfx.o \ - gfx_v8_0.o + gfx_v8_0.o \ + gfx_v9_0.o # add async DMA block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 13ea68a81ac1..23390468341c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -896,6 +896,8 @@ struct amdgpu_rlc { struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; + struct amdgpu_bo *mec_fw_obj; + u64 mec_fw_gpu_addr; u32 num_pipe; u32 num_mec; u32 num_queue; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c new file mode 100644 index 000000000000..e0a3cdc6e759 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -0,0 +1,3292 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include "drmP.h" +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "soc15.h" +#include "soc15d.h" + +#include "vega10/soc15ip.h" +#include "vega10/GC/gc_9_0_offset.h" +#include "vega10/GC/gc_9_0_sh_mask.h" +#include "vega10/vega10_enum.h" +#include "vega10/HDP/hdp_4_0_offset.h" + +#include "soc15_common.h" +#include "clearstate_gfx9.h" +#include "v9_structs.h" + +#define GFX9_NUM_GFX_RINGS 1 +#define GFX9_NUM_COMPUTE_RINGS 8 +#define GFX9_NUM_SE 4 +#define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 + +MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); +MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); +MODULE_FIRMWARE("amdgpu/vega10_me.bin"); +MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); +MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); +MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); + +static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = +{ + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15)} +}; + +static const u32 golden_settings_gc_9_0[] = +{ + SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00ffeff, 0x00000400, + SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, + SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, + SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, + SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff +}; + +static const u32 golden_settings_gc_9_0_vg10[] = +{ + SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0x0000f000, 0x00012107, + SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, + SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x2a114042, + SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000, + SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, + SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800, + SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1),0x0000000f, 0x00000007 +}; + +#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 + +static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); +static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); +static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); +static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); +static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info); +static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); +static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); + +static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + amdgpu_program_register_sequence(adev, + golden_settings_gc_9_0, + (const u32)ARRAY_SIZE(golden_settings_gc_9_0)); + amdgpu_program_register_sequence(adev, + golden_settings_gc_9_0_vg10, + (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + break; + default: + break; + } +} + +static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) +{ + adev->gfx.scratch.num_reg = 7; + adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); + adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; +} + +static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, + bool wc, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | + WRITE_DATA_DST_SEL(0) | + (wc ? WR_CONFIRM : 0)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < adev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", + ring->idx, scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + return r; +} + +static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + uint32_t scratch; + uint32_t tmp = 0; + long r; + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 256, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; + } + ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); + ib.ptr[2] = 0xDEADBEEF; + ib.length_dw = 3; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err2; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + goto err2; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err2; + } + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } else { + DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } +err2: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err1: + amdgpu_gfx_scratch_free(adev, scratch); + return r; +} + +static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_VEGA10: + chip_name = "vega10"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); + err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.pfp_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); + err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.me_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; + adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); + err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.ce_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; + adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.mec_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); + if (!err) { + err = amdgpu_ucode_validate(adev->gfx.mec2_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec2_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + } else { + err = 0; + adev->gfx.mec2_fw = NULL; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; + info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; + info->fw = adev->gfx.pfp_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; + info->ucode_id = AMDGPU_UCODE_ID_CP_ME; + info->fw = adev->gfx.me_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; + info->ucode_id = AMDGPU_UCODE_ID_CP_CE; + info->fw = adev->gfx.ce_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_G; + info->fw = adev->gfx.rlc_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; + info->fw = adev->gfx.mec_fw; + header = (const struct common_firmware_header *)info->fw->data; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; + info->fw = adev->gfx.mec_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + + if (adev->gfx.mec2_fw) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; + info->fw = adev->gfx.mec2_fw; + header = (const struct common_firmware_header *)info->fw->data; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; + info->fw = adev->gfx.mec2_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + } + + } + +out: + if (err) { + dev_err(adev->dev, + "gfx9: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + } + return err; +} + +static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) +{ + int r; + + if (adev->gfx.mec.hpd_eop_obj) { + r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); + amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + + amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); + adev->gfx.mec.hpd_eop_obj = NULL; + } + if (adev->gfx.mec.mec_fw_obj) { + r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r); + amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + + amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj); + adev->gfx.mec.mec_fw_obj = NULL; + } +} + +#define MEC_HPD_SIZE 2048 + +static int gfx_v9_0_mec_init(struct amdgpu_device *adev) +{ + int r; + u32 *hpd; + const __le32 *fw_data; + unsigned fw_size; + u32 *fw; + + const struct gfx_firmware_header_v1_0 *mec_hdr; + + /* + * we assign only 1 pipe because all other pipes will + * be handled by KFD + */ + adev->gfx.mec.num_mec = 1; + adev->gfx.mec.num_pipe = 1; + adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + + if (adev->gfx.mec.hpd_eop_obj == NULL) { + r = amdgpu_bo_create(adev, + adev->gfx.mec.num_queue * MEC_HPD_SIZE, + PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, + &adev->gfx.mec.hpd_eop_obj); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + return r; + } + } + + r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); + if (unlikely(r != 0)) { + gfx_v9_0_mec_fini(adev); + return r; + } + r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_gpu_addr); + if (r) { + dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); + gfx_v9_0_mec_fini(adev); + return r; + } + r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); + gfx_v9_0_mec_fini(adev); + return r; + } + + memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); + + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; + + if (adev->gfx.mec.mec_fw_obj == NULL) { + r = amdgpu_bo_create(adev, + mec_hdr->header.ucode_size_bytes, + PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, + &adev->gfx.mec.mec_fw_obj); + if (r) { + dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); + return r; + } + } + + r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false); + if (unlikely(r != 0)) { + gfx_v9_0_mec_fini(adev); + return r; + } + r = amdgpu_bo_pin(adev->gfx.mec.mec_fw_obj, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.mec_fw_gpu_addr); + if (r) { + dev_warn(adev->dev, "(%d) pin mec firmware bo failed\n", r); + gfx_v9_0_mec_fini(adev); + return r; + } + r = amdgpu_bo_kmap(adev->gfx.mec.mec_fw_obj, (void **)&fw); + if (r) { + dev_warn(adev->dev, "(%d) map firmware bo failed\n", r); + gfx_v9_0_mec_fini(adev); + return r; + } + memcpy(fw, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + + + return 0; +} + +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) +{ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX), + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | + (address << SQ_IND_INDEX__INDEX__SHIFT) | + (SQ_IND_INDEX__FORCE_READ_MASK)); + return RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA)); +} + +static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t thread, + uint32_t regno, uint32_t num, uint32_t *out) +{ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX), + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | + (regno << SQ_IND_INDEX__INDEX__SHIFT) | + (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | + (SQ_IND_INDEX__FORCE_READ_MASK) | + (SQ_IND_INDEX__AUTO_INCR_MASK)); + while (num--) + *(out++) = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA)); +} + +static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +{ + /* type 1 wave data */ + dst[(*no_fields)++] = 1; + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); +} + +static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t start, + uint32_t size, uint32_t *dst) +{ + wave_read_regs( + adev, simd, wave, 0, + start + SQIND_WAVE_SGPRS_OFFSET, size, dst); +} + + +static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, + .select_se_sh = &gfx_v9_0_select_se_sh, + .read_wave_data = &gfx_v9_0_read_wave_data, + .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, +}; + +static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) +{ + u32 gb_addr_config; + + adev->gfx.funcs = &gfx_v9_0_gfx_funcs; + + switch (adev->asic_type) { + case CHIP_VEGA10: + adev->gfx.config.max_shader_engines = 4; + adev->gfx.config.max_tile_pipes = 8; //?? + adev->gfx.config.max_cu_per_sh = 16; + adev->gfx.config.max_sh_per_se = 1; + adev->gfx.config.max_backends_per_se = 4; + adev->gfx.config.max_texture_channel_caches = 16; + adev->gfx.config.max_gprs = 256; + adev->gfx.config.max_gs_threads = 32; + adev->gfx.config.max_hw_contexts = 8; + + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; + break; + default: + BUG(); + break; + } + + adev->gfx.config.gb_addr_config = gb_addr_config; + + adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_PIPES); + adev->gfx.config.gb_addr_config_fields.num_banks = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_BANKS); + adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + MAX_COMPRESSED_FRAGS); + adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_RB_PER_SE); + adev->gfx.config.gb_addr_config_fields.num_se = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_SHADER_ENGINES); + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + PIPE_INTERLEAVE_SIZE)); +} + +static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, + struct amdgpu_ngg_buf *ngg_buf, + int size_se, + int default_size_se) +{ + int r; + + if (size_se < 0) { + dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); + return -EINVAL; + } + size_se = size_se ? size_se : default_size_se; + + ngg_buf->size = size_se * GFX9_NUM_SE; + r = amdgpu_bo_create_kernel(adev, ngg_buf->size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &ngg_buf->bo, + &ngg_buf->gpu_addr, + NULL); + if (r) { + dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); + return r; + } + ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); + + return r; +} + +static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < NGG_BUF_MAX; i++) + amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, + &adev->gfx.ngg.buf[i].gpu_addr, + NULL); + + memset(&adev->gfx.ngg.buf[0], 0, + sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); + + adev->gfx.ngg.init = false; + + return 0; +} + +static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) +{ + int r; + + if (!amdgpu_ngg || adev->gfx.ngg.init == true) + return 0; + + /* GDS reserve memory: 64 bytes alignment */ + adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); + adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; + adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; + adev->gfx.ngg.gds_reserve_addr = amdgpu_gds_reg_offset[0].mem_base; + adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size; + + /* Primitive Buffer */ + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PRIM], + amdgpu_prim_buf_per_se, + 64 * 1024); + if (r) { + dev_err(adev->dev, "Failed to create Primitive Buffer\n"); + goto err; + } + + /* Position Buffer */ + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[POS], + amdgpu_pos_buf_per_se, + 256 * 1024); + if (r) { + dev_err(adev->dev, "Failed to create Position Buffer\n"); + goto err; + } + + /* Control Sideband */ + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[CNTL], + amdgpu_cntl_sb_buf_per_se, + 256); + if (r) { + dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); + goto err; + } + + /* Parameter Cache, not created by default */ + if (amdgpu_param_buf_per_se <= 0) + goto out; + + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PARAM], + amdgpu_param_buf_per_se, + 512 * 1024); + if (r) { + dev_err(adev->dev, "Failed to create Parameter Cache\n"); + goto err; + } + +out: + adev->gfx.ngg.init = true; + return 0; +err: + gfx_v9_0_ngg_fini(adev); + return r; +} + +static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; + int r; + u32 data; + u32 size; + u32 base; + + if (!amdgpu_ngg) + return 0; + + /* Program buffer size */ + data = 0; + size = adev->gfx.ngg.buf[PRIM].size / 256; + data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size); + + size = adev->gfx.ngg.buf[POS].size / 256; + data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_1), data); + + data = 0; + size = adev->gfx.ngg.buf[CNTL].size / 256; + data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size); + + size = adev->gfx.ngg.buf[PARAM].size / 1024; + data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_2), data); + + /* Program buffer base address */ + base = lower_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr); + data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE), data); + + base = upper_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr); + data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE_HI), data); + + base = lower_32_bits(adev->gfx.ngg.buf[POS].gpu_addr); + data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE), data); + + base = upper_32_bits(adev->gfx.ngg.buf[POS].gpu_addr); + data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE_HI), data); + + base = lower_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr); + data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE), data); + + base = upper_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr); + data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI), data); + + /* Clear GDS reserved memory */ + r = amdgpu_ring_alloc(ring, 17); + if (r) { + DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[0].mem_size, + (adev->gds.mem.total_size + + adev->gfx.ngg.gds_reserve_size) >> + AMDGPU_GDS_SHIFT); + + amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); + amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | + PACKET3_DMA_DATA_SRC_SEL(2))); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_size); + + + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[0].mem_size, 0); + + amdgpu_ring_commit(ring); + + return 0; +} + +static int gfx_v9_0_sw_init(void *handle) +{ + int i, r; + struct amdgpu_ring *ring; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* EOP Event */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq); + if (r) + return r; + + /* Privileged reg */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184, + &adev->gfx.priv_reg_irq); + if (r) + return r; + + /* Privileged inst */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 185, + &adev->gfx.priv_inst_irq); + if (r) + return r; + + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; + + gfx_v9_0_scratch_init(adev); + + r = gfx_v9_0_init_microcode(adev); + if (r) { + DRM_ERROR("Failed to load gfx firmware!\n"); + return r; + } + + r = gfx_v9_0_mec_init(adev); + if (r) { + DRM_ERROR("Failed to init MEC BOs!\n"); + return r; + } + + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + ring->ring_obj = NULL; + sprintf(ring->name, "gfx"); + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1; + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); + if (r) + return r; + } + + /* set up the compute queues */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + unsigned irq_type; + + /* max 32 queues per MEC */ + if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { + DRM_ERROR("Too many (%d) compute rings!\n", i); + break; + } + ring = &adev->gfx.compute_ring[i]; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1; + ring->me = 1; /* first MEC */ + ring->pipe = i / 8; + ring->queue = i % 8; + sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue); + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + } + + /* reserve GDS, GWS and OA resource for gfx */ + r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, + &adev->gds.gds_gfx_bo, NULL, NULL); + if (r) + return r; + + r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, + &adev->gds.gws_gfx_bo, NULL, NULL); + if (r) + return r; + + r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, + &adev->gds.oa_gfx_bo, NULL, NULL); + if (r) + return r; + + adev->gfx.ce_ram_size = 0x8000; + + gfx_v9_0_gpu_early_init(adev); + + r = gfx_v9_0_ngg_init(adev); + if (r) + return r; + + return 0; +} + + +static int gfx_v9_0_sw_fini(void *handle) +{ + int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + + gfx_v9_0_mec_fini(adev); + gfx_v9_0_ngg_fini(adev); + + return 0; +} + + +static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) +{ + /* TODO */ +} + +static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) +{ + u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); + + if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) { + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); + } else if (se_num == 0xffffffff) { + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); + } else if (sh_num == 0xffffffff) { + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + } else { + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + } + WREG32( SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); +} + +static u32 gfx_v9_0_create_bitmask(u32 bit_width) +{ + return (u32)((1ULL << bit_width) - 1); +} + +static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) +{ + u32 data, mask; + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_RB_BACKEND_DISABLE)); + data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_RB_BACKEND_DISABLE)); + + data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; + data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; + + mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); + + return (~data) & mask; +} + +static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) +{ + int i, j; + u32 data, tmp, num_rbs = 0; + u32 active_rbs = 0; + u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + data = gfx_v9_0_get_rb_active_bitmap(adev); + active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * + rb_bitmap_width_per_sh); + } + } + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + adev->gfx.config.backend_enable_mask = active_rbs; + tmp = active_rbs; + while (tmp >>= 1) + num_rbs++; + adev->gfx.config.num_rbs = num_rbs; +} + +#define DEFAULT_SH_MEM_BASES (0x6000) +#define FIRST_COMPUTE_VMID (8) +#define LAST_COMPUTE_VMID (16) +static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + + sh_mem_config = SH_MEM_ADDRESS_MODE_64 | + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + + mutex_lock(&adev->srbm_mutex); + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + soc15_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + } + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) +{ + u32 tmp; + int i; + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_CNTL)); + tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_CNTL), tmp); + + gfx_v9_0_tiling_mode_table_init(adev); + + gfx_v9_0_setup_rb(adev); + gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < 16; i++) { + soc15_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + tmp = 0; + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), tmp); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), 0); + } + soc15_grbm_select(adev, 0, 0, 0, 0); + + mutex_unlock(&adev->srbm_mutex); + + gfx_v9_0_init_compute_vmid(adev); + + mutex_lock(&adev->grbm_idx_mutex); + /* + * making sure that the following register writes will be broadcasted + * to all the shaders + */ + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE), + (adev->gfx.config.sc_prim_fifo_size_frontend << + PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | + (adev->gfx.config.sc_prim_fifo_size_backend << + PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | + (adev->gfx.config.sc_hiz_tile_fifo_size << + PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | + (adev->gfx.config.sc_earlyz_tile_fifo_size << + PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); + mutex_unlock(&adev->grbm_idx_mutex); + +} + +static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) +{ + u32 i, j, k; + u32 mask; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + for (k = 0; k < adev->usec_timeout; k++) { + if (RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY)) == 0) + break; + udelay(1); + } + } + } + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; + for (k = 0; k < adev->usec_timeout; k++) { + if ((RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY)) & mask) == 0) + break; + udelay(1); + } +} + +static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)); + + if (enable) + return; + + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), tmp); +} + +void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) +{ + u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); + + tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp); + + gfx_v9_0_enable_gui_idle_interrupt(adev, false); + + gfx_v9_0_wait_for_rlc_serdes(adev); +} + +static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) +{ + u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); + + tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp); + udelay(50); + tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp); + udelay(50); +} + +static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) +{ +#ifdef AMDGPU_RLC_DEBUG_RETRY + u32 rlc_ucode_ver; +#endif + u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); + + tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp); + + /* carrizo do enable cp interrupt after cp inited */ + if (!(adev->flags & AMD_IS_APU)) + gfx_v9_0_enable_gui_idle_interrupt(adev, true); + + udelay(50); + +#ifdef AMDGPU_RLC_DEBUG_RETRY + /* RLC_GPM_GENERAL_6 : RLC Ucode version */ + rlc_ucode_ver = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6)); + if(rlc_ucode_ver == 0x108) { + DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", + rlc_ucode_ver, adev->gfx.rlc_fw_version); + /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, + * default is 0x9C4 to create a 100us interval */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3), 0x9C4); + /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr + * to disable the page fault retry interrupts, default is + * 0x100 (256) */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12), 0x100); + } +#endif +} + +static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_0 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + if (!adev->gfx.rlc_fw) + return -EINVAL; + + hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(&hdr->header); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), + RLCG_UCODE_LOADING_START_ADDRESS); + for (i = 0; i < fw_size; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), adev->gfx.rlc_fw_version); + + return 0; +} + +static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) +{ + int r; + + gfx_v9_0_rlc_stop(adev); + + /* disable CG */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), 0); + + /* disable PG */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), 0); + + gfx_v9_0_rlc_reset(adev); + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + /* legacy rlc firmware loading */ + r = gfx_v9_0_rlc_load_microcode(adev); + if (r) + return r; + } + + gfx_v9_0_rlc_start(adev); + + return 0; +} + +static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) +{ + int i; + u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)); + + if (enable) { + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); + } else { + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + adev->gfx.gfx_ring[i].ready = false; + } + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL), tmp); + udelay(50); +} + +static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v1_0 *pfp_hdr; + const struct gfx_firmware_header_v1_0 *ce_hdr; + const struct gfx_firmware_header_v1_0 *me_hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) + return -EINVAL; + + pfp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.pfp_fw->data; + ce_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.ce_fw->data; + me_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.me_fw->data; + + amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); + amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); + amdgpu_ucode_print_gfx_hdr(&me_hdr->header); + + gfx_v9_0_cp_gfx_enable(adev, false); + + /* PFP */ + fw_data = (const __le32 *) + (adev->gfx.pfp_fw->data + + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), 0); + for (i = 0; i < fw_size; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), adev->gfx.pfp_fw_version); + + /* CE */ + fw_data = (const __le32 *) + (adev->gfx.ce_fw->data + + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), 0); + for (i = 0; i < fw_size; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), adev->gfx.ce_fw_version); + + /* ME */ + fw_data = (const __le32 *) + (adev->gfx.me_fw->data + + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), 0); + for (i = 0; i < fw_size; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_DATA), le32_to_cpup(fw_data++)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), adev->gfx.me_fw_version); + + return 0; +} + +static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + /* begin clear state */ + count += 2; + /* context control state */ + count += 3; + + for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) + count += 2 + ext->reg_count; + else + return 0; + } + } + /* pa_sc_raster_config/pa_sc_raster_config1 */ + count += 4; + /* end clear state */ + count += 2; + /* clear state */ + count += 2; + + return count; +} + +static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + int r, i; + + /* init the CP */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT), adev->gfx.config.max_hw_contexts - 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID), 1); + + gfx_v9_0_cp_gfx_enable(adev, true); + + r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + return r; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, 0x80000000); + amdgpu_ring_write(ring, 0x80000000); + + for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + amdgpu_ring_write(ring, + PACKET3(PACKET3_SET_CONTEXT_REG, + ext->reg_count)); + amdgpu_ring_write(ring, + ext->reg_index - PACKET3_SET_CONTEXT_REG_START); + for (i = 0; i < ext->reg_count; i++) + amdgpu_ring_write(ring, ext->extent[i]); + } + } + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); + + amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); + amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); + amdgpu_ring_write(ring, 0x8000); + amdgpu_ring_write(ring, 0x8000); + + amdgpu_ring_commit(ring); + + return 0; +} + +static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + u32 tmp; + u32 rb_bufsz; + u64 rb_addr, rptr_addr; + + /* Set the write pointer delay */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY), 0); + + /* set the RB to use vmid 0 */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID), 0); + + /* Set ring buffer size */ + ring = &adev->gfx.gfx_ring[0]; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); +#endif + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp); + + /* Initialize the ring buffer's write pointers */ + ring->wptr = 0; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr)); + + /* set the wb address wether it's enabled or not */ + rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR), lower_32_bits(rptr_addr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI), upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); + + mdelay(1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp); + + rb_addr = ring->gpu_addr >> 8; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE), rb_addr); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI), upper_32_bits(rb_addr)); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL)); + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); + } + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL), tmp); + + tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, ring->doorbell_index); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER), tmp); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER), + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); + + + /* start the ring */ + gfx_v9_0_cp_gfx_start(adev); + ring->ready = true; + + return 0; +} + +static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) +{ + int i; + + if (enable) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), 0); + } else { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), + (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + adev->gfx.compute_ring[i].ready = false; + } + udelay(50); +} + +static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev) +{ + gfx_v9_0_cp_compute_enable(adev, true); + + return 0; +} + +static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v1_0 *mec_hdr; + const __le32 *fw_data; + unsigned i; + u32 tmp; + + if (!adev->gfx.mec_fw) + return -EINVAL; + + gfx_v9_0_cp_compute_enable(adev, false); + + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); + + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + tmp = 0; + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_CNTL), tmp); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_LO), + adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_HI), + upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); + + /* MEC1 */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR), + mec_hdr->jt_offset); + for (i = 0; i < mec_hdr->jt_size; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA), + le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR), + adev->gfx.mec_fw_version); + /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ + + return 0; +} + +static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + + if (ring->mqd_obj) { + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); + + amdgpu_bo_unpin(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + + amdgpu_bo_unref(&ring->mqd_obj); + ring->mqd_obj = NULL; + } + } +} + +static int gfx_v9_0_init_queue(struct amdgpu_ring *ring); + +static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev) +{ + int i, r; + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + if (gfx_v9_0_init_queue(ring)) + dev_warn(adev->dev, "compute queue %d init failed!\n", i); + } + + r = gfx_v9_0_cp_compute_start(adev); + if (r) + return r; + + return 0; +} + +static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) +{ + int r,i; + struct amdgpu_ring *ring; + + if (!(adev->flags & AMD_IS_APU)) + gfx_v9_0_enable_gui_idle_interrupt(adev, false); + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + /* legacy firmware loading */ + r = gfx_v9_0_cp_gfx_load_microcode(adev); + if (r) + return r; + + r = gfx_v9_0_cp_compute_load_microcode(adev); + if (r) + return r; + } + + r = gfx_v9_0_cp_gfx_resume(adev); + if (r) + return r; + + r = gfx_v9_0_cp_compute_resume(adev); + if (r) + return r; + + ring = &adev->gfx.gfx_ring[0]; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + return r; + } + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) + ring->ready = false; + } + + gfx_v9_0_enable_gui_idle_interrupt(adev, true); + + return 0; +} + +static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) +{ + gfx_v9_0_cp_gfx_enable(adev, enable); + gfx_v9_0_cp_compute_enable(adev, enable); +} + +static int gfx_v9_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gfx_v9_0_init_golden_registers(adev); + + gfx_v9_0_gpu_init(adev); + + r = gfx_v9_0_rlc_resume(adev); + if (r) + return r; + + r = gfx_v9_0_cp_resume(adev); + if (r) + return r; + + r = gfx_v9_0_ngg_en(adev); + if (r) + return r; + + return r; +} + +static int gfx_v9_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + gfx_v9_0_cp_enable(adev, false); + gfx_v9_0_rlc_stop(adev); + gfx_v9_0_cp_compute_fini(adev); + + return 0; +} + +static int gfx_v9_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return gfx_v9_0_hw_fini(adev); +} + +static int gfx_v9_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return gfx_v9_0_hw_init(adev); +} + +static bool gfx_v9_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (REG_GET_FIELD(RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)), + GRBM_STATUS, GUI_ACTIVE)) + return false; + else + return true; +} + +static int gfx_v9_0_wait_for_idle(void *handle) +{ + unsigned i; + u32 tmp; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)) & + GRBM_STATUS__GUI_ACTIVE_MASK; + + if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static void gfx_v9_0_print_status(void *handle) +{ + int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + dev_info(adev->dev, "GFX 9.x registers\n"); + dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS))); + dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2))); + dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0))); + dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1))); + dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2))); + dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3))); + dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STAT))); + dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1))); + dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2))); + dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3))); + dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT))); + dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1))); + dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS))); + dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_BUSY_STAT))); + dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1))); + dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS))); + + for (i = 0; i < 32; i++) { + dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n", + i, RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_TILE_MODE0 ) + i*4)); + } + for (i = 0; i < 16; i++) { + dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n", + i, RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_MACROTILE_MODE0) + i*4)); + } + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + dev_info(adev->dev, " se: %d\n", i); + gfx_v9_0_select_se_sh(adev, i, 0xffffffff, 0xffffffff); + dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_RASTER_CONFIG))); + dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_RASTER_CONFIG_1))); + } + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + + dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG))); + + dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEQ_THRESHOLDS))); + dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmSX_DEBUG_1))); + dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX))); + dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL))); + dev_info(adev->dev, " SQ_CONFIG=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG))); + dev_info(adev->dev, " DB_DEBUG=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG))); + dev_info(adev->dev, " DB_DEBUG2=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2))); + dev_info(adev->dev, " DB_DEBUG3=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG3))); + dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL))); + dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1))); + dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE))); + dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_NUM_INSTANCES))); + dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL))); + dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FORCE_EOV_MAX_CNTS))); + dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION))); + dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_GS_VERTEX_REUSE))); + dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE))); + dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_CL_ENHANCE))); + dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE))); + + dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL))); + dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT))); + dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID))); + + dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_SEM_WAIT_TIMER))); + + dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY))); + dev_info(adev->dev, " CP_RB_VMID=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID))); + dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL))); + dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR))); + dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR))); + dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI))); + dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL))); + dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE))); + dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI))); + dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL))); + + dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_ADDR))); + dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_UMSK))); + + dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0))); + dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTL))); + dev_info(adev->dev, " RLC_CNTL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL))); + dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL))); + dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTR_INIT))); + dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTR_MAX))); + dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_INIT_CU_MASK))); + dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_PARAMS))); + dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTL))); + dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_UCODE_CNTL))); + + dev_info(adev->dev, " RLC_GPM_GENERAL_6=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6))); + dev_info(adev->dev, " RLC_GPM_GENERAL_12=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12))); + dev_info(adev->dev, " RLC_GPM_TIMER_INT_3=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3))); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < 16; i++) { + soc15_grbm_select(adev, 0, 0, 0, i); + dev_info(adev->dev, " VM %d:\n", i); + dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG))); + dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n", + RREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES))); + } + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static int gfx_v9_0_soft_reset(void *handle) +{ + u32 grbm_soft_reset = 0; + u32 tmp; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* GRBM_STATUS */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)); + if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | + GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | + GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | + GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | + GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | + GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); + } + + if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, 1); + } + + /* GRBM_STATUS2 */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)); + if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + + + if (grbm_soft_reset ) { + gfx_v9_0_print_status((void *)adev); + /* stop the rlc */ + gfx_v9_0_rlc_stop(adev); + + /* Disable GFX parsing/prefetching */ + gfx_v9_0_cp_gfx_enable(adev, false); + + /* Disable MEC parsing/prefetching */ + gfx_v9_0_cp_compute_enable(adev, false); + + if (grbm_soft_reset) { + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp); + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp); + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); + } + + /* Wait a little for things to settle down */ + udelay(50); + gfx_v9_0_print_status((void *)adev); + } + return 0; +} + +static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + uint64_t clock; + + mutex_lock(&adev->gfx.gpu_clock_mutex); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT), 1); + clock = (uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB)) | + ((uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB)) << 32ULL); + mutex_unlock(&adev->gfx.gpu_clock_mutex); + return clock; +} + +static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, + uint32_t vmid, + uint32_t gds_base, uint32_t gds_size, + uint32_t gws_base, uint32_t gws_size, + uint32_t oa_base, uint32_t oa_size) +{ + gds_base = gds_base >> AMDGPU_GDS_SHIFT; + gds_size = gds_size >> AMDGPU_GDS_SHIFT; + + gws_base = gws_base >> AMDGPU_GWS_SHIFT; + gws_size = gws_size >> AMDGPU_GWS_SHIFT; + + oa_base = oa_base >> AMDGPU_OA_SHIFT; + oa_size = oa_size >> AMDGPU_OA_SHIFT; + + /* GDS Base */ + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[vmid].mem_base, + gds_base); + + /* GDS Size */ + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[vmid].mem_size, + gds_size); + + /* GWS */ + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[vmid].gws, + gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); + + /* OA */ + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[vmid].oa, + (1 << (oa_size + oa_base)) - (1 << oa_base)); +} + +static int gfx_v9_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS; + gfx_v9_0_set_ring_funcs(adev); + gfx_v9_0_set_irq_funcs(adev); + gfx_v9_0_set_gds_init(adev); + gfx_v9_0_set_rlc_funcs(adev); + + return 0; +} + +static int gfx_v9_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); + if (r) + return r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); + if (r) + return r; + + return 0; +} + +static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev) +{ + uint32_t rlc_setting, data; + unsigned i; + + if (adev->gfx.rlc.in_safe_mode) + return; + + /* if RLC is not enabled, do nothing */ + rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); + if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return; + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG)) { + data = RLC_SAFE_MODE__CMD_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data); + + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + break; + udelay(1); + } + adev->gfx.rlc.in_safe_mode = true; + } +} + +static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) +{ + uint32_t rlc_setting, data; + + if (!adev->gfx.rlc.in_safe_mode) + return; + + /* if RLC is not enabled, do nothing */ + rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); + if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return; + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { + /* + * Try to exit safe mode only if it is already in safe + * mode. + */ + data = RLC_SAFE_MODE__CMD_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data); + adev->gfx.rlc.in_safe_mode = false; + } +} + +static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + /* It is disabled by HW by default */ + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { + /* 1 - RLC_CGTT_MGCG_OVERRIDE */ + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + + /* only for Vega10 & Raven1 */ + data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; + + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + + /* MGLS is a global flag to control all MGLS in GFX */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { + /* 2 - RLC memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); + data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data); + } + /* 3 - CP memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); + data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data); + } + } + } else { + /* 1 - MGCG_OVERRIDE */ + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + + /* 2 - disable MGLS in RLC */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { + data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data); + } + + /* 3 - disable MGLS in CP */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { + data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data); + } + } +} + +static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + adev->gfx.rlc.funcs->enter_safe_mode(adev); + + /* Enable 3D CGCG/CGLS */ + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { + /* write cmd to clear cgcg/cgls ov */ + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + /* unset CGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + /* enable 3Dcgcg FSM(0x0020003f) */ + def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); + data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) + data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); + data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); + } else { + /* Disable CGCG/CGLS */ + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); + /* disable cgcg, cgls should be disabled */ + data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | + RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); + /* disable cgcg and cgls in FSM */ + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data); + } + + adev->gfx.rlc.funcs->exit_safe_mode(adev); +} + +static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + adev->gfx.rlc.funcs->enter_safe_mode(adev); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + /* unset CGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + + /* enable cgcg FSM(0x0020003F) */ + def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); + data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); + data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); + } else { + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); + /* reset CGCG/CGLS bits */ + data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + /* disable cgcg and cgls in FSM */ + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data); + } + + adev->gfx.rlc.funcs->exit_safe_mode(adev); +} + +static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + if (enable) { + /* CGCG/CGLS should be enabled after MGCG/MGLS + * === MGCG + MGLS === + */ + gfx_v9_0_update_medium_grain_clock_gating(adev, enable); + /* === CGCG /CGLS for GFX 3D Only === */ + gfx_v9_0_update_3d_clock_gating(adev, enable); + /* === CGCG + CGLS === */ + gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); + } else { + /* CGCG/CGLS should be disabled before MGCG/MGLS + * === CGCG + CGLS === + */ + gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); + /* === CGCG /CGLS for GFX 3D Only === */ + gfx_v9_0_update_3d_clock_gating(adev, enable); + /* === MGCG + MGLS === */ + gfx_v9_0_update_medium_grain_clock_gating(adev, enable); + } + return 0; +} + +static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { + .enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode, + .exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode +}; + +static int gfx_v9_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static int gfx_v9_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_VEGA10: + gfx_v9_0_update_gfx_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; +} + +static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) +{ + return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ +} + +static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); + } else { + wptr = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR)); + wptr += (u64)RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI)) << 32; + } + + return wptr; +} + +static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr)); + } +} + +static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + u32 ref_and_mask, reg_mem_engine; + struct nbio_hdp_flush_reg *nbio_hf_reg; + + if (ring->adev->asic_type == CHIP_VEGA10) + nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + switch (ring->me) { + case 1: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; + break; + case 2: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; + break; + default: + return; + } + reg_mem_engine = 0; + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + reg_mem_engine = 1; /* pfp */ + } + + gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, + nbio_hf_reg->hdp_flush_req_offset, + nbio_hf_reg->hdp_flush_done_offset, + ref_and_mask, ref_and_mask, 0x20); +} + +static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) +{ + gfx_v9_0_write_data_to_reg(ring, 0, true, + SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 1); +} + +static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) +{ + u32 header, control = 0; + + if (ib->flags & AMDGPU_IB_FLAG_CE) + header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); + else + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + + control |= ib->length_dw | (vm_id << 24); + + amdgpu_ring_write(ring, header); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +#define INDIRECT_BUFFER_VALID (1 << 23) + +static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) +{ + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); + + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + + /* RELEASE_MEM - flush caches, send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); + amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EOP_TC_MD_ACTION_EN | + EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + EVENT_INDEX(5))); + amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); + + /* + * the address should be Qword aligned if 64bit write, Dword + * aligned if only send 32bit data low (discard data high) + */ + if (write64bit) + BUG_ON(addr & 0x7); + else + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, + lower_32_bits(addr), upper_32_bits(addr), + seq, 0xffffffff, 4); +} + +static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vm_id, uint64_t pd_addr) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + unsigned eng = ring->idx; + unsigned i; + + pd_addr = pd_addr | 0x1; /* valid bit */ + /* now only use physical base address of PDE and valid */ + BUG_ON(pd_addr & 0xFFFF00000000003EULL); + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; + uint32_t req = hub->get_invalidate_req(vm_id); + + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->ctx0_ptb_addr_lo32 + + (2 * vm_id), + lower_32_bits(pd_addr)); + + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->ctx0_ptb_addr_hi32 + + (2 * vm_id), + upper_32_bits(pd_addr)); + + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->vm_inv_eng0_req + eng, req); + + /* wait for the invalidate to complete */ + gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + + eng, 0, 1 << vm_id, 1 << vm_id, 0x20); + } + + /* compute doesn't have PFP */ + if (usepfp) { + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + amdgpu_ring_write(ring, 0x0); + /* Emits 128 dw nop to prevent CE access VM before vm_flush finish */ + amdgpu_ring_insert_nop(ring, 128); + } +} + +static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) +{ + return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ +} + +static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) +{ + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); + else + BUG(); + return wptr; +} + +static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else{ + BUG(); /* only DOORBELL method supported on gfx9 now */ + } +} + +static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +{ + uint32_t dw2 = 0; + + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ + if (flags & AMDGPU_HAVE_CTX_SWITCH) { + /* set load_global_config & load_global_uconfig */ + dw2 |= 0x8001; + /* set load_cs_sh_regs */ + dw2 |= 0x01000000; + /* set load_per_context_state & load_gfx_sh_regs for GFX */ + dw2 |= 0x10002; + + /* set load_ce_ram if preamble presented */ + if (AMDGPU_PREAMBLE_IB_PRESENT & flags) + dw2 |= 0x10000000; + } else { + /* still load_ce_ram if this is the first time preamble presented + * although there is no context switch happens. + */ + if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) + dw2 |= 0x10000000; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, dw2); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + TIME_STAMP_INT_ENABLE, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)); + cp_int_cntl = + REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + TIME_STAMP_INT_ENABLE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl); + break; + default: + break; + } +} + +static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, + int me, int pipe, + enum amdgpu_interrupt_state state) +{ + u32 mec_int_cntl, mec_int_cntl_reg; + + /* + * amdgpu controls only pipe 0 of MEC1. That's why this function only + * handles the setting of interrupts for this specific pipe. All other + * pipes' interrupts are set by amdkfd. + */ + + if (me == 1) { + switch (pipe) { + case 0: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); + return; + } + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 0); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 1); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + default: + break; + } +} + +static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl); + break; + default: + break; + } + + return 0; +} + +static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl); + break; + default: + break; + } + + return 0; +} + +static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (type) { + case AMDGPU_CP_IRQ_GFX_EOP: + gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); + break; + default: + break; + } + return 0; +} + +static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + int i; + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + + DRM_DEBUG("IH: CP EOP\n"); + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + /* Per-queue interrupt is supported for MEC starting from VI. + * The interrupt can only be enabled/disabled per pipe instead of per queue. + */ + if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) + amdgpu_fence_process(ring); + } + break; + } + return 0; +} + +static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal register access in command stream\n"); + schedule_work(&adev->reset_work); + return 0; +} + +static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal instruction in command stream\n"); + schedule_work(&adev->reset_work); + return 0; +} + +const struct amd_ip_funcs gfx_v9_0_ip_funcs = { + .name = "gfx_v9_0", + .early_init = gfx_v9_0_early_init, + .late_init = gfx_v9_0_late_init, + .sw_init = gfx_v9_0_sw_init, + .sw_fini = gfx_v9_0_sw_fini, + .hw_init = gfx_v9_0_hw_init, + .hw_fini = gfx_v9_0_hw_fini, + .suspend = gfx_v9_0_suspend, + .resume = gfx_v9_0_resume, + .is_idle = gfx_v9_0_is_idle, + .wait_for_idle = gfx_v9_0_wait_for_idle, + .soft_reset = gfx_v9_0_soft_reset, + .set_clockgating_state = gfx_v9_0_set_clockgating_state, + .set_powergating_state = gfx_v9_0_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { + .type = AMDGPU_RING_TYPE_GFX, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v9_0_ring_get_rptr_gfx, + .get_wptr = gfx_v9_0_ring_get_wptr_gfx, + .set_wptr = gfx_v9_0_ring_set_wptr_gfx, + .emit_frame_size = + 20 + /* gfx_v9_0_ring_emit_gds_switch */ + 7 + /* gfx_v9_0_ring_emit_hdp_flush */ + 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ + 8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ + 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ + 128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */ + 2 + /* gfx_v9_ring_emit_sb */ + 3, /* gfx_v9_ring_emit_cntxcntl */ + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ + .emit_ib = gfx_v9_0_ring_emit_ib_gfx, + .emit_fence = gfx_v9_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, + .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, + .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate, + .test_ring = gfx_v9_0_ring_test_ring, + .test_ib = gfx_v9_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_switch_buffer = gfx_v9_ring_emit_sb, + .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, +}; + +static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { + .type = AMDGPU_RING_TYPE_COMPUTE, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v9_0_ring_get_rptr_compute, + .get_wptr = gfx_v9_0_ring_get_wptr_compute, + .set_wptr = gfx_v9_0_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v9_0_ring_emit_gds_switch */ + 7 + /* gfx_v9_0_ring_emit_hdp_flush */ + 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ + 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ + 64 + /* gfx_v9_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ + .emit_ib = gfx_v9_0_ring_emit_ib_compute, + .emit_fence = gfx_v9_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, + .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, + .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate, + .test_ring = gfx_v9_0_ring_test_ring, + .test_ib = gfx_v9_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, +}; + + +static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) + adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; +} + +static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { + .set = gfx_v9_0_set_eop_interrupt_state, + .process = gfx_v9_0_eop_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { + .set = gfx_v9_0_set_priv_reg_fault_state, + .process = gfx_v9_0_priv_reg_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { + .set = gfx_v9_0_set_priv_inst_fault_state, + .process = gfx_v9_0_priv_inst_irq, +}; + +static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; + adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; + + adev->gfx.priv_reg_irq.num_types = 1; + adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; + + adev->gfx.priv_inst_irq.num_types = 1; + adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; +} + +static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; + break; + default: + break; + } +} + +static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) +{ + /* init asci gds info */ + adev->gds.mem.total_size = RREG32(SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE)); + adev->gds.gws.total_size = 64; + adev->gds.oa.total_size = 16; + + if (adev->gds.mem.total_size == 64 * 1024) { + adev->gds.mem.gfx_partition_size = 4096; + adev->gds.mem.cs_partition_size = 4096; + + adev->gds.gws.gfx_partition_size = 4; + adev->gds.gws.cs_partition_size = 4; + + adev->gds.oa.gfx_partition_size = 4; + adev->gds.oa.cs_partition_size = 1; + } else { + adev->gds.mem.gfx_partition_size = 1024; + adev->gds.mem.cs_partition_size = 1024; + + adev->gds.gws.gfx_partition_size = 16; + adev->gds.gws.cs_partition_size = 16; + + adev->gds.oa.gfx_partition_size = 4; + adev->gds.oa.cs_partition_size = 4; + } +} + +static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) +{ + u32 data, mask; + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG)); + data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG)); + + data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; + data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; + + mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + + return (~data) & mask; +} + +static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info) +{ + int i, j, k, counter, active_cu_number = 0; + u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; + + if (!adev || !cu_info) + return -EINVAL; + + memset(cu_info, 0, sizeof(*cu_info)); + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + ao_bitmap = 0; + counter = 0; + gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + bitmap = gfx_v9_0_get_cu_active_bitmap(adev); + cu_info->bitmap[i][j] = bitmap; + + for (k = 0; k < 16; k ++) { + if (bitmap & mask) { + if (counter < 2) + ao_bitmap |= mask; + counter ++; + } + mask <<= 1; + } + active_cu_number += counter; + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + } + } + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + cu_info->number = active_cu_number; + cu_info->ao_cu_mask = ao_cu_mask; + + return 0; +} + +static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) +{ + int r, j; + u32 tmp; + bool use_doorbell = true; + u64 hqd_gpu_addr; + u64 mqd_gpu_addr; + u64 eop_gpu_addr; + u64 wb_gpu_addr; + u32 *buf; + struct v9_mqd *mqd; + struct amdgpu_device *adev; + + adev = ring->adev; + if (ring->mqd_obj == NULL) { + r = amdgpu_bo_create(adev, + sizeof(struct v9_mqd), + PAGE_SIZE,true, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, + NULL, &ring->mqd_obj); + if (r) { + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); + return r; + } + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + gfx_v9_0_cp_compute_fini(adev); + return r; + } + + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, + &mqd_gpu_addr); + if (r) { + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); + gfx_v9_0_cp_compute_fini(adev); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); + if (r) { + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); + gfx_v9_0_cp_compute_fini(adev); + return r; + } + + /* init the mqd struct */ + memset(buf, 0, sizeof(struct v9_mqd)); + + mqd = (struct v9_mqd *)buf; + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000003; + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, + ring->pipe, + ring->queue, 0); + /* disable wptr polling */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL)); + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL), tmp); + + /* write the EOP addr */ + BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */ + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE); + eop_gpu_addr >>= 8; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR), lower_32_bits(eop_gpu_addr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), upper_32_bits(eop_gpu_addr)); + mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr); + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(MEC_HPD_SIZE / 4) - 1)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL), tmp); + + /* enable doorbell? */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); + if (use_doorbell) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + else + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), tmp); + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* disable the queue if it's active */ + ring->wptr = 0; + mqd->cp_hqd_dequeue_request = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1); + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1)) + break; + udelay(1); + } + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), mqd->cp_hqd_dequeue_request); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR), mqd->cp_hqd_pq_rptr); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi); + } + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR), mqd->cp_mqd_base_addr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI), mqd->cp_mqd_base_addr_hi); + + /* set MQD vmid to 0 */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL), tmp); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE), mqd->cp_hqd_pq_base_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI), mqd->cp_hqd_pq_base_hi); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(ring->ring_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); +#endif + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL), tmp); + mqd->cp_hqd_pq_control = tmp; + + /* set the wb address wether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR), + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI), + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* enable the doorbell if requested */ + if (use_doorbell) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER), + (AMDGPU_DOORBELL64_KIQ * 2) << 2); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER), + (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2); + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); + mqd->cp_hqd_pq_doorbell_control = tmp; + + } else { + mqd->cp_hqd_pq_doorbell_control = 0; + } + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), + mqd->cp_hqd_pq_doorbell_control); + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE), tmp); + mqd->cp_hqd_persistent_state = tmp; + + /* activate the queue */ + mqd->cp_hqd_active = 1; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), mqd->cp_hqd_active); + + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + amdgpu_bo_kunmap(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + + if (use_doorbell) { + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS)); + tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS), tmp); + } + + return 0; +} + +const struct amdgpu_ip_block_version gfx_v9_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 9, + .minor = 0, + .rev = 0, + .funcs = &gfx_v9_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h new file mode 100644 index 000000000000..56ef652a575d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h @@ -0,0 +1,35 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFX_V9_0_H__ +#define __GFX_V9_0_H__ + +extern const struct amd_ip_funcs gfx_v9_0_ip_funcs; +extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block; + +void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num); + +uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); +int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); + +#endif -- cgit v1.2.3 From 0e5ca0d1ac07ef8b3a52d3b0404482207cb4da5a Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Fri, 3 Mar 2017 18:37:23 -0500 Subject: drm/amdgpu: add PSP driver for vega10 (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PSP is responsible for firmware loading on SOC-15 asics. v2: fix memory leak (Ken) Acked-by: Christian König Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 481 +++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 127 ++++++++ drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 269 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 507 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/psp_v3_1.h | 50 +++ drivers/gpu/drm/amd/include/amd_shared.h | 1 + 9 files changed, 1450 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h create mode 100644 drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h create mode 100644 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c create mode 100644 drivers/gpu/drm/amd/amdgpu/psp_v3_1.h (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index cab8eecf2ecf..48a9c03914c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -51,6 +51,11 @@ amdgpu-y += \ cz_ih.o \ vega10_ih.o +# add PSP block +amdgpu-y += \ + amdgpu_psp.o \ + psp_v3_1.o + # add SMC block amdgpu-y += \ amdgpu_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 23390468341c..c571f6835848 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -52,6 +52,7 @@ #include "amdgpu_irq.h" #include "amdgpu_ucode.h" #include "amdgpu_ttm.h" +#include "amdgpu_psp.h" #include "amdgpu_gds.h" #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -1213,6 +1214,10 @@ struct amdgpu_firmware { struct amdgpu_bo *fw_buf; unsigned int fw_size; unsigned int max_ucodes; + /* firmwares are loaded by psp instead of smu from vega10 */ + const struct amdgpu_psp_funcs *funcs; + struct amdgpu_bo *rbuf; + struct mutex mutex; }; /* @@ -1571,6 +1576,9 @@ struct amdgpu_device { /* firmwares */ struct amdgpu_firmware firmware; + /* PSP */ + struct psp_context psp; + /* GDS */ struct amdgpu_gds gds; @@ -1825,6 +1833,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a)) +#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) /* Common functions */ int amdgpu_gpu_reset(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index dd9f493006df..cf45fb902aed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1837,6 +1837,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, * can recall function without having locking issues */ mutex_init(&adev->vm_manager.lock); atomic_set(&adev->irq.ih.lock, 0); + mutex_init(&adev->firmware.mutex); mutex_init(&adev->pm.mutex); mutex_init(&adev->gfx.gpu_clock_mutex); mutex_init(&adev->srbm_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c new file mode 100644 index 000000000000..4731015f6101 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -0,0 +1,481 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ + +#include +#include "drmP.h" +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v3_1.h" + +static void psp_set_funcs(struct amdgpu_device *adev); + +static int psp_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + psp_set_funcs(adev); + + return 0; +} + +static int psp_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; + int ret; + + switch (adev->asic_type) { + case CHIP_VEGA10: + psp->init_microcode = psp_v3_1_init_microcode; + psp->bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv; + psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos; + psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf; + psp->ring_init = psp_v3_1_ring_init; + psp->cmd_submit = psp_v3_1_cmd_submit; + psp->compare_sram_data = psp_v3_1_compare_sram_data; + psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; + break; + default: + return -EINVAL; + } + + psp->adev = adev; + + ret = psp_init_microcode(psp); + if (ret) { + DRM_ERROR("Failed to load psp firmware!\n"); + return ret; + } + + return 0; +} + +static int psp_sw_fini(void *handle) +{ + return 0; +} + +int psp_wait_for(struct psp_context *psp, uint32_t reg_index, + uint32_t reg_val, uint32_t mask, bool check_changed) +{ + uint32_t val; + int i; + struct amdgpu_device *adev = psp->adev; + + val = RREG32(reg_index); + + for (i = 0; i < adev->usec_timeout; i++) { + if (check_changed) { + if (val != reg_val) + return 0; + } else { + if ((val & mask) == reg_val) + return 0; + } + udelay(1); + } + + return -ETIME; +} + +static int +psp_cmd_submit_buf(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr, + int index) +{ + int ret; + struct amdgpu_bo *cmd_buf_bo; + uint64_t cmd_buf_mc_addr; + struct psp_gfx_cmd_resp *cmd_buf_mem; + struct amdgpu_device *adev = psp->adev; + + ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &cmd_buf_bo, &cmd_buf_mc_addr, + (void **)&cmd_buf_mem); + if (ret) + return ret; + + memset(cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); + + memcpy(cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); + + ret = psp_cmd_submit(psp, ucode, cmd_buf_mc_addr, + fence_mc_addr, index); + + while (*((unsigned int *)psp->fence_buf) != index) { + msleep(1); + }; + + amdgpu_bo_free_kernel(&cmd_buf_bo, + &cmd_buf_mc_addr, + (void **)&cmd_buf_mem); + + return ret; +} + +static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t tmr_mc, uint32_t size) +{ + cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; + cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = (uint32_t)tmr_mc; + cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = (uint32_t)(tmr_mc >> 32); + cmd->cmd.cmd_setup_tmr.buf_size = size; +} + +/* Set up Trusted Memory Region */ +static int psp_tmr_init(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + /* + * Allocate 3M memory aligned to 1M from Frame Buffer (local + * physical). + * + * Note: this memory need be reserved till the driver + * uninitializes. + */ + ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + if (ret) + goto failed; + + psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr, 1); + if (ret) + goto failed_mem; + + kfree(cmd); + + return 0; + +failed_mem: + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); +failed: + kfree(cmd); + return ret; +} + +static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t asd_mc, uint64_t asd_mc_shared, + uint32_t size, uint32_t shared_size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_ASD; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(asd_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(asd_mc); + cmd->cmd.cmd_load_ta.app_len = size; + + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(asd_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(asd_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; +} + +static int psp_asd_load(struct psp_context *psp) +{ + int ret; + struct amdgpu_bo *asd_bo, *asd_shared_bo; + uint64_t asd_mc_addr, asd_shared_mc_addr; + void *asd_buf, *asd_shared_buf; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for shared ASD <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &asd_shared_bo, &asd_shared_mc_addr, &asd_buf); + if (ret) + goto failed; + + /* + * Allocate 256k memory aligned to 4k from Frame Buffer (local + * physical) for ASD firmware + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_BIN_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &asd_bo, &asd_mc_addr, &asd_buf); + if (ret) + goto failed_mem; + + memcpy(asd_buf, psp->asd_start_addr, psp->asd_ucode_size); + + psp_prep_asd_cmd_buf(cmd, asd_mc_addr, asd_shared_mc_addr, + psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr, 2); + if (ret) + goto failed_mem1; + + amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf); + amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf); + kfree(cmd); + + return 0; + +failed_mem1: + amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf); +failed_mem: + amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf); +failed: + kfree(cmd); + return ret; +} + +static int psp_load_fw(struct amdgpu_device *adev) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + int i; + struct amdgpu_firmware_info *ucode; + struct psp_context *psp = &adev->psp; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + ret = psp_bootloader_load_sysdrv(psp); + if (ret) + goto failed; + + ret = psp_bootloader_load_sos(psp); + if (ret) + goto failed; + + ret = psp_ring_init(psp, PSP_RING_TYPE__KM); + if (ret) + goto failed; + + ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->fence_buf_bo, + &psp->fence_buf_mc_addr, + &psp->fence_buf); + if (ret) + goto failed; + + memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); + + ret = psp_tmr_init(psp); + if (ret) + goto failed_mem; + + ret = psp_asd_load(psp); + if (ret) + goto failed_mem; + + for (i = 0; i < adev->firmware.max_ucodes; i++) { + ucode = &adev->firmware.ucode[i]; + if (!ucode->fw) + continue; + + if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && + psp_smu_reload_quirk(psp)) + continue; + + ret = psp_prep_cmd_buf(ucode, cmd); + if (ret) + goto failed_mem; + + ret = psp_cmd_submit_buf(psp, ucode, cmd, + psp->fence_buf_mc_addr, i + 3); + if (ret) + goto failed_mem; + +#if 0 + /* check if firmware loaded sucessfully */ + if (!amdgpu_psp_check_fw_loading_status(adev, i)) + return -EINVAL; +#endif + } + + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); + kfree(cmd); + + return 0; + +failed_mem: + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); +failed: + kfree(cmd); + return ret; +} + +static int psp_hw_init(void *handle) +{ + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + return 0; + + mutex_lock(&adev->firmware.mutex); + /* + * This sequence is just used on hw_init only once, no need on + * resume. + */ + ret = amdgpu_ucode_init_bo(adev); + if (ret) + goto failed; + + ret = psp_load_fw(adev); + if (ret) { + DRM_ERROR("PSP firmware loading failed\n"); + goto failed; + } + + mutex_unlock(&adev->firmware.mutex); + return 0; + +failed: + adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT; + mutex_unlock(&adev->firmware.mutex); + return -EINVAL; +} + +static int psp_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) + amdgpu_ucode_fini_bo(adev); + + if (psp->tmr_buf) + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + + return 0; +} + +static int psp_suspend(void *handle) +{ + return 0; +} + +static int psp_resume(void *handle) +{ + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + return 0; + + mutex_lock(&adev->firmware.mutex); + + ret = psp_load_fw(adev); + if (ret) + DRM_ERROR("PSP resume failed\n"); + + mutex_unlock(&adev->firmware.mutex); + + return ret; +} + +static bool psp_check_fw_loading_status(struct amdgpu_device *adev, + enum AMDGPU_UCODE_ID ucode_type) +{ + struct amdgpu_firmware_info *ucode = NULL; + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + DRM_INFO("firmware is not loaded by PSP\n"); + return true; + } + + if (!adev->firmware.fw_size) + return false; + + ucode = &adev->firmware.ucode[ucode_type]; + if (!ucode->fw || !ucode->ucode_size) + return false; + + return psp_compare_sram_data(&adev->psp, ucode, ucode_type); +} + +static int psp_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int psp_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs psp_ip_funcs = { + .name = "psp", + .early_init = psp_early_init, + .late_init = NULL, + .sw_init = psp_sw_init, + .sw_fini = psp_sw_fini, + .hw_init = psp_hw_init, + .hw_fini = psp_hw_fini, + .suspend = psp_suspend, + .resume = psp_resume, + .is_idle = NULL, + .wait_for_idle = NULL, + .soft_reset = NULL, + .set_clockgating_state = psp_set_clockgating_state, + .set_powergating_state = psp_set_powergating_state, +}; + +static const struct amdgpu_psp_funcs psp_funcs = { + .check_fw_loading_status = psp_check_fw_loading_status, +}; + +static void psp_set_funcs(struct amdgpu_device *adev) +{ + if (NULL == adev->firmware.funcs) + adev->firmware.funcs = &psp_funcs; +} + +const struct amdgpu_ip_block_version psp_v3_1_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 3, + .minor = 1, + .rev = 0, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h new file mode 100644 index 000000000000..e9f35e025b59 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -0,0 +1,127 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ +#ifndef __AMDGPU_PSP_H__ +#define __AMDGPU_PSP_H__ + +#include "amdgpu.h" +#include "psp_gfx_if.h" + +#define PSP_FENCE_BUFFER_SIZE 0x1000 +#define PSP_CMD_BUFFER_SIZE 0x1000 +#define PSP_ASD_BIN_SIZE 0x40000 +#define PSP_ASD_SHARED_MEM_SIZE 0x4000 + +enum psp_ring_type +{ + PSP_RING_TYPE__INVALID = 0, + /* + * These values map to the way the PSP kernel identifies the + * rings. + */ + PSP_RING_TYPE__UM = 1, /* User mode ring (formerly called RBI) */ + PSP_RING_TYPE__KM = 2 /* Kernel mode ring (formerly called GPCOM) */ +}; + +struct psp_ring +{ + enum psp_ring_type ring_type; + struct psp_gfx_rb_frame *ring_mem; + uint64_t ring_mem_mc_addr; + void *ring_mem_handle; + uint32_t ring_size; +}; + +struct psp_context +{ + struct amdgpu_device *adev; + struct psp_ring km_ring; + + int (*init_microcode)(struct psp_context *psp); + int (*bootloader_load_sysdrv)(struct psp_context *psp); + int (*bootloader_load_sos)(struct psp_context *psp); + int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd); + int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); + int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index); + bool (*compare_sram_data)(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type); + bool (*smu_reload_quirk)(struct psp_context *psp); + + /* sos firmware */ + const struct firmware *sos_fw; + uint32_t sos_fw_version; + uint32_t sos_feature_version; + uint32_t sys_bin_size; + uint32_t sos_bin_size; + uint8_t *sys_start_addr; + uint8_t *sos_start_addr; + + /* tmr buffer */ + struct amdgpu_bo *tmr_bo; + uint64_t tmr_mc_addr; + void *tmr_buf; + + /* asd firmware */ + const struct firmware *asd_fw; + uint32_t asd_fw_version; + uint32_t asd_feature_version; + uint32_t asd_ucode_size; + uint8_t *asd_start_addr; + + /* fence buffer */ + struct amdgpu_bo *fence_buf_bo; + uint64_t fence_buf_mc_addr; + void *fence_buf; +}; + +struct amdgpu_psp_funcs { + bool (*check_fw_loading_status)(struct amdgpu_device *adev, + enum AMDGPU_UCODE_ID); +}; + +#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type)) +#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type)) +#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ + (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) +#define psp_compare_sram_data(psp, ucode, type) \ + (psp)->compare_sram_data((psp), (ucode), (type)) +#define psp_init_microcode(psp) \ + ((psp)->init_microcode ? (psp)->init_microcode((psp)) : 0) +#define psp_bootloader_load_sysdrv(psp) \ + ((psp)->bootloader_load_sysdrv ? (psp)->bootloader_load_sysdrv((psp)) : 0) +#define psp_bootloader_load_sos(psp) \ + ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0) +#define psp_smu_reload_quirk(psp) \ + ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false) + +extern const struct amd_ip_funcs psp_ip_funcs; + +extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; +extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, + uint32_t field_val, uint32_t mask, bool check_changed); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h new file mode 100644 index 000000000000..8da6da90b1c9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -0,0 +1,269 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _PSP_TEE_GFX_IF_H_ +#define _PSP_TEE_GFX_IF_H_ + +#define PSP_GFX_CMD_BUF_VERSION 0x00000001 + +#define GFX_CMD_STATUS_MASK 0x0000FFFF +#define GFX_CMD_ID_MASK 0x000F0000 +#define GFX_CMD_RESERVED_MASK 0x7FF00000 +#define GFX_CMD_RESPONSE_MASK 0x80000000 + +/* TEE Gfx Command IDs for the register interface. +* Command ID must be between 0x00010000 and 0x000F0000. +*/ +enum psp_gfx_crtl_cmd_id +{ + GFX_CTRL_CMD_ID_INIT_RBI_RING = 0x00010000, /* initialize RBI ring */ + GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */ + GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */ + GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */ + + GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ +}; + + +/* Control registers of the TEE Gfx interface. These are located in +* SRBM-to-PSP mailbox registers (total 8 registers). +*/ +struct psp_gfx_ctrl +{ + volatile uint32_t cmd_resp; /* +0 Command/Response register for Gfx commands */ + volatile uint32_t rbi_wptr; /* +4 Write pointer (index) of RBI ring */ + volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */ + volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */ + volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */ + volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of physical address of ring buffer */ + volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of physical address of ring buffer */ + volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */ + +}; + + +/* Response flag is set in the command when command is completed by PSP. +* Used in the GFX_CTRL.CmdResp. +* When PSP GFX I/F is initialized, the flag is set. +*/ +#define GFX_FLAG_RESPONSE 0x80000000 + + +/* TEE Gfx Command IDs for the ring buffer interface. */ +enum psp_gfx_cmd_id +{ + GFX_CMD_ID_LOAD_TA = 0x00000001, /* load TA */ + GFX_CMD_ID_UNLOAD_TA = 0x00000002, /* unload TA */ + GFX_CMD_ID_INVOKE_CMD = 0x00000003, /* send command to TA */ + GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */ + GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */ + GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ + +}; + + +/* Command to load Trusted Application binary into PSP OS. */ +struct psp_gfx_cmd_load_ta +{ + uint32_t app_phy_addr_lo; /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */ + uint32_t app_phy_addr_hi; /* bits [63:32] of the physical address of the TA binary */ + uint32_t app_len; /* length of the TA binary in bytes */ + uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */ + uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the physical address of CMD buffer */ + uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */ + + /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided + * for the TA. Each InvokeCommand can have dinamically mapped CMD buffer instead + * of using global persistent buffer. + */ +}; + + +/* Command to Unload Trusted Application binary from PSP OS. */ +struct psp_gfx_cmd_unload_ta +{ + uint32_t session_id; /* Session ID of the loaded TA to be unloaded */ + +}; + + +/* Shared buffers for InvokeCommand. +*/ +struct psp_gfx_buf_desc +{ + uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */ + uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of the buffer */ + uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */ + +}; + +/* Max number of descriptors for one shared buffer (in how many different +* physical locations one shared buffer can be stored). If buffer is too much +* fragmented, error will be returned. +*/ +#define GFX_BUF_MAX_DESC 64 + +struct psp_gfx_buf_list +{ + uint32_t num_desc; /* number of buffer descriptors in the list */ + uint32_t total_size; /* total size of all buffers in the list in bytes (must be multiple of 4 KB) */ + struct psp_gfx_buf_desc buf_desc[GFX_BUF_MAX_DESC]; /* list of buffer descriptors */ + + /* total 776 bytes */ +}; + +/* Command to execute InvokeCommand entry point of the TA. */ +struct psp_gfx_cmd_invoke_cmd +{ + uint32_t session_id; /* Session ID of the TA to be executed */ + uint32_t ta_cmd_id; /* Command ID to be sent to TA */ + struct psp_gfx_buf_list buf; /* one indirect buffer (scatter/gather list) */ + +}; + + +/* Command to setup TMR region. */ +struct psp_gfx_cmd_setup_tmr +{ + uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */ + uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of TMR buffer */ + uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */ + +}; + + +/* FW types for GFX_CMD_ID_LOAD_IP_FW command. Limit 31. */ +enum psp_gfx_fw_type +{ + GFX_FW_TYPE_NONE = 0, + GFX_FW_TYPE_CP_ME = 1, + GFX_FW_TYPE_CP_PFP = 2, + GFX_FW_TYPE_CP_CE = 3, + GFX_FW_TYPE_CP_MEC = 4, + GFX_FW_TYPE_CP_MEC_ME1 = 5, + GFX_FW_TYPE_CP_MEC_ME2 = 6, + GFX_FW_TYPE_RLC_V = 7, + GFX_FW_TYPE_RLC_G = 8, + GFX_FW_TYPE_SDMA0 = 9, + GFX_FW_TYPE_SDMA1 = 10, + GFX_FW_TYPE_DMCU_ERAM = 11, + GFX_FW_TYPE_DMCU_ISR = 12, + GFX_FW_TYPE_VCN = 13, + GFX_FW_TYPE_UVD = 14, + GFX_FW_TYPE_VCE = 15, + GFX_FW_TYPE_ISP = 16, + GFX_FW_TYPE_ACP = 17, + GFX_FW_TYPE_SMU = 18, +}; + +/* Command to load HW IP FW. */ +struct psp_gfx_cmd_load_ip_fw +{ + uint32_t fw_phy_addr_lo; /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */ + uint32_t fw_phy_addr_hi; /* bits [63:32] of physical address of FW location */ + uint32_t fw_size; /* FW buffer size in bytes */ + enum psp_gfx_fw_type fw_type; /* FW type */ + +}; + + +/* All GFX ring buffer commands. */ +union psp_gfx_commands +{ + struct psp_gfx_cmd_load_ta cmd_load_ta; + struct psp_gfx_cmd_unload_ta cmd_unload_ta; + struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd; + struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; + struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; + +}; + + +/* Structure of GFX Response buffer. +* For GPCOM I/F it is part of GFX_CMD_RESP buffer, for RBI +* it is separate buffer. +*/ +struct psp_gfx_resp +{ + uint32_t status; /* +0 status of command execution */ + uint32_t session_id; /* +4 session ID in response to LoadTa command */ + uint32_t fw_addr_lo; /* +8 bits [31:0] of FW address within TMR (in response to cmd_load_ip_fw command) */ + uint32_t fw_addr_hi; /* +12 bits [63:32] of FW address within TMR (in response to cmd_load_ip_fw command) */ + + uint32_t reserved[4]; + + /* total 32 bytes */ +}; + +/* Structure of Command buffer pointed by psp_gfx_rb_frame.cmd_buf_addr_hi +* and psp_gfx_rb_frame.cmd_buf_addr_lo. +*/ +struct psp_gfx_cmd_resp +{ + uint32_t buf_size; /* +0 total size of the buffer in bytes */ + uint32_t buf_version; /* +4 version of the buffer strusture; must be PSP_GFX_CMD_BUF_VERSION */ + uint32_t cmd_id; /* +8 command ID */ + + /* These fields are used for RBI only. They are all 0 in GPCOM commands + */ + uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */ + uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of physical address of response buffer */ + uint32_t resp_offset; /* +20 offset within response buffer */ + uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */ + + union psp_gfx_commands cmd; /* +28 command specific structures */ + + uint8_t reserved_1[864 - sizeof(union psp_gfx_commands) - 28]; + + /* Note: Resp is part of this buffer for GPCOM ring. For RBI ring the response + * is separate buffer pointed by resp_buf_addr_hi and resp_buf_addr_lo. + */ + struct psp_gfx_resp resp; /* +864 response */ + + uint8_t reserved_2[1024 - 864 - sizeof(struct psp_gfx_resp)]; + + /* total size 1024 bytes */ +}; + + +#define FRAME_TYPE_DESTROY 1 /* frame sent by KMD driver when UMD Scheduler context is destroyed*/ + +/* Structure of the Ring Buffer Frame */ +struct psp_gfx_rb_frame +{ + uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of physical address of command buffer (must be 4 KB aligned) */ + uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of physical address of command buffer */ + uint32_t cmd_buf_size; /* +8 command buffer size in bytes */ + uint32_t fence_addr_lo; /* +12 bits [31:0] of physical address of Fence for this frame */ + uint32_t fence_addr_hi; /* +16 bits [63:32] of physical address of Fence for this frame */ + uint32_t fence_value; /* +20 Fence value */ + uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */ + uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */ + uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */ + uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */ + uint8_t reserved1[2]; /* +34 reserved, must be 0 */ + uint32_t reserved2[7]; /* +40 reserved, must be 0 */ + /* total 64 bytes */ +}; + +#endif /* _PSP_TEE_GFX_IF_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c new file mode 100644 index 000000000000..49c3844bb695 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -0,0 +1,507 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ + +#include +#include "drmP.h" +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v3_1.h" + +#include "vega10/soc15ip.h" +#include "vega10/MP/mp_9_0_offset.h" +#include "vega10/MP/mp_9_0_sh_mask.h" +#include "vega10/GC/gc_9_0_offset.h" +#include "vega10/SDMA0/sdma0_4_0_offset.h" +#include "vega10/NBIO/nbio_6_1_offset.h" + +MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); +MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); + +#define smnMP1_FIRMWARE_FLAGS 0x3010028 + +static int +psp_v3_1_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) +{ + switch(ucode->ucode_id) { + case AMDGPU_UCODE_ID_SDMA0: + *type = GFX_FW_TYPE_SDMA0; + break; + case AMDGPU_UCODE_ID_SDMA1: + *type = GFX_FW_TYPE_SDMA1; + break; + case AMDGPU_UCODE_ID_CP_CE: + *type = GFX_FW_TYPE_CP_CE; + break; + case AMDGPU_UCODE_ID_CP_PFP: + *type = GFX_FW_TYPE_CP_PFP; + break; + case AMDGPU_UCODE_ID_CP_ME: + *type = GFX_FW_TYPE_CP_ME; + break; + case AMDGPU_UCODE_ID_CP_MEC1: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC1_JT: + *type = GFX_FW_TYPE_CP_MEC_ME1; + break; + case AMDGPU_UCODE_ID_CP_MEC2: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC2_JT: + *type = GFX_FW_TYPE_CP_MEC_ME2; + break; + case AMDGPU_UCODE_ID_RLC_G: + *type = GFX_FW_TYPE_RLC_G; + break; + case AMDGPU_UCODE_ID_SMC: + *type = GFX_FW_TYPE_SMU; + break; + case AMDGPU_UCODE_ID_UVD: + *type = GFX_FW_TYPE_UVD; + break; + case AMDGPU_UCODE_ID_VCE: + *type = GFX_FW_TYPE_VCE; + break; + case AMDGPU_UCODE_ID_MAXIMUM: + default: + return -EINVAL; + } + + return 0; +} + +int psp_v3_1_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + const char *chip_name; + char fw_name[30]; + int err = 0; + const struct psp_firmware_header_v1_0 *hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_VEGA10: + chip_name = "vega10"; + break; + default: BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name); + err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.sos_fw); + if (err) + goto out; + + hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; + adev->psp.sos_fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->psp.sos_feature_version = le32_to_cpu(hdr->ucode_feature_version); + adev->psp.sos_bin_size = le32_to_cpu(hdr->sos_size_bytes); + adev->psp.sys_bin_size = le32_to_cpu(hdr->header.ucode_size_bytes) - + le32_to_cpu(hdr->sos_size_bytes); + adev->psp.sys_start_addr = (uint8_t *)hdr + + le32_to_cpu(hdr->header.ucode_array_offset_bytes); + adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(hdr->sos_offset_bytes); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); + err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.asd_fw); + if (err) + goto out; + + hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; + adev->psp.asd_fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->psp.asd_feature_version = le32_to_cpu(hdr->ucode_feature_version); + adev->psp.asd_ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes); + adev->psp.asd_start_addr = (uint8_t *)hdr + + le32_to_cpu(hdr->header.ucode_array_offset_bytes); + + return 0; +out: + if (err) { + dev_err(adev->dev, + "psp v3.1: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->psp.sos_fw); + adev->psp.sos_fw = NULL; + release_firmware(adev->psp.asd_fw); + adev->psp.asd_fw = NULL; + } + + return err; +} + +int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_bo *psp_sysdrv; + void *psp_sysdrv_virt = NULL; + uint64_t psp_sysdrv_mem; + struct amdgpu_device *adev = psp->adev; + uint32_t size; + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + /* + * Create a 1 meg GART memory to store the psp sys driver + * binary with a 1 meg aligned address + */ + size = (psp->sys_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) & + (~(PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)); + + ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT, + AMDGPU_GEM_DOMAIN_GTT, + &psp_sysdrv, + &psp_sysdrv_mem, + &psp_sysdrv_virt); + if (ret) + return ret; + + /* Copy PSP System Driver binary to memory */ + memcpy(psp_sysdrv_virt, psp->sys_start_addr, psp->sys_bin_size); + + /* Provide the sys driver to bootrom */ + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), + (uint32_t)(psp_sysdrv_mem >> 20)); + psp_gfxdrv_command_reg = 1 << 16; + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + + amdgpu_bo_free_kernel(&psp_sysdrv, &psp_sysdrv_mem, &psp_sysdrv_virt); + + return ret; +} + +int psp_v3_1_bootloader_load_sos(struct psp_context *psp) +{ + int ret; + unsigned int psp_gfxdrv_command_reg = 0; + struct amdgpu_bo *psp_sos; + void *psp_sos_virt = NULL; + uint64_t psp_sos_mem; + struct amdgpu_device *adev = psp->adev; + uint32_t size; + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + size = (psp->sos_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) & + (~((uint64_t)PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)); + + ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT, + AMDGPU_GEM_DOMAIN_GTT, + &psp_sos, + &psp_sos_mem, + &psp_sos_virt); + if (ret) + return ret; + + /* Copy Secure OS binary to PSP memory */ + memcpy(psp_sos_virt, psp->sos_start_addr, psp->sos_bin_size); + + /* Provide the PSP secure OS to bootrom */ + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), + (uint32_t)(psp_sos_mem >> 20)); + psp_gfxdrv_command_reg = 2 << 16; + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); +#if 0 + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), + RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)), + 0, true); +#endif + + amdgpu_bo_free_kernel(&psp_sos, &psp_sos_mem, &psp_sos_virt); + + return ret; +} + +int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) +{ + int ret; + uint64_t fw_mem_mc_addr = ucode->mc_addr; + + memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + + cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = (uint32_t)fw_mem_mc_addr; + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = (uint32_t)((uint64_t)fw_mem_mc_addr >> 32); + cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; + + ret = psp_v3_1_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); + if (ret) + DRM_ERROR("Unknown firmware type\n"); + + return ret; +} + +int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_69), psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_70), psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_71), psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + + return ret; +} + +int psp_v3_1_cmd_submit(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index) +{ + unsigned int psp_write_ptr_reg = 0; + struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + uint32_t ring_size_dw = ring->ring_size / 4; + uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; + + /* KM (GPCOM) prepare write pointer */ + psp_write_ptr_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67)); + + /* Update KM RB frame pointer to new frame */ + /* write_frame ptr increments by size of rb_frame in bytes */ + /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ + if ((psp_write_ptr_reg % ring_size_dw) == 0) + write_frame = ring->ring_mem; + else + write_frame = ring->ring_mem + (psp_write_ptr_reg / rb_frame_size_dw); + + /* Initialize KM RB frame */ + memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); + + /* Update KM RB frame */ + write_frame->cmd_buf_addr_hi = (unsigned int)(cmd_buf_mc_addr >> 32); + write_frame->cmd_buf_addr_lo = (unsigned int)(cmd_buf_mc_addr); + write_frame->fence_addr_hi = (unsigned int)(fence_mc_addr >> 32); + write_frame->fence_addr_lo = (unsigned int)(fence_mc_addr); + write_frame->fence_value = index; + + /* Update the write Pointer in DWORDs */ + psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67), psp_write_ptr_reg); + + return 0; +} + +static int +psp_v3_1_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) +{ + int ret = 0; + + switch(ucode_id) { +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SMC: + *sram_offset = 0; + *sram_addr_reg_offset = 0; + *sram_data_reg_offset = 0; + break; +#endif + + case AMDGPU_UCODE_ID_CP_CE: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_PFP: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_ME: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC1: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC2: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_RLC_G: + *sram_offset = 0x2000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_SDMA0: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + break; + +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SDMA1: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_UVD: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_VCE: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; +#endif + + case AMDGPU_UCODE_ID_MAXIMUM: + default: + ret = -EINVAL; + break; + } + + return ret; +} + +bool psp_v3_1_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type) +{ + int err = 0; + unsigned int fw_sram_reg_val = 0; + unsigned int fw_sram_addr_reg_offset = 0; + unsigned int fw_sram_data_reg_offset = 0; + unsigned int ucode_size; + uint32_t *ucode_mem = NULL; + struct amdgpu_device *adev = psp->adev; + + err = psp_v3_1_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset, + &fw_sram_data_reg_offset, ucode_type); + if (err) + return false; + + WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val); + + ucode_size = ucode->ucode_size; + ucode_mem = (uint32_t *)ucode->kaddr; + while (!ucode_size) { + fw_sram_reg_val = RREG32(fw_sram_data_reg_offset); + + if (*ucode_mem != fw_sram_reg_val) + return false; + + ucode_mem++; + /* 4 bytes */ + ucode_size -= 4; + } + + return true; +} + +bool psp_v3_1_smu_reload_quirk(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t reg, reg_val; + + reg_val = (smnMP1_FIRMWARE_FLAGS & 0xffffffff) | 0x03b00000; + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2), reg_val); + reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2)); + if ((reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >> + MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT) + return true; + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h new file mode 100644 index 000000000000..e82eff741a08 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h @@ -0,0 +1,50 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ +#ifndef __PSP_V3_1_H__ +#define __PSP_V3_1_H__ + +#include "amdgpu_psp.h" + +enum { PSP_DIRECTORY_TABLE_ENTRIES = 4 }; +enum { PSP_BINARY_ALIGNMENT = 64 }; +enum { PSP_BOOTLOADER_1_MEG_ALIGNMENT = 0x100000 }; +enum { PSP_BOOTLOADER_8_MEM_ALIGNMENT = 0x800000 }; + +extern int psp_v3_1_init_microcode(struct psp_context *psp); +extern int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp); +extern int psp_v3_1_bootloader_load_sos(struct psp_context *psp); +extern int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd); +extern int psp_v3_1_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type); +extern int psp_v3_1_cmd_submit(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index); +extern bool psp_v3_1_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type); +extern bool psp_v3_1_smu_reload_quirk(struct psp_context *psp); +#endif diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index a94420d680ec..2ccf44e580de 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -68,6 +68,7 @@ enum amd_ip_block_type { AMD_IP_BLOCK_TYPE_GMC, AMD_IP_BLOCK_TYPE_IH, AMD_IP_BLOCK_TYPE_SMC, + AMD_IP_BLOCK_TYPE_PSP, AMD_IP_BLOCK_TYPE_DCE, AMD_IP_BLOCK_TYPE_GFX, AMD_IP_BLOCK_TYPE_SDMA, -- cgit v1.2.3 From e6b3ecb4dbd9b03498a66fb777a54084f7182359 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 30 Dec 2016 16:18:56 +0800 Subject: drm/amdgpu/vega10:fix DOORBELL64 scheme MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Signed-off-by: Xiangliang Yu Reviewed-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c571f6835848..ec5d4ae0223e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -706,15 +706,24 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */ AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */ - /* VCN engine */ - AMDGPU_DOORBELL64_VCN0 = 0xF8, - AMDGPU_DOORBELL64_VCN1 = 0xF9, - AMDGPU_DOORBELL64_VCN2 = 0xFA, - AMDGPU_DOORBELL64_VCN3 = 0xFB, - AMDGPU_DOORBELL64_VCN4 = 0xFC, - AMDGPU_DOORBELL64_VCN5 = 0xFD, - AMDGPU_DOORBELL64_VCN6 = 0xFE, - AMDGPU_DOORBELL64_VCN7 = 0xFF, + /* VCN engine use 32 bits doorbell */ + AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_DOORBELL64_VCN2_3 = 0xF9, + AMDGPU_DOORBELL64_VCN4_5 = 0xFA, + AMDGPU_DOORBELL64_VCN6_7 = 0xFB, + + /* overlap the doorbell assignment with VCN as they are mutually exclusive + * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + */ + AMDGPU_DOORBELL64_RING0_1 = 0xF8, + AMDGPU_DOORBELL64_RING2_3 = 0xF9, + AMDGPU_DOORBELL64_RING4_5 = 0xFA, + AMDGPU_DOORBELL64_RING6_7 = 0xFB, + + AMDGPU_DOORBELL64_UVD_RING0_1 = 0xFC, + AMDGPU_DOORBELL64_UVD_RING2_3 = 0xFD, + AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFE, + AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFF, AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, AMDGPU_DOORBELL64_INVALID = 0xFFFF -- cgit v1.2.3 From 5846e355694f8a0be33a29d45ea8cafb0b5de22c Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 23 Mar 2017 16:10:04 +0800 Subject: drm/amdgpu:fix ring_write_multiple ring_write_multiple should use buf_mask instead of ptr_mask Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ec5d4ae0223e..262056778f52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1739,9 +1739,9 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr if (ring->count_dw < count_dw) { DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); } else { - occupied = ring->wptr & ring->ptr_mask; + occupied = ring->wptr & ring->buf_mask; dst = (void *)&ring->ring[occupied]; - chunk1 = ring->ptr_mask + 1 - occupied; + chunk1 = ring->buf_mask + 1 - occupied; chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; chunk2 = count_dw - chunk1; chunk1 <<= 2; -- cgit v1.2.3 From a9f87f6452543505108d7d9b36d948534ea28b0b Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 30 Mar 2017 14:03:59 +0200 Subject: drm/amdgpu: use a 64bit interval tree for VM management v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This only makes a difference for 32-bit systems. The idea is to have a fixed virtual address space size with 4-level page tables and to minimize differences between 32 and 64-bit systems. v2: Update commit message. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 12 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 93 ++++++++++++++++--------------- 7 files changed, 71 insertions(+), 62 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 262056778f52..5c6bcacd11af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include @@ -379,7 +379,10 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va_mapping { struct list_head list; - struct interval_tree_node it; + struct rb_node rb; + uint64_t start; + uint64_t last; + uint64_t __subtree_last; uint64_t offset; uint64_t flags; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 97f661372a1c..9c7f7ed07ea0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -949,7 +949,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, } if ((chunk_ib->va_start + chunk_ib->ib_bytes) > - (m->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { DRM_ERROR("IB va_start+ib_bytes is invalid\n"); return -EINVAL; } @@ -960,7 +960,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return r; } - offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE; + offset = m->start * AMDGPU_GPU_PAGE_SIZE; kptr += chunk_ib->va_start - offset; r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); @@ -1388,8 +1388,8 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, continue; list_for_each_entry(mapping, &lobj->bo_va->valids, list) { - if (mapping->it.start > addr || - addr > mapping->it.last) + if (mapping->start > addr || + addr > mapping->last) continue; *bo = lobj->bo_va->bo; @@ -1397,8 +1397,8 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, } list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { - if (mapping->it.start > addr || - addr > mapping->it.last) + if (mapping->start > addr || + addr > mapping->last) continue; *bo = lobj->bo_va->bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 7ea3cacf9f9f..38f739fb727b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index a87de18160a8..ee9d0f346d75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -226,8 +226,8 @@ TRACE_EVENT(amdgpu_vm_bo_map, TP_fast_assign( __entry->bo = bo_va ? bo_va->bo : NULL; - __entry->start = mapping->it.start; - __entry->last = mapping->it.last; + __entry->start = mapping->start; + __entry->last = mapping->last; __entry->offset = mapping->offset; __entry->flags = mapping->flags; ), @@ -250,8 +250,8 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, TP_fast_assign( __entry->bo = bo_va->bo; - __entry->start = mapping->it.start; - __entry->last = mapping->it.last; + __entry->start = mapping->start; + __entry->last = mapping->last; __entry->offset = mapping->offset; __entry->flags = mapping->flags; ), @@ -270,8 +270,8 @@ DECLARE_EVENT_CLASS(amdgpu_vm_mapping, ), TP_fast_assign( - __entry->soffset = mapping->it.start; - __entry->eoffset = mapping->it.last + 1; + __entry->soffset = mapping->start; + __entry->eoffset = mapping->last + 1; __entry->flags = mapping->flags; ), TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 0b92dd0c1d70..2ca09f111f08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -741,10 +741,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) start = amdgpu_bo_gpu_offset(bo); - end = (mapping->it.last + 1 - mapping->it.start); + end = (mapping->last + 1 - mapping->start); end = end * AMDGPU_GPU_PAGE_SIZE + start; - addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; + addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE; start += addr; amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 0184197eb000..c853400805d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -595,13 +595,13 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, } if ((addr + (uint64_t)size) > - ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { + (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) { DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n", addr, lo, hi); return -EINVAL; } - addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; + addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE; addr += amdgpu_bo_gpu_offset(bo); addr -= ((uint64_t)size) * ((uint64_t)index); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0235d7933efd..5f3d932e77f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -26,6 +26,7 @@ * Jerome Glisse */ #include +#include #include #include #include "amdgpu.h" @@ -51,6 +52,15 @@ * SI supports 16. */ +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) + +INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, + START, LAST, static, amdgpu_vm_it) + +#undef START +#undef LAST + /* Local structure. Encapsulate some VM table update parameters to reduce * the number of function parameters */ @@ -1301,7 +1311,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct drm_mm_node *nodes, struct dma_fence **fence) { - uint64_t pfn, src = 0, start = mapping->it.start; + uint64_t pfn, src = 0, start = mapping->start; int r; /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here @@ -1353,7 +1363,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } addr += pfn << PAGE_SHIFT; - last = min((uint64_t)mapping->it.last, start + max_entries - 1); + last = min((uint64_t)mapping->last, start + max_entries - 1); r = amdgpu_vm_bo_update_mapping(adev, exclusive, src, pages_addr, vm, start, last, flags, addr, @@ -1368,7 +1378,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } start = last + 1; - } while (unlikely(start != mapping->it.last + 1)); + } while (unlikely(start != mapping->last + 1)); return 0; } @@ -1724,9 +1734,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, uint64_t saddr, uint64_t offset, uint64_t size, uint64_t flags) { - struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_bo_va_mapping *mapping, *tmp; struct amdgpu_vm *vm = bo_va->vm; - struct interval_tree_node *it; uint64_t eaddr; /* validate the parameters */ @@ -1743,14 +1752,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; - it = interval_tree_iter_first(&vm->va, saddr, eaddr); - if (it) { - struct amdgpu_bo_va_mapping *tmp; - tmp = container_of(it, struct amdgpu_bo_va_mapping, it); + tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); + if (tmp) { /* bo and tmp overlap, invalid addr */ dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " - "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr, - tmp->it.start, tmp->it.last + 1); + "0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr, + tmp->start, tmp->last + 1); return -EINVAL; } @@ -1759,13 +1766,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, return -ENOMEM; INIT_LIST_HEAD(&mapping->list); - mapping->it.start = saddr; - mapping->it.last = eaddr; + mapping->start = saddr; + mapping->last = eaddr; mapping->offset = offset; mapping->flags = flags; list_add(&mapping->list, &bo_va->invalids); - interval_tree_insert(&mapping->it, &vm->va); + amdgpu_vm_it_insert(mapping, &vm->va); if (flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); @@ -1823,13 +1830,13 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; - mapping->it.start = saddr; - mapping->it.last = eaddr; + mapping->start = saddr; + mapping->last = eaddr; mapping->offset = offset; mapping->flags = flags; list_add(&mapping->list, &bo_va->invalids); - interval_tree_insert(&mapping->it, &vm->va); + amdgpu_vm_it_insert(mapping, &vm->va); if (flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); @@ -1860,7 +1867,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; list_for_each_entry(mapping, &bo_va->valids, list) { - if (mapping->it.start == saddr) + if (mapping->start == saddr) break; } @@ -1868,7 +1875,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, valid = false; list_for_each_entry(mapping, &bo_va->invalids, list) { - if (mapping->it.start == saddr) + if (mapping->start == saddr) break; } @@ -1877,7 +1884,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, } list_del(&mapping->list); - interval_tree_remove(&mapping->it, &vm->va); + amdgpu_vm_it_remove(mapping, &vm->va); trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (valid) @@ -1905,7 +1912,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, uint64_t saddr, uint64_t size) { struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; - struct interval_tree_node *it; LIST_HEAD(removed); uint64_t eaddr; @@ -1927,43 +1933,42 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, INIT_LIST_HEAD(&after->list); /* Now gather all removed mappings */ - it = interval_tree_iter_first(&vm->va, saddr, eaddr); - while (it) { - tmp = container_of(it, struct amdgpu_bo_va_mapping, it); - it = interval_tree_iter_next(it, saddr, eaddr); - + tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); + while (tmp) { /* Remember mapping split at the start */ - if (tmp->it.start < saddr) { - before->it.start = tmp->it.start; - before->it.last = saddr - 1; + if (tmp->start < saddr) { + before->start = tmp->start; + before->last = saddr - 1; before->offset = tmp->offset; before->flags = tmp->flags; list_add(&before->list, &tmp->list); } /* Remember mapping split at the end */ - if (tmp->it.last > eaddr) { - after->it.start = eaddr + 1; - after->it.last = tmp->it.last; + if (tmp->last > eaddr) { + after->start = eaddr + 1; + after->last = tmp->last; after->offset = tmp->offset; - after->offset += after->it.start - tmp->it.start; + after->offset += after->start - tmp->start; after->flags = tmp->flags; list_add(&after->list, &tmp->list); } list_del(&tmp->list); list_add(&tmp->list, &removed); + + tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr); } /* And free them up */ list_for_each_entry_safe(tmp, next, &removed, list) { - interval_tree_remove(&tmp->it, &vm->va); + amdgpu_vm_it_remove(tmp, &vm->va); list_del(&tmp->list); - if (tmp->it.start < saddr) - tmp->it.start = saddr; - if (tmp->it.last > eaddr) - tmp->it.last = eaddr; + if (tmp->start < saddr) + tmp->start = saddr; + if (tmp->last > eaddr) + tmp->last = eaddr; list_add(&tmp->list, &vm->freed); trace_amdgpu_vm_bo_unmap(NULL, tmp); @@ -1971,7 +1976,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, /* Insert partial mapping before the range */ if (!list_empty(&before->list)) { - interval_tree_insert(&before->it, &vm->va); + amdgpu_vm_it_insert(before, &vm->va); if (before->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); } else { @@ -1980,7 +1985,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, /* Insert partial mapping after the range */ if (!list_empty(&after->list)) { - interval_tree_insert(&after->it, &vm->va); + amdgpu_vm_it_insert(after, &vm->va); if (after->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); } else { @@ -2014,13 +2019,13 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); - interval_tree_remove(&mapping->it, &vm->va); + amdgpu_vm_it_remove(mapping, &vm->va); trace_amdgpu_vm_bo_unmap(bo_va, mapping); list_add(&mapping->list, &vm->freed); } list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { list_del(&mapping->list); - interval_tree_remove(&mapping->it, &vm->va); + amdgpu_vm_it_remove(mapping, &vm->va); amdgpu_vm_free_mapping(adev, vm, mapping, bo_va->last_pt_update); } @@ -2162,9 +2167,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (!RB_EMPTY_ROOT(&vm->va)) { dev_err(adev->dev, "still active bo inside vm\n"); } - rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, it.rb) { + rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { list_del(&mapping->list); - interval_tree_remove(&mapping->it, &vm->va); + amdgpu_vm_it_remove(mapping, &vm->va); kfree(mapping); } list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { -- cgit v1.2.3 From 5a9b8e8a0bb45836d3678466cae325728127a5ba Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 30 Mar 2017 14:37:23 +0200 Subject: drm/amdgpu: fix VMHUB order to match the hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Match our defines with what the hw uses. Reviewed-by: Junwei Zhang Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 +++--------- 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5c6bcacd11af..a90b49a15335 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -124,8 +124,8 @@ extern int amdgpu_param_buf_per_se; /* max number of VMHUB */ #define AMDGPU_MAX_VMHUBS 2 -#define AMDGPU_MMHUB 0 -#define AMDGPU_GFXHUB 1 +#define AMDGPU_GFXHUB 0 +#define AMDGPU_MMHUB 1 /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (8 << 20) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index df69aae99df4..8dd99b2a0026 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -129,8 +129,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - struct amdgpu_vmhub *gfxhub = &adev->vmhub[AMDGPU_GFXHUB]; - struct amdgpu_vmhub *mmhub = &adev->vmhub[AMDGPU_MMHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[entry->vm_id_src]; uint32_t status = 0; u64 addr; @@ -138,13 +137,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr |= ((u64)entry->src_data[1] & 0xf) << 44; if (!amdgpu_sriov_vf(adev)) { - if (entry->vm_id_src) { - status = RREG32(mmhub->vm_l2_pro_fault_status); - WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1); - } else { - status = RREG32(gfxhub->vm_l2_pro_fault_status); - WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1); - } + status = RREG32(hub->vm_l2_pro_fault_status); + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); } if (printk_ratelimit()) { -- cgit v1.2.3 From eb60ef2b4d77c591539f1cf4af2f0c2022d57689 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 30 Mar 2017 14:41:19 +0200 Subject: drm/amdgpu: move VM related defines into amdgpu_vm.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Try to clean up amdgpu.h. Reviewed-by: Junwei Zhang Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 -------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a90b49a15335..46c22e7df99d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -122,14 +122,6 @@ extern int amdgpu_param_buf_per_se; /* max number of IP instances */ #define AMDGPU_MAX_SDMA_INSTANCES 2 -/* max number of VMHUB */ -#define AMDGPU_MAX_VMHUBS 2 -#define AMDGPU_GFXHUB 0 -#define AMDGPU_MMHUB 1 - -/* hardcode that limit for now */ -#define AMDGPU_VA_RESERVED_SIZE (8 << 20) - /* hard reset data */ #define AMDGPU_ASIC_RESET_DATA 0x39d5e86b diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fbe17bf73a00..2c2996c161f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -76,6 +76,14 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_FAULT_STOP_FIRST 1 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 +/* max number of VMHUB */ +#define AMDGPU_MAX_VMHUBS 2 +#define AMDGPU_GFXHUB 0 +#define AMDGPU_MMHUB 1 + +/* hardcode that limit for now */ +#define AMDGPU_VA_RESERVED_SIZE (8 << 20) + struct amdgpu_vm_pt { struct amdgpu_bo *bo; uint64_t addr; -- cgit v1.2.3 From f75e237c41587b5e438cbf5d5c90ffcda4080955 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 30 Mar 2017 15:55:07 +0200 Subject: drm/amdgpu: move adjust_mc_addr into amdgpu_gart_funcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should probably rename amdgpu_gart_funcs sooner or later. Reviewed-by: Junwei Zhang Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 23 +++++++---------------- 3 files changed, 9 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 46c22e7df99d..0a617601fcca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -304,10 +304,6 @@ struct amdgpu_gart_funcs { /* set pte flags based per asic */ uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, uint32_t flags); -}; - -/* provided by the mc block */ -struct amdgpu_mc_funcs { /* adjust mc addr in fb for APU case */ u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr); }; @@ -613,7 +609,6 @@ struct amdgpu_mc { u64 private_aperture_end; /* protects concurrent invalidation */ spinlock_t invalidate_lock; - const struct amdgpu_mc_funcs *mc_funcs; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9225493c8366..14f0889f7563 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -571,8 +571,8 @@ static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) { u64 addr = mc_addr; - if (adev->mc.mc_funcs && adev->mc.mc_funcs->adjust_mc_addr) - addr = adev->mc.mc_funcs->adjust_mc_addr(adev, addr); + if (adev->gart.gart_funcs->adjust_mc_addr) + addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr); return addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8dd99b2a0026..268cedb421d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -331,30 +331,22 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } -static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { - .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, - .set_pte_pde = gmc_v9_0_gart_set_pte_pde, - .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags -}; - -static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) -{ - if (adev->gart.gart_funcs == NULL) - adev->gart.gart_funcs = &gmc_v9_0_gart_funcs; -} - static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) { return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start; } -static const struct amdgpu_mc_funcs gmc_v9_0_mc_funcs = { +static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { + .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, + .set_pte_pde = gmc_v9_0_gart_set_pte_pde, + .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, .adjust_mc_addr = gmc_v9_0_adjust_mc_addr, }; -static void gmc_v9_0_set_mc_funcs(struct amdgpu_device *adev) +static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) { - adev->mc.mc_funcs = &gmc_v9_0_mc_funcs; + if (adev->gart.gart_funcs == NULL) + adev->gart.gart_funcs = &gmc_v9_0_gart_funcs; } static int gmc_v9_0_early_init(void *handle) @@ -362,7 +354,6 @@ static int gmc_v9_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; gmc_v9_0_set_gart_funcs(adev); - gmc_v9_0_set_mc_funcs(adev); gmc_v9_0_set_irq_funcs(adev); return 0; -- cgit v1.2.3 From 112501642669807640b572d00bd0b3138922a065 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 30 Mar 2017 15:31:13 +0200 Subject: drm/amdgpu: cleanup VMHUB bit definitions v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two hubs are just instances of the same hardware, so the register bits are identical. v2: only remove get_vm_protection_bits for now Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 12 ------------ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++++---- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 12 ------------ 4 files changed, 8 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0a617601fcca..cd573caa43ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -571,7 +571,6 @@ struct amdgpu_vmhub { uint32_t vm_l2_pro_fault_status; uint32_t vm_l2_pro_fault_cntl; uint32_t (*get_invalidate_req)(unsigned int vm_id); - uint32_t (*get_vm_protection_bits)(void); }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 30ef3126c8a9..b808d4ce86d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -318,17 +318,6 @@ static uint32_t gfxhub_v1_0_get_invalidate_req(unsigned int vm_id) return req; } -static uint32_t gfxhub_v1_0_get_vm_protection_bits(void) -{ - return (VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK); -} - static int gfxhub_v1_0_early_init(void *handle) { return 0; @@ -362,7 +351,6 @@ static int gfxhub_v1_0_sw_init(void *handle) SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); hub->get_invalidate_req = gfxhub_v1_0_get_invalidate_req; - hub->get_vm_protection_bits = gfxhub_v1_0_get_vm_protection_bits; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 268cedb421d2..d81372357ae6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -75,11 +75,18 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_vmhub *hub; u32 tmp, reg, bits, i; + bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* MM HUB */ hub = &adev->vmhub[AMDGPU_MMHUB]; - bits = hub->get_vm_protection_bits(); for (i = 0; i< 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); @@ -89,7 +96,6 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* GFX HUB */ hub = &adev->vmhub[AMDGPU_GFXHUB]; - bits = hub->get_vm_protection_bits(); for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); @@ -100,7 +106,6 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, case AMDGPU_IRQ_STATE_ENABLE: /* MM HUB */ hub = &adev->vmhub[AMDGPU_MMHUB]; - bits = hub->get_vm_protection_bits(); for (i = 0; i< 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); @@ -110,7 +115,6 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* GFX HUB */ hub = &adev->vmhub[AMDGPU_GFXHUB]; - bits = hub->get_vm_protection_bits(); for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 266a0f47a908..a065b4394ea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -336,17 +336,6 @@ static uint32_t mmhub_v1_0_get_invalidate_req(unsigned int vm_id) return req; } -static uint32_t mmhub_v1_0_get_vm_protection_bits(void) -{ - return (VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK); -} - static int mmhub_v1_0_early_init(void *handle) { return 0; @@ -380,7 +369,6 @@ static int mmhub_v1_0_sw_init(void *handle) SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); hub->get_invalidate_req = mmhub_v1_0_get_invalidate_req; - hub->get_vm_protection_bits = mmhub_v1_0_get_vm_protection_bits; return 0; } -- cgit v1.2.3 From ccaf3574451ffe814823e7920089417b3abc9b89 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Tue, 4 Apr 2017 09:14:13 -0400 Subject: drm/amd/amdgpu: Clean up gfx_v8_0_kiq_set_interrupt_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use new WREG32_FIELD_OFFSET() to clean up code. Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 40 +++++++++++------------------------ 2 files changed, 15 insertions(+), 28 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cd573caa43ec..9f244c39b65a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1701,6 +1701,9 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); #define WREG32_FIELD(reg, field, val) \ WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) +#define WREG32_FIELD_OFFSET(reg, offset, field, val) \ + WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + /* * BIOS helpers. */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2e0f55635579..8f30189e8a2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6987,40 +6987,24 @@ static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, unsigned int type, enum amdgpu_interrupt_state state) { - uint32_t tmp, target; struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); - if (ring->me == 1) - target = mmCP_ME1_PIPE0_INT_CNTL; - else - target = mmCP_ME2_PIPE0_INT_CNTL; - target += ring->pipe; - switch (type) { case AMDGPU_CP_KIQ_IRQ_DRIVER0: - if (state == AMDGPU_IRQ_STATE_DISABLE) { - tmp = RREG32(mmCPC_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, - GENERIC2_INT_ENABLE, 0); - WREG32(mmCPC_INT_CNTL, tmp); - - tmp = RREG32(target); - tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, - GENERIC2_INT_ENABLE, 0); - WREG32(target, tmp); - } else { - tmp = RREG32(mmCPC_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, - GENERIC2_INT_ENABLE, 1); - WREG32(mmCPC_INT_CNTL, tmp); - - tmp = RREG32(target); - tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, - GENERIC2_INT_ENABLE, 1); - WREG32(target, tmp); - } + WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, + state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); + if (ring->me == 1) + WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, + ring->pipe, + GENERIC2_INT_ENABLE, + state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); + else + WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, + ring->pipe, + GENERIC2_INT_ENABLE, + state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); break; default: BUG(); /* kiq only support GENERIC2_INT now */ -- cgit v1.2.3 From 03f89feb57bf61749885ae98ce98b8c0fd28903b Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 4 Apr 2017 16:07:45 +0200 Subject: drm/amdgpu: cleanup get_invalidate_req v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two hubs are just instances of the same hardware, so the register bits are identical. v2: keep the function pointer Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 21 --------------------- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 22 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 21 --------------------- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 2 +- 8 files changed, 27 insertions(+), 49 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9f244c39b65a..e0aee9ca9d4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -306,6 +306,7 @@ struct amdgpu_gart_funcs { uint32_t flags); /* adjust mc addr in fb for APU case */ u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr); + uint32_t (*get_invalidate_req)(unsigned int vm_id); }; /* provided by the ih block */ @@ -570,7 +571,6 @@ struct amdgpu_vmhub { uint32_t vm_context0_cntl; uint32_t vm_l2_pro_fault_status; uint32_t vm_l2_pro_fault_cntl; - uint32_t (*get_invalidate_req)(unsigned int vm_id); }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 669bb98fc45d..4e6702d764e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3148,6 +3148,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->idx; unsigned i; @@ -3157,7 +3158,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - uint32_t req = hub->get_invalidate_req(vm_id); gfx_v9_0_write_data_to_reg(ring, usepfp, true, hub->ctx0_ptb_addr_lo32 diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 70c21f9b904b..005075ff00f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -299,25 +299,6 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); } -static uint32_t gfxhub_v1_0_get_invalidate_req(unsigned int vm_id) -{ - u32 req = 0; - - /* invalidate using legacy mode on vm_id*/ - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, - PER_VMID_INVALIDATE_REQ, 1 << vm_id); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, - CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); - - return req; -} - static int gfxhub_v1_0_early_init(void *handle) { return 0; @@ -350,8 +331,6 @@ static int gfxhub_v1_0_sw_init(void *handle) hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); - hub->get_invalidate_req = gfxhub_v1_0_get_invalidate_req; - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 7632f4ad7aa9..6329be81f260 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -173,6 +173,25 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs; } +static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vm_id) +{ + u32 req = 0; + + /* invalidate using legacy mode on vm_id*/ + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vm_id); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -202,7 +221,7 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = &adev->vmhub[i]; - u32 tmp = hub->get_invalidate_req(vmid); + u32 tmp = gmc_v9_0_get_invalidate_req(vmid); WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); @@ -345,6 +364,7 @@ static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { .set_pte_pde = gmc_v9_0_gart_set_pte_pde, .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, .adjust_mc_addr = gmc_v9_0_adjust_mc_addr, + .get_invalidate_req = gmc_v9_0_get_invalidate_req, }; static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 3c9e27effc6a..62684510ddcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -317,25 +317,6 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); } -static uint32_t mmhub_v1_0_get_invalidate_req(unsigned int vm_id) -{ - u32 req = 0; - - /* invalidate using legacy mode on vm_id*/ - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, - PER_VMID_INVALIDATE_REQ, 1 << vm_id); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, - CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); - - return req; -} - static int mmhub_v1_0_early_init(void *handle) { return 0; @@ -368,8 +349,6 @@ static int mmhub_v1_0_sw_init(void *handle) hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); - hub->get_invalidate_req = mmhub_v1_0_get_invalidate_req; - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 2dd2b20d727e..21f38d882335 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1039,6 +1039,7 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->idx; unsigned i; @@ -1048,7 +1049,6 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - uint32_t req = hub->get_invalidate_req(vm_id); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 8a9a90b00c93..9bcf01469282 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1034,6 +1034,7 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring, static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); uint32_t data0, data1, mask; unsigned eng = ring->idx; unsigned i; @@ -1044,7 +1045,6 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - uint32_t req = hub->get_invalidate_req(vm_id); data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; data1 = upper_32_bits(pd_addr); @@ -1080,6 +1080,7 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vm_id, uint64_t pd_addr) { + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->idx; unsigned i; @@ -1089,7 +1090,6 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - uint32_t req = hub->get_invalidate_req(vm_id); amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); amdgpu_ring_write(ring, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 2a3db99fbf1e..edde5fe938d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -973,6 +973,7 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vm_id, uint64_t pd_addr) { + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->idx; unsigned i; @@ -982,7 +983,6 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - uint32_t req = hub->get_invalidate_req(vm_id); amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); amdgpu_ring_write(ring, -- cgit v1.2.3 From 714f88e06f2f84a396129cc6990d7369d3fdcad5 Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Wed, 5 Apr 2017 11:07:13 -0400 Subject: drm/amdgpu: Move function amdgpu_has_atpx near other similar functions Signed-off-by: Alex Xie Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 ------ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e0aee9ca9d4e..c4f6211640da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1879,12 +1879,14 @@ void amdgpu_unregister_atpx_handler(void); bool amdgpu_has_atpx_dgpu_power_cntl(void); bool amdgpu_is_atpx_hybrid(void); bool amdgpu_atpx_dgpu_req_power_for_displays(void); +bool amdgpu_has_atpx(void); #else static inline void amdgpu_register_atpx_handler(void) {} static inline void amdgpu_unregister_atpx_handler(void) {} static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; } static inline bool amdgpu_is_atpx_hybrid(void) { return false; } static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; } +static inline bool amdgpu_has_atpx(void) { return false; } #endif /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index dfb029ab3448..055d10a79ea5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -36,12 +36,6 @@ #include #include "amdgpu_amdkfd.h" -#if defined(CONFIG_VGA_SWITCHEROO) -bool amdgpu_has_atpx(void); -#else -static inline bool amdgpu_has_atpx(void) { return false; } -#endif - /** * amdgpu_driver_unload_kms - Main unload function for KMS. * -- cgit v1.2.3 From efe53d8a4615fd218ab379cef76af8c57e63ce1d Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 5 Apr 2017 08:32:13 -0400 Subject: drm/amd/amdgpu: cleanup gfx_v9_0_init_queue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce WREG32_FIELD15 macro for SOC15 architectures. Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 +++-------- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c4f6211640da..6a8129949333 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1704,6 +1704,9 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); #define WREG32_FIELD_OFFSET(reg, offset, field, val) \ WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) +#define WREG32_FIELD15(ip, idx, reg, field, val) \ + WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + /* * BIOS helpers. */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 4e6702d764e2..7b9aff7c2806 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3975,9 +3975,7 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) ring->pipe, ring->queue, 0); /* disable wptr polling */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL)); - tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL), tmp); + WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); /* write the EOP addr */ BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */ @@ -4121,11 +4119,8 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) amdgpu_bo_kunmap(ring->mqd_obj); amdgpu_bo_unreserve(ring->mqd_obj); - if (use_doorbell) { - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS)); - tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS), tmp); - } + if (use_doorbell) + WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); return 0; } -- cgit v1.2.3