summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gpu.c')
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c262
1 files changed, 237 insertions, 25 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 06cab2c6fd66..019610341df1 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -68,6 +68,8 @@ static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
uint32_t wptr;
unsigned long flags;
@@ -81,12 +83,17 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
/* Make sure to wrap wptr if we need to */
wptr = get_wptr(ring);
- spin_unlock_irqrestore(&ring->preempt_lock, flags);
-
- /* Make sure everything is posted before making a decision */
- mb();
+ /* Update HW if this is the current ring and we are not in preempt*/
+ if (!a6xx_in_preempt(a6xx_gpu)) {
+ if (a6xx_gpu->cur_ring == ring)
+ gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
+ else
+ ring->restore_wptr = true;
+ } else {
+ ring->restore_wptr = true;
+ }
- gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
+ spin_unlock_irqrestore(&ring->preempt_lock, flags);
}
static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
@@ -101,20 +108,30 @@ static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
}
static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
- struct msm_ringbuffer *ring, struct msm_file_private *ctx)
+ struct msm_ringbuffer *ring, struct msm_gem_submit *submit)
{
bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
+ struct msm_file_private *ctx = submit->queue->ctx;
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
phys_addr_t ttbr;
u32 asid;
u64 memptr = rbmemptr(ring, ttbr0);
- if (ctx->seqno == a6xx_gpu->base.base.cur_ctx_seqno)
+ if (ctx->seqno == ring->cur_ctx_seqno)
return;
if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
return;
+ if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) {
+ /* Wait for previous submit to complete before continuing: */
+ OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
+ OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
+ OUT_RING(ring, submit->seqno - 1);
+ }
+
if (!sysprof) {
if (!adreno_is_a7xx(adreno_gpu)) {
/* Turn off protected mode to write to special registers */
@@ -138,12 +155,14 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
/*
* Write the new TTBR0 to the memstore. This is good for debugging.
+ * Needed for preemption
*/
- OUT_PKT7(ring, CP_MEM_WRITE, 4);
+ OUT_PKT7(ring, CP_MEM_WRITE, 5);
OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
OUT_RING(ring, lower_32_bits(ttbr));
- OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
+ OUT_RING(ring, upper_32_bits(ttbr));
+ OUT_RING(ring, ctx->seqno);
/*
* Sync both threads after switching pagetables and enable BR only
@@ -193,7 +212,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
- a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
+ a6xx_set_pagetable(a6xx_gpu, ring, submit);
get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
rbmemptr_stats(ring, index, cpcycles_start));
@@ -219,7 +238,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
break;
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
- if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
+ if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
break;
fallthrough;
case MSM_SUBMIT_CMD_BUF:
@@ -268,6 +287,46 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
a6xx_flush(gpu, ring);
}
+static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
+ struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue)
+{
+ u64 preempt_postamble;
+
+ OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12);
+
+ OUT_RING(ring, SMMU_INFO);
+ /* don't save SMMU, we write the record from the kernel instead */
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+
+ /* privileged and non secure buffer save */
+ OUT_RING(ring, NON_SECURE_SAVE_ADDR);
+ OUT_RING(ring, lower_32_bits(
+ a6xx_gpu->preempt_iova[ring->id]));
+ OUT_RING(ring, upper_32_bits(
+ a6xx_gpu->preempt_iova[ring->id]));
+
+ /* user context buffer save, seems to be unnused by fw */
+ OUT_RING(ring, NON_PRIV_SAVE_ADDR);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+
+ OUT_RING(ring, COUNTER);
+ /* seems OK to set to 0 to disable it */
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+
+ /* Emit postamble to clear perfcounters */
+ preempt_postamble = a6xx_gpu->preempt_postamble_iova;
+
+ OUT_PKT7(ring, CP_SET_AMBLE, 3);
+ OUT_RING(ring, lower_32_bits(preempt_postamble));
+ OUT_RING(ring, upper_32_bits(preempt_postamble));
+ OUT_RING(ring, CP_SET_AMBLE_2_DWORDS(
+ a6xx_gpu->preempt_postamble_len) |
+ CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE));
+}
+
static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
{
unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
@@ -283,7 +342,14 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
- a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
+ a6xx_set_pagetable(a6xx_gpu, ring, submit);
+
+ /*
+ * If preemption is enabled, then set the pseudo register for the save
+ * sequence
+ */
+ if (gpu->nr_rings > 1)
+ a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue);
get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
rbmemptr_stats(ring, index, cpcycles_start));
@@ -296,8 +362,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_PKT7(ring, CP_SET_MARKER, 1);
OUT_RING(ring, 0x101); /* IFPC disable */
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, 0x00d); /* IB1LIST start */
+ if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, 0x00d); /* IB1LIST start */
+ }
/* Submit the commands */
for (i = 0; i < submit->nr_cmds; i++) {
@@ -305,7 +373,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
break;
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
- if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
+ if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
break;
fallthrough;
case MSM_SUBMIT_CMD_BUF:
@@ -328,8 +396,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
update_shadow_rptr(gpu, ring);
}
- OUT_PKT7(ring, CP_SET_MARKER, 1);
- OUT_RING(ring, 0x00e); /* IB1LIST end */
+ if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, 0x00e); /* IB1LIST end */
+ }
get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
rbmemptr_stats(ring, index, cpcycles_end));
@@ -376,6 +446,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
OUT_RING(ring, submit->seqno);
+ a6xx_gpu->last_seqno[ring->id] = submit->seqno;
+
/* write the ringbuffer timestamp */
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27));
@@ -389,10 +461,32 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_PKT7(ring, CP_SET_MARKER, 1);
OUT_RING(ring, 0x100); /* IFPC enable */
+ /* If preemption is enabled */
+ if (gpu->nr_rings > 1) {
+ /* Yield the floor on command completion */
+ OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
+
+ /*
+ * If dword[2:1] are non zero, they specify an address for
+ * the CP to write the value of dword[3] to on preemption
+ * complete. Write 0 to skip the write
+ */
+ OUT_RING(ring, 0x00);
+ OUT_RING(ring, 0x00);
+ /* Data value - not used if the address above is 0 */
+ OUT_RING(ring, 0x01);
+ /* generate interrupt on preemption completion */
+ OUT_RING(ring, 0x00);
+ }
+
+
trace_msm_gpu_submit_flush(submit,
gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
a6xx_flush(gpu, ring);
+
+ /* Check to see if we need to start preemption */
+ a6xx_preempt_trigger(gpu);
}
static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
@@ -541,6 +635,15 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
gpu->ubwc_config.macrotile_mode = 1;
}
+ if (adreno_is_a663(gpu)) {
+ gpu->ubwc_config.highest_bank_bit = 13;
+ gpu->ubwc_config.amsbc = 1;
+ gpu->ubwc_config.rgb565_predicator = 1;
+ gpu->ubwc_config.uavflagprd_inv = 2;
+ gpu->ubwc_config.macrotile_mode = 1;
+ gpu->ubwc_config.ubwc_swizzle = 0x4;
+ }
+
if (adreno_is_7c3(gpu)) {
gpu->ubwc_config.highest_bank_bit = 14;
gpu->ubwc_config.amsbc = 1;
@@ -599,6 +702,77 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
adreno_gpu->ubwc_config.macrotile_mode);
}
+static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+ const struct adreno_reglist_list *reglist;
+ void *ptr = a6xx_gpu->pwrup_reglist_ptr;
+ struct cpu_gpu_lock *lock = ptr;
+ u32 *dest = (u32 *)&lock->regs[0];
+ int i;
+
+ reglist = adreno_gpu->info->a6xx->pwrup_reglist;
+
+ lock->gpu_req = lock->cpu_req = lock->turn = 0;
+ lock->ifpc_list_len = 0;
+ lock->preemption_list_len = reglist->count;
+
+ /*
+ * For each entry in each of the lists, write the offset and the current
+ * register value into the GPU buffer
+ */
+ for (i = 0; i < reglist->count; i++) {
+ *dest++ = reglist->regs[i];
+ *dest++ = gpu_read(gpu, reglist->regs[i]);
+ }
+
+ /*
+ * The overall register list is composed of
+ * 1. Static IFPC-only registers
+ * 2. Static IFPC + preemption registers
+ * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects)
+ *
+ * The first two lists are static. Size of these lists are stored as
+ * number of pairs in ifpc_list_len and preemption_list_len
+ * respectively. With concurrent binning, Some of the perfcounter
+ * registers being virtualized, CP needs to know the pipe id to program
+ * the aperture inorder to restore the same. Thus, third list is a
+ * dynamic list with triplets as
+ * (<aperture, shifted 12 bits> <address> <data>), and the length is
+ * stored as number for triplets in dynamic_list_len.
+ */
+ lock->dynamic_list_len = 0;
+}
+
+static int a7xx_preempt_start(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+ struct msm_ringbuffer *ring = gpu->rb[0];
+
+ if (gpu->nr_rings <= 1)
+ return 0;
+
+ /* Turn CP protection off */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 0);
+
+ a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL);
+
+ /* Yield the floor on command completion */
+ OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
+ OUT_RING(ring, 0x00);
+ OUT_RING(ring, 0x00);
+ OUT_RING(ring, 0x00);
+ /* Generate interrupt on preemption completion */
+ OUT_RING(ring, 0x00);
+
+ a6xx_flush(gpu, ring);
+
+ return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
+}
+
static int a6xx_cp_init(struct msm_gpu *gpu)
{
struct msm_ringbuffer *ring = gpu->rb[0];
@@ -630,6 +804,8 @@ static int a6xx_cp_init(struct msm_gpu *gpu)
static int a7xx_cp_init(struct msm_gpu *gpu)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring = gpu->rb[0];
u32 mask;
@@ -667,11 +843,11 @@ static int a7xx_cp_init(struct msm_gpu *gpu)
/* *Don't* send a power up reg list for concurrent binning (TODO) */
/* Lo address */
- OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova));
/* Hi address */
- OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova));
/* BIT(31) set => read the regs from the list */
- OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, BIT(31));
a6xx_flush(gpu, ring);
return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
@@ -795,6 +971,16 @@ static int a6xx_ucode_load(struct msm_gpu *gpu)
msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
}
+ a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE,
+ MSM_BO_WC | MSM_BO_MAP_PRIV,
+ gpu->aspace, &a6xx_gpu->pwrup_reglist_bo,
+ &a6xx_gpu->pwrup_reglist_iova);
+
+ if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr))
+ return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr);
+
+ msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist");
+
return 0;
}
@@ -854,6 +1040,7 @@ static int hw_init(struct msm_gpu *gpu)
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
u64 gmem_range_min;
+ unsigned int i;
int ret;
if (!adreno_has_gmu_wrapper(adreno_gpu)) {
@@ -1062,7 +1249,7 @@ static int hw_init(struct msm_gpu *gpu)
if (adreno_is_a690(adreno_gpu))
gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90);
/* Set dualQ + disable afull for A660 GPU */
- else if (adreno_is_a660(adreno_gpu))
+ else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu))
gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
else if (adreno_is_a7xx(adreno_gpu))
gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG,
@@ -1124,22 +1311,32 @@ static int hw_init(struct msm_gpu *gpu)
if (a6xx_gpu->shadow_bo) {
gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR,
shadowptr(a6xx_gpu, gpu->rb[0]));
+ for (unsigned int i = 0; i < gpu->nr_rings; i++)
+ a6xx_gpu->shadow[i] = 0;
}
/* ..which means "always" on A7xx, also for BV shadow */
if (adreno_is_a7xx(adreno_gpu)) {
gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR,
- rbmemptr(gpu->rb[0], bv_fence));
+ rbmemptr(gpu->rb[0], bv_rptr));
}
+ a6xx_preempt_hw_init(gpu);
+
/* Always come up on rb 0 */
a6xx_gpu->cur_ring = gpu->rb[0];
- gpu->cur_ctx_seqno = 0;
+ for (i = 0; i < gpu->nr_rings; i++)
+ gpu->rb[i]->cur_ctx_seqno = 0;
/* Enable the SQE_to start the CP engine */
gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
+ if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) {
+ a7xx_patch_pwrup_reglist(gpu);
+ a6xx_gpu->pwrup_reglist_emitted = true;
+ }
+
ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu);
if (ret)
goto out;
@@ -1177,6 +1374,10 @@ static int hw_init(struct msm_gpu *gpu)
out:
if (adreno_has_gmu_wrapper(adreno_gpu))
return ret;
+
+ /* Last step - yield the ringbuffer */
+ a7xx_preempt_start(gpu);
+
/*
* Tell the GMU that we are done touching the GPU and it can start power
* management
@@ -1554,8 +1755,13 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
a7xx_sw_fuse_violation_irq(gpu);
- if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
+ if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
msm_gpu_retire(gpu);
+ a6xx_preempt_trigger(gpu);
+ }
+
+ if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
+ a6xx_preempt_irq(gpu);
return IRQ_HANDLED;
}
@@ -2249,6 +2455,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
struct a6xx_gpu *a6xx_gpu;
struct adreno_gpu *adreno_gpu;
struct msm_gpu *gpu;
+ extern int enable_preemption;
bool is_a7xx;
int ret;
@@ -2287,7 +2494,10 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
return ERR_PTR(ret);
}
- if (is_a7xx)
+ if ((enable_preemption == 1) || (enable_preemption == -1 &&
+ (config->info->quirks & ADRENO_QUIRK_PREEMPTION)))
+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 4);
+ else if (is_a7xx)
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 1);
else if (adreno_has_gmu_wrapper(adreno_gpu))
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1);
@@ -2328,6 +2538,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
a6xx_fault_handler);
a6xx_calc_ubwc_config(adreno_gpu);
+ /* Set up the preemption specific bits and pieces for each ringbuffer */
+ a6xx_preempt_init(gpu);
return gpu;
}