From 139768ff9266d9e58a29e9636d91cc649f6030ee Mon Sep 17 00:00:00 2001
From: Nils Wallménius <nils.wallmenius@gmail.com>
Date: Sat, 19 Mar 2016 16:12:11 +0100
Subject: drm/amdgpu: delete unused struct member suspend from amdgpu_device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Nils Wallménius <nils.wallmenius@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 62a778012fe0..e33b18587769 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1903,7 +1903,6 @@ struct amdgpu_device {
 	int				usec_timeout;
 	const struct amdgpu_asic_funcs	*asic_funcs;
 	bool				shutdown;
-	bool				suspend;
 	bool				need_dma32;
 	bool				accel_working;
 	struct work_struct 		reset_work;
-- 
cgit v1.2.3


From 379548f509b5d4bdfcf252dd33093a45c01ce0d8 Mon Sep 17 00:00:00 2001
From: Nils Wallménius <nils.wallmenius@gmail.com>
Date: Sat, 19 Mar 2016 16:12:13 +0100
Subject: drm/amdgpu: do not store bios_header_start in amdgpu_device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is only used locally in amdgpu_get_bios

Signed-off-by: Nils Wallménius <nils.wallmenius@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 8 ++++----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e33b18587769..2c2f2349dd82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1924,7 +1924,6 @@ struct amdgpu_device {
 	/* BIOS */
 	uint8_t				*bios;
 	bool				is_atom_bios;
-	uint16_t			bios_header_start;
 	struct amdgpu_bo		*stollen_vga_memory;
 	uint32_t			bios_scratch[AMDGPU_BIOS_NUM_SCRATCH];
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 80add22375ee..99ca75baa47d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -349,7 +349,7 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
 bool amdgpu_get_bios(struct amdgpu_device *adev)
 {
 	bool r;
-	uint16_t tmp;
+	uint16_t tmp, bios_header_start;
 
 	r = amdgpu_atrm_get_bios(adev);
 	if (r == false)
@@ -383,11 +383,11 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
 		goto free_bios;
 	}
 
-	adev->bios_header_start = RBIOS16(0x48);
-	if (!adev->bios_header_start) {
+	bios_header_start = RBIOS16(0x48);
+	if (!bios_header_start) {
 		goto free_bios;
 	}
-	tmp = adev->bios_header_start + 4;
+	tmp = bios_header_start + 4;
 	if (!memcmp(adev->bios + tmp, "ATOM", 4) ||
 	    !memcmp(adev->bios + tmp, "MOTA", 4)) {
 		adev->is_atom_bios = true;
-- 
cgit v1.2.3


From 128cff1af68689cf4d85d3ba948c86a194dee30f Mon Sep 17 00:00:00 2001
From: Monk Liu <monk.liu@amd.com>
Date: Thu, 14 Jan 2016 18:08:16 +0800
Subject: drm/amdgpu: support cond exec
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds the groundwork for conditional execution on
SDMA which is necessary for preemption.

Signed-off-by: Monk Liu <monk.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      | 3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 9 +++++++++
 3 files changed, 15 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2c2f2349dd82..49adb451c3bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -788,6 +788,9 @@ struct amdgpu_ring {
 	struct amdgpu_ctx	*current_ctx;
 	enum amdgpu_ring_type	type;
 	char			name[16];
+	unsigned		cond_exe_offs;
+	u64				cond_exe_gpu_addr;
+	volatile u32	*cond_exe_cpu_addr;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 8443cea6821a..a15d690d9089 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -160,6 +160,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 			amdgpu_ring_emit_hdp_flush(ring);
 	}
 
+	/* always set cond_exec_polling to CONTINUE */
+	*ring->cond_exe_cpu_addr = 1;
+
 	old_ctx = ring->current_ctx;
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 972eed2ef787..dd79243d0a37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -267,6 +267,15 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	}
 	ring->next_rptr_gpu_addr = adev->wb.gpu_addr + (ring->next_rptr_offs * 4);
 	ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs];
+
+	r = amdgpu_wb_get(adev, &ring->cond_exe_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
+		return r;
+	}
+	ring->cond_exe_gpu_addr = adev->wb.gpu_addr + (ring->cond_exe_offs * 4);
+	ring->cond_exe_cpu_addr = &adev->wb.wb[ring->cond_exe_offs];
+
 	spin_lock_init(&ring->fence_lock);
 	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
 	if (r) {
-- 
cgit v1.2.3


From 03ccf481980f8d3363263e73c64473d8f2779dc0 Mon Sep 17 00:00:00 2001
From: Monk Liu <monk.liu@amd.com>
Date: Thu, 14 Jan 2016 19:07:38 +0800
Subject: drm/amdgpu: patch cond exec for SDMA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

More ground work for conditional execution on SDMA
necessary for preemption.

Signed-off-by: Monk Liu <monk.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  9 ++++++++-
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 25 +++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 49adb451c3bc..412fc2f39fa5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -302,6 +302,8 @@ struct amdgpu_ring_funcs {
 	void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
 	/* pad the indirect buffer to the necessary number of dw */
 	void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+	unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
+	void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
 };
 
 /*
@@ -2182,6 +2184,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
 #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
+#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
+#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
 #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
 #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
 #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index a15d690d9089..644336d76aca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -124,7 +124,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	struct amdgpu_ctx *ctx, *old_ctx;
 	struct amdgpu_vm *vm;
 	struct fence *hwf;
-	unsigned i;
+	unsigned i, patch_offset = ~0;
+
 	int r = 0;
 
 	if (num_ibs == 0)
@@ -149,6 +150,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		return r;
 	}
 
+	if (ring->type == AMDGPU_RING_TYPE_SDMA && ring->funcs->init_cond_exec)
+		patch_offset = amdgpu_ring_init_cond_exec(ring);
+
 	if (vm) {
 		/* do context switch */
 		amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
@@ -204,6 +208,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (f)
 		*f = fence_get(hwf);
 
+	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
+		amdgpu_ring_patch_cond_exec(ring, patch_offset);
+
 	amdgpu_ring_commit(ring);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 8c8ca98dd129..833d2658428f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -452,6 +452,31 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
 	amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
 }
 
+unsigned init_cond_exec(struct amdgpu_ring *ring)
+{
+	unsigned ret;
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
+	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+	amdgpu_ring_write(ring, 1);
+	ret = ring->wptr;/* this is the offset we need patch later */
+	amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+	return ret;
+}
+
+void patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
+{
+	unsigned cur;
+	BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+	cur = ring->wptr - 1;
+	if (likely(cur > offset))
+		ring->ring[offset] = cur - offset;
+	else
+		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
+}
+
+
 /**
  * sdma_v3_0_gfx_stop - stop the gfx async dma engines
  *
-- 
cgit v1.2.3


From e472d2588eef38c2f16f71d6160e58fb5948e84f Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Thu, 3 Mar 2016 19:00:50 +0800
Subject: drm/amdgpu: delay job free to when it's finished (v2)

for those jobs submitted through scheduler, do not
free it immediately after scheduled, instead free it
in global workqueue by its sched fence signaling
callback function.

v2:
call uf's bo_undef after job_run()
call job's sync free after job_run()
no static inline __amdgpu_job_free() anymore, just use
kfree(job) to replace it.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       | 11 ++++++++++-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c |  8 ++++++++
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  5 ++++-
 4 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 412fc2f39fa5..9bf72b24495c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2401,5 +2401,4 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 		       uint64_t addr, struct amdgpu_bo **bo);
 
 #include "amdgpu_object.h"
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index eb0f7890401a..23468088a995 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,6 +28,12 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
+static void amdgpu_job_free_handler(struct work_struct *ws)
+{
+	struct amdgpu_job *job = container_of(ws, struct amdgpu_job, base.work_free_job);
+	kfree(job);
+}
+
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 		     struct amdgpu_job **job)
 {
@@ -45,6 +51,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	(*job)->adev = adev;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
+	INIT_WORK(&(*job)->base.work_free_job, amdgpu_job_free_handler);
 
 	amdgpu_sync_create(&(*job)->sync);
 
@@ -80,7 +87,9 @@ void amdgpu_job_free(struct amdgpu_job *job)
 
 	amdgpu_bo_unref(&job->uf.bo);
 	amdgpu_sync_free(&job->sync);
-	kfree(job);
+
+	if (!job->base.use_sched)
+		kfree(job);
 }
 
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index b9d5822bece8..8d49ea2e4134 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -319,6 +319,11 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
 	return added;
 }
 
+static void amd_sched_free_job(struct fence *f, struct fence_cb *cb) {
+	struct amd_sched_job *job = container_of(cb, struct amd_sched_job, cb_free_job);
+	schedule_work(&job->work_free_job);
+}
+
 /**
  * Submit a job to the job queue
  *
@@ -330,6 +335,9 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
 {
 	struct amd_sched_entity *entity = sched_job->s_entity;
 
+	sched_job->use_sched = 1;
+	fence_add_callback(&sched_job->s_fence->base,
+					&sched_job->cb_free_job, amd_sched_free_job);
 	trace_amd_sched_job(sched_job);
 	wait_event(entity->sched->job_scheduled,
 		   amd_sched_entity_in(sched_job));
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 74bbec837f58..ee1e8127f863 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -37,7 +37,7 @@ extern atomic_t sched_fence_slab_ref;
 
 /**
  * A scheduler entity is a wrapper around a job queue or a group
- * of other entities. Entities take turns emitting jobs from their 
+ * of other entities. Entities take turns emitting jobs from their
  * job queues to corresponding hardware ring based on scheduling
  * policy.
 */
@@ -82,6 +82,9 @@ struct amd_sched_job {
 	struct amd_gpu_scheduler        *sched;
 	struct amd_sched_entity         *s_entity;
 	struct amd_sched_fence          *s_fence;
+	bool	use_sched;	/* true if the job goes to scheduler */
+	struct fence_cb                cb_free_job;
+	struct work_struct             work_free_job;
 };
 
 extern const struct fence_ops amd_sched_fence_ops;
-- 
cgit v1.2.3


From 0de2479c953ae07fd11e7b1bc8d4fc831e6842bb Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Fri, 4 Mar 2016 18:51:02 +0800
Subject: drm/amdgpu: rework TDR in scheduler (v2)

Add two callbacks to scheduler to maintain jobs, and invoked for
job timeout calculations. Now TDR measures time gap from
job is processed by hw.

v2:
fix typo

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       | 16 +++++++++++-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 37 +++++++++++++++++++++++++++
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  7 +++++
 drivers/gpu/drm/amd/scheduler/sched_fence.c   |  1 +
 6 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9bf72b24495c..ccb28468ece8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -754,6 +754,7 @@ void amdgpu_job_free(struct amdgpu_job *job);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct fence **f);
+void amdgpu_job_timeout_func(struct work_struct *work);
 
 struct amdgpu_ring {
 	struct amdgpu_device		*adev;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 23266b454aec..9025671d21c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -871,6 +871,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	r = amd_sched_job_init(&job->base, &ring->sched,
 						&p->ctx->rings[ring->idx].entity,
+						amdgpu_job_timeout_func,
 						p->filp, &fence);
 	if (r) {
 		amdgpu_job_free(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 23468088a995..961cae4a1955 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -34,6 +34,15 @@ static void amdgpu_job_free_handler(struct work_struct *ws)
 	kfree(job);
 }
 
+void amdgpu_job_timeout_func(struct work_struct *work)
+{
+	struct amdgpu_job *job = container_of(work, struct amdgpu_job, base.work_tdr.work);
+	DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n",
+				job->base.sched->name,
+				(uint32_t)atomic_read(&job->ring->fence_drv.last_seq),
+				job->ring->fence_drv.sync_seq);
+}
+
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 		     struct amdgpu_job **job)
 {
@@ -103,7 +112,10 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 	if (!f)
 		return -EINVAL;
 
-	r = amd_sched_job_init(&job->base, &ring->sched, entity, owner, &fence);
+	r = amd_sched_job_init(&job->base, &ring->sched,
+							entity, owner,
+							amdgpu_job_timeout_func,
+							&fence);
 	if (r)
 		return r;
 
@@ -180,4 +192,6 @@ err:
 struct amd_sched_backend_ops amdgpu_sched_ops = {
 	.dependency = amdgpu_job_dependency,
 	.run_job = amdgpu_job_run,
+	.begin_job = amd_sched_job_begin,
+	.finish_job = amd_sched_job_finish,
 };
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 9a9fffdc272b..b7e8071448c6 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -324,6 +324,40 @@ static void amd_sched_free_job(struct fence *f, struct fence_cb *cb) {
 	schedule_work(&job->work_free_job);
 }
 
+/* job_finish is called after hw fence signaled, and
+ * the job had already been deleted from ring_mirror_list
+ */
+void amd_sched_job_finish(struct amd_sched_job *s_job)
+{
+	struct amd_sched_job *next;
+	struct amd_gpu_scheduler *sched = s_job->sched;
+
+	if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
+		cancel_delayed_work(&s_job->work_tdr); /*TODO: how to deal the case that tdr is running */
+
+		/* queue TDR for next job */
+		next = list_first_entry_or_null(&sched->ring_mirror_list,
+						struct amd_sched_job, node);
+
+		if (next) {
+			INIT_DELAYED_WORK(&next->work_tdr, s_job->timeout_callback);
+			schedule_delayed_work(&next->work_tdr, sched->timeout);
+		}
+	}
+}
+
+void amd_sched_job_begin(struct amd_sched_job *s_job)
+{
+	struct amd_gpu_scheduler *sched = s_job->sched;
+
+	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
+		list_first_entry_or_null(&sched->ring_mirror_list, struct amd_sched_job, node) == s_job)
+	{
+		INIT_DELAYED_WORK(&s_job->work_tdr, s_job->timeout_callback);
+		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+	}
+}
+
 /**
  * Submit a job to the job queue
  *
@@ -347,6 +381,7 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
 int amd_sched_job_init(struct amd_sched_job *job,
 						struct amd_gpu_scheduler *sched,
 						struct amd_sched_entity *entity,
+						void (*timeout_cb)(struct work_struct *work),
 						void *owner, struct fence **fence)
 {
 	INIT_LIST_HEAD(&job->node);
@@ -357,6 +392,7 @@ int amd_sched_job_init(struct amd_sched_job *job,
 		return -ENOMEM;
 
 	job->s_fence->s_job = job;
+	job->timeout_callback = timeout_cb;
 
 	if (fence)
 		*fence = &job->s_fence->base;
@@ -415,6 +451,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
 	/* remove job from ring_mirror_list */
 	spin_lock_irqsave(&sched->job_list_lock, flags);
 	list_del_init(&s_fence->s_job->node);
+	sched->ops->finish_job(s_fence->s_job);
 	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
 	amd_sched_fence_signal(s_fence);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index b26148d24a3d..a5700aded5bf 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -85,6 +85,8 @@ struct amd_sched_job {
 	struct fence_cb                cb_free_job;
 	struct work_struct             work_free_job;
 	struct list_head			   node;
+	struct delayed_work work_tdr;
+	void (*timeout_callback) (struct work_struct *work);
 };
 
 extern const struct fence_ops amd_sched_fence_ops;
@@ -105,6 +107,8 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
 struct amd_sched_backend_ops {
 	struct fence *(*dependency)(struct amd_sched_job *sched_job);
 	struct fence *(*run_job)(struct amd_sched_job *sched_job);
+	void (*begin_job)(struct amd_sched_job *sched_job);
+	void (*finish_job)(struct amd_sched_job *sched_job);
 };
 
 enum amd_sched_priority {
@@ -150,7 +154,10 @@ void amd_sched_fence_signal(struct amd_sched_fence *fence);
 int amd_sched_job_init(struct amd_sched_job *job,
 					struct amd_gpu_scheduler *sched,
 					struct amd_sched_entity *entity,
+					void (*timeout_cb)(struct work_struct *work),
 					void *owner, struct fence **fence);
 void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched ,
 								struct amd_sched_job *s_job);
+void amd_sched_job_finish(struct amd_sched_job *s_job);
+void amd_sched_job_begin(struct amd_sched_job *s_job);
 #endif
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index 33ddd38185d5..2a732c490375 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -63,6 +63,7 @@ void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched ,
 	unsigned long flags;
 	spin_lock_irqsave(&sched->job_list_lock, flags);
 	list_add_tail(&s_job->node, &sched->ring_mirror_list);
+	sched->ops->begin_job(s_job);
 	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 }
 
-- 
cgit v1.2.3


From b6723c8da55af5309cf06e71a5228f3c02846c5a Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Thu, 10 Mar 2016 12:14:44 +0800
Subject: drm/amdgpu: use ref to keep job alive

this is to fix fatal page fault error that occured if:
job is signaled/released after its timeout work is already
put to the global queue (in this case the cancel_delayed_work
will return false), which will lead to NX-protection error
page fault during job_timeout_func.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       | 15 ++++++++++++---
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c |  8 +++++++-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 13 +++++++++++++
 5 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ccb28468ece8..2ac07ebecfd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -750,7 +750,9 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 		     struct amdgpu_job **job);
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 			     struct amdgpu_job **job);
+
 void amdgpu_job_free(struct amdgpu_job *job);
+void amdgpu_job_free_func(struct kref *refcount);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct fence **f);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9025671d21c3..d7e0b0b9a1bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -872,6 +872,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	r = amd_sched_job_init(&job->base, &ring->sched,
 						&p->ctx->rings[ring->idx].entity,
 						amdgpu_job_timeout_func,
+						amdgpu_job_free_func,
 						p->filp, &fence);
 	if (r) {
 		amdgpu_job_free(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 961cae4a1955..a052ac2b131d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -31,7 +31,7 @@
 static void amdgpu_job_free_handler(struct work_struct *ws)
 {
 	struct amdgpu_job *job = container_of(ws, struct amdgpu_job, base.work_free_job);
-	kfree(job);
+	amd_sched_job_put(&job->base);
 }
 
 void amdgpu_job_timeout_func(struct work_struct *work)
@@ -41,6 +41,8 @@ void amdgpu_job_timeout_func(struct work_struct *work)
 				job->base.sched->name,
 				(uint32_t)atomic_read(&job->ring->fence_drv.last_seq),
 				job->ring->fence_drv.sync_seq);
+
+	amd_sched_job_put(&job->base);
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@@ -101,6 +103,12 @@ void amdgpu_job_free(struct amdgpu_job *job)
 		kfree(job);
 }
 
+void amdgpu_job_free_func(struct kref *refcount)
+{
+	struct amdgpu_job *job = container_of(refcount, struct amdgpu_job, base.refcount);
+	kfree(job);
+}
+
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct fence **f)
@@ -113,9 +121,10 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		return -EINVAL;
 
 	r = amd_sched_job_init(&job->base, &ring->sched,
-							entity, owner,
+							entity,
 							amdgpu_job_timeout_func,
-							&fence);
+							amdgpu_job_free_func,
+							owner, &fence);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index b7e8071448c6..639c70de217c 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -333,7 +333,8 @@ void amd_sched_job_finish(struct amd_sched_job *s_job)
 	struct amd_gpu_scheduler *sched = s_job->sched;
 
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
-		cancel_delayed_work(&s_job->work_tdr); /*TODO: how to deal the case that tdr is running */
+		if (cancel_delayed_work(&s_job->work_tdr))
+			amd_sched_job_put(s_job);
 
 		/* queue TDR for next job */
 		next = list_first_entry_or_null(&sched->ring_mirror_list,
@@ -341,6 +342,7 @@ void amd_sched_job_finish(struct amd_sched_job *s_job)
 
 		if (next) {
 			INIT_DELAYED_WORK(&next->work_tdr, s_job->timeout_callback);
+			amd_sched_job_get(next);
 			schedule_delayed_work(&next->work_tdr, sched->timeout);
 		}
 	}
@@ -354,6 +356,7 @@ void amd_sched_job_begin(struct amd_sched_job *s_job)
 		list_first_entry_or_null(&sched->ring_mirror_list, struct amd_sched_job, node) == s_job)
 	{
 		INIT_DELAYED_WORK(&s_job->work_tdr, s_job->timeout_callback);
+		amd_sched_job_get(s_job);
 		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 	}
 }
@@ -382,9 +385,11 @@ int amd_sched_job_init(struct amd_sched_job *job,
 						struct amd_gpu_scheduler *sched,
 						struct amd_sched_entity *entity,
 						void (*timeout_cb)(struct work_struct *work),
+						void (*free_cb)(struct kref *refcount),
 						void *owner, struct fence **fence)
 {
 	INIT_LIST_HEAD(&job->node);
+	kref_init(&job->refcount);
 	job->sched = sched;
 	job->s_entity = entity;
 	job->s_fence = amd_sched_fence_create(entity, owner);
@@ -393,6 +398,7 @@ int amd_sched_job_init(struct amd_sched_job *job,
 
 	job->s_fence->s_job = job;
 	job->timeout_callback = timeout_cb;
+	job->free_callback = free_cb;
 
 	if (fence)
 		*fence = &job->s_fence->base;
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index a5700aded5bf..95ebfd069690 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -78,6 +78,7 @@ struct amd_sched_fence {
 };
 
 struct amd_sched_job {
+	struct kref refcount;
 	struct amd_gpu_scheduler        *sched;
 	struct amd_sched_entity         *s_entity;
 	struct amd_sched_fence          *s_fence;
@@ -87,6 +88,7 @@ struct amd_sched_job {
 	struct list_head			   node;
 	struct delayed_work work_tdr;
 	void (*timeout_callback) (struct work_struct *work);
+	void (*free_callback)(struct kref *refcount);
 };
 
 extern const struct fence_ops amd_sched_fence_ops;
@@ -155,9 +157,20 @@ int amd_sched_job_init(struct amd_sched_job *job,
 					struct amd_gpu_scheduler *sched,
 					struct amd_sched_entity *entity,
 					void (*timeout_cb)(struct work_struct *work),
+					void (*free_cb)(struct kref* refcount),
 					void *owner, struct fence **fence);
 void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched ,
 								struct amd_sched_job *s_job);
 void amd_sched_job_finish(struct amd_sched_job *s_job);
 void amd_sched_job_begin(struct amd_sched_job *s_job);
+static inline void amd_sched_job_get(struct amd_sched_job *job) {
+	if (job)
+		kref_get(&job->refcount);
+}
+
+static inline void amd_sched_job_put(struct amd_sched_job *job) {
+	if (job)
+		kref_put(&job->refcount, job->free_callback);
+}
+
 #endif
-- 
cgit v1.2.3


From 4325198180e5aeff21f44ba32c9ec18d9dd3ba72 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 30 Mar 2016 10:54:16 +0200
Subject: drm/amdgpu: remove GART page addr array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not needed any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 22 ++++------------------
 2 files changed, 4 insertions(+), 19 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2ac07ebecfd1..e6f7bad735cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -612,7 +612,6 @@ struct amdgpu_gart {
 	unsigned			num_cpu_pages;
 	unsigned			table_size;
 	struct page			**pages;
-	dma_addr_t			*pages_addr;
 	bool				ready;
 	const struct amdgpu_gart_funcs *gart_funcs;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 7312d729d300..a13603abed07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -240,8 +240,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
 	for (i = 0; i < pages; i++, p++) {
 		if (adev->gart.pages[p]) {
 			adev->gart.pages[p] = NULL;
-			adev->gart.pages_addr[p] = adev->dummy_page.addr;
-			page_base = adev->gart.pages_addr[p];
+			page_base = adev->dummy_page.addr;
 			if (!adev->gart.ptr)
 				continue;
 
@@ -287,10 +286,9 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
 	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
 
 	for (i = 0; i < pages; i++, p++) {
-		adev->gart.pages_addr[p] = dma_addr[i];
 		adev->gart.pages[p] = pagelist[i];
 		if (adev->gart.ptr) {
-			page_base = adev->gart.pages_addr[p];
+			page_base = dma_addr[i];
 			for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
 				amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, t, page_base, flags);
 				page_base += AMDGPU_GPU_PAGE_SIZE;
@@ -312,7 +310,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
  */
 int amdgpu_gart_init(struct amdgpu_device *adev)
 {
-	int r, i;
+	int r;
 
 	if (adev->gart.pages) {
 		return 0;
@@ -336,16 +334,6 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
 		amdgpu_gart_fini(adev);
 		return -ENOMEM;
 	}
-	adev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) *
-					adev->gart.num_cpu_pages);
-	if (adev->gart.pages_addr == NULL) {
-		amdgpu_gart_fini(adev);
-		return -ENOMEM;
-	}
-	/* set GART entry to point to the dummy page by default */
-	for (i = 0; i < adev->gart.num_cpu_pages; i++) {
-		adev->gart.pages_addr[i] = adev->dummy_page.addr;
-	}
 	return 0;
 }
 
@@ -358,15 +346,13 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
  */
 void amdgpu_gart_fini(struct amdgpu_device *adev)
 {
-	if (adev->gart.pages && adev->gart.pages_addr && adev->gart.ready) {
+	if (adev->gart.pages && adev->gart.ready) {
 		/* unbind pages */
 		amdgpu_gart_unbind(adev, 0, adev->gart.num_cpu_pages);
 	}
 	adev->gart.ready = false;
 	vfree(adev->gart.pages);
-	vfree(adev->gart.pages_addr);
 	adev->gart.pages = NULL;
-	adev->gart.pages_addr = NULL;
 
 	amdgpu_dummy_page_fini(adev);
 }
-- 
cgit v1.2.3


From a1d29476d666f5972f200c49e76df339ced6b07a Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 30 Mar 2016 14:42:57 +0200
Subject: drm/amdgpu: optionally enable GART debugfs file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Keeping the pages array around can use a lot of system memory
when you want a large GART.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Kconfig       | 10 +++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 35 +++++++++++++++++++-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  |  9 ++++++++
 4 files changed, 42 insertions(+), 14 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index b30fcfa4b1f2..7335c0420c70 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -15,3 +15,13 @@ config DRM_AMDGPU_USERPTR
 	help
 	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
 	  selected to enabled full userptr support.
+
+config DRM_AMDGPU_GART_DEBUGFS
+	bool "Allow GART access through debugfs"
+	depends on DRM_AMDGPU
+	depends on DEBUG_FS
+	default n
+	help
+	  Selecting this option creates a debugfs file to inspect the mapped
+	  pages. Uses more memory for housekeeping, enable only for debugging.
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e6f7bad735cc..b9a6fe9a2a31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -611,7 +611,9 @@ struct amdgpu_gart {
 	unsigned			num_gpu_pages;
 	unsigned			num_cpu_pages;
 	unsigned			table_size;
+#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 	struct page			**pages;
+#endif
 	bool				ready;
 	const struct amdgpu_gart_funcs *gart_funcs;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index a13603abed07..921bce2df0b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -238,17 +238,17 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
 	t = offset / AMDGPU_GPU_PAGE_SIZE;
 	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
 	for (i = 0; i < pages; i++, p++) {
-		if (adev->gart.pages[p]) {
-			adev->gart.pages[p] = NULL;
-			page_base = adev->dummy_page.addr;
-			if (!adev->gart.ptr)
-				continue;
+#ifdef CONFIG_AMDGPU_GART_DEBUGFS
+		adev->gart.pages[p] = NULL;
+#endif
+		page_base = adev->dummy_page.addr;
+		if (!adev->gart.ptr)
+			continue;
 
-			for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
-				amdgpu_gart_set_pte_pde(adev, adev->gart.ptr,
-							t, page_base, flags);
-				page_base += AMDGPU_GPU_PAGE_SIZE;
-			}
+		for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
+			amdgpu_gart_set_pte_pde(adev, adev->gart.ptr,
+						t, page_base, flags);
+			page_base += AMDGPU_GPU_PAGE_SIZE;
 		}
 	}
 	mb();
@@ -286,7 +286,9 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
 	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
 
 	for (i = 0; i < pages; i++, p++) {
+#ifdef CONFIG_AMDGPU_GART_DEBUGFS
 		adev->gart.pages[p] = pagelist[i];
+#endif
 		if (adev->gart.ptr) {
 			page_base = dma_addr[i];
 			for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
@@ -312,9 +314,9 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (adev->gart.pages) {
+	if (adev->dummy_page.page)
 		return 0;
-	}
+
 	/* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */
 	if (PAGE_SIZE < AMDGPU_GPU_PAGE_SIZE) {
 		DRM_ERROR("Page size is smaller than GPU page size!\n");
@@ -328,12 +330,16 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
 	adev->gart.num_gpu_pages = adev->mc.gtt_size / AMDGPU_GPU_PAGE_SIZE;
 	DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
 		 adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
+
+#ifdef CONFIG_AMDGPU_GART_DEBUGFS
 	/* Allocate pages table */
 	adev->gart.pages = vzalloc(sizeof(void *) * adev->gart.num_cpu_pages);
 	if (adev->gart.pages == NULL) {
 		amdgpu_gart_fini(adev);
 		return -ENOMEM;
 	}
+#endif
+
 	return 0;
 }
 
@@ -346,13 +352,14 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
  */
 void amdgpu_gart_fini(struct amdgpu_device *adev)
 {
-	if (adev->gart.pages && adev->gart.ready) {
+	if (adev->gart.ready) {
 		/* unbind pages */
 		amdgpu_gart_unbind(adev, 0, adev->gart.num_cpu_pages);
 	}
 	adev->gart.ready = false;
+#ifdef CONFIG_AMDGPU_GART_DEBUGFS
 	vfree(adev->gart.pages);
 	adev->gart.pages = NULL;
-
+#endif
 	amdgpu_dummy_page_fini(adev);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 6f3369de232f..7a8bdfedff1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1216,6 +1216,8 @@ static const struct file_operations amdgpu_ttm_vram_fops = {
 	.llseek = default_llseek
 };
 
+#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
+
 static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
 				   size_t size, loff_t *pos)
 {
@@ -1263,6 +1265,8 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
 
 #endif
 
+#endif
+
 static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
 {
 #if defined(CONFIG_DEBUG_FS)
@@ -1278,6 +1282,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
 	i_size_write(ent->d_inode, adev->mc.mc_vram_size);
 	adev->mman.vram = ent;
 
+#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 	ent = debugfs_create_file("amdgpu_gtt", S_IFREG | S_IRUGO, root,
 				  adev, &amdgpu_ttm_gtt_fops);
 	if (IS_ERR(ent))
@@ -1285,6 +1290,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
 	i_size_write(ent->d_inode, adev->mc.gtt_size);
 	adev->mman.gtt = ent;
 
+#endif
 	count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
 
 #ifdef CONFIG_SWIOTLB
@@ -1306,7 +1312,10 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev)
 	debugfs_remove(adev->mman.vram);
 	adev->mman.vram = NULL;
 
+#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 	debugfs_remove(adev->mman.gtt);
 	adev->mman.gtt = NULL;
 #endif
+
+#endif
 }
-- 
cgit v1.2.3


From bcb1ba35a87be34d1312f6e050f1b5cc4d32f096 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 8 Mar 2016 15:40:11 +0100
Subject: drm/amdgpu: merge VM manager and VM context ID structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No need to have two of them any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  17 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 139 +++++++++++++++++----------------
 2 files changed, 78 insertions(+), 78 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b9a6fe9a2a31..c1b10046317e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -839,13 +839,6 @@ struct amdgpu_vm_pt {
 	uint64_t			addr;
 };
 
-struct amdgpu_vm_id {
-	struct amdgpu_vm_manager_id	*mgr_id;
-	uint64_t			pd_gpu_addr;
-	/* last flushed PD/PT update */
-	struct fence			*flushed_updates;
-};
-
 struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	struct rb_root		va;
@@ -871,7 +864,7 @@ struct amdgpu_vm {
 	struct amdgpu_vm_pt	*page_tables;
 
 	/* for id and flush management per ring */
-	struct amdgpu_vm_id	ids[AMDGPU_MAX_RINGS];
+	struct amdgpu_vm_id	*ids[AMDGPU_MAX_RINGS];
 
 	/* protecting freed */
 	spinlock_t		freed_lock;
@@ -880,11 +873,15 @@ struct amdgpu_vm {
 	struct amd_sched_entity	entity;
 };
 
-struct amdgpu_vm_manager_id {
+struct amdgpu_vm_id {
 	struct list_head	list;
 	struct fence		*active;
 	atomic_long_t		owner;
 
+	uint64_t		pd_gpu_addr;
+	/* last flushed PD/PT update */
+	struct fence		*flushed_updates;
+
 	uint32_t		gds_base;
 	uint32_t		gds_size;
 	uint32_t		gws_base;
@@ -898,7 +895,7 @@ struct amdgpu_vm_manager {
 	struct mutex				lock;
 	unsigned				num_ids;
 	struct list_head			ids_lru;
-	struct amdgpu_vm_manager_id		ids[AMDGPU_NUM_VM];
+	struct amdgpu_vm_id			ids[AMDGPU_NUM_VM];
 
 	uint32_t				max_pfn;
 	/* vram base address for page table entry  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0c92e0450694..8a758b4fb3a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -166,43 +166,41 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 {
 	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_id *id = &vm->ids[ring->idx];
+	struct amdgpu_vm_id *id = vm->ids[ring->idx];
 	struct fence *updates = sync->last_vm_update;
 	int r;
 
 	mutex_lock(&adev->vm_manager.lock);
 
 	/* check if the id is still valid */
-	if (id->mgr_id) {
+	if (id) {
 		struct fence *flushed = id->flushed_updates;
-		bool is_later;
-		long owner;
+		long owner = atomic_long_read(&id->owner);
+		bool usable = pd_addr == id->pd_gpu_addr;
 
-		if (!flushed)
-			is_later = true;
+		if (owner != (long)&vm->ids[ring->idx])
+			usable = false;
+		else if (!flushed)
+			usable = false;
 		else if (!updates)
-			is_later = false;
+			usable = true;
 		else
-			is_later = fence_is_later(updates, flushed);
+			usable = !fence_is_later(updates, flushed);
 
-		owner = atomic_long_read(&id->mgr_id->owner);
-		if (!is_later && owner == (long)id &&
-		    pd_addr == id->pd_gpu_addr) {
+		if (usable) {
 
-			r = amdgpu_sync_fence(ring->adev, sync,
-					      id->mgr_id->active);
+			r = amdgpu_sync_fence(ring->adev, sync, id->active);
 			if (r) {
 				mutex_unlock(&adev->vm_manager.lock);
 				return r;
 			}
 
-			fence_put(id->mgr_id->active);
-			id->mgr_id->active = fence_get(fence);
+			fence_put(id->active);
+			id->active = fence_get(fence);
 
-			list_move_tail(&id->mgr_id->list,
-				       &adev->vm_manager.ids_lru);
+			list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 
-			*vm_id = id->mgr_id - adev->vm_manager.ids;
+			*vm_id = id - adev->vm_manager.ids;
 			*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
 			trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id,
 						*vm_pd_addr);
@@ -212,38 +210,41 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		}
 	}
 
-	id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
-				      struct amdgpu_vm_manager_id,
-				      list);
+	id = list_first_entry(&adev->vm_manager.ids_lru,
+			      struct amdgpu_vm_id,
+			      list);
 
-	if (id->mgr_id->active && !fence_is_signaled(id->mgr_id->active)) {
-		struct amdgpu_vm_manager_id *mgr_id, *tmp;
+	if (id->active && !fence_is_signaled(id->active)) {
+		struct amdgpu_vm_id *tmp;
 		struct list_head *head = &adev->vm_manager.ids_lru;
-		list_for_each_entry_safe(mgr_id, tmp, &adev->vm_manager.ids_lru, list) {
-			if (mgr_id->active && fence_is_signaled(mgr_id->active)) {
-				list_move(&mgr_id->list, head);
-				head = &mgr_id->list;
+
+		list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru,
+					 list) {
+			if (id->active && fence_is_signaled(id->active)) {
+				list_move(&id->list, head);
+				head = &id->list;
 			}
 		}
-		id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
-					      struct amdgpu_vm_manager_id,
-					      list);
+		id = list_first_entry(&adev->vm_manager.ids_lru,
+				      struct amdgpu_vm_id,
+				      list);
 	}
 
-	r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active);
+	r = amdgpu_sync_fence(ring->adev, sync, id->active);
 	if (!r) {
-		fence_put(id->mgr_id->active);
-		id->mgr_id->active = fence_get(fence);
+		fence_put(id->active);
+		id->active = fence_get(fence);
 
 		fence_put(id->flushed_updates);
 		id->flushed_updates = fence_get(updates);
 
 		id->pd_gpu_addr = pd_addr;
 
-		list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru);
-		atomic_long_set(&id->mgr_id->owner, (long)id);
+		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+		atomic_long_set(&id->owner, (long)&vm->ids[ring->idx]);
+		vm->ids[ring->idx] = id;
 
-		*vm_id = id->mgr_id - adev->vm_manager.ids;
+		*vm_id = id - adev->vm_manager.ids;
 		*vm_pd_addr = pd_addr;
 		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
 	}
@@ -268,14 +269,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
 		     uint32_t oa_base, uint32_t oa_size)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
-		mgr_id->gds_base != gds_base ||
-		mgr_id->gds_size != gds_size ||
-		mgr_id->gws_base != gws_base ||
-		mgr_id->gws_size != gws_size ||
-		mgr_id->oa_base != oa_base ||
-		mgr_id->oa_size != oa_size);
+		id->gds_base != gds_base ||
+		id->gds_size != gds_size ||
+		id->gws_base != gws_base ||
+		id->gws_size != gws_size ||
+		id->oa_base != oa_base ||
+		id->oa_size != oa_size);
 
 	if (ring->funcs->emit_pipeline_sync && (
 	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
@@ -287,12 +288,12 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
 	}
 
 	if (gds_switch_needed) {
-		mgr_id->gds_base = gds_base;
-		mgr_id->gds_size = gds_size;
-		mgr_id->gws_base = gws_base;
-		mgr_id->gws_size = gws_size;
-		mgr_id->oa_base = oa_base;
-		mgr_id->oa_size = oa_size;
+		id->gds_base = gds_base;
+		id->gds_size = gds_size;
+		id->gws_base = gws_base;
+		id->gws_size = gws_size;
+		id->oa_base = oa_base;
+		id->oa_size = oa_size;
 		amdgpu_ring_emit_gds_switch(ring, vm_id,
 					    gds_base, gds_size,
 					    gws_base, gws_size,
@@ -310,14 +311,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
  */
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id)
 {
-	struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
-
-	mgr_id->gds_base = 0;
-	mgr_id->gds_size = 0;
-	mgr_id->gws_base = 0;
-	mgr_id->gws_size = 0;
-	mgr_id->oa_base = 0;
-	mgr_id->oa_size = 0;
+	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
+
+	id->gds_base = 0;
+	id->gds_size = 0;
+	id->gws_base = 0;
+	id->gws_size = 0;
+	id->oa_base = 0;
+	id->oa_size = 0;
 }
 
 /**
@@ -1345,10 +1346,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	struct amd_sched_rq *rq;
 	int i, r;
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		vm->ids[i].mgr_id = NULL;
-		vm->ids[i].flushed_updates = NULL;
-	}
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+		vm->ids[i] = NULL;
 	vm->va = RB_ROOT;
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->invalidated);
@@ -1443,12 +1442,12 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	fence_put(vm->page_directory_fence);
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_vm_id *id = &vm->ids[i];
+		struct amdgpu_vm_id *id = vm->ids[i];
 
-		if (id->mgr_id)
-			atomic_long_cmpxchg(&id->mgr_id->owner,
-					    (long)id, 0);
-		fence_put(id->flushed_updates);
+		if (!id)
+			continue;
+
+		atomic_long_cmpxchg(&id->owner, (long)&vm->ids[i], 0);
 	}
 }
 
@@ -1486,6 +1485,10 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
 	unsigned i;
 
-	for (i = 0; i < AMDGPU_NUM_VM; ++i)
-		fence_put(adev->vm_manager.ids[i].active);
+	for (i = 0; i < AMDGPU_NUM_VM; ++i) {
+		struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
+
+		fence_put(id->active);
+		fence_put(id->flushed_updates);
+	}
 }
-- 
cgit v1.2.3


From 832a902f9433b812f829e9f2257daf5d518cf0de Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 15 Feb 2016 12:33:02 +0100
Subject: drm/amdgpu: use a sync object for VMID fences v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: rebase & cleanup

This way we can store more than one fence as user for each VMID.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Reviewed-by: Chunming Zhou <david1.zhou@amd.com> (v1)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 105 +++++++++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |  53 ++++++++--------
 3 files changed, 132 insertions(+), 32 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c1b10046317e..148e2c61463c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -588,6 +588,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
 		     void *owner);
+bool amdgpu_sync_is_idle(struct amdgpu_sync *sync);
+int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
+			     struct fence *fence);
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
 int amdgpu_sync_wait(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
@@ -875,7 +878,8 @@ struct amdgpu_vm {
 
 struct amdgpu_vm_id {
 	struct list_head	list;
-	struct fence		*active;
+	struct fence		*first;
+	struct amdgpu_sync	active;
 	atomic_long_t		owner;
 
 	uint64_t		pd_gpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index c48b4fce5e57..34a92808bbd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -108,6 +108,29 @@ static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
 	*keep = fence_get(fence);
 }
 
+/**
+ * amdgpu_sync_add_later - add the fence to the hash
+ *
+ * @sync: sync object to add the fence to
+ * @f: fence to add
+ *
+ * Tries to add the fence to an existing hash entry. Returns true when an entry
+ * was found, false otherwise.
+ */
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct fence *f)
+{
+	struct amdgpu_sync_entry *e;
+
+	hash_for_each_possible(sync->fences, e, node, f->context) {
+		if (unlikely(e->fence->context != f->context))
+			continue;
+
+		amdgpu_sync_keep_later(&e->fence, f);
+		return true;
+	}
+	return false;
+}
+
 /**
  * amdgpu_sync_fence - remember to sync to this fence
  *
@@ -127,13 +150,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 	    amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
 		amdgpu_sync_keep_later(&sync->last_vm_update, f);
 
-	hash_for_each_possible(sync->fences, e, node, f->context) {
-		if (unlikely(e->fence->context != f->context))
-			continue;
-
-		amdgpu_sync_keep_later(&e->fence, f);
+	if (amdgpu_sync_add_later(sync, f))
 		return 0;
-	}
 
 	e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
 	if (!e)
@@ -204,6 +222,81 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 	return r;
 }
 
+/**
+ * amdgpu_sync_is_idle - test if all fences are signaled
+ *
+ * @sync: the sync object
+ *
+ * Returns true if all fences in the sync object are signaled.
+ */
+bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	int i;
+
+	hash_for_each_safe(sync->fences, i, tmp, e, node) {
+		struct fence *f = e->fence;
+
+		if (fence_is_signaled(f)) {
+			hash_del(&e->node);
+			fence_put(f);
+			kmem_cache_free(amdgpu_sync_slab, e);
+			continue;
+		}
+
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * amdgpu_sync_cycle_fences - move fences from one sync object into another
+ *
+ * @dst: the destination sync object
+ * @src: the source sync object
+ * @fence: fence to add to source
+ *
+ * Remove all fences from source and put them into destination and add
+ * fence as new one into source.
+ */
+int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
+			     struct fence *fence)
+{
+	struct amdgpu_sync_entry *e, *newone;
+	struct hlist_node *tmp;
+	int i;
+
+	/* Allocate the new entry before moving the old ones */
+	newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
+	if (!newone)
+		return -ENOMEM;
+
+	hash_for_each_safe(src->fences, i, tmp, e, node) {
+		struct fence *f = e->fence;
+
+		hash_del(&e->node);
+		if (fence_is_signaled(f)) {
+			fence_put(f);
+			kmem_cache_free(amdgpu_sync_slab, e);
+			continue;
+		}
+
+		if (amdgpu_sync_add_later(dst, f)) {
+			kmem_cache_free(amdgpu_sync_slab, e);
+			continue;
+		}
+
+		hash_add(dst->fences, &e->node, f->context);
+	}
+
+	hash_add(src->fences, &newone->node, fence->context);
+	newone->fence = fence_get(fence);
+
+	return 0;
+}
+
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 {
 	struct amdgpu_sync_entry *e;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8a758b4fb3a9..d0cce7c3129a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -189,14 +189,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 
 		if (usable) {
 
-			r = amdgpu_sync_fence(ring->adev, sync, id->active);
-			if (r) {
-				mutex_unlock(&adev->vm_manager.lock);
-				return r;
-			}
+			r = amdgpu_sync_fence(ring->adev, sync, id->first);
+			if (r)
+				goto error;
 
-			fence_put(id->active);
-			id->active = fence_get(fence);
+			r = amdgpu_sync_fence(ring->adev, &id->active, fence);
+			if (r)
+				goto error;
 
 			list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 
@@ -214,13 +213,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			      struct amdgpu_vm_id,
 			      list);
 
-	if (id->active && !fence_is_signaled(id->active)) {
-		struct amdgpu_vm_id *tmp;
+	if (!amdgpu_sync_is_idle(&id->active)) {
 		struct list_head *head = &adev->vm_manager.ids_lru;
+		struct amdgpu_vm_id *tmp;
 
 		list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru,
 					 list) {
-			if (id->active && fence_is_signaled(id->active)) {
+			if (amdgpu_sync_is_idle(&id->active)) {
 				list_move(&id->list, head);
 				head = &id->list;
 			}
@@ -230,25 +229,27 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 				      list);
 	}
 
-	r = amdgpu_sync_fence(ring->adev, sync, id->active);
-	if (!r) {
-		fence_put(id->active);
-		id->active = fence_get(fence);
+	r = amdgpu_sync_cycle_fences(sync, &id->active, fence);
+	if (r)
+		goto error;
 
-		fence_put(id->flushed_updates);
-		id->flushed_updates = fence_get(updates);
+	fence_put(id->first);
+	id->first = fence_get(fence);
 
-		id->pd_gpu_addr = pd_addr;
+	fence_put(id->flushed_updates);
+	id->flushed_updates = fence_get(updates);
 
-		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-		atomic_long_set(&id->owner, (long)&vm->ids[ring->idx]);
-		vm->ids[ring->idx] = id;
+	id->pd_gpu_addr = pd_addr;
 
-		*vm_id = id - adev->vm_manager.ids;
-		*vm_pd_addr = pd_addr;
-		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
-	}
+	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+	atomic_long_set(&id->owner, (long)id);
+	vm->ids[ring->idx] = id;
 
+	*vm_id = id - adev->vm_manager.ids;
+	*vm_pd_addr = pd_addr;
+	trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+
+error:
 	mutex_unlock(&adev->vm_manager.lock);
 	return r;
 }
@@ -1467,6 +1468,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	/* skip over VMID 0, since it is the system VM */
 	for (i = 1; i < adev->vm_manager.num_ids; ++i) {
 		amdgpu_vm_reset_id(adev, i);
+		amdgpu_sync_create(&adev->vm_manager.ids[i].active);
 		list_add_tail(&adev->vm_manager.ids[i].list,
 			      &adev->vm_manager.ids_lru);
 	}
@@ -1488,7 +1490,8 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_NUM_VM; ++i) {
 		struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
 
-		fence_put(id->active);
+		fence_put(adev->vm_manager.ids[i].first);
+		amdgpu_sync_free(&adev->vm_manager.ids[i].active);
 		fence_put(id->flushed_updates);
 	}
 }
-- 
cgit v1.2.3


From 41d9eb2c5a2a21c9120e906d077e77562883510e Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 1 Mar 2016 16:46:18 +0100
Subject: drm/amdgpu: add a fence after the VM flush
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This way we can track when the flush is done.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 11 ++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 ++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 26 +++++++++++++++++++++-----
 3 files changed, 35 insertions(+), 14 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 148e2c61463c..66e51f9e593b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -880,6 +880,7 @@ struct amdgpu_vm_id {
 	struct list_head	list;
 	struct fence		*first;
 	struct amdgpu_sync	active;
+	struct fence		*last_flush;
 	atomic_long_t		owner;
 
 	uint64_t		pd_gpu_addr;
@@ -926,11 +927,11 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
 		      unsigned *vm_id, uint64_t *vm_pd_addr);
-void amdgpu_vm_flush(struct amdgpu_ring *ring,
-		     unsigned vm_id, uint64_t pd_addr,
-		     uint32_t gds_base, uint32_t gds_size,
-		     uint32_t gws_base, uint32_t gws_size,
-		     uint32_t oa_base, uint32_t oa_size);
+int amdgpu_vm_flush(struct amdgpu_ring *ring,
+		    unsigned vm_id, uint64_t pd_addr,
+		    uint32_t gds_base, uint32_t gds_size,
+		    uint32_t gws_base, uint32_t gws_size,
+		    uint32_t oa_base, uint32_t oa_size);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 644336d76aca..83973d051080 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -155,10 +155,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 
 	if (vm) {
 		/* do context switch */
-		amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
-				ib->gds_base, ib->gds_size,
-				ib->gws_base, ib->gws_size,
-				ib->oa_base, ib->oa_size);
+		r = amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
+				    ib->gds_base, ib->gds_size,
+				    ib->gws_base, ib->gws_size,
+				    ib->oa_base, ib->oa_size);
+		if (r) {
+			amdgpu_ring_undo(ring);
+			return r;
+		}
 
 		if (ring->funcs->emit_hdp_flush)
 			amdgpu_ring_emit_hdp_flush(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d0cce7c3129a..252445f578f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -236,6 +236,9 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	fence_put(id->first);
 	id->first = fence_get(fence);
 
+	fence_put(id->last_flush);
+	id->last_flush = NULL;
+
 	fence_put(id->flushed_updates);
 	id->flushed_updates = fence_get(updates);
 
@@ -263,11 +266,11 @@ error:
  *
  * Emit a VM flush when it is necessary.
  */
-void amdgpu_vm_flush(struct amdgpu_ring *ring,
-		     unsigned vm_id, uint64_t pd_addr,
-		     uint32_t gds_base, uint32_t gds_size,
-		     uint32_t gws_base, uint32_t gws_size,
-		     uint32_t oa_base, uint32_t oa_size)
+int amdgpu_vm_flush(struct amdgpu_ring *ring,
+		    unsigned vm_id, uint64_t pd_addr,
+		    uint32_t gds_base, uint32_t gds_size,
+		    uint32_t gws_base, uint32_t gws_size,
+		    uint32_t oa_base, uint32_t oa_size)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
@@ -278,14 +281,25 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
 		id->gws_size != gws_size ||
 		id->oa_base != oa_base ||
 		id->oa_size != oa_size);
+	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
 	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (pd_addr != AMDGPU_VM_NO_FLUSH) {
+		struct fence *fence;
+
 		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
 		amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
+		r = amdgpu_fence_emit(ring, &fence);
+		if (r)
+			return r;
+
+		mutex_lock(&adev->vm_manager.lock);
+		fence_put(id->last_flush);
+		id->last_flush = fence;
+		mutex_unlock(&adev->vm_manager.lock);
 	}
 
 	if (gds_switch_needed) {
@@ -300,6 +314,8 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
 					    gws_base, gws_size,
 					    oa_base, oa_size);
 	}
+
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From f498d9ed26fdfa2694ef3d892f032c7dc6feba14 Mon Sep 17 00:00:00 2001
From: Nils Wallménius <nils.wallmenius@gmail.com>
Date: Sun, 10 Apr 2016 16:29:59 +0200
Subject: drm/amd: Mark some tables as const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch marks some compile-time constant tables 'const'.
The tables marked in this patch are the low hanging fruit
where little other changes were necesary to avoid casting
away constness etc. Also mark some tables that are private
to a file as static.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Nils Wallménius <nils.wallmenius@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h                  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c          |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c              |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c              |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c              |  2 +-
 drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c     | 12 ++++++------
 drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h     |  2 +-
 drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c | 10 +++++-----
 drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c    |  8 ++++----
 drivers/gpu/drm/amd/powerplay/inc/fiji_pwrvirus.h    |  2 +-
 drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c     |  2 +-
 drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c   |  6 +++---
 drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c  |  2 +-
 13 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 66e51f9e593b..5d05b5d67bbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2348,7 +2348,7 @@ static inline void amdgpu_unregister_atpx_handler(void) {}
  * KMS
  */
 extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
-extern int amdgpu_max_kms_ioctl;
+extern const int amdgpu_max_kms_ioctl;
 
 int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int amdgpu_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 0535095c4d14..c835abe65df3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -596,20 +596,20 @@ const struct drm_mode_config_funcs amdgpu_mode_funcs = {
 	.output_poll_changed = amdgpu_output_poll_changed
 };
 
-static struct drm_prop_enum_list amdgpu_underscan_enum_list[] =
+static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] =
 {	{ UNDERSCAN_OFF, "off" },
 	{ UNDERSCAN_ON, "on" },
 	{ UNDERSCAN_AUTO, "auto" },
 };
 
-static struct drm_prop_enum_list amdgpu_audio_enum_list[] =
+static const struct drm_prop_enum_list amdgpu_audio_enum_list[] =
 {	{ AMDGPU_AUDIO_DISABLE, "off" },
 	{ AMDGPU_AUDIO_ENABLE, "on" },
 	{ AMDGPU_AUDIO_AUTO, "auto" },
 };
 
 /* XXX support different dither options? spatial, temporal, both, etc. */
-static struct drm_prop_enum_list amdgpu_dither_enum_list[] =
+static const struct drm_prop_enum_list amdgpu_dither_enum_list[] =
 {	{ AMDGPU_FMT_DITHER_DISABLE, "off" },
 	{ AMDGPU_FMT_DITHER_ENABLE, "on" },
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 93462aea9faa..642578c01e09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -166,7 +166,7 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
 MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
 module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
 
-static struct pci_device_id pciidlist[] = {
+static const struct pci_device_id pciidlist[] = {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	/* Kaveri */
 	{0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 762cfdb85147..9266c7b69808 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -498,7 +498,7 @@ static int amdgpu_irqdomain_map(struct irq_domain *d,
 	return 0;
 }
 
-static struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
+static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
 	.map = amdgpu_irqdomain_map,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 598eb0cd5aab..4ac83c8b40d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -755,4 +755,4 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
-int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
+const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 025a3ed37492..55a006dd5c0e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -95,23 +95,23 @@ enum DPM_EVENT_SRC {
 /* [2.5%,~2.5%] Clock stretched is multiple of 2.5% vs
  * not and [Fmin, Fmax, LDO_REFSEL, USE_FOR_LOW_FREQ]
  */
-uint16_t fiji_clock_stretcher_lookup_table[2][4] = { {600, 1050, 3, 0},
-                                                {600, 1050, 6, 1} };
+static const uint16_t fiji_clock_stretcher_lookup_table[2][4] =
+{ {600, 1050, 3, 0}, {600, 1050, 6, 1} };
 
 /* [FF, SS] type, [] 4 voltage ranges, and
  * [Floor Freq, Boundary Freq, VID min , VID max]
  */
-uint32_t fiji_clock_stretcher_ddt_table[2][4][4] =
+static const uint32_t fiji_clock_stretcher_ddt_table[2][4][4] =
 { { {265, 529, 120, 128}, {325, 650, 96, 119}, {430, 860, 32, 95}, {0, 0, 0, 31} },
   { {275, 550, 104, 112}, {319, 638, 96, 103}, {360, 720, 64, 95}, {384, 768, 32, 63} } };
 
 /* [Use_For_Low_freq] value, [0%, 5%, 10%, 7.14%, 14.28%, 20%]
  * (coming from PWR_CKS_CNTL.stretch_amount reg spec)
  */
-uint8_t fiji_clock_stretch_amount_conversion[2][6] = { {0, 1, 3, 2, 4, 5},
-                                                  {0, 2, 4, 5, 6, 5} };
+static const uint8_t fiji_clock_stretch_amount_conversion[2][6] =
+{ {0, 1, 3, 2, 4, 5}, {0, 2, 4, 5, 6, 5} };
 
-const unsigned long PhwFiji_Magic = (unsigned long)(PHM_VIslands_Magic);
+static const unsigned long PhwFiji_Magic = (unsigned long)(PHM_VIslands_Magic);
 
 struct fiji_power_state *cast_phw_fiji_power_state(
 				  struct pp_hw_power_state *hw_ps)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
index a16f7cd4c238..4b29d9ef07ce 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
@@ -263,7 +263,7 @@ struct fiji_hwmgr {
 	bool                           enable_tdc_limit_feature;
 	bool                           enable_pkg_pwr_tracking_feature;
 	bool                           disable_uvd_power_tune_feature;
-	struct fiji_pt_defaults       *power_tune_defaults;
+	const struct fiji_pt_defaults  *power_tune_defaults;
 	struct SMU73_Discrete_PmFuses  power_tune_table;
 	uint32_t                       dte_tj_offset;
 	uint32_t                       fast_watermark_threshold;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
index 6efcb2bac45f..db23a4068baf 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
@@ -32,7 +32,7 @@
 #define VOLTAGE_SCALE  4
 #define POWERTUNE_DEFAULT_SET_MAX    1
 
-struct fiji_pt_defaults fiji_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = {
+const struct fiji_pt_defaults fiji_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = {
 		/*sviLoadLIneEn,  SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc */
 		{1,               0xF,             0xFD,
 		/* TDC_MAWt, TdcWaterfallCtl, DTEAmbientTempBase */
@@ -143,7 +143,7 @@ static void get_scl_sda_value(uint8_t line, uint8_t *scl, uint8_t* sda)
 int fiji_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr)
 {
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-	struct fiji_pt_defaults *defaults = data->power_tune_defaults;
+	const struct fiji_pt_defaults *defaults = data->power_tune_defaults;
 	SMU73_Discrete_DpmTable  *dpm_table = &(data->smc_state_table);
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
@@ -222,7 +222,7 @@ int fiji_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr)
 static int fiji_populate_svi_load_line(struct pp_hwmgr *hwmgr)
 {
     struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-    struct fiji_pt_defaults *defaults = data->power_tune_defaults;
+    const struct fiji_pt_defaults *defaults = data->power_tune_defaults;
 
     data->power_tune_table.SviLoadLineEn = defaults->SviLoadLineEn;
     data->power_tune_table.SviLoadLineVddC = defaults->SviLoadLineVddC;
@@ -238,7 +238,7 @@ static int fiji_populate_tdc_limit(struct pp_hwmgr *hwmgr)
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
-	struct  fiji_pt_defaults *defaults = data->power_tune_defaults;
+	const struct fiji_pt_defaults *defaults = data->power_tune_defaults;
 
 	/* TDC number of fraction bits are changed from 8 to 7
 	 * for Fiji as requested by SMC team
@@ -256,7 +256,7 @@ static int fiji_populate_tdc_limit(struct pp_hwmgr *hwmgr)
 static int fiji_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset)
 {
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-	struct  fiji_pt_defaults *defaults = data->power_tune_defaults;
+	const struct fiji_pt_defaults *defaults = data->power_tune_defaults;
 	uint32_t temp;
 
 	if (fiji_read_smc_sram_dword(hwmgr->smumgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index b9a27c666531..3bed991ffa40 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -89,17 +89,17 @@
 typedef uint32_t PECI_RegistryValue;
 
 /* [2.5%,~2.5%] Clock stretched is multiple of 2.5% vs not and [Fmin, Fmax, LDO_REFSEL, USE_FOR_LOW_FREQ] */
-uint16_t PP_ClockStretcherLookupTable[2][4] = {
+static const uint16_t PP_ClockStretcherLookupTable[2][4] = {
 	{600, 1050, 3, 0},
 	{600, 1050, 6, 1} };
 
 /* [FF, SS] type, [] 4 voltage ranges, and [Floor Freq, Boundary Freq, VID min , VID max] */
-uint32_t PP_ClockStretcherDDTTable[2][4][4] = {
+static const uint32_t PP_ClockStretcherDDTTable[2][4][4] = {
 	{ {265, 529, 120, 128}, {325, 650, 96, 119}, {430, 860, 32, 95}, {0, 0, 0, 31} },
 	{ {275, 550, 104, 112}, {319, 638, 96, 103}, {360, 720, 64, 95}, {384, 768, 32, 63} } };
 
 /* [Use_For_Low_freq] value, [0%, 5%, 10%, 7.14%, 14.28%, 20%] (coming from PWR_CKS_CNTL.stretch_amount reg spec) */
-uint8_t PP_ClockStretchAmountConversion[2][6] = {
+static const uint8_t PP_ClockStretchAmountConversion[2][6] = {
 	{0, 1, 3, 2, 4, 5},
 	{0, 2, 4, 5, 6, 5} };
 
@@ -113,7 +113,7 @@ enum DPM_EVENT_SRC {
 };
 typedef enum DPM_EVENT_SRC DPM_EVENT_SRC;
 
-const unsigned long PhwTonga_Magic = (unsigned long)(PHM_VIslands_Magic);
+static const unsigned long PhwTonga_Magic = (unsigned long)(PHM_VIslands_Magic);
 
 struct tonga_power_state *cast_phw_tonga_power_state(
 				  struct pp_hw_power_state *hw_ps)
diff --git a/drivers/gpu/drm/amd/powerplay/inc/fiji_pwrvirus.h b/drivers/gpu/drm/amd/powerplay/inc/fiji_pwrvirus.h
index 0262ad35502a..8a31665321a8 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/fiji_pwrvirus.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/fiji_pwrvirus.h
@@ -46,7 +46,7 @@ struct PWR_Command_Table
 typedef struct PWR_Command_Table PWR_Command_Table;
 
 #define PWR_VIRUS_TABLE_SIZE  10243
-static PWR_Command_Table PwrVirusTable[PWR_VIRUS_TABLE_SIZE] =
+static const PWR_Command_Table PwrVirusTable[PWR_VIRUS_TABLE_SIZE] =
 {
     { PwrCmdWrite, 0x100100b6, mmPCIE_INDEX                               },
     { PwrCmdWrite, 0x00000000, mmPCIE_DATA                                },
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
index ec222c665602..da18f44fd1c8 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
@@ -39,7 +39,7 @@
 
 #define SIZE_ALIGN_32(x)    (((x) + 31) / 32 * 32)
 
-static enum cz_scratch_entry firmware_list[] = {
+static const enum cz_scratch_entry firmware_list[] = {
 	CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0,
 	CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1,
 	CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index cdbb9f89bf36..673a75c74e18 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -44,7 +44,7 @@
 
 #define FIJI_SMC_SIZE 0x20000
 
-struct SMU73_Discrete_GraphicsLevel avfs_graphics_level[8] = {
+static const struct SMU73_Discrete_GraphicsLevel avfs_graphics_level[8] = {
 		/*  Min        Sclk       pcie     DeepSleep Activity  CgSpll      CgSpll    spllSpread  SpllSpread   CcPwr  CcPwr  Sclk   Display     Enabled     Enabled                       Voltage    Power */
 		/* Voltage,  Frequency,  DpmLevel,  DivId,    Level,  FuncCntl3,  FuncCntl4,  Spectrum,   Spectrum2,  DynRm, DynRm1  Did, Watermark, ForActivity, ForThrottle, UpHyst, DownHyst, DownHyst, Throttle */
 		{ 0x3c0fd047, 0x30750000,   0x00,     0x03,   0x1e00, 0x00200410, 0x87020000, 0x21680000, 0x0c000000,   0,      0,   0x16,   0x00,       0x01,        0x01,      0x00,   0x00,      0x00,     0x00 },
@@ -189,7 +189,7 @@ int fiji_copy_bytes_to_smc(struct pp_smumgr *smumgr,
 
 int fiji_program_jump_on_start(struct pp_smumgr *smumgr)
 {
-	static unsigned char data[] = { 0xE0, 0x00, 0x80, 0x40 };
+	static const unsigned char data[] = { 0xE0, 0x00, 0x80, 0x40 };
 
 	fiji_copy_bytes_to_smc(smumgr, 0x0, data, 4, sizeof(data) + 1);
 
@@ -665,7 +665,7 @@ int fiji_setup_pwr_virus(struct pp_smumgr *smumgr)
 {
 	int i, result = -1;
 	uint32_t reg, data;
-	PWR_Command_Table *virus = PwrVirusTable;
+	const PWR_Command_Table *virus = PwrVirusTable;
 	struct fiji_smumgr *priv = (struct fiji_smumgr *)(smumgr->backend);
 
 	priv->avfs.AvfsBtcStatus = AVFS_LOAD_VIRUS;
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index ebdb43a8daef..32820b680d88 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -145,7 +145,7 @@ out:
 
 int tonga_program_jump_on_start(struct pp_smumgr *smumgr)
 {
-	static unsigned char pData[] = { 0xE0, 0x00, 0x80, 0x40 };
+	static const unsigned char pData[] = { 0xE0, 0x00, 0x80, 0x40 };
 
 	tonga_copy_bytes_to_smc(smumgr, 0x0, pData, 4, sizeof(pData)+1);
 
-- 
cgit v1.2.3


From 62250a910a4090f88b729e04baf4369d78ba5bdc Mon Sep 17 00:00:00 2001
From: Nils Wallménius <nils.wallmenius@gmail.com>
Date: Sun, 10 Apr 2016 16:30:00 +0200
Subject: drm/amd/scheduler: Mark amdgpu_sched_ops const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This marks the struct amdgpu_sched_ops const and
adjusts amd_sched_init to take a const pointer
for the ops param. The ops member of
struct amd_gpu_scheduler is also changed to const.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Nils Wallménius <nils.wallmenius@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       | 2 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 2 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5d05b5d67bbd..660213a1682a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -748,7 +748,7 @@ enum amdgpu_ring_type {
 	AMDGPU_RING_TYPE_VCE
 };
 
-extern struct amd_sched_backend_ops amdgpu_sched_ops;
+extern const struct amd_sched_backend_ops amdgpu_sched_ops;
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 		     struct amdgpu_job **job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index a052ac2b131d..4eea2a18d8bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -198,7 +198,7 @@ err:
 	return fence;
 }
 
-struct amd_sched_backend_ops amdgpu_sched_ops = {
+const struct amd_sched_backend_ops amdgpu_sched_ops = {
 	.dependency = amdgpu_job_dependency,
 	.run_job = amdgpu_job_run,
 	.begin_job = amd_sched_job_begin,
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 639c70de217c..c16248cee779 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -530,7 +530,7 @@ static int amd_sched_main(void *param)
  * Return 0 on success, otherwise error code.
 */
 int amd_sched_init(struct amd_gpu_scheduler *sched,
-		   struct amd_sched_backend_ops *ops,
+		   const struct amd_sched_backend_ops *ops,
 		   unsigned hw_submission, long timeout, const char *name)
 {
 	int i;
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 95ebfd069690..169f70fe949c 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -123,7 +123,7 @@ enum amd_sched_priority {
  * One scheduler is implemented for each hardware ring
 */
 struct amd_gpu_scheduler {
-	struct amd_sched_backend_ops	*ops;
+	const struct amd_sched_backend_ops	*ops;
 	uint32_t			hw_submission_limit;
 	long				timeout;
 	const char			*name;
@@ -137,7 +137,7 @@ struct amd_gpu_scheduler {
 };
 
 int amd_sched_init(struct amd_gpu_scheduler *sched,
-		   struct amd_sched_backend_ops *ops,
+		   const struct amd_sched_backend_ops *ops,
 		   uint32_t hw_submission, long timeout, const char *name);
 void amd_sched_fini(struct amd_gpu_scheduler *sched);
 
-- 
cgit v1.2.3


From 06ab6832ac06c77332e3b0415977acf68ea364cf Mon Sep 17 00:00:00 2001
From: Nils Wallménius <nils.wallmenius@gmail.com>
Date: Mon, 2 May 2016 12:46:15 -0400
Subject: drm/amdgpu: Mark all instances of struct drm_info_list as const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All these are compile time constand and the
drm_debugfs_create/remove_files functions take a const
pointer argument.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Nils Wallménius <nils.wallmenius@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c     | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c     | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 660213a1682a..d1ad7634f351 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1701,12 +1701,12 @@ static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
  * Debugfs
  */
 struct amdgpu_debugfs {
-	struct drm_info_list	*files;
+	const struct drm_info_list	*files;
 	unsigned		num_files;
 };
 
 int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
-			     struct drm_info_list *files,
+			     const struct drm_info_list *files,
 			     unsigned nfiles);
 int amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index fa848ade51f1..c7974ff470e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2009,7 +2009,7 @@ void amdgpu_get_pcie_info(struct amdgpu_device *adev)
  * Debugfs
  */
 int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
-			     struct drm_info_list *files,
+			     const struct drm_info_list *files,
 			     unsigned nfiles)
 {
 	unsigned i;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d81f1f4883a6..100f4c6a8c1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -639,7 +639,7 @@ static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data)
 	return 0;
 }
 
-static struct drm_info_list amdgpu_debugfs_fence_list[] = {
+static const struct drm_info_list amdgpu_debugfs_fence_list[] = {
 	{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
 	{"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index fa6a27bff298..0635bb6b45c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -797,7 +797,7 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static struct drm_info_list amdgpu_debugfs_gem_list[] = {
+static const struct drm_info_list amdgpu_debugfs_gem_list[] = {
 	{"amdgpu_gem_info", &amdgpu_debugfs_gem_info, 0, NULL},
 };
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 83973d051080..0129617a7962 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -329,7 +329,7 @@ static int amdgpu_debugfs_sa_info(struct seq_file *m, void *data)
 
 }
 
-static struct drm_info_list amdgpu_debugfs_sa_list[] = {
+static const struct drm_info_list amdgpu_debugfs_sa_list[] = {
 	{"amdgpu_sa_info", &amdgpu_debugfs_sa_info, 0, NULL},
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index ff9597ce268c..6d44d4a64698 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1212,7 +1212,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static struct drm_info_list amdgpu_pm_info_list[] = {
+static const struct drm_info_list amdgpu_pm_info_list[] = {
 	{"amdgpu_pm_info", amdgpu_debugfs_pm_info, 0, NULL},
 };
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index dd79243d0a37..7bd31ae10b33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -427,7 +427,7 @@ static int r600_uvd_index = offsetof(struct amdgpu_device, uvd.ring);
 static int si_vce1_index = offsetof(struct amdgpu_device, vce.ring[0]);
 static int si_vce2_index = offsetof(struct amdgpu_device, vce.ring[1]);
 
-static struct drm_info_list amdgpu_debugfs_ring_info_list[] = {
+static const struct drm_info_list amdgpu_debugfs_ring_info_list[] = {
 	{"amdgpu_ring_gfx", amdgpu_debugfs_ring_info, 0, &amdgpu_gfx_index},
 	{"amdgpu_ring_cp1", amdgpu_debugfs_ring_info, 0, &cayman_cp1_index},
 	{"amdgpu_ring_cp2", amdgpu_debugfs_ring_info, 0, &cayman_cp2_index},
@@ -445,7 +445,7 @@ static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ri
 #if defined(CONFIG_DEBUG_FS)
 	unsigned i;
 	for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) {
-		struct drm_info_list *info = &amdgpu_debugfs_ring_info_list[i];
+		const struct drm_info_list *info = &amdgpu_debugfs_ring_info_list[i];
 		int roffset = *(int*)amdgpu_debugfs_ring_info_list[i].data;
 		struct amdgpu_ring *other = (void *)(((uint8_t*)adev) + roffset);
 		unsigned r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7a8bdfedff1d..7f5fc29354f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1165,7 +1165,7 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
 static int ttm_pl_vram = TTM_PL_VRAM;
 static int ttm_pl_tt = TTM_PL_TT;
 
-static struct drm_info_list amdgpu_ttm_debugfs_list[] = {
+static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
 	{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram},
 	{"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt},
 	{"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
-- 
cgit v1.2.3


From c036554170fcc2238c32a7edd72c1b61b886428a Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Tue, 12 Apr 2016 13:46:15 +0200
Subject: drm/amdgpu: handle more than 10 UVD sessions (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change History
--------------

v2:
- Make firmware version check correctly. Firmware
  versions >= 1.80 should all support 40 UVD
  instances.
- Replace AMDGPU_MAX_UVD_HANDLES with max_handles
  variable.

v1:
- The firmware can handle upto 40 UVD sessions.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Signed-off-by: Ayyappa Chandolu <ayyappa.chandolu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h                | 11 +++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c            | 30 ++++++++++++++++------
 drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c              |  5 ++--
 drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c              |  5 ++--
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c              |  7 +++--
 .../gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h   |  1 +
 6 files changed, 41 insertions(+), 18 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d1ad7634f351..c9fe2d56cebf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1593,16 +1593,19 @@ void amdgpu_get_pcie_info(struct amdgpu_device *adev);
 /*
  * UVD
  */
-#define AMDGPU_MAX_UVD_HANDLES	10
-#define AMDGPU_UVD_STACK_SIZE	(1024*1024)
-#define AMDGPU_UVD_HEAP_SIZE	(1024*1024)
-#define AMDGPU_UVD_FIRMWARE_OFFSET 256
+#define AMDGPU_DEFAULT_UVD_HANDLES	10
+#define AMDGPU_MAX_UVD_HANDLES		40
+#define AMDGPU_UVD_STACK_SIZE		(200*1024)
+#define AMDGPU_UVD_HEAP_SIZE		(256*1024)
+#define AMDGPU_UVD_SESSION_SIZE		(50*1024)
+#define AMDGPU_UVD_FIRMWARE_OFFSET	256
 
 struct amdgpu_uvd {
 	struct amdgpu_bo	*vcpu_bo;
 	void			*cpu_addr;
 	uint64_t		gpu_addr;
 	void			*saved_bo;
+	unsigned		max_handles;
 	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
 	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
 	struct delayed_work	idle_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 338da80006b6..76ebc109e5e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -151,6 +151,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 		return r;
 	}
 
+	/* Set the default UVD handles that the firmware can handle */
+	adev->uvd.max_handles = AMDGPU_DEFAULT_UVD_HANDLES;
+
 	hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
 	family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
 	version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
@@ -158,8 +161,19 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
 		version_major, version_minor, family_id);
 
+	/*
+	 * Limit the number of UVD handles depending on microcode major
+	 * and minor versions. The firmware version which has 40 UVD
+	 * instances support is 1.80. So all subsequent versions should
+	 * also have the same support.
+	 */
+	if ((version_major > 0x01) ||
+	    ((version_major == 0x01) && (version_minor >= 0x50)))
+		adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
+
 	bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
-		 +  AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE;
+		  +  AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
+		  +  AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
 	r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
 			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
@@ -202,7 +216,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 		return r;
 	}
 
-	for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
+	for (i = 0; i < adev->uvd.max_handles; ++i) {
 		atomic_set(&adev->uvd.handles[i], 0);
 		adev->uvd.filp[i] = NULL;
 	}
@@ -248,7 +262,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 	if (adev->uvd.vcpu_bo == NULL)
 		return 0;
 
-	for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i)
+	for (i = 0; i < adev->uvd.max_handles; ++i)
 		if (atomic_read(&adev->uvd.handles[i]))
 			break;
 
@@ -303,7 +317,7 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 	struct amdgpu_ring *ring = &adev->uvd.ring;
 	int i, r;
 
-	for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
+	for (i = 0; i < adev->uvd.max_handles; ++i) {
 		uint32_t handle = atomic_read(&adev->uvd.handles[i]);
 		if (handle != 0 && adev->uvd.filp[i] == filp) {
 			struct fence *fence;
@@ -563,7 +577,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 		amdgpu_bo_kunmap(bo);
 
 		/* try to alloc a new handle */
-		for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
+		for (i = 0; i < adev->uvd.max_handles; ++i) {
 			if (atomic_read(&adev->uvd.handles[i]) == handle) {
 				DRM_ERROR("Handle 0x%x already in use!\n", handle);
 				return -EINVAL;
@@ -586,7 +600,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 			return r;
 
 		/* validate the handle */
-		for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
+		for (i = 0; i < adev->uvd.max_handles; ++i) {
 			if (atomic_read(&adev->uvd.handles[i]) == handle) {
 				if (adev->uvd.filp[i] != ctx->parser->filp) {
 					DRM_ERROR("UVD handle collision detected!\n");
@@ -601,7 +615,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 
 	case 2:
 		/* it's a destroy msg, free the handle */
-		for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i)
+		for (i = 0; i < adev->uvd.max_handles; ++i)
 			atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
 		amdgpu_bo_kunmap(bo);
 		return 0;
@@ -1013,7 +1027,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 
 	fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
 
-	for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i)
+	for (i = 0; i < adev->uvd.max_handles; ++i)
 		if (atomic_read(&adev->uvd.handles[i]))
 			++handles;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index cb463753115b..0d6b9e2150cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -559,12 +559,13 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev)
 	WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
 
 	addr += size;
-	size = AMDGPU_UVD_STACK_SIZE >> 3;
+	size = AMDGPU_UVD_HEAP_SIZE >> 3;
 	WREG32(mmUVD_VCPU_CACHE_OFFSET1, addr);
 	WREG32(mmUVD_VCPU_CACHE_SIZE1, size);
 
 	addr += size;
-	size = AMDGPU_UVD_HEAP_SIZE >> 3;
+	size = (AMDGPU_UVD_STACK_SIZE +
+	       (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles)) >> 3;
 	WREG32(mmUVD_VCPU_CACHE_OFFSET2, addr);
 	WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index de459c8000a7..84abf89ef4f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -272,12 +272,13 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev)
 	WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
 
 	offset += size;
-	size = AMDGPU_UVD_STACK_SIZE;
+	size = AMDGPU_UVD_HEAP_SIZE;
 	WREG32(mmUVD_VCPU_CACHE_OFFSET1, offset >> 3);
 	WREG32(mmUVD_VCPU_CACHE_SIZE1, size);
 
 	offset += size;
-	size = AMDGPU_UVD_HEAP_SIZE;
+	size = AMDGPU_UVD_STACK_SIZE +
+	       (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles);
 	WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3);
 	WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 372d70a0daec..c633b1a26a7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -272,18 +272,21 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev)
 	WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
 
 	offset += size;
-	size = AMDGPU_UVD_STACK_SIZE;
+	size = AMDGPU_UVD_HEAP_SIZE;
 	WREG32(mmUVD_VCPU_CACHE_OFFSET1, offset >> 3);
 	WREG32(mmUVD_VCPU_CACHE_SIZE1, size);
 
 	offset += size;
-	size = AMDGPU_UVD_HEAP_SIZE;
+	size = AMDGPU_UVD_STACK_SIZE +
+	       (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles);
 	WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3);
 	WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
 
 	WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
 	WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
 	WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+
+	WREG32(mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
 }
 
 #if 0
diff --git a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h
index b2d4aaf045bc..6f6fb34742d2 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h
@@ -111,5 +111,6 @@
 #define mmUVD_MIF_RECON1_ADDR_CONFIG                                            0x39c5
 #define ixUVD_MIF_SCLR_ADDR_CONFIG                                              0x4
 #define mmUVD_JPEG_ADDR_CONFIG                                                  0x3a1f
+#define mmUVD_GP_SCRATCH4                                                       0x3d38
 
 #endif /* UVD_6_0_D_H */
-- 
cgit v1.2.3


From 110e6f26af80dfd90b6e5c645b1aed7228aa580d Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 12 Apr 2016 13:25:48 +1000
Subject: drm/amd: make a type-safe cgs_device struct. (v2)

This is just a type-safety things to avoid everyone taking void *,
it doesn't change anything.

v2: agd5f: split out the dal changes into a separate patch.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/acp/acp_hw.c             |  2 +-
 drivers/gpu/drm/amd/acp/include/acp_gfx_if.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h          | 11 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h      |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c      | 92 ++++++++++++++--------------
 drivers/gpu/drm/amd/include/cgs_common.h     | 76 ++++++++++++-----------
 drivers/gpu/drm/amd/include/cgs_linux.h      |  6 +-
 7 files changed, 93 insertions(+), 98 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/acp/acp_hw.c b/drivers/gpu/drm/amd/acp/acp_hw.c
index 7af83f142b4b..c7d7205c9b11 100644
--- a/drivers/gpu/drm/amd/acp/acp_hw.c
+++ b/drivers/gpu/drm/amd/acp/acp_hw.c
@@ -34,7 +34,7 @@
 
 #define mmACP_AZALIA_I2S_SELECT 0x51d4
 
-int amd_acp_hw_init(void *cgs_device,
+int amd_acp_hw_init(struct cgs_device *cgs_device,
 		    unsigned acp_version_major, unsigned acp_version_minor)
 {
 	unsigned int acp_mode = ACP_MODE_I2S;
diff --git a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
index bccf47b63899..a72ddb2f69ac 100644
--- a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
+++ b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
@@ -28,7 +28,7 @@
 #include "cgs_linux.h"
 #include "cgs_common.h"
 
-int amd_acp_hw_init(void *cgs_device,
+int amd_acp_hw_init(struct cgs_device *cgs_device,
 		    unsigned acp_version_major, unsigned acp_version_minor);
 
 #endif /* _ACP_GFX_IF_H */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c9fe2d56cebf..a7a53ac5f413 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1868,15 +1868,8 @@ struct amdgpu_atcs {
 /*
  * CGS
  */
-void *amdgpu_cgs_create_device(struct amdgpu_device *adev);
-void amdgpu_cgs_destroy_device(void *cgs_device);
-
-
-/*
- * CGS
- */
-void *amdgpu_cgs_create_device(struct amdgpu_device *adev);
-void amdgpu_cgs_destroy_device(void *cgs_device);
+struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev);
+void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
 
 
 /* GPU virtualization */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
index f6e32a639107..8a396313c86f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
@@ -30,7 +30,7 @@
 
 struct amdgpu_acp {
 	struct device *parent;
-	void *cgs_device;
+	struct cgs_device *cgs_device;
 	struct amd_acp_private *private;
 	struct mfd_cell *acp_cell;
 	struct resource *acp_res;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 6043dc7c3a94..8b653f2c7086 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -42,7 +42,7 @@ struct amdgpu_cgs_device {
 	struct amdgpu_device *adev =					\
 		((struct amdgpu_cgs_device *)cgs_device)->adev
 
-static int amdgpu_cgs_gpu_mem_info(void *cgs_device, enum cgs_gpu_mem_type type,
+static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type,
 				   uint64_t *mc_start, uint64_t *mc_size,
 				   uint64_t *mem_size)
 {
@@ -73,7 +73,7 @@ static int amdgpu_cgs_gpu_mem_info(void *cgs_device, enum cgs_gpu_mem_type type,
 	return 0;
 }
 
-static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem,
+static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem,
 				uint64_t size,
 				uint64_t min_offset, uint64_t max_offset,
 				cgs_handle_t *kmem_handle, uint64_t *mcaddr)
@@ -102,7 +102,7 @@ static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem,
 	return ret;
 }
 
-static int amdgpu_cgs_gunmap_kmem(void *cgs_device, cgs_handle_t kmem_handle)
+static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t kmem_handle)
 {
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle;
 
@@ -118,7 +118,7 @@ static int amdgpu_cgs_gunmap_kmem(void *cgs_device, cgs_handle_t kmem_handle)
 	return 0;
 }
 
-static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device,
+static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
 				    enum cgs_gpu_mem_type type,
 				    uint64_t size, uint64_t align,
 				    uint64_t min_offset, uint64_t max_offset,
@@ -208,7 +208,7 @@ static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device,
 	return ret;
 }
 
-static int amdgpu_cgs_free_gpu_mem(void *cgs_device, cgs_handle_t handle)
+static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
 {
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
 
@@ -225,7 +225,7 @@ static int amdgpu_cgs_free_gpu_mem(void *cgs_device, cgs_handle_t handle)
 	return 0;
 }
 
-static int amdgpu_cgs_gmap_gpu_mem(void *cgs_device, cgs_handle_t handle,
+static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
 				   uint64_t *mcaddr)
 {
 	int r;
@@ -246,7 +246,7 @@ static int amdgpu_cgs_gmap_gpu_mem(void *cgs_device, cgs_handle_t handle,
 	return r;
 }
 
-static int amdgpu_cgs_gunmap_gpu_mem(void *cgs_device, cgs_handle_t handle)
+static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
 {
 	int r;
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
@@ -258,7 +258,7 @@ static int amdgpu_cgs_gunmap_gpu_mem(void *cgs_device, cgs_handle_t handle)
 	return r;
 }
 
-static int amdgpu_cgs_kmap_gpu_mem(void *cgs_device, cgs_handle_t handle,
+static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
 				   void **map)
 {
 	int r;
@@ -271,7 +271,7 @@ static int amdgpu_cgs_kmap_gpu_mem(void *cgs_device, cgs_handle_t handle,
 	return r;
 }
 
-static int amdgpu_cgs_kunmap_gpu_mem(void *cgs_device, cgs_handle_t handle)
+static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
 {
 	int r;
 	struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
@@ -283,20 +283,20 @@ static int amdgpu_cgs_kunmap_gpu_mem(void *cgs_device, cgs_handle_t handle)
 	return r;
 }
 
-static uint32_t amdgpu_cgs_read_register(void *cgs_device, unsigned offset)
+static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset)
 {
 	CGS_FUNC_ADEV;
 	return RREG32(offset);
 }
 
-static void amdgpu_cgs_write_register(void *cgs_device, unsigned offset,
+static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned offset,
 				      uint32_t value)
 {
 	CGS_FUNC_ADEV;
 	WREG32(offset, value);
 }
 
-static uint32_t amdgpu_cgs_read_ind_register(void *cgs_device,
+static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
 					     enum cgs_ind_reg space,
 					     unsigned index)
 {
@@ -320,7 +320,7 @@ static uint32_t amdgpu_cgs_read_ind_register(void *cgs_device,
 	return 0;
 }
 
-static void amdgpu_cgs_write_ind_register(void *cgs_device,
+static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
 					  enum cgs_ind_reg space,
 					  unsigned index, uint32_t value)
 {
@@ -343,7 +343,7 @@ static void amdgpu_cgs_write_ind_register(void *cgs_device,
 	WARN(1, "Invalid indirect register space");
 }
 
-static uint8_t amdgpu_cgs_read_pci_config_byte(void *cgs_device, unsigned addr)
+static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device *cgs_device, unsigned addr)
 {
 	CGS_FUNC_ADEV;
 	uint8_t val;
@@ -353,7 +353,7 @@ static uint8_t amdgpu_cgs_read_pci_config_byte(void *cgs_device, unsigned addr)
 	return val;
 }
 
-static uint16_t amdgpu_cgs_read_pci_config_word(void *cgs_device, unsigned addr)
+static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device *cgs_device, unsigned addr)
 {
 	CGS_FUNC_ADEV;
 	uint16_t val;
@@ -363,7 +363,7 @@ static uint16_t amdgpu_cgs_read_pci_config_word(void *cgs_device, unsigned addr)
 	return val;
 }
 
-static uint32_t amdgpu_cgs_read_pci_config_dword(void *cgs_device,
+static uint32_t amdgpu_cgs_read_pci_config_dword(struct cgs_device *cgs_device,
 						 unsigned addr)
 {
 	CGS_FUNC_ADEV;
@@ -374,7 +374,7 @@ static uint32_t amdgpu_cgs_read_pci_config_dword(void *cgs_device,
 	return val;
 }
 
-static void amdgpu_cgs_write_pci_config_byte(void *cgs_device, unsigned addr,
+static void amdgpu_cgs_write_pci_config_byte(struct cgs_device *cgs_device, unsigned addr,
 					     uint8_t value)
 {
 	CGS_FUNC_ADEV;
@@ -382,7 +382,7 @@ static void amdgpu_cgs_write_pci_config_byte(void *cgs_device, unsigned addr,
 	WARN(ret, "pci_write_config_byte error");
 }
 
-static void amdgpu_cgs_write_pci_config_word(void *cgs_device, unsigned addr,
+static void amdgpu_cgs_write_pci_config_word(struct cgs_device *cgs_device, unsigned addr,
 					     uint16_t value)
 {
 	CGS_FUNC_ADEV;
@@ -390,7 +390,7 @@ static void amdgpu_cgs_write_pci_config_word(void *cgs_device, unsigned addr,
 	WARN(ret, "pci_write_config_word error");
 }
 
-static void amdgpu_cgs_write_pci_config_dword(void *cgs_device, unsigned addr,
+static void amdgpu_cgs_write_pci_config_dword(struct cgs_device *cgs_device, unsigned addr,
 					      uint32_t value)
 {
 	CGS_FUNC_ADEV;
@@ -399,7 +399,7 @@ static void amdgpu_cgs_write_pci_config_dword(void *cgs_device, unsigned addr,
 }
 
 
-static int amdgpu_cgs_get_pci_resource(void *cgs_device,
+static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device,
 				       enum cgs_resource_type resource_type,
 				       uint64_t size,
 				       uint64_t offset,
@@ -433,7 +433,7 @@ static int amdgpu_cgs_get_pci_resource(void *cgs_device,
 	}
 }
 
-static const void *amdgpu_cgs_atom_get_data_table(void *cgs_device,
+static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device,
 						  unsigned table, uint16_t *size,
 						  uint8_t *frev, uint8_t *crev)
 {
@@ -449,7 +449,7 @@ static const void *amdgpu_cgs_atom_get_data_table(void *cgs_device,
 	return NULL;
 }
 
-static int amdgpu_cgs_atom_get_cmd_table_revs(void *cgs_device, unsigned table,
+static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table,
 					      uint8_t *frev, uint8_t *crev)
 {
 	CGS_FUNC_ADEV;
@@ -462,7 +462,7 @@ static int amdgpu_cgs_atom_get_cmd_table_revs(void *cgs_device, unsigned table,
 	return -EINVAL;
 }
 
-static int amdgpu_cgs_atom_exec_cmd_table(void *cgs_device, unsigned table,
+static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table,
 					  void *args)
 {
 	CGS_FUNC_ADEV;
@@ -471,33 +471,33 @@ static int amdgpu_cgs_atom_exec_cmd_table(void *cgs_device, unsigned table,
 		adev->mode_info.atom_context, table, args);
 }
 
-static int amdgpu_cgs_create_pm_request(void *cgs_device, cgs_handle_t *request)
+static int amdgpu_cgs_create_pm_request(struct cgs_device *cgs_device, cgs_handle_t *request)
 {
 	/* TODO */
 	return 0;
 }
 
-static int amdgpu_cgs_destroy_pm_request(void *cgs_device, cgs_handle_t request)
+static int amdgpu_cgs_destroy_pm_request(struct cgs_device *cgs_device, cgs_handle_t request)
 {
 	/* TODO */
 	return 0;
 }
 
-static int amdgpu_cgs_set_pm_request(void *cgs_device, cgs_handle_t request,
+static int amdgpu_cgs_set_pm_request(struct cgs_device *cgs_device, cgs_handle_t request,
 				     int active)
 {
 	/* TODO */
 	return 0;
 }
 
-static int amdgpu_cgs_pm_request_clock(void *cgs_device, cgs_handle_t request,
+static int amdgpu_cgs_pm_request_clock(struct cgs_device *cgs_device, cgs_handle_t request,
 				       enum cgs_clock clock, unsigned freq)
 {
 	/* TODO */
 	return 0;
 }
 
-static int amdgpu_cgs_pm_request_engine(void *cgs_device, cgs_handle_t request,
+static int amdgpu_cgs_pm_request_engine(struct cgs_device *cgs_device, cgs_handle_t request,
 					enum cgs_engine engine, int powered)
 {
 	/* TODO */
@@ -506,7 +506,7 @@ static int amdgpu_cgs_pm_request_engine(void *cgs_device, cgs_handle_t request,
 
 
-static int amdgpu_cgs_pm_query_clock_limits(void *cgs_device,
+static int amdgpu_cgs_pm_query_clock_limits(struct cgs_device *cgs_device,
 					    enum cgs_clock clock,
 					    struct cgs_clock_limits *limits)
 {
@@ -514,7 +514,7 @@ static int amdgpu_cgs_pm_query_clock_limits(void *cgs_device,
 	return 0;
 }
 
-static int amdgpu_cgs_set_camera_voltages(void *cgs_device, uint32_t mask,
+static int amdgpu_cgs_set_camera_voltages(struct cgs_device *cgs_device, uint32_t mask,
 					  const uint32_t *voltages)
 {
 	DRM_ERROR("not implemented");
@@ -565,7 +565,7 @@ static const struct amdgpu_irq_src_funcs cgs_irq_funcs = {
 	.process = cgs_process_irq,
 };
 
-static int amdgpu_cgs_add_irq_source(void *cgs_device, unsigned src_id,
+static int amdgpu_cgs_add_irq_source(struct cgs_device *cgs_device, unsigned src_id,
 				     unsigned num_types,
 				     cgs_irq_source_set_func_t set,
 				     cgs_irq_handler_func_t handler,
@@ -600,19 +600,19 @@ static int amdgpu_cgs_add_irq_source(void *cgs_device, unsigned src_id,
 	return ret;
 }
 
-static int amdgpu_cgs_irq_get(void *cgs_device, unsigned src_id, unsigned type)
+static int amdgpu_cgs_irq_get(struct cgs_device *cgs_device, unsigned src_id, unsigned type)
 {
 	CGS_FUNC_ADEV;
 	return amdgpu_irq_get(adev, adev->irq.sources[src_id], type);
 }
 
-static int amdgpu_cgs_irq_put(void *cgs_device, unsigned src_id, unsigned type)
+static int amdgpu_cgs_irq_put(struct cgs_device *cgs_device, unsigned src_id, unsigned type)
 {
 	CGS_FUNC_ADEV;
 	return amdgpu_irq_put(adev, adev->irq.sources[src_id], type);
 }
 
-int amdgpu_cgs_set_clockgating_state(void *cgs_device,
+int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
 				  enum amd_ip_block_type block_type,
 				  enum amd_clockgating_state state)
 {
@@ -633,7 +633,7 @@ int amdgpu_cgs_set_clockgating_state(void *cgs_device,
 	return r;
 }
 
-int amdgpu_cgs_set_powergating_state(void *cgs_device,
+int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device,
 				  enum amd_ip_block_type block_type,
 				  enum amd_powergating_state state)
 {
@@ -655,7 +655,7 @@ int amdgpu_cgs_set_powergating_state(void *cgs_device,
 }
 
 
-static uint32_t fw_type_convert(void *cgs_device, uint32_t fw_type)
+static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
 {
 	CGS_FUNC_ADEV;
 	enum AMDGPU_UCODE_ID result = AMDGPU_UCODE_ID_MAXIMUM;
@@ -695,7 +695,7 @@ static uint32_t fw_type_convert(void *cgs_device, uint32_t fw_type)
 	return result;
 }
 
-static int amdgpu_cgs_get_firmware_info(void *cgs_device,
+static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 					enum cgs_ucode_id type,
 					struct cgs_firmware_info *info)
 {
@@ -774,7 +774,7 @@ static int amdgpu_cgs_get_firmware_info(void *cgs_device,
 	return 0;
 }
 
-static int amdgpu_cgs_query_system_info(void *cgs_device,
+static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
 				struct cgs_system_info *sys_info)
 {
 	CGS_FUNC_ADEV;
@@ -808,7 +808,7 @@ static int amdgpu_cgs_query_system_info(void *cgs_device,
 	return 0;
 }
 
-static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
+static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
 					  struct cgs_display_info *info)
 {
 	CGS_FUNC_ADEV;
@@ -851,7 +851,7 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
 }
 
 
-static int amdgpu_cgs_notify_dpm_enabled(void *cgs_device, bool enabled)
+static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled)
 {
 	CGS_FUNC_ADEV;
 
@@ -867,7 +867,7 @@ static int amdgpu_cgs_notify_dpm_enabled(void *cgs_device, bool enabled)
  */
 
 #if defined(CONFIG_ACPI)
-static int amdgpu_cgs_acpi_eval_object(void *cgs_device,
+static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 				    struct cgs_acpi_method_info *info)
 {
 	CGS_FUNC_ADEV;
@@ -1030,14 +1030,14 @@ error:
 	return result;
 }
 #else
-static int amdgpu_cgs_acpi_eval_object(void *cgs_device,
+static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 				struct cgs_acpi_method_info *info)
 {
 	return -EIO;
 }
 #endif
 
-int amdgpu_cgs_call_acpi_method(void *cgs_device,
+int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
 					uint32_t acpi_method,
 					uint32_t acpi_function,
 					void *pinput, void *poutput,
@@ -1121,7 +1121,7 @@ static const struct cgs_os_ops amdgpu_cgs_os_ops = {
 	amdgpu_cgs_irq_put
 };
 
-void *amdgpu_cgs_create_device(struct amdgpu_device *adev)
+struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
 {
 	struct amdgpu_cgs_device *cgs_device =
 		kmalloc(sizeof(*cgs_device), GFP_KERNEL);
@@ -1135,10 +1135,10 @@ void *amdgpu_cgs_create_device(struct amdgpu_device *adev)
 	cgs_device->base.os_ops = &amdgpu_cgs_os_ops;
 	cgs_device->adev = adev;
 
-	return cgs_device;
+	return (struct cgs_device *)cgs_device;
 }
 
-void amdgpu_cgs_destroy_device(void *cgs_device)
+void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device)
 {
 	kfree(cgs_device);
 }
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index ab84d4947247..ca1e22917e84 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -26,6 +26,8 @@
 
 #include "amd_shared.h"
 
+struct cgs_device;
+
 /**
  * enum cgs_gpu_mem_type - GPU memory types
  */
@@ -223,7 +225,7 @@ struct cgs_acpi_method_info {
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_gpu_mem_info_t)(void *cgs_device, enum cgs_gpu_mem_type type,
+typedef int (*cgs_gpu_mem_info_t)(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type,
 				  uint64_t *mc_start, uint64_t *mc_size,
 				  uint64_t *mem_size);
 
@@ -239,7 +241,7 @@ typedef int (*cgs_gpu_mem_info_t)(void *cgs_device, enum cgs_gpu_mem_type type,
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_gmap_kmem_t)(void *cgs_device, void *kmem, uint64_t size,
+typedef int (*cgs_gmap_kmem_t)(struct cgs_device *cgs_device, void *kmem, uint64_t size,
 			       uint64_t min_offset, uint64_t max_offset,
 			       cgs_handle_t *kmem_handle, uint64_t *mcaddr);
 
@@ -250,7 +252,7 @@ typedef int (*cgs_gmap_kmem_t)(void *cgs_device, void *kmem, uint64_t size,
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_gunmap_kmem_t)(void *cgs_device, cgs_handle_t kmem_handle);
+typedef int (*cgs_gunmap_kmem_t)(struct cgs_device *cgs_device, cgs_handle_t kmem_handle);
 
 /**
  * cgs_alloc_gpu_mem() - Allocate GPU memory
@@ -279,7 +281,7 @@ typedef int (*cgs_gunmap_kmem_t)(void *cgs_device, cgs_handle_t kmem_handle);
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_alloc_gpu_mem_t)(void *cgs_device, enum cgs_gpu_mem_type type,
+typedef int (*cgs_alloc_gpu_mem_t)(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type,
 				   uint64_t size, uint64_t align,
 				   uint64_t min_offset, uint64_t max_offset,
 				   cgs_handle_t *handle);
@@ -291,7 +293,7 @@ typedef int (*cgs_alloc_gpu_mem_t)(void *cgs_device, enum cgs_gpu_mem_type type,
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_free_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
+typedef int (*cgs_free_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle);
 
 /**
  * cgs_gmap_gpu_mem() - GPU-map GPU memory
@@ -303,7 +305,7 @@ typedef int (*cgs_free_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_gmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle,
+typedef int (*cgs_gmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle,
 				  uint64_t *mcaddr);
 
 /**
@@ -315,7 +317,7 @@ typedef int (*cgs_gmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle,
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_gunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
+typedef int (*cgs_gunmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle);
 
 /**
  * cgs_kmap_gpu_mem() - Kernel-map GPU memory
@@ -326,7 +328,7 @@ typedef int (*cgs_gunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_kmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle,
+typedef int (*cgs_kmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle,
 				  void **map);
 
 /**
@@ -336,7 +338,7 @@ typedef int (*cgs_kmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle,
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_kunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
+typedef int (*cgs_kunmap_gpu_mem_t)(struct cgs_device *cgs_device, cgs_handle_t handle);
 
 /**
  * cgs_read_register() - Read an MMIO register
@@ -345,7 +347,7 @@ typedef int (*cgs_kunmap_gpu_mem_t)(void *cgs_device, cgs_handle_t handle);
  *
  * Return:  register value
  */
-typedef uint32_t (*cgs_read_register_t)(void *cgs_device, unsigned offset);
+typedef uint32_t (*cgs_read_register_t)(struct cgs_device *cgs_device, unsigned offset);
 
 /**
  * cgs_write_register() - Write an MMIO register
@@ -353,7 +355,7 @@ typedef uint32_t (*cgs_read_register_t)(void *cgs_device, unsigned offset);
  * @offset:	register offset
  * @value:	register value
  */
-typedef void (*cgs_write_register_t)(void *cgs_device, unsigned offset,
+typedef void (*cgs_write_register_t)(struct cgs_device *cgs_device, unsigned offset,
 				     uint32_t value);
 
 /**
@@ -363,7 +365,7 @@ typedef void (*cgs_write_register_t)(void *cgs_device, unsigned offset,
  *
  * Return:  register value
  */
-typedef uint32_t (*cgs_read_ind_register_t)(void *cgs_device, enum cgs_ind_reg space,
+typedef uint32_t (*cgs_read_ind_register_t)(struct cgs_device *cgs_device, enum cgs_ind_reg space,
 					    unsigned index);
 
 /**
@@ -372,7 +374,7 @@ typedef uint32_t (*cgs_read_ind_register_t)(void *cgs_device, enum cgs_ind_reg s
  * @offset:	register offset
  * @value:	register value
  */
-typedef void (*cgs_write_ind_register_t)(void *cgs_device, enum cgs_ind_reg space,
+typedef void (*cgs_write_ind_register_t)(struct cgs_device *cgs_device, enum cgs_ind_reg space,
 					 unsigned index, uint32_t value);
 
 /**
@@ -382,7 +384,7 @@ typedef void (*cgs_write_ind_register_t)(void *cgs_device, enum cgs_ind_reg spac
  *
  * Return:  Value read
  */
-typedef uint8_t (*cgs_read_pci_config_byte_t)(void *cgs_device, unsigned addr);
+typedef uint8_t (*cgs_read_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr);
 
 /**
  * cgs_read_pci_config_word() - Read word from PCI configuration space
@@ -391,7 +393,7 @@ typedef uint8_t (*cgs_read_pci_config_byte_t)(void *cgs_device, unsigned addr);
  *
  * Return:  Value read
  */
-typedef uint16_t (*cgs_read_pci_config_word_t)(void *cgs_device, unsigned addr);
+typedef uint16_t (*cgs_read_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr);
 
 /**
  * cgs_read_pci_config_dword() - Read dword from PCI configuration space
@@ -400,7 +402,7 @@ typedef uint16_t (*cgs_read_pci_config_word_t)(void *cgs_device, unsigned addr);
  *
  * Return:  Value read
  */
-typedef uint32_t (*cgs_read_pci_config_dword_t)(void *cgs_device,
+typedef uint32_t (*cgs_read_pci_config_dword_t)(struct cgs_device *cgs_device,
 						unsigned addr);
 
 /**
@@ -409,7 +411,7 @@ typedef uint32_t (*cgs_read_pci_config_dword_t)(void *cgs_device,
  * @addr:	address
  * @value:	value to write
  */
-typedef void (*cgs_write_pci_config_byte_t)(void *cgs_device, unsigned addr,
+typedef void (*cgs_write_pci_config_byte_t)(struct cgs_device *cgs_device, unsigned addr,
 					    uint8_t value);
 
 /**
@@ -418,7 +420,7 @@ typedef void (*cgs_write_pci_config_byte_t)(void *cgs_device, unsigned addr,
  * @addr:	address, must be word-aligned
  * @value:	value to write
  */
-typedef void (*cgs_write_pci_config_word_t)(void *cgs_device, unsigned addr,
+typedef void (*cgs_write_pci_config_word_t)(struct cgs_device *cgs_device, unsigned addr,
 					    uint16_t value);
 
 /**
@@ -427,7 +429,7 @@ typedef void (*cgs_write_pci_config_word_t)(void *cgs_device, unsigned addr,
  * @addr:	address, must be dword-aligned
  * @value:	value to write
  */
-typedef void (*cgs_write_pci_config_dword_t)(void *cgs_device, unsigned addr,
+typedef void (*cgs_write_pci_config_dword_t)(struct cgs_device *cgs_device, unsigned addr,
 					     uint32_t value);
 
 
@@ -441,7 +443,7 @@ typedef void (*cgs_write_pci_config_dword_t)(void *cgs_device, unsigned addr,
  *
  * Return: 0 on success, -errno otherwise
  */
-typedef int (*cgs_get_pci_resource_t)(void *cgs_device,
+typedef int (*cgs_get_pci_resource_t)(struct cgs_device *cgs_device,
 				      enum cgs_resource_type resource_type,
 				      uint64_t size,
 				      uint64_t offset,
@@ -458,7 +460,7 @@ typedef int (*cgs_get_pci_resource_t)(void *cgs_device,
  * Return: Pointer to start of the table, or NULL on failure
  */
 typedef const void *(*cgs_atom_get_data_table_t)(
-	void *cgs_device, unsigned table,
+	struct cgs_device *cgs_device, unsigned table,
 	uint16_t *size, uint8_t *frev, uint8_t *crev);
 
 /**
@@ -470,7 +472,7 @@ typedef const void *(*cgs_atom_get_data_table_t)(
  *
  * Return: 0 on success, -errno otherwise
  */
-typedef int (*cgs_atom_get_cmd_table_revs_t)(void *cgs_device, unsigned table,
+typedef int (*cgs_atom_get_cmd_table_revs_t)(struct cgs_device *cgs_device, unsigned table,
 					     uint8_t *frev, uint8_t *crev);
 
 /**
@@ -481,7 +483,7 @@ typedef int (*cgs_atom_get_cmd_table_revs_t)(void *cgs_device, unsigned table,
  *
  * Return: 0 on success, -errno otherwise
  */
-typedef int (*cgs_atom_exec_cmd_table_t)(void *cgs_device,
+typedef int (*cgs_atom_exec_cmd_table_t)(struct cgs_device *cgs_device,
 					 unsigned table, void *args);
 
 /**
@@ -491,7 +493,7 @@ typedef int (*cgs_atom_exec_cmd_table_t)(void *cgs_device,
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_create_pm_request_t)(void *cgs_device, cgs_handle_t *request);
+typedef int (*cgs_create_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t *request);
 
 /**
  * cgs_destroy_pm_request() - Destroy a power management request
@@ -500,7 +502,7 @@ typedef int (*cgs_create_pm_request_t)(void *cgs_device, cgs_handle_t *request);
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_destroy_pm_request_t)(void *cgs_device, cgs_handle_t request);
+typedef int (*cgs_destroy_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request);
 
 /**
  * cgs_set_pm_request() - Activate or deactiveate a PM request
@@ -516,7 +518,7 @@ typedef int (*cgs_destroy_pm_request_t)(void *cgs_device, cgs_handle_t request);
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_set_pm_request_t)(void *cgs_device, cgs_handle_t request,
+typedef int (*cgs_set_pm_request_t)(struct cgs_device *cgs_device, cgs_handle_t request,
 				    int active);
 
 /**
@@ -528,7 +530,7 @@ typedef int (*cgs_set_pm_request_t)(void *cgs_device, cgs_handle_t request,
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_pm_request_clock_t)(void *cgs_device, cgs_handle_t request,
+typedef int (*cgs_pm_request_clock_t)(struct cgs_device *cgs_device, cgs_handle_t request,
 				      enum cgs_clock clock, unsigned freq);
 
 /**
@@ -540,7 +542,7 @@ typedef int (*cgs_pm_request_clock_t)(void *cgs_device, cgs_handle_t request,
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_pm_request_engine_t)(void *cgs_device, cgs_handle_t request,
+typedef int (*cgs_pm_request_engine_t)(struct cgs_device *cgs_device, cgs_handle_t request,
 				       enum cgs_engine engine, int powered);
 
 /**
@@ -551,7 +553,7 @@ typedef int (*cgs_pm_request_engine_t)(void *cgs_device, cgs_handle_t request,
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_pm_query_clock_limits_t)(void *cgs_device,
+typedef int (*cgs_pm_query_clock_limits_t)(struct cgs_device *cgs_device,
 					   enum cgs_clock clock,
 					   struct cgs_clock_limits *limits);
 
@@ -563,7 +565,7 @@ typedef int (*cgs_pm_query_clock_limits_t)(void *cgs_device,
  *
  * Return: 0 on success, -errno otherwise
  */
-typedef int (*cgs_set_camera_voltages_t)(void *cgs_device, uint32_t mask,
+typedef int (*cgs_set_camera_voltages_t)(struct cgs_device *cgs_device, uint32_t mask,
 					 const uint32_t *voltages);
 /**
  * cgs_get_firmware_info - Get the firmware information from core driver
@@ -573,25 +575,25 @@ typedef int (*cgs_set_camera_voltages_t)(void *cgs_device, uint32_t mask,
  *
  * Return: 0 on success, -errno otherwise
  */
-typedef int (*cgs_get_firmware_info)(void *cgs_device,
+typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device,
 				     enum cgs_ucode_id type,
 				     struct cgs_firmware_info *info);
 
-typedef int(*cgs_set_powergating_state)(void *cgs_device,
+typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device,
 				  enum amd_ip_block_type block_type,
 				  enum amd_powergating_state state);
 
-typedef int(*cgs_set_clockgating_state)(void *cgs_device,
+typedef int(*cgs_set_clockgating_state)(struct cgs_device *cgs_device,
 				  enum amd_ip_block_type block_type,
 				  enum amd_clockgating_state state);
 
 typedef int(*cgs_get_active_displays_info)(
-					void *cgs_device,
+					struct cgs_device *cgs_device,
 					struct cgs_display_info *info);
 
-typedef int (*cgs_notify_dpm_enabled)(void *cgs_device, bool enabled);
+typedef int (*cgs_notify_dpm_enabled)(struct cgs_device *cgs_device, bool enabled);
 
-typedef int (*cgs_call_acpi_method)(void *cgs_device,
+typedef int (*cgs_call_acpi_method)(struct cgs_device *cgs_device,
 					uint32_t acpi_method,
 					uint32_t acpi_function,
 					void *pinput, void *poutput,
@@ -599,7 +601,7 @@ typedef int (*cgs_call_acpi_method)(void *cgs_device,
 					uint32_t input_size,
 					uint32_t output_size);
 
-typedef int (*cgs_query_system_info)(void *cgs_device,
+typedef int (*cgs_query_system_info)(struct cgs_device *cgs_device,
 				struct cgs_system_info *sys_info);
 
 struct cgs_ops {
diff --git a/drivers/gpu/drm/amd/include/cgs_linux.h b/drivers/gpu/drm/amd/include/cgs_linux.h
index 3b47ae313e36..ca4f6007a9b3 100644
--- a/drivers/gpu/drm/amd/include/cgs_linux.h
+++ b/drivers/gpu/drm/amd/include/cgs_linux.h
@@ -66,7 +66,7 @@ typedef int (*cgs_irq_handler_func_t)(void *private_data,
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_add_irq_source_t)(void *cgs_device, unsigned src_id,
+typedef int (*cgs_add_irq_source_t)(struct cgs_device *cgs_device, unsigned src_id,
 				    unsigned num_types,
 				    cgs_irq_source_set_func_t set,
 				    cgs_irq_handler_func_t handler,
@@ -83,7 +83,7 @@ typedef int (*cgs_add_irq_source_t)(void *cgs_device, unsigned src_id,
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_irq_get_t)(void *cgs_device, unsigned src_id, unsigned type);
+typedef int (*cgs_irq_get_t)(struct cgs_device *cgs_device, unsigned src_id, unsigned type);
 
 /**
  * cgs_irq_put() - Indicate IRQ source is no longer needed
@@ -98,7 +98,7 @@ typedef int (*cgs_irq_get_t)(void *cgs_device, unsigned src_id, unsigned type);
  *
  * Return:  0 on success, -errno otherwise
  */
-typedef int (*cgs_irq_put_t)(void *cgs_device, unsigned src_id, unsigned type);
+typedef int (*cgs_irq_put_t)(struct cgs_device *cgs_device, unsigned src_id, unsigned type);
 
 struct cgs_os_ops {
 	/* IRQ handling */
-- 
cgit v1.2.3


From 79e5412c58ef7bfdc54d1bb85b63c6c8e6d2c3f7 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 8 Apr 2016 15:45:13 -0400
Subject: drm/amdgpu: add a new set of rlc function pointers

Different asics tend to have different ways to interact
with the RLC.  This just covers enter/exit of safe mode
for updating CG and PG state, but could be extended to
cover other RLC operations in the future if necessary.

Acked-by: Tom St Denis <tom.stdenis@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a7a53ac5f413..4a2b9a37271d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1037,6 +1037,11 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
  */
 #include "clearstate_defs.h"
 
+struct amdgpu_rlc_funcs {
+	void (*enter_safe_mode)(struct amdgpu_device *adev);
+	void (*exit_safe_mode)(struct amdgpu_device *adev);
+};
+
 struct amdgpu_rlc {
 	/* for power gating */
 	struct amdgpu_bo	*save_restore_obj;
@@ -1055,6 +1060,10 @@ struct amdgpu_rlc {
 	uint64_t		cp_table_gpu_addr;
 	volatile uint32_t	*cp_table_ptr;
 	u32                     cp_table_size;
+
+	/* safe mode for updating CG/PG state */
+	bool in_safe_mode;
+	const struct amdgpu_rlc_funcs *funcs;
 };
 
 struct amdgpu_mec {
-- 
cgit v1.2.3


From 68befebee4927036b0f350825a20d46104f375b5 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Thu, 14 Apr 2016 13:42:32 +0800
Subject: drm/amdgpu: only update last_flush when vmid doesn't have other new
 owner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 ++++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4a2b9a37271d..e4e781658cf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -881,6 +881,7 @@ struct amdgpu_vm_id {
 	struct fence		*first;
 	struct amdgpu_sync	active;
 	struct fence		*last_flush;
+	struct amdgpu_ring      *last_user;
 	atomic_long_t		owner;
 
 	uint64_t		pd_gpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1425aab31233..2aff938f94c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -260,6 +260,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	id->pd_gpu_addr = pd_addr;
 
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+	id->last_user = ring;
 	atomic_long_set(&id->owner, (long)vm);
 	vm->ids[ring->idx] = id;
 
@@ -307,13 +308,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 
 		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
 		amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
-		r = amdgpu_fence_emit(ring, &fence);
-		if (r)
-			return r;
 
 		mutex_lock(&adev->vm_manager.lock);
-		fence_put(id->last_flush);
-		id->last_flush = fence;
+		if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) {
+			r = amdgpu_fence_emit(ring, &fence);
+			if (r) {
+				mutex_unlock(&adev->vm_manager.lock);
+				return r;
+			}
+			fence_put(id->last_flush);
+			id->last_flush = fence;
+		}
 		mutex_unlock(&adev->vm_manager.lock);
 	}
 
-- 
cgit v1.2.3


From 29b3259a3ab98f4823f3227de1ae695f3f7bc751 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 15 Apr 2016 17:19:16 +0200
Subject: drm/amdgpu: group BOs by log2 of the size on the LRU v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows us to have small BOs on the LRU before big ones.

v2: fix of by one and list corruption bug

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 11 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 +++++++++++++++++++++++++++++++--
 2 files changed, 70 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e4e781658cf1..5399f3a2453f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -393,6 +393,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
 /*
  * TTM.
  */
+
+#define AMDGPU_TTM_LRU_SIZE	20
+
+struct amdgpu_mman_lru {
+	struct list_head		*lru[TTM_NUM_MEM_TYPES];
+	struct list_head		*swap_lru;
+};
+
 struct amdgpu_mman {
 	struct ttm_bo_global_ref        bo_global_ref;
 	struct drm_global_reference	mem_global_ref;
@@ -410,6 +418,9 @@ struct amdgpu_mman {
 	struct amdgpu_ring			*buffer_funcs_ring;
 	/* Scheduler entity for buffer moves */
 	struct amd_sched_entity			entity;
+
+	/* custom LRU management */
+	struct amdgpu_mman_lru			log2_size[AMDGPU_TTM_LRU_SIZE];
 };
 
 int amdgpu_copy_buffer(struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index d580ccccd2b8..a0065765ed2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -909,6 +909,52 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 	return flags;
 }
 
+static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
+{
+	struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
+		struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
+
+		for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
+			if (&tbo->lru == lru->lru[j])
+				lru->lru[j] = tbo->lru.prev;
+
+		if (&tbo->swap == lru->swap_lru)
+			lru->swap_lru = tbo->swap.prev;
+	}
+}
+
+static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo)
+{
+	struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
+	unsigned log2_size = min(ilog2(tbo->num_pages),
+				 AMDGPU_TTM_LRU_SIZE - 1);
+
+	return &adev->mman.log2_size[log2_size];
+}
+
+static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo)
+{
+	struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
+	struct list_head *res = lru->lru[tbo->mem.mem_type];
+
+	lru->lru[tbo->mem.mem_type] = &tbo->lru;
+
+	return res;
+}
+
+static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
+{
+	struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
+	struct list_head *res = lru->swap_lru;
+
+	lru->swap_lru = &tbo->swap;
+
+	return res;
+}
+
 static struct ttm_bo_driver amdgpu_bo_driver = {
 	.ttm_tt_create = &amdgpu_ttm_tt_create,
 	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
@@ -922,12 +968,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
 	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
 	.io_mem_free = &amdgpu_ttm_io_mem_free,
-	.lru_tail = &ttm_bo_default_lru_tail,
-	.swap_lru_tail = &ttm_bo_default_swap_lru_tail,
+	.lru_removal = &amdgpu_ttm_lru_removal,
+	.lru_tail = &amdgpu_ttm_lru_tail,
+	.swap_lru_tail = &amdgpu_ttm_swap_lru_tail,
 };
 
 int amdgpu_ttm_init(struct amdgpu_device *adev)
 {
+	unsigned i, j;
 	int r;
 
 	r = amdgpu_ttm_global_init(adev);
@@ -945,6 +993,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
 	}
+
+	for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
+		struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
+
+		for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
+			lru->lru[j] = &adev->mman.bdev.man[j].lru;
+		lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
+	}
+
 	adev->mman.initialized = true;
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
 				adev->mc.real_vram_size >> PAGE_SHIFT);
-- 
cgit v1.2.3


From 2b6cd9779707f1f7974205320a058ef80aa2cdd4 Mon Sep 17 00:00:00 2001
From: Eric Huang <JinHuiEric.Huang@amd.com>
Date: Thu, 14 Apr 2016 17:26:07 -0400
Subject: drm/amd/amdgpu: add power gating initialization support for GFX8.0

Signed-off-by: Eric Huang <JinHuiEric.Huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  14 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 353 +++++++++++++++++++++++++++++++++-
 2 files changed, 364 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5399f3a2453f..6cc174f9d583 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1076,6 +1076,20 @@ struct amdgpu_rlc {
 	/* safe mode for updating CG/PG state */
 	bool in_safe_mode;
 	const struct amdgpu_rlc_funcs *funcs;
+
+	/* for firmware data */
+	u32 save_and_restore_offset;
+	u32 clear_state_descriptor_offset;
+	u32 avail_scratch_ram_locations;
+	u32 reg_restore_list_size;
+	u32 reg_list_format_start;
+	u32 reg_list_format_separate_start;
+	u32 starting_offsets_start;
+	u32 reg_list_format_size_bytes;
+	u32 reg_list_size_bytes;
+
+	u32 *register_list_format;
+	u32 *register_restore;
 };
 
 struct amdgpu_mec {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index ce1054d0f391..89d7b1576a66 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -86,6 +86,8 @@ enum {
 	BPM_REG_FGCG_MAX
 };
 
+#define RLC_FormatDirectRegListLength        14
+
 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
@@ -633,6 +635,7 @@ static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
+static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 
 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
@@ -838,6 +841,8 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 	struct amdgpu_firmware_info *info = NULL;
 	const struct common_firmware_header *header = NULL;
 	const struct gfx_firmware_header_v1_0 *cp_hdr;
+	const struct rlc_firmware_header_v2_0 *rlc_hdr;
+	unsigned int *tmp = NULL, i;
 
 	DRM_DEBUG("\n");
 
@@ -905,9 +910,49 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 	if (err)
 		goto out;
 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
-	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
-	adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
-	adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+
+	adev->gfx.rlc.save_and_restore_offset =
+			le32_to_cpu(rlc_hdr->save_and_restore_offset);
+	adev->gfx.rlc.clear_state_descriptor_offset =
+			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
+	adev->gfx.rlc.avail_scratch_ram_locations =
+			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
+	adev->gfx.rlc.reg_restore_list_size =
+			le32_to_cpu(rlc_hdr->reg_restore_list_size);
+	adev->gfx.rlc.reg_list_format_start =
+			le32_to_cpu(rlc_hdr->reg_list_format_start);
+	adev->gfx.rlc.reg_list_format_separate_start =
+			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
+	adev->gfx.rlc.starting_offsets_start =
+			le32_to_cpu(rlc_hdr->starting_offsets_start);
+	adev->gfx.rlc.reg_list_format_size_bytes =
+			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
+	adev->gfx.rlc.reg_list_size_bytes =
+			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
+
+	adev->gfx.rlc.register_list_format =
+			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
+					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+
+	if (!adev->gfx.rlc.register_list_format) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	tmp = (unsigned int *)((uint64_t)rlc_hdr +
+			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
+
+	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+
+	tmp = (unsigned int *)((uint64_t)rlc_hdr +
+			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
 
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
@@ -1008,6 +1053,148 @@ out:
 	return err;
 }
 
+static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
+				    volatile u32 *buffer)
+{
+	u32 count = 0, i;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+
+	if (adev->gfx.rlc.cs_data == NULL)
+		return;
+	if (buffer == NULL)
+		return;
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	buffer[count++] = cpu_to_le32(0x80000000);
+	buffer[count++] = cpu_to_le32(0x80000000);
+
+	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT) {
+				buffer[count++] =
+					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+				buffer[count++] = cpu_to_le32(ext->reg_index -
+						PACKET3_SET_CONTEXT_REG_START);
+				for (i = 0; i < ext->reg_count; i++)
+					buffer[count++] = cpu_to_le32(ext->extent[i]);
+			} else {
+				return;
+			}
+		}
+	}
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
+			PACKET3_SET_CONTEXT_REG_START);
+	switch (adev->asic_type) {
+	case CHIP_TONGA:
+		buffer[count++] = cpu_to_le32(0x16000012);
+		buffer[count++] = cpu_to_le32(0x0000002A);
+		break;
+	case CHIP_FIJI:
+		buffer[count++] = cpu_to_le32(0x3a00161a);
+		buffer[count++] = cpu_to_le32(0x0000002e);
+		break;
+	case CHIP_TOPAZ:
+	case CHIP_CARRIZO:
+		buffer[count++] = cpu_to_le32(0x00000002);
+		buffer[count++] = cpu_to_le32(0x00000000);
+		break;
+	case CHIP_STONEY:
+		buffer[count++] = cpu_to_le32(0x00000000);
+		buffer[count++] = cpu_to_le32(0x00000000);
+		break;
+	default:
+		buffer[count++] = cpu_to_le32(0x00000000);
+		buffer[count++] = cpu_to_le32(0x00000000);
+		break;
+	}
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+	buffer[count++] = cpu_to_le32(0);
+}
+
+static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
+{
+	int r;
+
+	/* clear state block */
+	if (adev->gfx.rlc.clear_state_obj) {
+		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+		if (unlikely(r != 0))
+			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
+		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
+		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+
+		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
+		adev->gfx.rlc.clear_state_obj = NULL;
+	}
+}
+
+static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
+{
+	volatile u32 *dst_ptr;
+	u32 dws;
+	const struct cs_section_def *cs_data;
+	int r;
+
+	adev->gfx.rlc.cs_data = vi_cs_data;
+
+	cs_data = adev->gfx.rlc.cs_data;
+
+	if (cs_data) {
+		/* clear state block */
+		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
+
+		if (adev->gfx.rlc.clear_state_obj == NULL) {
+			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
+					     AMDGPU_GEM_DOMAIN_VRAM,
+					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+					     NULL, NULL,
+					     &adev->gfx.rlc.clear_state_obj);
+			if (r) {
+				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
+				gfx_v8_0_rlc_fini(adev);
+				return r;
+			}
+		}
+		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+		if (unlikely(r != 0)) {
+			gfx_v8_0_rlc_fini(adev);
+			return r;
+		}
+		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
+				  &adev->gfx.rlc.clear_state_gpu_addr);
+		if (r) {
+			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+			dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
+			gfx_v8_0_rlc_fini(adev);
+			return r;
+		}
+
+		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
+		if (r) {
+			dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
+			gfx_v8_0_rlc_fini(adev);
+			return r;
+		}
+		/* set up the cs buffer */
+		dst_ptr = adev->gfx.rlc.cs_ptr;
+		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
+		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
+		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+	}
+
+	return 0;
+}
+
 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
 {
 	int r;
@@ -1681,6 +1868,12 @@ static int gfx_v8_0_sw_init(void *handle)
 		return r;
 	}
 
+	r = gfx_v8_0_rlc_init(adev);
+	if (r) {
+		DRM_ERROR("Failed to init rlc BOs!\n");
+		return r;
+	}
+
 	r = gfx_v8_0_mec_init(adev);
 	if (r) {
 		DRM_ERROR("Failed to init MEC BOs!\n");
@@ -1780,6 +1973,10 @@ static int gfx_v8_0_sw_fini(void *handle)
 
 	gfx_v8_0_mec_fini(adev);
 
+	gfx_v8_0_rlc_fini(adev);
+
+	kfree(adev->gfx.rlc.register_list_format);
+
 	return 0;
 }
 
@@ -3322,6 +3519,154 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
 	WREG32(mmCP_INT_CNTL_RING0, tmp);
 }
 
+static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
+{
+	/* csib */
+	WREG32(mmRLC_CSIB_ADDR_HI,
+			adev->gfx.rlc.clear_state_gpu_addr >> 32);
+	WREG32(mmRLC_CSIB_ADDR_LO,
+			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+	WREG32(mmRLC_CSIB_LENGTH,
+			adev->gfx.rlc.clear_state_size);
+}
+
+static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
+				int ind_offset,
+				int list_size,
+				int *unique_indices,
+				int *indices_count,
+				int max_indices,
+				int *ind_start_offsets,
+				int *offset_count,
+				int max_offset)
+{
+	int indices;
+	bool new_entry = true;
+
+	for (; ind_offset < list_size; ind_offset++) {
+
+		if (new_entry) {
+			new_entry = false;
+			ind_start_offsets[*offset_count] = ind_offset;
+			*offset_count = *offset_count + 1;
+			BUG_ON(*offset_count >= max_offset);
+		}
+
+		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
+			new_entry = true;
+			continue;
+		}
+
+		ind_offset += 2;
+
+		/* look for the matching indice */
+		for (indices = 0;
+			indices < *indices_count;
+			indices++) {
+			if (unique_indices[indices] ==
+				register_list_format[ind_offset])
+				break;
+		}
+
+		if (indices >= *indices_count) {
+			unique_indices[*indices_count] =
+				register_list_format[ind_offset];
+			indices = *indices_count;
+			*indices_count = *indices_count + 1;
+			BUG_ON(*indices_count >= max_indices);
+		}
+
+		register_list_format[ind_offset] = indices;
+	}
+}
+
+static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
+{
+	int i, temp, data;
+	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
+	int indices_count = 0;
+	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+	int offset_count = 0;
+
+	int list_size;
+	unsigned int *register_list_format =
+		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
+	if (register_list_format == NULL)
+		return -ENOMEM;
+	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
+			adev->gfx.rlc.reg_list_format_size_bytes);
+
+	gfx_v8_0_parse_ind_reg_list(register_list_format,
+				RLC_FormatDirectRegListLength,
+				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
+				unique_indices,
+				&indices_count,
+				sizeof(unique_indices) / sizeof(int),
+				indirect_start_offsets,
+				&offset_count,
+				sizeof(indirect_start_offsets)/sizeof(int));
+
+	/* save and restore list */
+	temp = RREG32(mmRLC_SRM_CNTL);
+	temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+	WREG32(mmRLC_SRM_CNTL, temp);
+
+	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
+	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
+		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
+
+	/* indirect list */
+	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
+	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
+		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
+
+	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
+	list_size = list_size >> 1;
+	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
+	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
+
+	/* starting offsets starts */
+	WREG32(mmRLC_GPM_SCRATCH_ADDR,
+		adev->gfx.rlc.starting_offsets_start);
+	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
+		WREG32(mmRLC_GPM_SCRATCH_DATA,
+				indirect_start_offsets[i]);
+
+	/* unique indices */
+	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
+	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
+	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
+		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
+		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
+	}
+	kfree(register_list_format);
+
+	return 0;
+}
+
+static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
+{
+	uint32_t data;
+
+	data = RREG32(mmRLC_SRM_CNTL);
+	data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+	WREG32(mmRLC_SRM_CNTL, data);
+}
+
+static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
+{
+	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+			      AMD_PG_SUPPORT_GFX_SMG |
+			      AMD_PG_SUPPORT_GFX_DMG |
+			      AMD_PG_SUPPORT_CP |
+			      AMD_PG_SUPPORT_GDS |
+			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
+		gfx_v8_0_init_csb(adev);
+		gfx_v8_0_init_save_restore_list(adev);
+		gfx_v8_0_enable_save_restore_machine(adev);
+	}
+}
+
 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
 {
 	u32 tmp = RREG32(mmRLC_CNTL);
@@ -3401,6 +3746,8 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
 
 	gfx_v8_0_rlc_reset(adev);
 
+	gfx_v8_0_init_pg(adev);
+
 	if (!adev->pp_enabled) {
 		if (!adev->firmware.smu_load) {
 			/* legacy rlc firmware loading */
-- 
cgit v1.2.3


From adcec288bc066065310a6668755c673264ee12d9 Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Fri, 15 Apr 2016 13:08:44 -0400
Subject: drm/amd/amdgpu: Add debugfs entries for smc/didt/pcie

This adds 3 new files that can be read/written to access
indirect GPU registers.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 230 ++++++++++++++++++++++++++++-
 2 files changed, 223 insertions(+), 9 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6cc174f9d583..1012bd3f3482 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1953,7 +1953,7 @@ struct amdgpu_device {
 	struct amdgpu_debugfs		debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
 	unsigned 			debugfs_count;
 #if defined(CONFIG_DEBUG_FS)
-	struct dentry			*debugfs_regs;
+	struct dentry			*debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
 #endif
 	struct amdgpu_atif		atif;
 	struct amdgpu_atcs		atcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 14e832fe83db..8f037e5e08fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2134,32 +2134,246 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
 	return result;
 }
 
+static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		value = RREG32_PCIE(*pos >> 2);
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		WREG32_PCIE(*pos >> 2, value);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		value = RREG32_DIDT(*pos >> 2);
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		WREG32_DIDT(*pos >> 2, value);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		value = RREG32_SMC(*pos >> 2);
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	while (size) {
+		uint32_t value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		WREG32_SMC(*pos >> 2, value);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
 static const struct file_operations amdgpu_debugfs_regs_fops = {
 	.owner = THIS_MODULE,
 	.read = amdgpu_debugfs_regs_read,
 	.write = amdgpu_debugfs_regs_write,
 	.llseek = default_llseek
 };
+static const struct file_operations amdgpu_debugfs_regs_didt_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_regs_didt_read,
+	.write = amdgpu_debugfs_regs_didt_write,
+	.llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_regs_pcie_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_regs_pcie_read,
+	.write = amdgpu_debugfs_regs_pcie_write,
+	.llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_regs_smc_read,
+	.write = amdgpu_debugfs_regs_smc_write,
+	.llseek = default_llseek
+};
+
+static const struct file_operations *debugfs_regs[] = {
+	&amdgpu_debugfs_regs_fops,
+	&amdgpu_debugfs_regs_didt_fops,
+	&amdgpu_debugfs_regs_pcie_fops,
+	&amdgpu_debugfs_regs_smc_fops,
+};
+
+static const char *debugfs_regs_names[] = {
+	"amdgpu_regs",
+	"amdgpu_regs_didt",
+	"amdgpu_regs_pcie",
+	"amdgpu_regs_smc",
+};
 
 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
 {
 	struct drm_minor *minor = adev->ddev->primary;
 	struct dentry *ent, *root = minor->debugfs_root;
+	unsigned i, j;
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
+		ent = debugfs_create_file(debugfs_regs_names[i],
+					  S_IFREG | S_IRUGO, root,
+					  adev, debugfs_regs[i]);
+		if (IS_ERR(ent)) {
+			for (j = 0; j < i; j++) {
+				debugfs_remove(adev->debugfs_regs[i]);
+				adev->debugfs_regs[i] = NULL;
+			}
+			return PTR_ERR(ent);
+		}
 
-	ent = debugfs_create_file("amdgpu_regs", S_IFREG | S_IRUGO, root,
-				  adev, &amdgpu_debugfs_regs_fops);
-	if (IS_ERR(ent))
-		return PTR_ERR(ent);
-	i_size_write(ent->d_inode, adev->rmmio_size);
-	adev->debugfs_regs = ent;
+		if (!i)
+			i_size_write(ent->d_inode, adev->rmmio_size);
+		adev->debugfs_regs[i] = ent;
+	}
 
 	return 0;
 }
 
 static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev)
 {
-	debugfs_remove(adev->debugfs_regs);
-	adev->debugfs_regs = NULL;
+	unsigned i;
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
+		if (adev->debugfs_regs[i]) {
+			debugfs_remove(adev->debugfs_regs[i]);
+			adev->debugfs_regs[i] = NULL;
+		}
+	}
 }
 
 int amdgpu_debugfs_init(struct drm_minor *minor)
-- 
cgit v1.2.3


From 031e2983e8e385b9c99367586decabf6323ae049 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Mon, 25 Apr 2016 10:19:13 +0800
Subject: drm/amdgpu: add client id for every vm

This adds a unique id for each vm client so we can
properly track them.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Monk Liu <monk.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 113fd4bf9b64..e72cf4518c30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -363,6 +363,7 @@ struct amdgpu_fence_driver {
 /* some special values for the owner field */
 #define AMDGPU_FENCE_OWNER_UNDEFINED	((void*)0ul)
 #define AMDGPU_FENCE_OWNER_VM		((void*)1ul)
+#define AMDGPU_CLIENT_ID_RESERVED       2
 
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
@@ -885,6 +886,9 @@ struct amdgpu_vm {
 
 	/* Scheduler entity for page table updates */
 	struct amd_sched_entity	entity;
+
+	/* client id */
+	u64                     client_id;
 };
 
 struct amdgpu_vm_id {
@@ -924,6 +928,8 @@ struct amdgpu_vm_manager {
 	struct amdgpu_ring                      *vm_pte_rings[AMDGPU_MAX_RINGS];
 	unsigned				vm_pte_num_rings;
 	atomic_t				vm_pte_next_ring;
+	/* client id counter */
+	atomic64_t				client_counter;
 };
 
 void amdgpu_vm_manager_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e06d0661549f..275378c46b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1386,6 +1386,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		vm->ids[i] = NULL;
 	vm->va = RB_ROOT;
+	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->cleared);
@@ -1514,6 +1515,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	}
 
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
+	atomic64_set(&adev->vm_manager.client_counter, AMDGPU_CLIENT_ID_RESERVED);
 }
 
 /**
-- 
cgit v1.2.3


From c5637837ba5d5b5e962e73f5a1a7c5456fa85a68 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Tue, 19 Apr 2016 20:11:32 +0800
Subject: drm/amdgpu: keep vm in job instead of ib (v2)

ib.vm is a legacy way to get vm, after scheduler
implemented vm should be get from job, and all ibs
from one job share the same vm, no need to keep ib.vm
just move vm field to job.

this patch as well add job as paramter to ib_schedule
so it can get vm from job->vm.

v2: agd: sqaush in:
drm/amdgpu: check if ring emit_vm_flush exists in vm flush

No vm flush on engines that don't support VM.

bug:
https://bugs.freedesktop.org/show_bug.cgi?id=95195

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 16 ++++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  9 +++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  |  2 +-
 12 files changed, 25 insertions(+), 30 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e72cf4518c30..959008ad65a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -740,7 +740,6 @@ struct amdgpu_ib {
 	uint64_t			gpu_addr;
 	uint32_t			*ptr;
 	struct amdgpu_user_fence        *user;
-	struct amdgpu_vm		*vm;
 	unsigned			vm_id;
 	uint64_t			vm_pd_addr;
 	struct amdgpu_ctx		*ctx;
@@ -763,7 +762,7 @@ enum amdgpu_ring_type {
 extern const struct amd_sched_backend_ops amdgpu_sched_ops;
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
-		     struct amdgpu_job **job);
+		     struct amdgpu_job **job, struct amdgpu_vm *vm);
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 			     struct amdgpu_job **job);
 
@@ -1191,7 +1190,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f);
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		       struct amdgpu_ib *ib, struct fence *last_vm_update,
-		       struct fence **f);
+		       struct amdgpu_job *job, struct fence **f);
 int amdgpu_ib_pool_init(struct amdgpu_device *adev);
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
 int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
@@ -1247,6 +1246,7 @@ struct amdgpu_cs_parser {
 struct amdgpu_job {
 	struct amd_sched_job    base;
 	struct amdgpu_device	*adev;
+	struct amdgpu_vm 	*vm;
 	struct amdgpu_ring	*ring;
 	struct amdgpu_sync	sync;
 	struct amdgpu_ib	*ibs;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2ebba295d0e4..1a065961981a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -120,6 +120,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
 	union drm_amdgpu_cs *cs = data;
 	uint64_t *chunk_array_user;
 	uint64_t *chunk_array;
@@ -214,7 +215,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		}
 	}
 
-	ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job);
+	ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
 	if (ret)
 		goto free_all_kdata;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 0129617a7962..0ed643036361 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -74,7 +74,6 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
 	}
 
-	ib->vm = vm;
 	ib->vm_id = 0;
 
 	return 0;
@@ -117,13 +116,13 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fen
  */
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		       struct amdgpu_ib *ibs, struct fence *last_vm_update,
-		       struct fence **f)
+		       struct amdgpu_job *job, struct fence **f)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
 	struct amdgpu_ctx *ctx, *old_ctx;
-	struct amdgpu_vm *vm;
 	struct fence *hwf;
+	struct amdgpu_vm *vm = NULL;
 	unsigned i, patch_offset = ~0;
 
 	int r = 0;
@@ -132,7 +131,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		return -EINVAL;
 
 	ctx = ibs->ctx;
-	vm = ibs->vm;
+	if (job) /* for domain0 job like ring test, ibs->job is not assigned */
+		vm = job->vm;
 
 	if (!ring->ready) {
 		dev_err(adev->dev, "couldn't schedule ib\n");
@@ -174,14 +174,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	old_ctx = ring->current_ctx;
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
-
-		if (ib->ctx != ctx || ib->vm != vm) {
-			ring->current_ctx = old_ctx;
-			if (ib->vm_id)
-				amdgpu_vm_reset_id(adev, ib->vm_id);
-			amdgpu_ring_undo(ring);
-			return -EINVAL;
-		}
 		amdgpu_ring_emit_ib(ring, ib);
 		ring->current_ctx = ctx;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4eea2a18d8bb..917c6f3bfa09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -46,7 +46,7 @@ void amdgpu_job_timeout_func(struct work_struct *work)
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
-		     struct amdgpu_job **job)
+		     struct amdgpu_job **job, struct amdgpu_vm *vm)
 {
 	size_t size = sizeof(struct amdgpu_job);
 
@@ -60,6 +60,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 		return -ENOMEM;
 
 	(*job)->adev = adev;
+	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
 	INIT_WORK(&(*job)->base.work_free_job, amdgpu_job_free_handler);
@@ -74,7 +75,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 {
 	int r;
 
-	r = amdgpu_job_alloc(adev, 1, job);
+	r = amdgpu_job_alloc(adev, 1, job, NULL);
 	if (r)
 		return r;
 
@@ -138,7 +139,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 {
 	struct amdgpu_job *job = to_amdgpu_job(sched_job);
-	struct amdgpu_vm *vm = job->ibs->vm;
+	struct amdgpu_vm *vm = job->vm;
 
 	struct fence *fence = amdgpu_sync_get_fence(&job->sync);
 
@@ -186,7 +187,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 
 	trace_amdgpu_sched_run_job(job);
 	r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
-			       job->sync.last_vm_update, &fence);
+			       job->sync.last_vm_update, job, &fence);
 	if (r) {
 		DRM_ERROR("Error scheduling IBs (%d)\n", r);
 		goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index abda242980ba..3f953759002f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -910,7 +910,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	ib->length_dw = 16;
 
 	if (direct) {
-		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
 		job->fence = f;
 		if (r)
 			goto err_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 22a4d96fedb7..79ba2aae0d7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -436,7 +436,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = ib->length_dw; i < ib_size_dw; ++i)
 		ib->ptr[i] = 0x0;
 
-	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
 	job->fence = f;
 	if (r)
 		goto err;
@@ -498,7 +498,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 		ib->ptr[i] = 0x0;
 
 	if (direct) {
-		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
 		job->fence = f;
 		if (r)
 			goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2c3d9557e1a2..692d0d02b644 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -303,7 +303,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
-	if (pd_addr != AMDGPU_VM_NO_FLUSH) {
+	if (ring->funcs->emit_vm_flush &&
+	    pd_addr != AMDGPU_VM_NO_FLUSH) {
 		struct fence *fence;
 
 		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index b7ed9d376001..8d69c6555e02 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -643,7 +643,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[3] = 1;
 	ib.ptr[4] = 0xDEADBEEF;
 	ib.length_dw = 5;
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r)
 		goto err1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 6686c9c3005d..03108909a275 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2136,7 +2136,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r)
 		goto err2;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 021c17e50d51..a82945f3a5d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -800,7 +800,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r)
 		goto err2;
 
@@ -1551,7 +1551,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
 
 	/* shedule the ib on the ring */
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r) {
 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
 		goto fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index e6d3544fda06..27ca46d16bc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -701,7 +701,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
 	ib.length_dw = 8;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r)
 		goto err1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 00b43700c956..278b1fe35385 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -925,7 +925,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
 	ib.length_dw = 8;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r)
 		goto err1;
 
-- 
cgit v1.2.3


From 4d9c514d8ccff2b4bd0db5d3e178c0c0b3f3bc77 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 3 May 2016 18:46:19 +0200
Subject: drm/amdgpu: two minor 80 char fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       | 10 ++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c    |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  7 ++++---
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 959008ad65a8..e42b0a357fb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -508,9 +508,10 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 				struct drm_file *file_priv);
 unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
 struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-							struct dma_buf_attachment *attach,
-							struct sg_table *sg);
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+				 struct dma_buf_attachment *attach,
+				 struct sg_table *sg);
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					int flags);
@@ -1187,7 +1188,8 @@ struct amdgpu_gfx {
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f);
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
+		    struct fence *f);
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		       struct amdgpu_ib *ib, struct fence *last_vm_update,
 		       struct amdgpu_job *job, struct fence **f);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 89f33d5e8731..910556136fde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -88,7 +88,8 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  *
  * Free an IB (all asics).
  */
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f)
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
+		    struct fence *f)
 {
 	amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index be6388f73ba2..7700dc22f243 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -57,9 +57,10 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
 	ttm_bo_kunmap(&bo->dma_buf_vmap);
 }
 
-struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-							struct dma_buf_attachment *attach,
-							struct sg_table *sg)
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+				 struct dma_buf_attachment *attach,
+				 struct sg_table *sg)
 {
 	struct reservation_object *resv = attach->dmabuf->resv;
 	struct amdgpu_device *adev = dev->dev_private;
-- 
cgit v1.2.3


From 0ea54b9b6c4ebc04cc6f68246b03577a25dbd4bb Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 4 May 2016 10:20:01 +0200
Subject: drm/amdgpu: make the VMID owner always 64bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise we could (in theory) run into problems on 32bit systems.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e42b0a357fb8..130c0a7c65d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -897,7 +897,7 @@ struct amdgpu_vm_id {
 	struct amdgpu_sync	active;
 	struct fence		*last_flush;
 	struct amdgpu_ring      *last_user;
-	atomic_long_t		owner;
+	atomic64_t		owner;
 
 	uint64_t		pd_gpu_addr;
 	/* last flushed PD/PT update */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 62ce7253e917..cd578987d6c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -185,7 +185,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (!id)
 			continue;
 
-		if (atomic_long_read(&id->owner) != vm->client_id)
+		if (atomic64_read(&id->owner) != vm->client_id)
 			continue;
 
 		if (pd_addr != id->pd_gpu_addr)
@@ -261,7 +261,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 	id->last_user = ring;
-	atomic_long_set(&id->owner, vm->client_id);
+	atomic64_set(&id->owner, vm->client_id);
 	vm->ids[ring->idx] = id;
 
 	*vm_id = id - adev->vm_manager.ids;
-- 
cgit v1.2.3


From b1c8a81fdd346274e3c38909740eec7182ef8f8a Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 4 May 2016 10:34:03 +0200
Subject: drm/amdgpu: remove define for reserved client ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just set it to zero instead.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 130c0a7c65d1..0ab5fcc72273 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -363,7 +363,6 @@ struct amdgpu_fence_driver {
 /* some special values for the owner field */
 #define AMDGPU_FENCE_OWNER_UNDEFINED	((void*)0ul)
 #define AMDGPU_FENCE_OWNER_VM		((void*)1ul)
-#define AMDGPU_CLIENT_ID_RESERVED       2
 
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 67f6c2eb8282..ea708cb94862 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1504,7 +1504,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	}
 
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
-	atomic64_set(&adev->vm_manager.client_counter, AMDGPU_CLIENT_ID_RESERVED);
+	atomic64_set(&adev->vm_manager.client_counter, 0);
 }
 
 /**
-- 
cgit v1.2.3


From edf600dac65eecb6c8bcf21fa986db30ee21a2ac Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 3 May 2016 15:54:54 +0200
Subject: drm/amd: cleanup remaining spaces and tabs v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is the result of running the following commands:
find drivers/gpu/drm/amd/ -name "*.h" -exec sed -i 's/[ \t]\+$//' {} \;
find drivers/gpu/drm/amd/ -name "*.c" -exec sed -i 's/[ \t]\+$//' {} \;
find drivers/gpu/drm/amd/ -name "*.h" -exec sed -i 's/ \+\t/\t/' {} \;
find drivers/gpu/drm/amd/ -name "*.c" -exec sed -i 's/ \+\t/\t/' {} \;

v2: drop changes to DAL and internal headers

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h                   | 12 ++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c           |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c            |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h               |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h              |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c               |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c               |  6 +++---
 drivers/gpu/drm/amd/amdgpu/atom.h                     |  2 +-
 drivers/gpu/drm/amd/amdgpu/ci_dpm.c                   |  2 +-
 drivers/gpu/drm/amd/amdgpu/cik_ih.c                   |  2 +-
 drivers/gpu/drm/amd/amdgpu/cikd.h                     |  4 ++--
 drivers/gpu/drm/amd/amdgpu/cz_ih.c                    |  2 +-
 drivers/gpu/drm/amd/amdgpu/cz_smumgr.h                |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c                 |  2 +-
 drivers/gpu/drm/amd/amdgpu/vce_v3_0.c                 |  6 +++---
 drivers/gpu/drm/amd/amdgpu/vid.h                      |  2 +-
 drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c      |  6 +++---
 drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c |  8 ++++----
 drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c      |  4 ++--
 drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c     |  8 ++++----
 drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h     | 18 +++++++++---------
 drivers/gpu/drm/amd/powerplay/inc/hwmgr.h             |  2 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h         |  2 +-
 23 files changed, 56 insertions(+), 56 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0ab5fcc72273..abe62ecaaef4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -369,7 +369,7 @@ struct amdgpu_fence_driver {
 
 struct amdgpu_user_fence {
 	/* write-back bo */
-	struct amdgpu_bo 	*bo;
+	struct amdgpu_bo	*bo;
 	/* write-back address offset to bo start */
 	uint32_t                offset;
 };
@@ -777,7 +777,7 @@ struct amdgpu_ring {
 	struct amdgpu_device		*adev;
 	const struct amdgpu_ring_funcs	*funcs;
 	struct amdgpu_fence_driver	fence_drv;
-	struct amd_gpu_scheduler 	sched;
+	struct amd_gpu_scheduler	sched;
 
 	spinlock_t              fence_lock;
 	struct amdgpu_bo	*ring_obj;
@@ -1247,7 +1247,7 @@ struct amdgpu_cs_parser {
 struct amdgpu_job {
 	struct amd_sched_job    base;
 	struct amdgpu_device	*adev;
-	struct amdgpu_vm 	*vm;
+	struct amdgpu_vm	*vm;
 	struct amdgpu_ring	*ring;
 	struct amdgpu_sync	sync;
 	struct amdgpu_ib	*ibs;
@@ -1701,7 +1701,7 @@ struct amdgpu_sdma {
 	struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
 	struct amdgpu_irq_src	trap_irq;
 	struct amdgpu_irq_src	illegal_inst_irq;
-	int 			num_instances;
+	int			num_instances;
 };
 
 /*
@@ -1955,11 +1955,11 @@ struct amdgpu_device {
 	bool				shutdown;
 	bool				need_dma32;
 	bool				accel_working;
-	struct work_struct 		reset_work;
+	struct work_struct		reset_work;
 	struct notifier_block		acpi_nb;
 	struct amdgpu_i2c_chan		*i2c_bus[AMDGPU_MAX_I2C_BUS];
 	struct amdgpu_debugfs		debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
-	unsigned 			debugfs_count;
+	unsigned			debugfs_count;
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry			*debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index eacd810fc09b..35d0856738ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -263,7 +263,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 		for (i = 0; i < args->in.bo_number; ++i) {
 			if (copy_from_user(&info[i], uptr, bytes))
 				goto error_free;
-			
+
 			uptr += args->in.bo_info_size;
 		}
 	}
@@ -271,7 +271,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 	switch (args->in.operation) {
 	case AMDGPU_BO_LIST_OP_CREATE:
 		r = amdgpu_bo_list_create(fpriv, &list, &handle);
-		if (r) 
+		if (r)
 			goto error_free;
 
 		r = amdgpu_bo_list_set(adev, filp, list, info,
@@ -281,7 +281,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 			goto error_free;
 
 		break;
-		
+
 	case AMDGPU_BO_LIST_OP_DESTROY:
 		amdgpu_bo_list_destroy(fpriv, handle);
 		handle = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 778330529ff5..0d44e6a41eda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -348,7 +348,7 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev)
 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
 
-	adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), 
+	adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
 					     AMDGPU_DOORBELL_MAX_ASSIGNMENT+1);
 	if (adev->doorbell.num_doorbells == 0)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index c3f4e85594ff..503d54098128 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -43,7 +43,7 @@ struct amdgpu_ring;
 struct amdgpu_bo;
 
 struct amdgpu_gds_asic_info {
-	uint32_t 	total_size;
+	uint32_t	total_size;
 	uint32_t	gfx_partition_size;
 	uint32_t	cs_partition_size;
 };
@@ -52,8 +52,8 @@ struct amdgpu_gds {
 	struct amdgpu_gds_asic_info	mem;
 	struct amdgpu_gds_asic_info	gws;
 	struct amdgpu_gds_asic_info	oa;
-	/* At present, GDS, GWS and OA resources for gfx (graphics) 
-	 * is always pre-allocated and available for graphics operation. 
+	/* At present, GDS, GWS and OA resources for gfx (graphics)
+	 * is always pre-allocated and available for graphics operation.
 	 * Such resource is shared between all gfx clients.
 	 * TODO: move this operation to user space
 	 * */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 81bd964d3dfc..8a253aa0b551 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -530,7 +530,7 @@ struct amdgpu_framebuffer {
 				((em) == ATOM_ENCODER_MODE_DP_MST))
 
 /* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
-#define USE_REAL_VBLANKSTART 		(1 << 30)
+#define USE_REAL_VBLANKSTART		(1 << 30)
 #define GET_DISTANCE_TO_VBLANKSTART	(1 << 31)
 
 void amdgpu_link_encoder_connector(struct drm_device *dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 3f953759002f..0081cf56c87b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -45,9 +45,9 @@
 /* Firmware Names */
 #ifdef CONFIG_DRM_AMDGPU_CIK
 #define FIRMWARE_BONAIRE	"radeon/bonaire_uvd.bin"
-#define FIRMWARE_KABINI 	"radeon/kabini_uvd.bin"
-#define FIRMWARE_KAVERI 	"radeon/kaveri_uvd.bin"
-#define FIRMWARE_HAWAII 	"radeon/hawaii_uvd.bin"
+#define FIRMWARE_KABINI	"radeon/kabini_uvd.bin"
+#define FIRMWARE_KAVERI	"radeon/kaveri_uvd.bin"
+#define FIRMWARE_HAWAII	"radeon/hawaii_uvd.bin"
 #define FIRMWARE_MULLINS	"radeon/mullins_uvd.bin"
 #endif
 #define FIRMWARE_TONGA		"amdgpu/tonga_uvd.bin"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 79ba2aae0d7a..7b7b0f64530a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -41,9 +41,9 @@
 /* Firmware Names */
 #ifdef CONFIG_DRM_AMDGPU_CIK
 #define FIRMWARE_BONAIRE	"radeon/bonaire_vce.bin"
-#define FIRMWARE_KABINI 	"radeon/kabini_vce.bin"
-#define FIRMWARE_KAVERI 	"radeon/kaveri_vce.bin"
-#define FIRMWARE_HAWAII 	"radeon/hawaii_vce.bin"
+#define FIRMWARE_KABINI	"radeon/kabini_vce.bin"
+#define FIRMWARE_KAVERI	"radeon/kaveri_vce.bin"
+#define FIRMWARE_HAWAII	"radeon/hawaii_vce.bin"
 #define FIRMWARE_MULLINS	"radeon/mullins_vce.bin"
 #endif
 #define FIRMWARE_TONGA		"amdgpu/tonga_vce.bin"
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
index fece8f45dc7a..49daf6d723e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.h
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -92,7 +92,7 @@
 #define ATOM_WS_AND_MASK	0x45
 #define ATOM_WS_FB_WINDOW	0x46
 #define ATOM_WS_ATTRIBUTES	0x47
-#define ATOM_WS_REGPTR  	0x48
+#define ATOM_WS_REGPTR		0x48
 
 #define ATOM_IIO_NOP		0
 #define ATOM_IIO_START		1
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 90f83b21b38c..2f247975fdd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6363,7 +6363,7 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev,
 }
 
 static int ci_dpm_process_interrupt(struct amdgpu_device *adev,
-				    struct amdgpu_irq_src *source, 
+				    struct amdgpu_irq_src *source,
 				    struct amdgpu_iv_entry *entry)
 {
 	bool queue_thermal = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index f2f14fe26784..7e750a459499 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -243,7 +243,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
 	/* wptr/rptr are in bytes! */
 	u32 ring_index = adev->irq.ih.rptr >> 2;
 	uint32_t dw[4];
-	
+
 	dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
 	dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]);
 	dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 60d4493206dd..c4f6f00d62bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -190,8 +190,8 @@
 #       define MACRO_TILE_ASPECT(x)				((x) << 4)
 #       define NUM_BANKS(x)					((x) << 6)
 
-#define		MSG_ENTER_RLC_SAFE_MODE      			1
-#define		MSG_EXIT_RLC_SAFE_MODE      			0
+#define		MSG_ENTER_RLC_SAFE_MODE			1
+#define		MSG_EXIT_RLC_SAFE_MODE			0
 
 /*
  * PM4
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index 23bd9122b15d..874b92899797 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -222,7 +222,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
 	/* wptr/rptr are in bytes! */
 	u32 ring_index = adev->irq.ih.rptr >> 2;
 	uint32_t dw[4];
-	
+
 	dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
 	dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]);
 	dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_smumgr.h b/drivers/gpu/drm/amd/amdgpu/cz_smumgr.h
index 924d355b4e2c..026342fcf0f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_smumgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/cz_smumgr.h
@@ -77,7 +77,7 @@ struct cz_smu_private_data {
 	uint8_t		driver_buffer_length;
 	uint8_t		scratch_buffer_length;
 	uint16_t	toc_entry_used_count;
-	uint16_t 	toc_entry_initialize_index;
+	uint16_t	toc_entry_initialize_index;
 	uint16_t	toc_entry_power_profiling_index;
 	uint16_t	toc_entry_aram;
 	uint16_t	toc_entry_ih_register_restore_task_index;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index a82945f3a5d2..4ea4b4eb0bc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -603,7 +603,7 @@ static const u32 stoney_golden_settings_a11[] =
 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
-  	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index e1d6ae7e1629..55b35daa1ac9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -40,9 +40,9 @@
 
 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
-#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 	0x8616
-#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 	0x8617
-#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 	0x8618
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
 
 #define VCE_V3_0_FW_SIZE	(384 * 1024)
 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index ace49976f7be..3bf7172ede43 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -365,7 +365,7 @@
 #define VCE_CMD_IB		0x00000002
 #define VCE_CMD_FENCE		0x00000003
 #define VCE_CMD_TRAP		0x00000004
-#define VCE_CMD_IB_AUTO 	0x00000005
+#define VCE_CMD_IB_AUTO	0x00000005
 #define VCE_CMD_SEMAPHORE	0x00000006
 
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 55e877c4b862..d05a5e0ab87f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -465,14 +465,14 @@ static int fiji_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
 			table_info->vdd_dep_on_mclk;
 
 	PP_ASSERT_WITH_CODE(allowed_sclk_vdd_table != NULL,
-		"VDD dependency on SCLK table is missing. 	\
+		"VDD dependency on SCLK table is missing.	\
 		This table is mandatory", return -EINVAL);
 	PP_ASSERT_WITH_CODE(allowed_sclk_vdd_table->count >= 1,
-		"VDD dependency on SCLK table has to have is missing. 	\
+		"VDD dependency on SCLK table has to have is missing.	\
 		This table is mandatory", return -EINVAL);
 
 	PP_ASSERT_WITH_CODE(allowed_mclk_vdd_table != NULL,
-		"VDD dependency on MCLK table is missing. 	\
+		"VDD dependency on MCLK table is missing.	\
 		This table is mandatory", return -EINVAL);
 	PP_ASSERT_WITH_CODE(allowed_mclk_vdd_table->count >= 1,
 		"VDD dependency on MCLK table has to have is missing.	 \
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index 010199fb7126..dbdcc68b17b2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -2900,14 +2900,14 @@ static int polaris10_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
 						table_info->vdd_dep_on_mclk;
 
 	PP_ASSERT_WITH_CODE(allowed_sclk_vdd_table != NULL,
-		"VDD dependency on SCLK table is missing. 	\
+		"VDD dependency on SCLK table is missing.	\
 		This table is mandatory", return -EINVAL);
 	PP_ASSERT_WITH_CODE(allowed_sclk_vdd_table->count >= 1,
-		"VDD dependency on SCLK table has to have is missing. 	\
+		"VDD dependency on SCLK table has to have is missing.	\
 		This table is mandatory", return -EINVAL);
 
 	PP_ASSERT_WITH_CODE(allowed_mclk_vdd_table != NULL,
-		"VDD dependency on MCLK table is missing. 	\
+		"VDD dependency on MCLK table is missing.	\
 		This table is mandatory", return -EINVAL);
 	PP_ASSERT_WITH_CODE(allowed_mclk_vdd_table->count >= 1,
 		"VDD dependency on MCLK table has to have is missing.	 \
@@ -4628,7 +4628,7 @@ int polaris10_upload_mc_firmware(struct pp_hwmgr *hwmgr)
 	data->need_long_memory_training = true;
 
 /*
- * 	PPMCME_FirmwareDescriptorEntry *pfd = NULL;
+ *	PPMCME_FirmwareDescriptorEntry *pfd = NULL;
 	pfd = &tonga_mcmeFirmware;
 	if (0 == PHM_READ_FIELD(hwmgr->device, MC_SEQ_SUP_CNTL, RUN))
 		polaris10_load_mc_microcode(hwmgr, pfd->dpmThreshold,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
index 8ba3ad5e7111..da9f5f1b6dc2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
@@ -1041,10 +1041,10 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 }
 
 /** atomctrl_get_voltage_evv_on_sclk gets voltage via call to ATOM COMMAND table.
- * @param hwmgr               	input: pointer to hwManager
+ * @param hwmgr	input: pointer to hwManager
  * @param voltage_type            input: type of EVV voltage VDDC or VDDGFX
  * @param sclk                        input: in 10Khz unit. DPM state SCLK frequency
- *                                   		which is define in PPTable SCLK/VDDC dependence
+ *		which is define in PPTable SCLK/VDDC dependence
  *				table associated with this virtual_voltage_Id
  * @param virtual_voltage_Id      input: voltage id which match per voltage DPM state: 0xff01, 0xff02.. 0xff08
  * @param voltage		       output: real voltage level in unit of mv
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index 670b6288933f..d79af48ca9a5 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -2683,7 +2683,7 @@ static int tonga_populate_all_memory_levels(struct pp_hwmgr *hwmgr)
 struct TONGA_DLL_SPEED_SETTING {
 	uint16_t            Min;                          /* Minimum Data Rate*/
 	uint16_t            Max;                          /* Maximum Data Rate*/
-	uint32_t 			dll_speed;                     /* The desired DLL_SPEED setting*/
+	uint32_t			dll_speed;                     /* The desired DLL_SPEED setting*/
 };
 
 static int tonga_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
@@ -3316,14 +3316,14 @@ static int tonga_set_private_var_based_on_pptale(struct pp_hwmgr *hwmgr)
 		pptable_info->vdd_dep_on_mclk;
 
 	PP_ASSERT_WITH_CODE(allowed_sclk_vdd_table != NULL,
-		"VDD dependency on SCLK table is missing. 	\
+		"VDD dependency on SCLK table is missing.	\
 		This table is mandatory", return -1);
 	PP_ASSERT_WITH_CODE(allowed_sclk_vdd_table->count >= 1,
-		"VDD dependency on SCLK table has to have is missing. 	\
+		"VDD dependency on SCLK table has to have is missing.	\
 		This table is mandatory", return -1);
 
 	PP_ASSERT_WITH_CODE(allowed_mclk_vdd_table != NULL,
-		"VDD dependency on MCLK table is missing. 	\
+		"VDD dependency on MCLK table is missing.	\
 		This table is mandatory", return -1);
 	PP_ASSERT_WITH_CODE(allowed_mclk_vdd_table->count >= 1,
 		"VDD dependency on MCLK table has to have is missing.	 \
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
index c6a6b4006dc1..573cd39fe78d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
@@ -74,7 +74,7 @@ struct tonga_power_state {
 };
 
 struct _phw_tonga_dpm_level {
-	bool  		enabled;
+	bool		enabled;
 	uint32_t    value;
 	uint32_t    param1;
 };
@@ -237,20 +237,20 @@ struct tonga_hwmgr {
 	irq_handler_func_t             ctf_callback;
 	void                             *ctf_context;
 
-	phw_tonga_clock_registers      	  clock_registers;
+	phw_tonga_clock_registers	  clock_registers;
 	phw_tonga_voltage_smio_registers  voltage_smio_registers;
 
-	bool                         	is_memory_GDDR5;
+	bool	is_memory_GDDR5;
 	uint16_t                          acpi_vddc;
-	bool                         	pspp_notify_required;        /* Flag to indicate if PSPP notification to SBIOS is required */
+	bool	pspp_notify_required;        /* Flag to indicate if PSPP notification to SBIOS is required */
 	uint16_t                          force_pcie_gen;            /* The forced PCI-E speed if not 0xffff */
 	uint16_t                          acpi_pcie_gen;             /* The PCI-E speed at ACPI time */
 	uint32_t                           pcie_gen_cap;             /* The PCI-E speed capabilities bitmap from CAIL */
 	uint32_t                           pcie_lane_cap;            /* The PCI-E lane capabilities bitmap from CAIL */
 	uint32_t                           pcie_spc_cap;             /* Symbol Per Clock Capabilities from registry */
-	phw_tonga_leakage_voltage       	vddc_leakage;            /* The Leakage VDDC supported (based on leakage ID).*/
-	phw_tonga_leakage_voltage       	vddcgfx_leakage;         /* The Leakage VDDC supported (based on leakage ID). */
-	phw_tonga_leakage_voltage       	vddci_leakage;           /* The Leakage VDDCI supported (based on leakage ID). */
+	phw_tonga_leakage_voltage	vddc_leakage;            /* The Leakage VDDC supported (based on leakage ID).*/
+	phw_tonga_leakage_voltage	vddcgfx_leakage;         /* The Leakage VDDC supported (based on leakage ID). */
+	phw_tonga_leakage_voltage	vddci_leakage;           /* The Leakage VDDCI supported (based on leakage ID). */
 
 	uint32_t                           mvdd_control;
 	uint32_t                           vddc_mask_low;
@@ -263,8 +263,8 @@ struct tonga_hwmgr {
 	uint32_t                           mclk_stutter_mode_threshold;
 	uint32_t                           mclk_edc_enable_threshold;
 	uint32_t                           mclk_edc_wr_enable_threshold;
-	bool                         	is_uvd_enabled;
-	bool                         	is_xdma_enabled;
+	bool	is_uvd_enabled;
+	bool	is_xdma_enabled;
 	phw_tonga_vbios_boot_state      vbios_boot_state;
 
 	bool                         battery_state;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index c96e5b1baae0..fd4ce7aaeee9 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -500,7 +500,7 @@ struct phm_dynamic_state_info {
 	struct phm_ppm_table                          *ppm_parameter_table;
 	struct phm_cac_tdp_table                      *cac_dtp_table;
 	struct phm_clock_voltage_dependency_table	  *vdd_gfx_dependency_on_sclk;
-	struct phm_vq_budgeting_table		  		  *vq_budgeting_table;
+	struct phm_vq_budgeting_table				  *vq_budgeting_table;
 };
 
 struct pp_fan_info {
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 169f70fe949c..070095a9433c 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -74,7 +74,7 @@ struct amd_sched_fence {
 	struct amd_gpu_scheduler	*sched;
 	spinlock_t			lock;
 	void                            *owner;
-	struct amd_sched_job 	*s_job;
+	struct amd_sched_job	*s_job;
 };
 
 struct amd_sched_job {
-- 
cgit v1.2.3


From 7dae69a2905c34a97678645c8e4c95095f5390b8 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 3 May 2016 16:25:53 -0400
Subject: drm/amdgpu: fetch cu_info once at init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fetch this info once at init and just store the results
for future requests.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 20 +++++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c |  4 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  9 ++++-----
 drivers/gpu/drm/amd/amdgpu/cik.c        |  1 -
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 23 +++++++----------------
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h   |  1 -
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 11 ++++-------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h   |  1 -
 drivers/gpu/drm/amd/amdgpu/vi.c         |  1 -
 9 files changed, 25 insertions(+), 46 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index abe62ecaaef4..6589d43bd094 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1148,6 +1148,12 @@ struct amdgpu_gca_config {
 	uint32_t macrotile_mode_array[16];
 };
 
+struct amdgpu_cu_info {
+	uint32_t number; /* total active CU number */
+	uint32_t ao_cu_mask;
+	uint32_t bitmap[4][4];
+};
+
 struct amdgpu_gfx {
 	struct mutex			gpu_clock_mutex;
 	struct amdgpu_gca_config	config;
@@ -1180,9 +1186,10 @@ struct amdgpu_gfx {
 	struct amdgpu_irq_src		priv_reg_irq;
 	struct amdgpu_irq_src		priv_inst_irq;
 	/* gfx status */
-	uint32_t gfx_current_status;
+	uint32_t			gfx_current_status;
 	/* ce ram size*/
-	unsigned ce_ram_size;
+	unsigned			ce_ram_size;
+	struct amdgpu_cu_info		cu_info;
 };
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@ -1794,13 +1801,6 @@ struct amdgpu_allowed_register_entry {
 	bool grbm_indexed;
 };
 
-struct amdgpu_cu_info {
-	uint32_t number; /* total active CU number */
-	uint32_t ao_cu_mask;
-	uint32_t bitmap[4][4];
-};
-
-
 /*
  * ASIC specific functions.
  */
@@ -1818,7 +1818,6 @@ struct amdgpu_asic_funcs {
 	u32 (*get_xclk)(struct amdgpu_device *adev);
 	/* get the gpu clock counter */
 	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
-	int (*get_cu_info)(struct amdgpu_device *adev, struct amdgpu_cu_info *info);
 	/* MM block clocks */
 	int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
 	int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
@@ -2210,7 +2209,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
 #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
-#define amdgpu_asic_get_cu_info(adev, info) (adev)->asic_funcs->get_cu_info((adev), (info))
 #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 490464e39322..199f76baf22c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -793,7 +793,6 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
 				struct cgs_system_info *sys_info)
 {
 	CGS_FUNC_ADEV;
-	struct amdgpu_cu_info cu_info;
 
 	if (NULL == sys_info)
 		return -ENODEV;
@@ -818,8 +817,7 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
 		sys_info->value = adev->pg_flags;
 		break;
 	case CGS_SYSTEM_INFO_GFX_CU_INFO:
-		amdgpu_asic_get_cu_info(adev, &cu_info);
-		sys_info->value = cu_info.number;
+		sys_info->value = adev->gfx.cu_info.number;
 		break;
 	default:
 		return -ENODEV;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 7db271278e70..40a23704a981 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -427,7 +427,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	}
 	case AMDGPU_INFO_DEV_INFO: {
 		struct drm_amdgpu_info_device dev_info = {};
-		struct amdgpu_cu_info cu_info;
 
 		dev_info.device_id = dev->pdev->device;
 		dev_info.chip_rev = adev->rev_id;
@@ -461,11 +460,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 					     AMDGPU_GPU_PAGE_SIZE;
 		dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE;
 
-		amdgpu_asic_get_cu_info(adev, &cu_info);
-		dev_info.cu_active_number = cu_info.number;
-		dev_info.cu_ao_mask = cu_info.ao_cu_mask;
+		dev_info.cu_active_number = adev->gfx.cu_info.number;
+		dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
 		dev_info.ce_ram_size = adev->gfx.ce_ram_size;
-		memcpy(&dev_info.cu_bitmap[0], &cu_info.bitmap[0], sizeof(cu_info.bitmap));
+		memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
+		       sizeof(adev->gfx.cu_info.bitmap));
 		dev_info.vram_type = adev->mc.vram_type;
 		dev_info.vram_bit_width = adev->mc.vram_width;
 		dev_info.vce_harvest_config = adev->vce.harvest_config;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index c6127d66de11..68e569853c95 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -2007,7 +2007,6 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.get_xclk = &cik_get_xclk,
 	.set_uvd_clocks = &cik_set_uvd_clocks,
 	.set_vce_clocks = &cik_set_vce_clocks,
-	.get_cu_info = &gfx_v7_0_get_cu_info,
 	/* these should be moved to their own ip modules */
 	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
 	.wait_for_mc_idle = &gmc_v7_0_mc_wait_for_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 03108909a275..0014a9ceaf29 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -53,7 +53,6 @@
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
-int gfx_v7_0_get_cu_info(struct amdgpu_device *, struct amdgpu_cu_info *);
 
 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
 MODULE_FIRMWARE("radeon/bonaire_me.bin");
@@ -882,6 +881,7 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
 static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
 static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev);
 static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
+static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
 
 /*
  * Core functions
@@ -1718,6 +1718,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 	gfx_v7_0_tiling_mode_table_init(adev);
 
 	gfx_v7_0_setup_rb(adev);
+	gfx_v7_0_get_cu_info(adev);
 
 	/* set HW defaults for 3D engine */
 	WREG32(mmCP_MEQ_THRESHOLDS,
@@ -3869,18 +3870,13 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 
 static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
 {
-	uint32_t tmp, active_cu_number;
-	struct amdgpu_cu_info cu_info;
-
-	gfx_v7_0_get_cu_info(adev, &cu_info);
-	tmp = cu_info.ao_cu_mask;
-	active_cu_number = cu_info.number;
+	u32 tmp;
 
-	WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, tmp);
+	WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
 
 	tmp = RREG32(mmRLC_MAX_PG_CU);
 	tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
-	tmp |= (active_cu_number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
+	tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
 	WREG32(mmRLC_MAX_PG_CU, tmp);
 }
 
@@ -5015,14 +5011,11 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
 }
 
 
-int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
-			 struct amdgpu_cu_info *cu_info)
+static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 {
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
-
-	if (!adev || !cu_info)
-		return -EINVAL;
+	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
 
 	memset(cu_info, 0, sizeof(*cu_info));
 
@@ -5053,6 +5046,4 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
 
 	cu_info->number = active_cu_number;
 	cu_info->ao_cu_mask = ao_cu_mask;
-
-	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
index c04bfbabfc88..e747aa935c88 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
@@ -32,6 +32,5 @@ void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev);
 void gfx_v7_0_rlc_stop(struct amdgpu_device *adev);
 uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
-int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 4ea4b4eb0bc5..7525f99858f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -636,6 +636,7 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
+static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 
 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
@@ -3431,6 +3432,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
 	gfx_v8_0_tiling_mode_table_init(adev);
 
 	gfx_v8_0_setup_rb(adev);
+	gfx_v8_0_get_cu_info(adev);
 
 	/* XXX SH_MEM regs */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -6212,14 +6214,11 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 	return (~data) & mask;
 }
 
-int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
-			 struct amdgpu_cu_info *cu_info)
+static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 {
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
-
-	if (!adev || !cu_info)
-		return -EINVAL;
+	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
 
 	memset(cu_info, 0, sizeof(*cu_info));
 
@@ -6250,6 +6249,4 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
 
 	cu_info->number = active_cu_number;
 	cu_info->ao_cu_mask = ao_cu_mask;
-
-	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
index 021e05193cb9..16a49f53a2fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
@@ -28,6 +28,5 @@ extern const struct amd_ip_funcs gfx_v8_0_ip_funcs;
 
 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
-int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 31dd6304f208..b7da094f8bf9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1118,7 +1118,6 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
 	.get_xclk = &vi_get_xclk,
 	.set_uvd_clocks = &vi_set_uvd_clocks,
 	.set_vce_clocks = &vi_set_vce_clocks,
-	.get_cu_info = &gfx_v8_0_get_cu_info,
 	/* these should be moved to their own ip modules */
 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
 	.wait_for_mc_idle = &gmc_v8_0_mc_wait_for_idle,
-- 
cgit v1.2.3


From cb9e59d7e999c68b79f23d6016b08fc5d0bb8a8d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 5 May 2016 16:03:57 -0400
Subject: drm/amdgpu: Support DRM_MODE_PAGE_FLIP_ASYNC (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When this flag is set, we program the hardware to execute the flip
during horizontal blank (i.e. for the next scanline) instead of during
vertical blank (i.e. for the next frame).

Ported from radeon commit:
drm/radeon: Support DRM_MODE_PAGE_FLIP_ASYNC

v2: drop DAL change for upstream

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h    |  2 +-
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c      | 25 +++++++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c      | 26 ++++++++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c       | 20 ++++++++++++--------
 6 files changed, 52 insertions(+), 27 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6589d43bd094..ac26b13678b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -727,6 +727,7 @@ struct amdgpu_flip_work {
 	unsigned			shared_count;
 	struct fence			**shared;
 	struct fence_cb			cb;
+	bool				async;
 };
 
 
@@ -2243,7 +2244,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
 #define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
 #define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
-#define amdgpu_display_page_flip(adev, crtc, base) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base))
+#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
 #define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
 #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
 #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index c835abe65df3..726803069fef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -132,7 +132,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 				 vblank->linedur_ns / 1000, stat, vpos, hpos);
 
 	/* Do the flip (mmio) */
-	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
+	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base, work->async);
 
 	/* Set the flip status */
 	amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
@@ -197,6 +197,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
 	work->event = event;
 	work->adev = adev;
 	work->crtc_id = amdgpu_crtc->crtc_id;
+	work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
 
 	/* schedule unpin of the old buffer */
 	old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 8a253aa0b551..6b1d7d306564 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -283,7 +283,7 @@ struct amdgpu_display_funcs {
 	u32 (*hpd_get_gpio_reg)(struct amdgpu_device *adev);
 	/* pageflipping */
 	void (*page_flip)(struct amdgpu_device *adev,
-			 int crtc_id, u64 crtc_base);
+			  int crtc_id, u64 crtc_base, bool async);
 	int (*page_flip_get_scanoutpos)(struct amdgpu_device *adev, int crtc,
 					u32 *vbl, u32 *position);
 	/* display topology setup */
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 8af5fbc60e5b..578328860396 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -284,10 +284,16 @@ static void dce_v10_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
  * surface base address.
  */
 static void dce_v10_0_page_flip(struct amdgpu_device *adev,
-			      int crtc_id, u64 crtc_base)
+				int crtc_id, u64 crtc_base, bool async)
 {
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+	u32 tmp;
 
+	/* flip at hsync for async, default is vsync */
+	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
+			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0);
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
 	/* update the primary scanout address */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(crtc_base));
@@ -2211,6 +2217,14 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 
 	dce_v10_0_vga_enable(crtc, false);
 
+	/* Make sure surface address is updated at vertical blank rather than
+	 * horizontal blank
+	 */
+	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
+			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0);
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(fb_location));
 	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
@@ -2261,13 +2275,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 	WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
 	       (viewport_w << 16) | viewport_h);
 
-	/* pageflip setup */
-	/* make sure flip is at vb rather than hb */
-	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
-	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
-			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0);
-	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
 	/* set pageflip to happen only at start of vblank interval (front porch) */
 	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 3);
 
@@ -2992,6 +2999,8 @@ static int dce_v10_0_sw_init(void *handle)
 
 	adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
 
+	adev->ddev->mode_config.async_page_flip = true;
+
 	adev->ddev->mode_config.max_width = 16384;
 	adev->ddev->mode_config.max_height = 16384;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index e7b951f00421..60bfeb1b956e 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -302,10 +302,17 @@ static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
  * surface base address.
  */
 static void dce_v11_0_page_flip(struct amdgpu_device *adev,
-			      int crtc_id, u64 crtc_base)
+				int crtc_id, u64 crtc_base, bool async)
 {
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+	u32 tmp;
 
+	/* flip at hsync for async, default is vsync */
+	/* use UPDATE_IMMEDIATE_EN instead for async? */
+	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
+			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0);
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
 	/* update the scanout addresses */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(crtc_base));
@@ -2185,6 +2192,14 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 
 	dce_v11_0_vga_enable(crtc, false);
 
+	/* Make sure surface address is updated at vertical blank rather than
+	 * horizontal blank
+	 */
+	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
+			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0);
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(fb_location));
 	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
@@ -2235,13 +2250,6 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 	WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
 	       (viewport_w << 16) | viewport_h);
 
-	/* pageflip setup */
-	/* make sure flip is at vb rather than hb */
-	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
-	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
-			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0);
-	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
 	/* set pageflip to happen only at start of vblank interval (front porch) */
 	WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 3);
 
@@ -3042,6 +3050,8 @@ static int dce_v11_0_sw_init(void *handle)
 
 	adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
 
+	adev->ddev->mode_config.async_page_flip = true;
+
 	adev->ddev->mode_config.max_width = 16384;
 	adev->ddev->mode_config.max_height = 16384;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 25e6af03c406..c73993472059 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -233,10 +233,13 @@ static void dce_v8_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
  * surface base address.
  */
 static void dce_v8_0_page_flip(struct amdgpu_device *adev,
-			      int crtc_id, u64 crtc_base)
+			       int crtc_id, u64 crtc_base, bool async)
 {
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
 
+	/* flip at hsync for async, default is vsync */
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ?
+	       GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0);
 	/* update the primary scanout addresses */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(crtc_base));
@@ -1999,7 +2002,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 	uint32_t fb_format, fb_pitch_pixels;
 	u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
 	u32 pipe_config;
-	u32 tmp, viewport_w, viewport_h;
+	u32 viewport_w, viewport_h;
 	int r;
 	bool bypass_lut = false;
 
@@ -2135,6 +2138,11 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 
 	dce_v8_0_vga_enable(crtc, false);
 
+	/* Make sure surface address is updated at vertical blank rather than
+	 * horizontal blank
+	 */
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(fb_location));
 	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
@@ -2182,12 +2190,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 	WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
 	       (viewport_w << 16) | viewport_h);
 
-	/* pageflip setup */
-	/* make sure flip is at vb rather than hb */
-	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
-	tmp &= ~GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK;
-	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
 	/* set pageflip to happen only at start of vblank interval (front porch) */
 	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 3);
 
@@ -2902,6 +2904,8 @@ static int dce_v8_0_sw_init(void *handle)
 
 	adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
 
+	adev->ddev->mode_config.async_page_flip = true;
+
 	adev->ddev->mode_config.max_width = 16384;
 	adev->ddev->mode_config.max_height = 16384;
 
-- 
cgit v1.2.3


From aa3b73f67bda66637f17c3d847a8a36d3649f3f8 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 3 May 2016 15:17:40 +0200
Subject: drm/amdgpu: use fence_context to judge ctx switch v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use of the ctx pointer is not safe, because they are likely already
be assigned to another ctx when doing comparing.

v2: recreate from scratch, avoid all unnecessary changes.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Monk.Liu <monk.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ac26b13678b1..781f0471ef06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -743,7 +743,7 @@ struct amdgpu_ib {
 	struct amdgpu_user_fence        *user;
 	unsigned			vm_id;
 	uint64_t			vm_pd_addr;
-	struct amdgpu_ctx		*ctx;
+	uint64_t			ctx;
 	uint32_t			gds_base, gds_size;
 	uint32_t			gws_base, gws_size;
 	uint32_t			oa_base, oa_size;
@@ -806,7 +806,7 @@ struct amdgpu_ring {
 	unsigned		wptr_offs;
 	unsigned		next_rptr_offs;
 	unsigned		fence_offs;
-	struct amdgpu_ctx	*current_ctx;
+	uint64_t		current_ctx;
 	enum amdgpu_ring_type	type;
 	char			name[16];
 	unsigned		cond_exe_offs;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1a065961981a..87ec1136a0bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -741,7 +741,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 
 		ib->length_dw = chunk_ib->ib_bytes / 4;
 		ib->flags = chunk_ib->flags;
-		ib->ctx = parser->ctx;
+		ib->ctx = parser->ctx->rings[ring->idx].entity.fence_context;
 		j++;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 910556136fde..f670519efbbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -121,7 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
-	struct amdgpu_ctx *ctx, *old_ctx;
+	uint64_t ctx, old_ctx;
 	struct fence *hwf;
 	struct amdgpu_vm *vm = NULL;
 	unsigned i, patch_offset = ~0;
-- 
cgit v1.2.3


From f153d2867bf74f84d47f67c377a8e3a34865e562 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 6 May 2016 15:31:19 +0200
Subject: drm/amdgpu: move context switch handling into common code v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It was a source of bugs to repeat that in each IP version.

v2: rename parameter

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 16 +++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   |  9 ++++-----
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   |  9 ++++-----
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  2 +-
 12 files changed, 25 insertions(+), 29 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 781f0471ef06..db87edc72936 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -283,7 +283,7 @@ struct amdgpu_ring_funcs {
 	int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 	/* command emit functions */
 	void (*emit_ib)(struct amdgpu_ring *ring,
-			struct amdgpu_ib *ib);
+			struct amdgpu_ib *ib, bool ctx_switch);
 	void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
 			   uint64_t seq, unsigned flags);
 	void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
@@ -2221,7 +2221,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
-#define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib))
+#define amdgpu_ring_emit_ib(r, ib, c) (r)->funcs->emit_ib((r), (ib), (c))
 #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
 #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
 #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index d6f85923edcd..88b8fda7340f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -121,18 +121,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
-	uint64_t ctx, old_ctx;
 	struct fence *hwf;
 	struct amdgpu_vm *vm = NULL;
 	unsigned i, patch_offset = ~0;
-	bool skip_preamble;
+	bool skip_preamble, need_ctx_switch;
 
 	int r = 0;
 
 	if (num_ibs == 0)
 		return -EINVAL;
 
-	ctx = ibs->ctx;
 	if (job) /* for domain0 job like ring test, ibs->job is not assigned */
 		vm = job->vm;
 
@@ -156,7 +154,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
 	if (vm) {
-		/* do context switch */
 		r = amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
 				    ib->gds_base, ib->gds_size,
 				    ib->gws_base, ib->gws_size,
@@ -173,16 +170,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	/* always set cond_exec_polling to CONTINUE */
 	*ring->cond_exe_cpu_addr = 1;
 
-	skip_preamble = ring->current_ctx == ctx;
-	old_ctx = ring->current_ctx;
+	skip_preamble = ring->current_ctx == ib->ctx;
+	need_ctx_switch = ring->current_ctx != ib->ctx;
 	for (i = 0; i < num_ibs; ++i) {
+		ib = &ibs[i];
 
 		/* drop preamble IBs if we don't have a context switch */
 		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
 			continue;
 
-		amdgpu_ring_emit_ib(ring, ib);
-		ring->current_ctx = ctx;
+		amdgpu_ring_emit_ib(ring, ib, need_ctx_switch);
+		need_ctx_switch = false;
 	}
 
 	if (ring->funcs->emit_hdp_invalidate)
@@ -191,7 +189,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	r = amdgpu_fence_emit(ring, &hwf);
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
-		ring->current_ctx = old_ctx;
 		if (ib->vm_id)
 			amdgpu_vm_reset_id(adev, ib->vm_id);
 		amdgpu_ring_undo(ring);
@@ -212,6 +209,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
 		amdgpu_ring_patch_cond_exec(ring, patch_offset);
 
+	ring->current_ctx = ibs->ctx;
 	amdgpu_ring_commit(ring);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 7b7b0f64530a..ad91664a7649 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -762,7 +762,7 @@ out:
  * @ib: the IB to execute
  *
  */
-void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, VCE_CMD_IB);
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index ef99d2370182..40d0650e3a37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -34,7 +34,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 			       bool direct, struct fence **fence);
 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
-void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, bool ctx_switch);
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 				unsigned flags);
 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index df824f80ac07..6c2aa2b863b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -210,7 +210,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  * Schedule an IB in the DMA ring (CIK).
  */
 static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
-			   struct amdgpu_ib *ib)
+			   struct amdgpu_ib *ib, bool ctx_switch)
 {
 	u32 extra_bits = ib->vm_id & 0xf;
 	u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index d82fa9641f0e..189ef2b23668 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2030,13 +2030,12 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
  * on the gfx ring for execution by the GPU.
  */
 static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
-				  struct amdgpu_ib *ib)
+				      struct amdgpu_ib *ib, bool ctx_switch)
 {
-	bool need_ctx_switch = ring->current_ctx != ib->ctx;
 	u32 header, control = 0;
 	u32 next_rptr = ring->wptr + 5;
 
-	if (need_ctx_switch)
+	if (ctx_switch)
 		next_rptr += 2;
 
 	next_rptr += 4;
@@ -2047,7 +2046,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, next_rptr);
 
 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
-	if (need_ctx_switch) {
+	if (ctx_switch) {
 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
 		amdgpu_ring_write(ring, 0);
 	}
@@ -2070,7 +2069,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 }
 
 static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
-				  struct amdgpu_ib *ib)
+					  struct amdgpu_ib *ib, bool ctx_switch)
 {
 	u32 header, control = 0;
 	u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 9a0b6df210c1..0d556c907ab6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5646,13 +5646,12 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
 }
 
 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
-				  struct amdgpu_ib *ib)
+				      struct amdgpu_ib *ib, bool ctx_switch)
 {
-	bool need_ctx_switch = ring->current_ctx != ib->ctx;
 	u32 header, control = 0;
 	u32 next_rptr = ring->wptr + 5;
 
-	if (need_ctx_switch)
+	if (ctx_switch)
 		next_rptr += 2;
 
 	next_rptr += 4;
@@ -5663,7 +5662,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, next_rptr);
 
 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
-	if (need_ctx_switch) {
+	if (ctx_switch) {
 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
 		amdgpu_ring_write(ring, 0);
 	}
@@ -5686,7 +5685,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 }
 
 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
-				  struct amdgpu_ib *ib)
+					  struct amdgpu_ib *ib, bool ctx_switch)
 {
 	u32 header, control = 0;
 	u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 6be2c0faa1bc..de94adb2b19e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -242,7 +242,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  * Schedule an IB in the DMA ring (VI).
  */
 static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
-				   struct amdgpu_ib *ib)
+				   struct amdgpu_ib *ib, bool ctx_switch)
 {
 	u32 vmid = ib->vm_id & 0xf;
 	u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index b3dab09205af..ca2aee3e88a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -400,7 +400,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  * Schedule an IB in the DMA ring (VI).
  */
 static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
-				   struct amdgpu_ib *ib)
+				   struct amdgpu_ib *ib, bool ctx_switch)
 {
 	u32 vmid = ib->vm_id & 0xf;
 	u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 46a397654837..a75ffb5b11b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -489,7 +489,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
  * Write ring commands to execute the indirect buffer
  */
 static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
-				  struct amdgpu_ib *ib)
+				  struct amdgpu_ib *ib, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
 	amdgpu_ring_write(ring, ib->gpu_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index b96486c09250..ecb81014d836 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -539,7 +539,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
  * Write ring commands to execute the indirect buffer
  */
 static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
-				  struct amdgpu_ib *ib)
+				  struct amdgpu_ib *ib, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 892bdac4bb21..a43f1a7c58bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -631,7 +631,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
  * Write ring commands to execute the indirect buffer
  */
 static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
-				  struct amdgpu_ib *ib)
+				  struct amdgpu_ib *ib, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
-- 
cgit v1.2.3


From 92f250989b7098f4b52d50183a7b2fc4e010731b Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 6 May 2016 15:57:42 +0200
Subject: drm/amdgpu: move the context from the IBs into the job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We only have one context for all IBs.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 +++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 22 +++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  7 +++----
 4 files changed, 24 insertions(+), 17 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index db87edc72936..9b55ad351602 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -743,7 +743,6 @@ struct amdgpu_ib {
 	struct amdgpu_user_fence        *user;
 	unsigned			vm_id;
 	uint64_t			vm_pd_addr;
-	uint64_t			ctx;
 	uint32_t			gds_base, gds_size;
 	uint32_t			gws_base, gws_size;
 	uint32_t			oa_base, oa_size;
@@ -1262,6 +1261,7 @@ struct amdgpu_job {
 	struct fence		*fence; /* the hw fence */
 	uint32_t		num_ibs;
 	void			*owner;
+	uint64_t		ctx;
 	struct amdgpu_user_fence uf;
 };
 #define to_amdgpu_job(sched_job)		\
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 87ec1136a0bb..2895d63c9979 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -741,7 +741,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 
 		ib->length_dw = chunk_ib->ib_bytes / 4;
 		ib->flags = chunk_ib->flags;
-		ib->ctx = parser->ctx->rings[ring->idx].entity.fence_context;
 		j++;
 	}
 
@@ -840,6 +839,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_ring *ring = p->job->ring;
+	struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
 	struct fence *fence;
 	struct amdgpu_job *job;
 	int r;
@@ -848,16 +848,16 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	p->job = NULL;
 
 	r = amd_sched_job_init(&job->base, &ring->sched,
-						&p->ctx->rings[ring->idx].entity,
-						amdgpu_job_timeout_func,
-						amdgpu_job_free_func,
-						p->filp, &fence);
+			       entity, amdgpu_job_timeout_func,
+			       amdgpu_job_free_func,
+			       p->filp, &fence);
 	if (r) {
 		amdgpu_job_free(job);
 		return r;
 	}
 
 	job->owner = p->filp;
+	job->ctx = entity->fence_context;
 	p->fence = fence_get(fence);
 	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence);
 	job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 88b8fda7340f..dacbd2e32072 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -121,18 +121,26 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
-	struct fence *hwf;
-	struct amdgpu_vm *vm = NULL;
-	unsigned i, patch_offset = ~0;
 	bool skip_preamble, need_ctx_switch;
+	unsigned patch_offset = ~0;
+	struct amdgpu_vm *vm;
+	struct fence *hwf;
+	uint64_t ctx;
 
+	unsigned i;
 	int r = 0;
 
 	if (num_ibs == 0)
 		return -EINVAL;
 
-	if (job) /* for domain0 job like ring test, ibs->job is not assigned */
+	/* ring tests don't use a job */
+	if (job) {
 		vm = job->vm;
+		ctx = job->ctx;
+	} else {
+		vm = NULL;
+		ctx = 0;
+	}
 
 	if (!ring->ready) {
 		dev_err(adev->dev, "couldn't schedule ib\n");
@@ -170,8 +178,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	/* always set cond_exec_polling to CONTINUE */
 	*ring->cond_exe_cpu_addr = 1;
 
-	skip_preamble = ring->current_ctx == ib->ctx;
-	need_ctx_switch = ring->current_ctx != ib->ctx;
+	skip_preamble = ring->current_ctx == ctx;
+	need_ctx_switch = ring->current_ctx != ctx;
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
 
@@ -209,7 +217,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
 		amdgpu_ring_patch_cond_exec(ring, patch_offset);
 
-	ring->current_ctx = ibs->ctx;
+	ring->current_ctx = ctx;
 	amdgpu_ring_commit(ring);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 917c6f3bfa09..a0961f2a93d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -122,14 +122,13 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		return -EINVAL;
 
 	r = amd_sched_job_init(&job->base, &ring->sched,
-							entity,
-							amdgpu_job_timeout_func,
-							amdgpu_job_free_func,
-							owner, &fence);
+			       entity, amdgpu_job_timeout_func,
+			       amdgpu_job_free_func, owner, &fence);
 	if (r)
 		return r;
 
 	job->owner = owner;
+	job->ctx = entity->fence_context;
 	*f = fence_get(fence);
 	amd_sched_entity_push_job(&job->base);
 
-- 
cgit v1.2.3


From d88bf583bd06eecb31f82871c90ef6a5a09b5766 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 6 May 2016 17:50:03 +0200
Subject: drm/amdgpu: move VM fields into job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

They are the same for all IBs.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 15 +++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 36 +++++++++++++++------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 19 +++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 12 ++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c   |  5 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 10 +++++----
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 10 +++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c  |  5 +++--
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  |  5 +++--
 drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c   |  3 ++-
 drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c   |  3 ++-
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  3 ++-
 14 files changed, 66 insertions(+), 66 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9b55ad351602..d4c1eb7816f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -283,7 +283,8 @@ struct amdgpu_ring_funcs {
 	int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 	/* command emit functions */
 	void (*emit_ib)(struct amdgpu_ring *ring,
-			struct amdgpu_ib *ib, bool ctx_switch);
+			struct amdgpu_ib *ib,
+			unsigned vm_id, bool ctx_switch);
 	void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
 			   uint64_t seq, unsigned flags);
 	void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
@@ -741,11 +742,6 @@ struct amdgpu_ib {
 	uint64_t			gpu_addr;
 	uint32_t			*ptr;
 	struct amdgpu_user_fence        *user;
-	unsigned			vm_id;
-	uint64_t			vm_pd_addr;
-	uint32_t			gds_base, gds_size;
-	uint32_t			gws_base, gws_size;
-	uint32_t			oa_base, oa_size;
 	uint32_t			flags;
 	/* resulting sequence number */
 	uint64_t			sequence;
@@ -1262,6 +1258,11 @@ struct amdgpu_job {
 	uint32_t		num_ibs;
 	void			*owner;
 	uint64_t		ctx;
+	unsigned		vm_id;
+	uint64_t		vm_pd_addr;
+	uint32_t		gds_base, gds_size;
+	uint32_t		gws_base, gws_size;
+	uint32_t		oa_base, oa_size;
 	struct amdgpu_user_fence uf;
 };
 #define to_amdgpu_job(sched_job)		\
@@ -2221,7 +2222,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
-#define amdgpu_ring_emit_ib(r, ib, c) (r)->funcs->emit_ib((r), (ib), (c))
+#define amdgpu_ring_emit_ib(r, ib, vm_id, c) (r)->funcs->emit_ib((r), (ib), (vm_id), (c))
 #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
 #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
 #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2895d63c9979..9ab2f0886a14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -473,6 +473,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		goto error_validate;
 
 	if (p->bo_list) {
+		struct amdgpu_bo *gds = p->bo_list->gds_obj;
+		struct amdgpu_bo *gws = p->bo_list->gws_obj;
+		struct amdgpu_bo *oa = p->bo_list->oa_obj;
 		struct amdgpu_vm *vm = &fpriv->vm;
 		unsigned i;
 
@@ -481,6 +484,19 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 			p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
 		}
+
+		if (gds) {
+			p->job->gds_base = amdgpu_bo_gpu_offset(gds);
+			p->job->gds_size = amdgpu_bo_size(gds);
+		}
+		if (gws) {
+			p->job->gws_base = amdgpu_bo_gpu_offset(gws);
+			p->job->gws_size = amdgpu_bo_size(gws);
+		}
+		if (oa) {
+			p->job->oa_base = amdgpu_bo_gpu_offset(oa);
+			p->job->oa_size = amdgpu_bo_size(oa);
+		}
 	}
 
 error_validate:
@@ -744,26 +760,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		j++;
 	}
 
-	/* add GDS resources to first IB */
-	if (parser->bo_list) {
-		struct amdgpu_bo *gds = parser->bo_list->gds_obj;
-		struct amdgpu_bo *gws = parser->bo_list->gws_obj;
-		struct amdgpu_bo *oa = parser->bo_list->oa_obj;
-		struct amdgpu_ib *ib = &parser->job->ibs[0];
-
-		if (gds) {
-			ib->gds_base = amdgpu_bo_gpu_offset(gds);
-			ib->gds_size = amdgpu_bo_size(gds);
-		}
-		if (gws) {
-			ib->gws_base = amdgpu_bo_gpu_offset(gws);
-			ib->gws_size = amdgpu_bo_size(gws);
-		}
-		if (oa) {
-			ib->oa_base = amdgpu_bo_gpu_offset(oa);
-			ib->oa_size = amdgpu_bo_size(oa);
-		}
-	}
 	/* wrap the last IB with user fence */
 	if (parser->job->uf.bo) {
 		struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index dacbd2e32072..201aceb01d8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -74,8 +74,6 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
 	}
 
-	ib->vm_id = 0;
-
 	return 0;
 }
 
@@ -147,7 +145,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		return -EINVAL;
 	}
 
-	if (vm && !ibs->vm_id) {
+	if (vm && !job->vm_id) {
 		dev_err(adev->dev, "VM IB without ID\n");
 		return -EINVAL;
 	}
@@ -162,10 +160,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
 	if (vm) {
-		r = amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
-				    ib->gds_base, ib->gds_size,
-				    ib->gws_base, ib->gws_size,
-				    ib->oa_base, ib->oa_size);
+		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
+				    job->gds_base, job->gds_size,
+				    job->gws_base, job->gws_size,
+				    job->oa_base, job->oa_size);
 		if (r) {
 			amdgpu_ring_undo(ring);
 			return r;
@@ -187,7 +185,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
 			continue;
 
-		amdgpu_ring_emit_ib(ring, ib, need_ctx_switch);
+		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
+				    need_ctx_switch);
 		need_ctx_switch = false;
 	}
 
@@ -197,8 +196,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	r = amdgpu_fence_emit(ring, &hwf);
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
-		if (ib->vm_id)
-			amdgpu_vm_reset_id(adev, ib->vm_id);
+		if (job && job->vm_id)
+			amdgpu_vm_reset_id(adev, job->vm_id);
 		amdgpu_ring_undo(ring);
 		return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index a0961f2a93d2..8ea68d0cfad6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -142,23 +142,15 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 
 	struct fence *fence = amdgpu_sync_get_fence(&job->sync);
 
-	if (fence == NULL && vm && !job->ibs->vm_id) {
+	if (fence == NULL && vm && !job->vm_id) {
 		struct amdgpu_ring *ring = job->ring;
-		unsigned i, vm_id;
-		uint64_t vm_pd_addr;
 		int r;
 
 		r = amdgpu_vm_grab_id(vm, ring, &job->sync,
 				      &job->base.s_fence->base,
-				      &vm_id, &vm_pd_addr);
+				      &job->vm_id, &job->vm_pd_addr);
 		if (r)
 			DRM_ERROR("Error getting VM ID (%d)\n", r);
-		else {
-			for (i = 0; i < job->num_ibs; ++i) {
-				job->ibs[i].vm_id = vm_id;
-				job->ibs[i].vm_pd_addr = vm_pd_addr;
-			}
-		}
 
 		fence = amdgpu_sync_get_fence(&job->sync);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index ad91664a7649..875626a2eccb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -762,7 +762,8 @@ out:
  * @ib: the IB to execute
  *
  */
-void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, bool ctx_switch)
+void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
+			     unsigned vm_id, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, VCE_CMD_IB);
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 40d0650e3a37..f40cf761c66f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -34,7 +34,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 			       bool direct, struct fence **fence);
 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
-void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, bool ctx_switch);
+void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
+			     unsigned vm_id, bool ctx_switch);
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 				unsigned flags);
 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 6c2aa2b863b2..518dca43b133 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -210,9 +210,10 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  * Schedule an IB in the DMA ring (CIK).
  */
 static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
-			   struct amdgpu_ib *ib, bool ctx_switch)
+				  struct amdgpu_ib *ib,
+				  unsigned vm_id, bool ctx_switch)
 {
-	u32 extra_bits = ib->vm_id & 0xf;
+	u32 extra_bits = vm_id & 0xf;
 	u32 next_rptr = ring->wptr + 5;
 
 	while ((next_rptr & 7) != 4)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 189ef2b23668..7f18a53ab53a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2030,7 +2030,8 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
  * on the gfx ring for execution by the GPU.
  */
 static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
-				      struct amdgpu_ib *ib, bool ctx_switch)
+				      struct amdgpu_ib *ib,
+				      unsigned vm_id, bool ctx_switch)
 {
 	u32 header, control = 0;
 	u32 next_rptr = ring->wptr + 5;
@@ -2056,7 +2057,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 	else
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw | (ib->vm_id << 24);
+	control |= ib->length_dw | (vm_id << 24);
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
@@ -2069,7 +2070,8 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 }
 
 static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
-					  struct amdgpu_ib *ib, bool ctx_switch)
+					  struct amdgpu_ib *ib,
+					  unsigned vm_id, bool ctx_switch)
 {
 	u32 header, control = 0;
 	u32 next_rptr = ring->wptr + 5;
@@ -2084,7 +2086,7 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 
 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw | (ib->vm_id << 24);
+	control |= ib->length_dw | (vm_id << 24);
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 0d556c907ab6..92647fbf5b8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5646,7 +5646,8 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
 }
 
 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
-				      struct amdgpu_ib *ib, bool ctx_switch)
+				      struct amdgpu_ib *ib,
+				      unsigned vm_id, bool ctx_switch)
 {
 	u32 header, control = 0;
 	u32 next_rptr = ring->wptr + 5;
@@ -5672,7 +5673,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 	else
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw | (ib->vm_id << 24);
+	control |= ib->length_dw | (vm_id << 24);
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
@@ -5685,7 +5686,8 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 }
 
 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
-					  struct amdgpu_ib *ib, bool ctx_switch)
+					  struct amdgpu_ib *ib,
+					  unsigned vm_id, bool ctx_switch)
 {
 	u32 header, control = 0;
 	u32 next_rptr = ring->wptr + 5;
@@ -5701,7 +5703,7 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 
 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw | (ib->vm_id << 24);
+	control |= ib->length_dw | (vm_id << 24);
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index de94adb2b19e..f4c3130d3fdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -242,9 +242,10 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  * Schedule an IB in the DMA ring (VI).
  */
 static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
-				   struct amdgpu_ib *ib, bool ctx_switch)
+				   struct amdgpu_ib *ib,
+				   unsigned vm_id, bool ctx_switch)
 {
-	u32 vmid = ib->vm_id & 0xf;
+	u32 vmid = vm_id & 0xf;
 	u32 next_rptr = ring->wptr + 5;
 
 	while ((next_rptr & 7) != 2)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index ca2aee3e88a3..063f08a9957a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -400,9 +400,10 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  * Schedule an IB in the DMA ring (VI).
  */
 static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
-				   struct amdgpu_ib *ib, bool ctx_switch)
+				   struct amdgpu_ib *ib,
+				   unsigned vm_id, bool ctx_switch)
 {
-	u32 vmid = ib->vm_id & 0xf;
+	u32 vmid = vm_id & 0xf;
 	u32 next_rptr = ring->wptr + 5;
 
 	while ((next_rptr & 7) != 2)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index a75ffb5b11b2..f07551476a70 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -489,7 +489,8 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
  * Write ring commands to execute the indirect buffer
  */
 static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
-				  struct amdgpu_ib *ib, bool ctx_switch)
+				  struct amdgpu_ib *ib,
+				  unsigned vm_id, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
 	amdgpu_ring_write(ring, ib->gpu_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index ecb81014d836..e0a76a883d46 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -539,7 +539,8 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
  * Write ring commands to execute the indirect buffer
  */
 static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
-				  struct amdgpu_ib *ib, bool ctx_switch)
+				  struct amdgpu_ib *ib,
+				  unsigned vm_id, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index a43f1a7c58bc..c9929d665c01 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -631,7 +631,8 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
  * Write ring commands to execute the indirect buffer
  */
 static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
-				  struct amdgpu_ib *ib, bool ctx_switch)
+				  struct amdgpu_ib *ib,
+				  unsigned vm_id, bool ctx_switch)
 {
 	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
-- 
cgit v1.2.3


From 758ac17f963f3497aae4e767d3a9eb68fea71f71 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 6 May 2016 22:14:00 +0200
Subject: drm/amdgpu: fix and cleanup user fence handling v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We leaked the BO in the error pass, additional to that we only have
one user fence for all IBs in a job.

v2: remove white space changes

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 19 +++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 55 +++++++++++++++------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  9 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  2 +-
 4 files changed, 38 insertions(+), 47 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d4c1eb7816f0..2a009c398dcb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -368,13 +368,6 @@ struct amdgpu_fence_driver {
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
 
-struct amdgpu_user_fence {
-	/* write-back bo */
-	struct amdgpu_bo	*bo;
-	/* write-back address offset to bo start */
-	uint32_t                offset;
-};
-
 int amdgpu_fence_driver_init(struct amdgpu_device *adev);
 void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
 void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
@@ -741,10 +734,7 @@ struct amdgpu_ib {
 	uint32_t			length_dw;
 	uint64_t			gpu_addr;
 	uint32_t			*ptr;
-	struct amdgpu_user_fence        *user;
 	uint32_t			flags;
-	/* resulting sequence number */
-	uint64_t			sequence;
 };
 
 enum amdgpu_ring_type {
@@ -1219,7 +1209,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring);
 struct amdgpu_cs_chunk {
 	uint32_t		chunk_id;
 	uint32_t		length_dw;
-	uint32_t		*kdata;
+	void			*kdata;
 };
 
 struct amdgpu_cs_parser {
@@ -1263,7 +1253,12 @@ struct amdgpu_job {
 	uint32_t		gds_base, gds_size;
 	uint32_t		gws_base, gws_size;
 	uint32_t		oa_base, oa_size;
-	struct amdgpu_user_fence uf;
+
+	/* user fence handling */
+	struct amdgpu_bo	*uf_bo;
+	uint32_t		uf_offset;
+	uint64_t		uf_sequence;
+
 };
 #define to_amdgpu_job(sched_job)		\
 		container_of((sched_job), struct amdgpu_job, base)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9ab2f0886a14..2bbeeb07c187 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -87,33 +87,30 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
 }
 
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
-				      struct amdgpu_user_fence *uf,
-				      struct drm_amdgpu_cs_chunk_fence *fence_data)
+				      struct drm_amdgpu_cs_chunk_fence *data,
+				      uint32_t *offset)
 {
 	struct drm_gem_object *gobj;
-	uint32_t handle;
 
-	handle = fence_data->handle;
 	gobj = drm_gem_object_lookup(p->adev->ddev, p->filp,
-				     fence_data->handle);
+				     data->handle);
 	if (gobj == NULL)
 		return -EINVAL;
 
-	uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
-	uf->offset = fence_data->offset;
-
-	if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) {
-		drm_gem_object_unreference_unlocked(gobj);
-		return -EINVAL;
-	}
-
-	p->uf_entry.robj = amdgpu_bo_ref(uf->bo);
+	p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
 	p->uf_entry.priority = 0;
 	p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
 	p->uf_entry.tv.shared = true;
 	p->uf_entry.user_pages = NULL;
+	*offset = data->offset;
 
 	drm_gem_object_unreference_unlocked(gobj);
+
+	if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
+		amdgpu_bo_unref(&p->uf_entry.robj);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -124,8 +121,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	union drm_amdgpu_cs *cs = data;
 	uint64_t *chunk_array_user;
 	uint64_t *chunk_array;
-	struct amdgpu_user_fence uf = {};
 	unsigned size, num_ibs = 0;
+	uint32_t uf_offset = 0;
 	int i;
 	int ret;
 
@@ -200,7 +197,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 				goto free_partial_kdata;
 			}
 
-			ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata);
+			ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
+							 &uf_offset);
 			if (ret)
 				goto free_partial_kdata;
 
@@ -219,7 +217,10 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (ret)
 		goto free_all_kdata;
 
-	p->job->uf = uf;
+	if (p->uf_entry.robj) {
+		p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj);
+		p->job->uf_offset = uf_offset;
+	}
 
 	kfree(chunk_array);
 	return 0;
@@ -377,7 +378,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 
-	if (p->job->uf.bo)
+	if (p->uf_entry.robj)
 		list_add(&p->uf_entry.tv.head, &p->validated);
 
 	if (need_mmap_lock)
@@ -760,17 +761,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		j++;
 	}
 
-	/* wrap the last IB with user fence */
-	if (parser->job->uf.bo) {
-		struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1];
-
-		/* UVD & VCE fw doesn't support user fences */
-		if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
-		    parser->job->ring->type == AMDGPU_RING_TYPE_VCE)
-			return -EINVAL;
-
-		ib->user = &parser->job->uf;
-	}
+	/* UVD & VCE fw doesn't support user fences */
+	if (parser->job->uf_bo && (
+	    parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
+	    parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
+		return -EINVAL;
 
 	return 0;
 }
@@ -856,7 +851,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	job->ctx = entity->fence_context;
 	p->fence = fence_get(fence);
 	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence);
-	job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
+	job->uf_sequence = cs->out.handle;
 
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 201aceb01d8a..34e35423b78e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -203,10 +203,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	}
 
 	/* wrap the last IB with fence */
-	if (ib->user) {
-		uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
-		addr += ib->user->offset;
-		amdgpu_ring_emit_fence(ring, addr, ib->sequence,
+	if (job && job->uf_bo) {
+		uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo);
+
+		addr += job->uf_offset;
+		amdgpu_ring_emit_fence(ring, addr, job->uf_sequence,
 				       AMDGPU_FENCE_FLAG_64BIT);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 8ea68d0cfad6..f0dafa514fe4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -97,7 +97,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
 		amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
 	fence_put(job->fence);
 
-	amdgpu_bo_unref(&job->uf.bo);
+	amdgpu_bo_unref(&job->uf_bo);
 	amdgpu_sync_free(&job->sync);
 
 	if (!job->base.use_sched)
-- 
cgit v1.2.3


From d573de2d00835e38cef1fb4bff7b49c174c68941 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 12 May 2016 13:27:28 +0800
Subject: drm/amdgpu: create fence slab once when amdgpu module init.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This avoids problems with multiple GPUs.  For example,
if the first GPU failed before amdgpu_fence_init() was
called, amdgpu_fence_slab_ref is still 0 and it will
get decremented in amdgpu_fence_driver_fini().  This
will lead to a crash during init of the second GPU since
amdgpu_fence_slab_ref is not 0.

v2: add functions for init/exit instead of
    moving the variables into the driver.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  5 ++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 25 ++++++++++++++-----------
 3 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2a009c398dcb..992f00b65be4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -602,6 +602,8 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);
 void amdgpu_sync_fini(void);
+int amdgpu_fence_slab_init(void);
+void amdgpu_fence_slab_fini(void);
 
 /*
  * GART structures, functions & helpers
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0dee008bcc57..6ff587900cf5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -565,9 +565,12 @@ static struct pci_driver amdgpu_kms_pci_driver = {
 	.driver.pm = &amdgpu_pm_ops,
 };
 
+
+
 static int __init amdgpu_init(void)
 {
 	amdgpu_sync_init();
+	amdgpu_fence_slab_init();
 	if (vgacon_text_force()) {
 		DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
 		return -EINVAL;
@@ -578,7 +581,6 @@ static int __init amdgpu_init(void)
 	driver->driver_features |= DRIVER_MODESET;
 	driver->num_ioctls = amdgpu_max_kms_ioctl;
 	amdgpu_register_atpx_handler();
-
 	/* let modprobe override vga console setting */
 	return drm_pci_init(driver, pdriver);
 }
@@ -589,6 +591,7 @@ static void __exit amdgpu_exit(void)
 	drm_pci_exit(driver, pdriver);
 	amdgpu_unregister_atpx_handler();
 	amdgpu_sync_fini();
+	amdgpu_fence_slab_fini();
 }
 
 module_init(amdgpu_init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index ba9c04283d01..7eb2fca871e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -55,8 +55,21 @@ struct amdgpu_fence {
 };
 
 static struct kmem_cache *amdgpu_fence_slab;
-static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
 
+int amdgpu_fence_slab_init(void)
+{
+	amdgpu_fence_slab = kmem_cache_create(
+		"amdgpu_fence", sizeof(struct amdgpu_fence), 0,
+		SLAB_HWCACHE_ALIGN, NULL);
+	if (!amdgpu_fence_slab)
+		return -ENOMEM;
+	return 0;
+}
+
+void amdgpu_fence_slab_fini(void)
+{
+	kmem_cache_destroy(amdgpu_fence_slab);
+}
 /*
  * Cast helper
  */
@@ -396,13 +409,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
  */
 int amdgpu_fence_driver_init(struct amdgpu_device *adev)
 {
-	if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) {
-		amdgpu_fence_slab = kmem_cache_create(
-			"amdgpu_fence", sizeof(struct amdgpu_fence), 0,
-			SLAB_HWCACHE_ALIGN, NULL);
-		if (!amdgpu_fence_slab)
-			return -ENOMEM;
-	}
 	if (amdgpu_debugfs_fence_init(adev))
 		dev_err(adev->dev, "fence debugfs file creation failed\n");
 
@@ -441,9 +447,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
 		kfree(ring->fence_drv.fences);
 		ring->fence_drv.initialized = false;
 	}
-
-	if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
-		kmem_cache_destroy(amdgpu_fence_slab);
 }
 
 /**
-- 
cgit v1.2.3


From 2ba272d7bde27e1db2cf1c6cee49b01b7ea08989 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Wed, 27 Apr 2016 18:07:41 +0800
Subject: drm/amdgpu: add pipeline sync while vmid switch in same ctx

Since vmid-mgr supports vmid sharing in one vm, the same ctx could
get different vmids for two emits without vm flush, vm_flush could
be done in another ring.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 9 +++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++---
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 992f00b65be4..01c36b8d6222 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -799,6 +799,7 @@ struct amdgpu_ring {
 	unsigned		cond_exe_offs;
 	u64				cond_exe_gpu_addr;
 	volatile u32	*cond_exe_cpu_addr;
+	int                     vmid;
 };
 
 /*
@@ -936,7 +937,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 		    unsigned vm_id, uint64_t pd_addr,
 		    uint32_t gds_base, uint32_t gds_size,
 		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size);
+		    uint32_t oa_base, uint32_t oa_size,
+		    bool vmid_switch);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 34e35423b78e..7a0b1e50f293 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -122,6 +122,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	bool skip_preamble, need_ctx_switch;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
+	int vmid = 0, old_vmid = ring->vmid;
 	struct fence *hwf;
 	uint64_t ctx;
 
@@ -135,9 +136,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (job) {
 		vm = job->vm;
 		ctx = job->ctx;
+		vmid = job->vm_id;
 	} else {
 		vm = NULL;
 		ctx = 0;
+		vmid = 0;
 	}
 
 	if (!ring->ready) {
@@ -163,7 +166,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
 				    job->gds_base, job->gds_size,
 				    job->gws_base, job->gws_size,
-				    job->oa_base, job->oa_size);
+				    job->oa_base, job->oa_size,
+				    (ring->current_ctx == ctx) && (old_vmid != vmid));
 		if (r) {
 			amdgpu_ring_undo(ring);
 			return r;
@@ -180,7 +184,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	need_ctx_switch = ring->current_ctx != ctx;
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
-
 		/* drop preamble IBs if we don't have a context switch */
 		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
 			continue;
@@ -188,6 +191,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
 				    need_ctx_switch);
 		need_ctx_switch = false;
+		ring->vmid = vmid;
 	}
 
 	if (ring->funcs->emit_hdp_invalidate)
@@ -198,6 +202,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vm_id)
 			amdgpu_vm_reset_id(adev, job->vm_id);
+		ring->vmid = old_vmid;
 		amdgpu_ring_undo(ring);
 		return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9f36ed30ba11..62a4c127620f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -298,7 +298,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 		    unsigned vm_id, uint64_t pd_addr,
 		    uint32_t gds_base, uint32_t gds_size,
 		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size)
+		    uint32_t oa_base, uint32_t oa_size,
+		    bool vmid_switch)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
@@ -312,8 +313,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
-	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
-		    ring->type == AMDGPU_RING_TYPE_COMPUTE))
+	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || vmid_switch))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (ring->funcs->emit_vm_flush &&
-- 
cgit v1.2.3


From 048765ad5af7c8939603b4c6cb96293ffa05e00d Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andres.rodriguez@amd.com>
Date: Sat, 11 Jun 2016 02:51:32 -0400
Subject: amdgpu: fix asic initialization for virtualized environments (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When executing in a PCI passthrough based virtuzliation environemnt, the
hypervisor will usually attempt to send a PCIe bus reset signal to the
ASIC when the VM reboots. In this scenario, the card is not correctly
initialized, but we still consider it to be posted. Therefore, in a
passthrough based environemnt we should always post the card to guarantee
it is in a good state for driver initialization.

However, if we are operating in SR-IOV mode it is up to the GIM driver
to manage the asic state, therefore we should not post the card (and
shouldn't be able to do it either).

v2: add missing semi-colon

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andres.rodriguez@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  7 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 ++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/cik.c           |  7 +++++++
 drivers/gpu/drm/amd/amdgpu/vi.c            | 15 +++++++++++++++
 4 files changed, 45 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 01c36b8d6222..70af26d97d28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1822,6 +1822,8 @@ struct amdgpu_asic_funcs {
 	/* MM block clocks */
 	int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
 	int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
+	/* query virtual capabilities */
+	u32 (*get_virtual_caps)(struct amdgpu_device *adev);
 };
 
 /*
@@ -1916,8 +1918,12 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
 
 
 /* GPU virtualization */
+#define AMDGPU_VIRT_CAPS_SRIOV_EN       (1 << 0)
+#define AMDGPU_VIRT_CAPS_IS_VF          (1 << 1)
 struct amdgpu_virtualization {
 	bool supports_sr_iov;
+	bool is_virtual;
+	u32 caps;
 };
 
 /*
@@ -2206,6 +2212,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
 #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
 #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
+#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev)))
 #define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
 #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 964f31404f17..66482b429458 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1385,6 +1385,15 @@ static int amdgpu_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
+static bool amdgpu_device_is_virtual(void)
+{
+#ifdef CONFIG_X86
+	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#else
+	return false;
+#endif
+}
+
 /**
  * amdgpu_device_init - initialize the driver
  *
@@ -1519,8 +1528,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->virtualization.supports_sr_iov =
 		amdgpu_atombios_has_gpu_virtualization_table(adev);
 
+	/* Check if we are executing in a virtualized environment */
+	adev->virtualization.is_virtual = amdgpu_device_is_virtual();
+	adev->virtualization.caps = amdgpu_asic_get_virtual_caps(adev);
+
 	/* Post card if necessary */
-	if (!amdgpu_card_posted(adev)) {
+	if (!amdgpu_card_posted(adev) ||
+	    (adev->virtualization.is_virtual &&
+	     !adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN)) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
 			return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 07bc795a4ca9..910431808542 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -962,6 +962,12 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
+static u32 cik_get_virtual_caps(struct amdgpu_device *adev)
+{
+	/* CIK does not support SR-IOV */
+	return 0;
+}
+
 static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
 	{mmGRBM_STATUS, false},
 	{mmGB_ADDR_CONFIG, false},
@@ -2007,6 +2013,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.get_xclk = &cik_get_xclk,
 	.set_uvd_clocks = &cik_set_uvd_clocks,
 	.set_vce_clocks = &cik_set_vce_clocks,
+	.get_virtual_caps = &cik_get_virtual_caps,
 	/* these should be moved to their own ip modules */
 	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
 	.wait_for_mc_idle = &gmc_v7_0_mc_wait_for_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 2c88d0b66cf3..a65c96029476 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -421,6 +421,20 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
+static u32 vi_get_virtual_caps(struct amdgpu_device *adev)
+{
+	u32 caps = 0;
+	u32 reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
+
+	if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE))
+		caps |= AMDGPU_VIRT_CAPS_SRIOV_EN;
+
+	if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER))
+		caps |= AMDGPU_VIRT_CAPS_IS_VF;
+
+	return caps;
+}
+
 static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = {
 	{mmGB_MACROTILE_MODE7, true},
 };
@@ -1118,6 +1132,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
 	.get_xclk = &vi_get_xclk,
 	.set_uvd_clocks = &vi_set_uvd_clocks,
 	.set_vce_clocks = &vi_set_vce_clocks,
+	.get_virtual_caps = &vi_get_virtual_caps,
 	/* these should be moved to their own ip modules */
 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
 	.wait_for_mc_idle = &gmc_v8_0_mc_wait_for_idle,
-- 
cgit v1.2.3


From 7c4021d403ca72ce52d39c17d8154974521a82be Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 13 Jun 2016 18:59:17 -0400
Subject: Revert "drm/amdgpu: add pipeline sync while vmid switch in same ctx"

This reverts commit 2ba272d7bde27e1db2cf1c6cee49b01b7ea08989.

The issue fixed by this patch is specific to compute rings and the
previous patch was enough.  Additionally, this patch as been traced
to strange behavior on some CZ systems so we might as well drop it.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 4 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 9 ++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++---
 3 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 70af26d97d28..e055d5be1c3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -799,7 +799,6 @@ struct amdgpu_ring {
 	unsigned		cond_exe_offs;
 	u64				cond_exe_gpu_addr;
 	volatile u32	*cond_exe_cpu_addr;
-	int                     vmid;
 };
 
 /*
@@ -937,8 +936,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 		    unsigned vm_id, uint64_t pd_addr,
 		    uint32_t gds_base, uint32_t gds_size,
 		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size,
-		    bool vmid_switch);
+		    uint32_t oa_base, uint32_t oa_size);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 7a0b1e50f293..34e35423b78e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -122,7 +122,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	bool skip_preamble, need_ctx_switch;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
-	int vmid = 0, old_vmid = ring->vmid;
 	struct fence *hwf;
 	uint64_t ctx;
 
@@ -136,11 +135,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (job) {
 		vm = job->vm;
 		ctx = job->ctx;
-		vmid = job->vm_id;
 	} else {
 		vm = NULL;
 		ctx = 0;
-		vmid = 0;
 	}
 
 	if (!ring->ready) {
@@ -166,8 +163,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
 				    job->gds_base, job->gds_size,
 				    job->gws_base, job->gws_size,
-				    job->oa_base, job->oa_size,
-				    (ring->current_ctx == ctx) && (old_vmid != vmid));
+				    job->oa_base, job->oa_size);
 		if (r) {
 			amdgpu_ring_undo(ring);
 			return r;
@@ -184,6 +180,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	need_ctx_switch = ring->current_ctx != ctx;
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
+
 		/* drop preamble IBs if we don't have a context switch */
 		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
 			continue;
@@ -191,7 +188,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
 				    need_ctx_switch);
 		need_ctx_switch = false;
-		ring->vmid = vmid;
 	}
 
 	if (ring->funcs->emit_hdp_invalidate)
@@ -202,7 +198,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vm_id)
 			amdgpu_vm_reset_id(adev, job->vm_id);
-		ring->vmid = old_vmid;
 		amdgpu_ring_undo(ring);
 		return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 62a4c127620f..9f36ed30ba11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -298,8 +298,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 		    unsigned vm_id, uint64_t pd_addr,
 		    uint32_t gds_base, uint32_t gds_size,
 		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size,
-		    bool vmid_switch)
+		    uint32_t oa_base, uint32_t oa_size)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
@@ -313,7 +312,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
-	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || vmid_switch))
+	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
+		    ring->type == AMDGPU_RING_TYPE_COMPUTE))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (ring->funcs->emit_vm_flush &&
-- 
cgit v1.2.3