summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorJonathan Kim <jonathan.kim@amd.com>2022-04-06 12:03:31 -0400
committerAlex Deucher <alexander.deucher@amd.com>2023-06-09 12:35:58 -0400
commit44b87bb0836c65d1b9d21b01503eb6e9b9297771 (patch)
treeb6cc3c68fa63ea1bff4ac649f2705b31514eec8f /drivers/gpu/drm/amd/amdkfd
parent69a8c3ae2dea84a6d571e4c1aad306f630f3ccfd (diff)
drm/amdkfd: add raise exception event function
Exception events can be generated from interrupts or queue activitity. The raise event function will save exception status of a queue, device or process then notify the debugger of the status change by writing to a debugger polled file descriptor that the debugger provides during debug attach. For memory violation exceptions, extra exception data will be saved. The debugger will be able to query the saved exception states by query operation that will be provided by follow up patches. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c104
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c2
4 files changed, 123 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 5e2ee2d1acc4..dccb27fc764b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -24,6 +24,107 @@
#include "kfd_device_queue_manager.h"
#include <linux/file.h>
+void debug_event_write_work_handler(struct work_struct *work)
+{
+ struct kfd_process *process;
+
+ static const char write_data = '.';
+ loff_t pos = 0;
+
+ process = container_of(work,
+ struct kfd_process,
+ debug_event_workarea);
+
+ kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
+}
+
+/* update process/device/queue exception status, write to descriptor
+ * only if exception_status is enabled.
+ */
+bool kfd_dbg_ev_raise(uint64_t event_mask,
+ struct kfd_process *process, struct kfd_node *dev,
+ unsigned int source_id, bool use_worker,
+ void *exception_data, size_t exception_data_size)
+{
+ struct process_queue_manager *pqm;
+ struct process_queue_node *pqn;
+ int i;
+ static const char write_data = '.';
+ loff_t pos = 0;
+ bool is_subscribed = true;
+
+ if (!(process && process->debug_trap_enabled))
+ return false;
+
+ mutex_lock(&process->event_mutex);
+
+ if (event_mask & KFD_EC_MASK_DEVICE) {
+ for (i = 0; i < process->n_pdds; i++) {
+ struct kfd_process_device *pdd = process->pdds[i];
+
+ if (pdd->dev != dev)
+ continue;
+
+ pdd->exception_status |= event_mask & KFD_EC_MASK_DEVICE;
+
+ if (event_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
+ if (!pdd->vm_fault_exc_data) {
+ pdd->vm_fault_exc_data = kmemdup(
+ exception_data,
+ exception_data_size,
+ GFP_KERNEL);
+ if (!pdd->vm_fault_exc_data)
+ pr_debug("Failed to allocate exception data memory");
+ } else {
+ pr_debug("Debugger exception data not saved\n");
+ print_hex_dump_bytes("exception data: ",
+ DUMP_PREFIX_OFFSET,
+ exception_data,
+ exception_data_size);
+ }
+ }
+ break;
+ }
+ } else if (event_mask & KFD_EC_MASK_PROCESS) {
+ process->exception_status |= event_mask & KFD_EC_MASK_PROCESS;
+ } else {
+ pqm = &process->pqm;
+ list_for_each_entry(pqn, &pqm->queues,
+ process_queue_list) {
+ int target_id;
+
+ if (!pqn->q)
+ continue;
+
+ target_id = event_mask & KFD_EC_MASK(EC_QUEUE_NEW) ?
+ pqn->q->properties.queue_id :
+ pqn->q->doorbell_id;
+
+ if (pqn->q->device != dev || target_id != source_id)
+ continue;
+
+ pqn->q->properties.exception_status |= event_mask;
+ break;
+ }
+ }
+
+ if (process->exception_enable_mask & event_mask) {
+ if (use_worker)
+ schedule_work(&process->debug_event_workarea);
+ else
+ kernel_write(process->dbg_ev_file,
+ &write_data,
+ 1,
+ &pos);
+ } else {
+ is_subscribed = false;
+ }
+
+ mutex_unlock(&process->event_mutex);
+
+ return is_subscribed;
+}
+
static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
{
struct mqd_update_info minfo = {0};
@@ -99,6 +200,9 @@ static void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int
{
int i;
+ if (!unwind)
+ cancel_work_sync(&target->debug_event_workarea);
+
for (i = 0; i < target->n_pdds; i++) {
struct kfd_process_device *pdd = target->pdds[i];
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 3e56225f6ef6..66ee7b95d08a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -25,6 +25,11 @@
#include "kfd_priv.h"
+bool kfd_dbg_ev_raise(uint64_t event_mask,
+ struct kfd_process *process, struct kfd_node *dev,
+ unsigned int source_id, bool use_worker,
+ void *exception_data,
+ size_t exception_data_size);
int kfd_dbg_trap_disable(struct kfd_process *target);
int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
void __user *runtime_info,
@@ -35,6 +40,8 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0);
}
+void debug_event_write_work_handler(struct work_struct *work);
+
/*
* If GFX off is enabled, chips that do not support RLC restore for the debug
* registers will disable GFX off temporarily for the entire debug session.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f0a45d184c8f..b18cd4bf76bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -529,6 +529,7 @@ struct queue_properties {
uint32_t ctl_stack_size;
uint64_t tba_addr;
uint64_t tma_addr;
+ uint64_t exception_status;
};
#define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \
@@ -820,6 +821,11 @@ struct kfd_process_device {
uint64_t page_in;
uint64_t page_out;
+ /* Exception code status*/
+ uint64_t exception_status;
+ void *vm_fault_exc_data;
+ size_t vm_fault_exc_data_size;
+
/* Tracks debug per-vmid request settings */
uint32_t spi_dbg_override;
uint32_t spi_dbg_launch_mode;
@@ -955,12 +961,16 @@ struct kfd_process {
/* Exception code enable mask and status */
uint64_t exception_enable_mask;
+ uint64_t exception_status;
/* shared virtual memory registered by this process */
struct svm_range_list svms;
bool xnack_enabled;
+ /* Work area for debugger event writer worker. */
+ struct work_struct debug_event_workarea;
+
/* Tracks debug per-vmid request for debug flags */
bool dbg_flags;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index e77cadadb09b..f904d6d6e01c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1509,6 +1509,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
kfd_unref_process(process);
get_task_struct(process->lead_thread);
+ INIT_WORK(&process->debug_event_workarea, debug_event_write_work_handler);
+
return process;
err_register_notifier: