summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c83
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c23
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c55
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c48
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c32
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c134
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h21
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c202
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c323
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h4
21 files changed, 848 insertions, 139 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 8cc0a76ddf9f..93bd4eda0d94 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -25,3 +25,17 @@ config HSA_AMD_SVM
preemptions and one based on page faults. To enable page fault
based memory management on most GFXv9 GPUs, set the module
parameter amdgpu.noretry=0.
+
+config HSA_AMD_P2P
+ bool "HSA kernel driver support for peer-to-peer for AMD GPU devices"
+ depends on HSA_AMD && PCI_P2PDMA && DMABUF_MOVE_NOTIFY
+ help
+ Enable peer-to-peer (P2P) communication between AMD GPUs over
+ the PCIe bus. This can improve performance of multi-GPU compute
+ applications and libraries by enabling GPUs to access data directly
+ in peer GPUs' memory without intermediate copies in system memory.
+
+ This P2P feature is only enabled on compatible chipsets, and between
+ GPUs with large memory BARs that expose the entire VRAM in PCIe bus
+ address space within the physical address limits of the GPUs.
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 1c7016958d6d..2b3d8bc8f0aa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -65,6 +65,25 @@ static int kfd_char_dev_major = -1;
static struct class *kfd_class;
struct device *kfd_device;
+static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
+{
+ struct kfd_process_device *pdd;
+
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, gpu_id);
+
+ if (pdd)
+ return pdd;
+
+ mutex_unlock(&p->mutex);
+ return NULL;
+}
+
+static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
+{
+ mutex_unlock(&pdd->process->mutex);
+}
+
int kfd_chardev_init(void)
{
int err = 0;
@@ -280,6 +299,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
struct kfd_process_device *pdd;
struct queue_properties q_properties;
uint32_t doorbell_offset_in_process = 0;
+ struct amdgpu_bo *wptr_bo = NULL;
memset(&q_properties, 0, sizeof(struct queue_properties));
@@ -307,12 +327,49 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
goto err_bind_process;
}
+ /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
+ * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
+ */
+ if (dev->shared_resources.enable_mes &&
+ ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
+ >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
+ struct amdgpu_bo_va_mapping *wptr_mapping;
+ struct amdgpu_vm *wptr_vm;
+
+ wptr_vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+ if (err)
+ goto err_wptr_map_gart;
+
+ wptr_mapping = amdgpu_vm_bo_lookup_mapping(
+ wptr_vm, args->write_pointer_address >> PAGE_SHIFT);
+ amdgpu_bo_unreserve(wptr_vm->root.bo);
+ if (!wptr_mapping) {
+ pr_err("Failed to lookup wptr bo\n");
+ err = -EINVAL;
+ goto err_wptr_map_gart;
+ }
+
+ wptr_bo = wptr_mapping->bo_va->base.bo;
+ if (wptr_bo->tbo.base.size > PAGE_SIZE) {
+ pr_err("Requested GART mapping for wptr bo larger than one page\n");
+ err = -EINVAL;
+ goto err_wptr_map_gart;
+ }
+
+ err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);
+ if (err) {
+ pr_err("Failed to map wptr bo to GART\n");
+ goto err_wptr_map_gart;
+ }
+ }
+
pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
p->pasid,
dev->id);
- err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL,
- &doorbell_offset_in_process);
+ err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo,
+ NULL, NULL, NULL, &doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@@ -344,6 +401,9 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
return 0;
err_create_queue:
+ if (wptr_bo)
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
+err_wptr_map_gart:
err_bind_process:
err_pdd:
mutex_unlock(&p->mutex);
@@ -958,6 +1018,19 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
return false;
}
+static int kfd_ioctl_get_available_memory(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_get_available_memory_args *args = data;
+ struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);
+
+ if (!pdd)
+ return -EINVAL;
+ args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev);
+ kfd_unlock_pdd(pdd);
+ return 0;
+}
+
static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -2361,7 +2434,7 @@ static int criu_restore(struct file *filep,
* Set the process to evicted state to avoid running any new queues before all the memory
* mappings are ready.
*/
- ret = kfd_process_evict_queues(p);
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
if (ret)
goto exit_unlock;
@@ -2480,7 +2553,7 @@ static int criu_process_info(struct file *filep,
goto err_unlock;
}
- ret = kfd_process_evict_queues(p);
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
if (ret)
goto err_unlock;
@@ -2648,6 +2721,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
+ kfd_ioctl_get_available_memory, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cbfb32b3d235..a5409531a2fd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
props->rec_transfer_size =
iolink->recommended_transfer_size;
- dev->io_link_count++;
dev->node_props.io_links_count++;
list_add_tail(&props->list, &dev->io_link_props);
break;
@@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
- to_dev->io_link_count++;
to_dev->node_props.io_links_count++;
list_add_tail(&props2->list, &to_dev->io_link_props);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 581c3a30fee1..ad5a40a685ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -101,6 +101,8 @@ void kfd_debugfs_init(void)
kfd_debugfs_rls_by_device, &kfd_debugfs_fops);
debugfs_create_file("hang_hws", S_IFREG | 0200, debugfs_root,
kfd_debugfs_hang_hws_read, &kfd_debugfs_hang_hws_fops);
+ debugfs_create_file("mem_limit", S_IFREG | 0200, debugfs_root,
+ kfd_debugfs_kfd_mem_limits, &kfd_debugfs_fops);
}
void kfd_debugfs_fini(void)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index a08769c5e94b..f5853835f03a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -75,7 +75,6 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
case IP_VERSION(5, 2, 6):/* GC 10.3.6 */
case IP_VERSION(5, 2, 7):/* GC 10.3.7 */
- case IP_VERSION(6, 0, 1):
kfd->device_info.num_sdma_queues_per_engine = 2;
break;
case IP_VERSION(4, 2, 0):/* VEGA20 */
@@ -90,6 +89,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
kfd->device_info.num_sdma_queues_per_engine = 8;
break;
@@ -839,7 +839,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
}
-int kgd2kfd_quiesce_mm(struct mm_struct *mm)
+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
{
struct kfd_process *p;
int r;
@@ -853,7 +853,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
return -ESRCH;
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
- r = kfd_process_evict_queues(p);
+ r = kfd_process_evict_queues(p, trigger);
kfd_unref_process(p);
return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e1797657b04c..e83725a28106 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -177,6 +177,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct mes_add_queue_input queue_input;
int r, queue_type;
+ uint64_t wptr_addr_off;
if (dqm->is_hws_hang)
return -EIO;
@@ -197,6 +198,14 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.doorbell_offset = q->properties.doorbell_off;
queue_input.mqd_addr = q->gart_mqd_addr;
queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
+
+ if (q->wptr_bo) {
+ wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va;
+ queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
+ }
+
+ queue_input.is_kfd_process = 1;
+
queue_input.paging = false;
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
@@ -811,7 +820,6 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
struct mqd_manager *mqd_mgr;
struct kfd_process_device *pdd;
bool prev_active = false;
- bool add_queue = false;
dqm_lock(dqm);
pdd = kfd_get_process_device_data(q->device, q->process);
@@ -887,7 +895,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
if (!dqm->dev->shared_resources.enable_mes)
retval = map_queues_cpsch(dqm);
- else if (add_queue)
+ else if (q->properties.is_active)
retval = add_queue_mes(dqm, q, &pdd->qpd);
} else if (q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
@@ -1666,14 +1674,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (q->properties.is_active) {
increment_queue_count(dqm, qpd, q);
- if (!dqm->dev->shared_resources.enable_mes) {
+ if (!dqm->dev->shared_resources.enable_mes)
retval = execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
- } else {
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ else
retval = add_queue_mes(dqm, q, qpd);
- if (retval)
- goto cleanup_queue;
- }
+ if (retval)
+ goto cleanup_queue;
}
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 4df9c36146ba..3942a56c28bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -377,8 +377,7 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
return -EINVAL;
}
- err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
- mem, &kern_addr, &size);
+ err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, &kern_addr, &size);
if (err) {
pr_err("Failed to map event page to kernel\n");
return err;
@@ -387,7 +386,7 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
if (err) {
pr_err("Failed to set event page\n");
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
return err;
}
return err;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index a9466d154395..34772fe74296 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -146,7 +146,7 @@ static void interrupt_wq(struct work_struct *work)
struct kfd_dev *dev = container_of(work, struct kfd_dev,
interrupt_work);
uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
- long start_jiffies = jiffies;
+ unsigned long start_jiffies = jiffies;
if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
dev_err_once(dev->adev->dev, "Ring entry too small\n");
@@ -156,7 +156,7 @@ static void interrupt_wq(struct work_struct *work)
while (dequeue_ih_ring_entry(dev, ih_ring_entry)) {
dev->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
- if (jiffies - start_jiffies > HZ) {
+ if (time_is_before_jiffies(start_jiffies + HZ)) {
/* If we spent more than a second processing signals,
* reschedule the worker to avoid soft-lockup warnings
*/
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index e44376c2ecdc..373e5bfd4e91 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -33,6 +33,7 @@
#include "kfd_priv.h"
#include "kfd_svm.h"
#include "kfd_migrate.h"
+#include "kfd_smi_events.h"
#ifdef dev_fmt
#undef dev_fmt
@@ -402,8 +403,9 @@ out:
static long
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
- uint64_t end)
+ uint64_t end, uint32_t trigger)
{
+ struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
@@ -430,6 +432,11 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
+ kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ 0, adev->kfd.dev->id, prange->prefetch_loc,
+ prange->preferred_loc, trigger);
+
r = migrate_vma_setup(&migrate);
if (r) {
dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
@@ -458,6 +465,10 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
+ kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ 0, adev->kfd.dev->id, trigger);
+
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange);
@@ -479,6 +490,7 @@ out:
* @prange: range structure
* @best_loc: the device to migrate to
* @mm: the process mm structure
+ * @trigger: reason of migration
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
@@ -487,7 +499,7 @@ out:
*/
static int
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm)
+ struct mm_struct *mm, uint32_t trigger)
{
unsigned long addr, start, end;
struct vm_area_struct *vma;
@@ -524,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
break;
next = min(vma->vm_end, end);
- r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
+ r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
if (r < 0) {
pr_debug("failed %ld to migrate\n", r);
break;
@@ -655,8 +667,10 @@ out_oom:
*/
static long
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
- struct vm_area_struct *vma, uint64_t start, uint64_t end)
+ struct vm_area_struct *vma, uint64_t start, uint64_t end,
+ uint32_t trigger)
{
+ struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
unsigned long upages = npages;
unsigned long cpages = 0;
@@ -685,6 +699,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
+ kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ adev->kfd.dev->id, 0, prange->prefetch_loc,
+ prange->preferred_loc, trigger);
+
r = migrate_vma_setup(&migrate);
if (r) {
dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
@@ -715,6 +734,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
+
+ kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ adev->kfd.dev->id, 0, trigger);
+
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
out_free:
@@ -732,13 +756,15 @@ out:
* svm_migrate_vram_to_ram - migrate svm range from device to system
* @prange: range structure
* @mm: process mm, use current->mm if NULL
+ * @trigger: reason of migration
*
* Context: Process context, caller hold mmap read lock, prange->migrate_mutex
*
* Return:
* 0 - OK, otherwise error code
*/
-int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+ uint32_t trigger)
{
struct amdgpu_device *adev;
struct vm_area_struct *vma;
@@ -779,7 +805,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
}
next = min(vma->vm_end, end);
- r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
+ r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger);
if (r < 0) {
pr_debug("failed %ld to migrate prange %p\n", r, prange);
break;
@@ -802,6 +828,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
* @prange: range structure
* @best_loc: the device to migrate to
* @mm: process mm, use current->mm if NULL
+ * @trigger: reason of migration
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
@@ -810,7 +837,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
*/
static int
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm)
+ struct mm_struct *mm, uint32_t trigger)
{
int r, retries = 3;
@@ -822,7 +849,7 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
do {
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm, trigger);
if (r)
return r;
} while (prange->actual_loc && --retries);
@@ -830,17 +857,17 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
if (prange->actual_loc)
return -EDEADLK;
- return svm_migrate_ram_to_vram(prange, best_loc, mm);
+ return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
}
int
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm)
+ struct mm_struct *mm, uint32_t trigger)
{
if (!prange->actual_loc)
- return svm_migrate_ram_to_vram(prange, best_loc, mm);
+ return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
else
- return svm_migrate_vram_to_vram(prange, best_loc, mm);
+ return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
}
@@ -909,7 +936,7 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
goto out_unlock_prange;
}
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU);
if (r)
pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
prange, prange->start, prange->last);
@@ -992,6 +1019,8 @@ int svm_migrate_init(struct amdgpu_device *adev)
amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
+ svm_range_set_max_pages(adev);
+
pr_info("HMM registered %ldMB device memory\n", size >> 20);
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 2f5b3394c9ed..b3f0754b32fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -41,8 +41,9 @@ enum MIGRATION_COPY_DIR {
};
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm);
-int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
+ struct mm_struct *mm, uint32_t trigger);
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+ uint32_t trigger);
unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 49a283be6b57..623ccd227b7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -100,7 +100,9 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
{
struct kfd_cu_info cu_info;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
- int i, se, sh, cu, cu_bitmap_sh_mul;
+ bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
+ uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
+ int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1;
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
@@ -167,13 +169,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
se_mask[i] = 0;
i = 0;
- for (cu = 0; cu < 16; cu++) {
+ for (cu = 0; cu < 16; cu += inc) {
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
for (se = 0; se < cu_info.num_shader_engines; se++) {
if (cu_per_sh[se][sh] > cu) {
- if (cu_mask[i / 32] & (1 << (i % 32)))
- se_mask[se] |= 1 << (cu + sh * 16);
- i++;
+ if (cu_mask[i / 32] & (en_mask << (i % 32)))
+ se_mask[se] |= en_mask << (cu + sh * 16);
+ i += inc;
if (i == cu_mask_count)
return;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 4e0387f591be..b8e14c2cc295 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -377,6 +377,8 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
m->sdmax_rlcx_doorbell_offset =
q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 2585d6e61d42..d03a3b9c9c5d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -571,6 +571,8 @@ struct queue {
void *gang_ctx_bo;
uint64_t gang_ctx_gpu_addr;
void *gang_ctx_cpu_ptr;
+
+ struct amdgpu_bo *wptr_bo;
};
enum KFD_MQD_TYPE {
@@ -945,7 +947,7 @@ static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
}
void kfd_unref_process(struct kfd_process *p);
-int kfd_process_evict_queues(struct kfd_process *p);
+int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger);
int kfd_process_restore_queues(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
@@ -1206,6 +1208,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index e3d64ec8c353..6c83a519b3a1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -43,6 +43,7 @@ struct mm_struct;
#include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
#include "kfd_svm.h"
+#include "kfd_smi_events.h"
/*
* List of struct kfd_process (field kfd_process).
@@ -693,7 +694,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
struct kfd_dev *dev = pdd->dev;
if (kptr) {
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->adev, mem);
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
kptr = NULL;
}
@@ -733,7 +734,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
}
if (kptr) {
- err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->adev,
+ err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(
(struct kgd_mem *)*mem, kptr, NULL);
if (err) {
pr_debug("Map GTT BO to kernel failed\n");
@@ -999,7 +1000,7 @@ static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
if (!mem)
goto out;
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->adev, mem);
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
out:
mutex_unlock(&p->mutex);
@@ -1114,6 +1115,15 @@ static void kfd_process_wq_release(struct work_struct *work)
struct kfd_process *p = container_of(work, struct kfd_process,
release_work);
+ kfd_process_dequeue_from_all_devices(p);
+ pqm_uninit(&p->pqm);
+
+ /* Signal the eviction fence after user mode queues are
+ * destroyed. This allows any BOs to be freed without
+ * triggering pointless evictions or waiting for fences.
+ */
+ dma_fence_signal(p->ef);
+
kfd_process_remove_sysfs(p);
kfd_iommu_unbind_process(p);
@@ -1178,20 +1188,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
- mutex_lock(&p->mutex);
-
- kfd_process_dequeue_from_all_devices(p);
- pqm_uninit(&p->pqm);
-
/* Indicate to other users that MM is no longer valid */
p->mm = NULL;
- /* Signal the eviction fence after user mode queues are
- * destroyed. This allows any BOs to be freed without
- * triggering pointless evictions or waiting for fences.
- */
- dma_fence_signal(p->ef);
-
- mutex_unlock(&p->mutex);
mmu_notifier_put(&p->mmu_notifier);
}
@@ -1404,6 +1402,11 @@ static struct kfd_process *create_process(const struct task_struct *thread)
hash_add_rcu(kfd_processes_table, &process->kfd_processes,
(uintptr_t)process->mm);
+ /* Avoid free_notifier to start kfd_process_wq_release if
+ * mmu_notifier_get failed because of pending signal.
+ */
+ kref_get(&process->ref);
+
/* MMU notifier registration must be the last call that can fail
* because after this point we cannot unwind the process creation.
* After this point, mmu_notifier_put will trigger the cleanup by
@@ -1416,6 +1419,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
}
BUG_ON(mn != &process->mmu_notifier);
+ kfd_unref_process(process);
get_task_struct(process->lead_thread);
return process;
@@ -1736,7 +1740,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
* Eviction is reference-counted per process-device. This means multiple
* evictions from different sources can be nested safely.
*/
-int kfd_process_evict_queues(struct kfd_process *p)
+int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
{
int r = 0;
int i;
@@ -1745,6 +1749,9 @@ int kfd_process_evict_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
+ trigger);
+
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
&pdd->qpd);
/* evict return -EIO if HWS is hang or asic is resetting, in this case
@@ -1769,6 +1776,9 @@ fail:
if (n_evicted == 0)
break;
+
+ kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd))
pr_err("Failed to restore queues\n");
@@ -1788,6 +1798,8 @@ int kfd_process_restore_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd);
if (r) {
@@ -1849,7 +1861,7 @@ static void evict_process_worker(struct work_struct *work)
flush_delayed_work(&p->restore_work);
pr_debug("Started evicting pasid 0x%x\n", p->pasid);
- ret = kfd_process_evict_queues(p);
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
if (!ret) {
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
@@ -1916,7 +1928,7 @@ void kfd_suspend_all_processes(void)
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
- if (kfd_process_evict_queues(p))
+ if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
pr_err("Failed to suspend process 0x%x\n", p->pasid);
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index dc00484ff484..6e3e7f54381b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -180,7 +180,8 @@ void pqm_uninit(struct process_queue_manager *pqm)
static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev, struct queue **q,
struct queue_properties *q_properties,
- struct file *f, unsigned int qid)
+ struct file *f, struct amdgpu_bo *wptr_bo,
+ unsigned int qid)
{
int retval;
@@ -210,6 +211,7 @@ static int init_user_queue(struct process_queue_manager *pqm,
goto cleanup;
}
memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
+ (*q)->wptr_bo = wptr_bo;
}
pr_debug("PQM After init queue");
@@ -226,6 +228,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
@@ -288,7 +291,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* allocate_sdma_queue() in create_queue() has the
* corresponding check logic.
*/
- retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -309,7 +312,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -436,9 +439,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
pdd->qpd.num_gws = 0;
}
- if (dev->shared_resources.enable_mes)
+ if (dev->shared_resources.enable_mes) {
amdgpu_amdkfd_free_gtt_mem(dev->adev,
pqn->q->gang_ctx_bo);
+ if (pqn->q->wptr_bo)
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
+
+ }
uninit_queue(pqn->q);
}
@@ -491,6 +498,21 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
return -EFAULT;
}
+ /* ASICs that have WGPs must enforce pairwise enabled mask checks. */
+ if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK && minfo->cu_mask.ptr &&
+ KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
+ int i;
+
+ for (i = 0; i < minfo->cu_mask.count; i += 2) {
+ uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
+
+ if (cu_pair && cu_pair != 0x3) {
+ pr_debug("CUs must be adjacent pairwise enabled.\n");
+ return -EINVAL;
+ }
+ }
+ }
+
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
pqn->q, minfo);
if (retval != 0)
@@ -844,7 +866,7 @@ int kfd_criu_restore_queue(struct kfd_process *p,
print_queue_properties(&qp);
- ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack,
+ ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index f2e1d506ba21..0472b56de245 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -38,6 +38,9 @@ struct kfd_smi_client {
uint64_t events;
struct kfd_dev *dev;
spinlock_t lock;
+ struct rcu_head rcu;
+ pid_t pid;
+ bool suser;
};
#define MAX_KFIFO_SIZE 1024
@@ -135,6 +138,14 @@ static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
return sizeof(events);
}
+static void kfd_smi_ev_client_free(struct rcu_head *p)
+{
+ struct kfd_smi_client *ev = container_of(p, struct kfd_smi_client, rcu);
+
+ kfifo_free(&ev->fifo);
+ kfree(ev);
+}
+
static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
{
struct kfd_smi_client *client = filep->private_data;
@@ -144,23 +155,31 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
list_del_rcu(&client->list);
spin_unlock(&dev->smi_lock);
- synchronize_rcu();
- kfifo_free(&client->fifo);
- kfree(client);
-
+ call_rcu(&client->rcu, kfd_smi_ev_client_free);
return 0;
}
-static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
- char *event_msg, int len)
+static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
+ unsigned int event)
+{
+ uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS);
+ uint64_t events = READ_ONCE(client->events);
+
+ if (pid && client->pid != pid && !(client->suser && (events & all)))
+ return false;
+
+ return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
+}
+
+static void add_event_to_kfifo(pid_t pid, struct kfd_dev *dev,
+ unsigned int smi_event, char *event_msg, int len)
{
struct kfd_smi_client *client;
rcu_read_lock();
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
- if (!(READ_ONCE(client->events) &
- KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
+ if (!kfd_smi_ev_enabled(pid, client, smi_event))
continue;
spin_lock(&client->lock);
if (kfifo_avail(&client->fifo) >= len) {
@@ -176,9 +195,9 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
rcu_read_unlock();
}
-__printf(3, 4)
-static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event,
- char *fmt, ...)
+__printf(4, 5)
+static void kfd_smi_event_add(pid_t pid, struct kfd_dev *dev,
+ unsigned int event, char *fmt, ...)
{
char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
int len;
@@ -193,7 +212,7 @@ static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event,
len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args);
va_end(args);
- add_event_to_kfifo(dev, event, fifo_in, len);
+ add_event_to_kfifo(pid, dev, event, fifo_in, len);
}
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
@@ -206,13 +225,13 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
event = KFD_SMI_EVENT_GPU_PRE_RESET;
++(dev->reset_seq_num);
}
- kfd_smi_event_add(dev, event, "%x\n", dev->reset_seq_num);
+ kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num);
}
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint64_t throttle_bitmask)
{
- kfd_smi_event_add(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
throttle_bitmask,
amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
}
@@ -227,10 +246,93 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
if (!task_info.pid)
return;
- kfd_smi_event_add(dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
task_info.pid, task_info.task_name);
}
+void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, bool write_fault,
+ ktime_t ts)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_START,
+ "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
+ address, dev->id, write_fault ? 'W' : 'R');
+}
+
+void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, bool migration)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_END,
+ "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
+ pid, address, dev->id, migration ? 'M' : 'U');
+}
+
+void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to,
+ uint32_t prefetch_loc, uint32_t preferred_loc,
+ uint32_t trigger)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_START,
+ "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
+ ktime_get_boottime_ns(), pid, start, end - start,
+ from, to, prefetch_loc, preferred_loc, trigger);
+}
+
+void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to, uint32_t trigger)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_END,
+ "%lld -%d @%lx(%lx) %x->%x %d\n",
+ ktime_get_boottime_ns(), pid, start, end - start,
+ from, to, trigger);
+}
+
+void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid,
+ uint32_t trigger)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_EVICTION,
+ "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
+ dev->id, trigger);
+}
+
+void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_RESTORE,
+ "%lld -%d %x\n", ktime_get_boottime_ns(), pid,
+ dev->id);
+}
+
+void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
+{
+ struct kfd_process *p;
+ int i;
+
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
+ return;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
+ KFD_SMI_EVENT_QUEUE_RESTORE,
+ "%lld -%d %x %c\n", ktime_get_boottime_ns(),
+ p->lead_thread->pid, pdd->dev->id, 'R');
+ }
+ kfd_unref_process(p);
+}
+
+void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, unsigned long last,
+ uint32_t trigger)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_UNMAP_FROM_GPU,
+ "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
+ pid, address, last - address + 1, dev->id, trigger);
+}
+
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
{
struct kfd_smi_client *client;
@@ -251,6 +353,8 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
spin_lock_init(&client->lock);
client->events = 0;
client->dev = dev;
+ client->pid = current->tgid;
+ client->suser = capable(CAP_SYS_ADMIN);
spin_lock(&dev->smi_lock);
list_add_rcu(&client->list, &dev->smi_clients);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index dfe101c21166..76fe4e0ec2d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -29,5 +29,24 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint64_t throttle_bitmask);
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
-
+void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, bool write_fault,
+ ktime_t ts);
+void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, bool migration);
+void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to,
+ uint32_t prefetch_loc, uint32_t preferred_loc,
+ uint32_t trigger);
+void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to, uint32_t trigger);
+void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid,
+ uint32_t trigger);
+void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid);
+void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
+void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, unsigned long last,
+ uint32_t trigger);
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 7b332246eda3..a67ba8879a56 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -32,6 +32,7 @@
#include "kfd_priv.h"
#include "kfd_svm.h"
#include "kfd_migrate.h"
+#include "kfd_smi_events.h"
#ifdef dev_fmt
#undef dev_fmt
@@ -43,7 +44,13 @@
/* Long enough to ensure no retry fault comes after svm range is restored and
* page table is updated.
*/
-#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000
+#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC)
+
+/* Giant svm range split into smaller ranges based on this, it is decided using
+ * minimum of all dGPU/APU 1/32 VRAM size, between 2MB to 1GB and alignment to
+ * power of 2MB.
+ */
+static uint64_t max_svm_range_pages;
struct criu_svm_metadata {
struct list_head list;
@@ -259,13 +266,22 @@ void svm_range_free_dma_mappings(struct svm_range *prange)
}
}
-static void svm_range_free(struct svm_range *prange)
+static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
{
+ uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
+ struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
prange->start, prange->last);
svm_range_vram_node_free(prange);
svm_range_free_dma_mappings(prange);
+
+ if (update_mem_usage && !p->xnack_enabled) {
+ pr_debug("unreserve mem limit: %lld\n", size);
+ amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ }
mutex_destroy(&prange->lock);
mutex_destroy(&prange->migrate_mutex);
kfree(prange);
@@ -284,7 +300,7 @@ svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
static struct
svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
- uint64_t last)
+ uint64_t last, bool update_mem_usage)
{
uint64_t size = last - start + 1;
struct svm_range *prange;
@@ -293,6 +309,15 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
prange = kzalloc(sizeof(*prange), GFP_KERNEL);
if (!prange)
return NULL;
+
+ p = container_of(svms, struct kfd_process, svms);
+ if (!p->xnack_enabled && update_mem_usage &&
+ amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) {
+ pr_info("SVM mapping failed, exceeds resident system memory limit\n");
+ kfree(prange);
+ return NULL;
+ }
prange->npages = size;
prange->svms = svms;
prange->start = start;
@@ -307,7 +332,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
mutex_init(&prange->migrate_mutex);
mutex_init(&prange->lock);
- p = container_of(svms, struct kfd_process, svms);
if (p->xnack_enabled)
bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
MAX_GPU_INSTANCE);
@@ -1000,9 +1024,9 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
svms = prange->svms;
if (old_start == start)
- *new = svm_range_new(svms, last + 1, old_last);
+ *new = svm_range_new(svms, last + 1, old_last, false);
else
- *new = svm_range_new(svms, old_start, start - 1);
+ *new = svm_range_new(svms, old_start, start - 1, false);
if (!*new)
return -ENOMEM;
@@ -1010,7 +1034,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
if (r) {
pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n",
r, old_start, old_last, start, last);
- svm_range_free(*new);
+ svm_range_free(*new, false);
*new = NULL;
}
@@ -1199,7 +1223,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
static int
svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
- unsigned long last)
+ unsigned long last, uint32_t trigger)
{
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
struct kfd_process_device *pdd;
@@ -1231,6 +1255,9 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
return -EINVAL;
}
+ kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
+ start, last, trigger);
+
r = svm_range_unmap_from_gpu(pdd->dev->adev,
drm_priv_to_vm(pdd->drm_priv),
start, last, &fence);
@@ -1617,7 +1644,7 @@ unreserve_out:
svm_range_unreserve_bos(&ctx);
if (!r)
- prange->validate_timestamp = ktime_to_us(ktime_get());
+ prange->validate_timestamp = ktime_get_boottime();
return r;
}
@@ -1729,14 +1756,16 @@ out_reschedule:
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
mutex_unlock(&process_info->lock);
- mmput(mm);
/* If validation failed, reschedule another attempt */
if (evicted_ranges) {
pr_debug("reschedule to restore svm range\n");
schedule_delayed_work(&svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+
+ kfd_smi_event_queue_restore_rescheduled(mm);
}
+ mmput(mm);
}
/**
@@ -1756,7 +1785,8 @@ out_reschedule:
*/
static int
svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
- unsigned long start, unsigned long last)
+ unsigned long start, unsigned long last,
+ enum mmu_notifier_event event)
{
struct svm_range_list *svms = prange->svms;
struct svm_range *pchild;
@@ -1768,10 +1798,15 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
svms, prange->start, prange->last, start, last);
- if (!p->xnack_enabled) {
+ if (!p->xnack_enabled ||
+ (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
int evicted_ranges;
+ bool mapped = prange->mapped_to_gpu;
list_for_each_entry(pchild, &prange->child_list, child_list) {
+ if (!pchild->mapped_to_gpu)
+ continue;
+ mapped = true;
mutex_lock_nested(&pchild->lock, 1);
if (pchild->start <= last && pchild->last >= start) {
pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
@@ -1781,6 +1816,9 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
mutex_unlock(&pchild->lock);
}
+ if (!mapped)
+ return r;
+
if (prange->start <= last && prange->last >= start)
atomic_inc(&prange->invalid);
@@ -1792,7 +1830,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
prange->svms, prange->start, prange->last);
/* First eviction, stop the queues */
- r = kgd2kfd_quiesce_mm(mm);
+ r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
if (r)
pr_debug("failed to quiesce KFD\n");
@@ -1801,6 +1839,12 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
} else {
unsigned long s, l;
+ uint32_t trigger;
+
+ if (event == MMU_NOTIFY_MIGRATE)
+ trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
+ else
+ trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY;
pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
prange->svms, start, last);
@@ -1809,13 +1853,13 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
s = max(start, pchild->start);
l = min(last, pchild->last);
if (l >= s)
- svm_range_unmap_from_gpus(pchild, s, l);
+ svm_range_unmap_from_gpus(pchild, s, l, trigger);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
- svm_range_unmap_from_gpus(prange, s, l);
+ svm_range_unmap_from_gpus(prange, s, l, trigger);
}
return r;
@@ -1825,7 +1869,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
{
struct svm_range *new;
- new = svm_range_new(old->svms, old->start, old->last);
+ new = svm_range_new(old->svms, old->start, old->last, false);
if (!new)
return NULL;
@@ -1849,6 +1893,46 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
return new;
}
+void svm_range_set_max_pages(struct amdgpu_device *adev)
+{
+ uint64_t max_pages;
+ uint64_t pages, _pages;
+
+ /* 1/32 VRAM size in pages */
+ pages = adev->gmc.real_vram_size >> 17;
+ pages = clamp(pages, 1ULL << 9, 1ULL << 18);
+ pages = rounddown_pow_of_two(pages);
+ do {
+ max_pages = READ_ONCE(max_svm_range_pages);
+ _pages = min_not_zero(max_pages, pages);
+ } while (cmpxchg(&max_svm_range_pages, max_pages, _pages) != max_pages);
+}
+
+static int
+svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last,
+ uint64_t max_pages, struct list_head *insert_list,
+ struct list_head *update_list)
+{
+ struct svm_range *prange;
+ uint64_t l;
+
+ pr_debug("max_svm_range_pages 0x%llx adding [0x%llx 0x%llx]\n",
+ max_pages, start, last);
+
+ while (last >= start) {
+ l = min(last, ALIGN_DOWN(start + max_pages, max_pages) - 1);
+
+ prange = svm_range_new(svms, start, l, true);
+ if (!prange)
+ return -ENOMEM;
+ list_add(&prange->list, insert_list);
+ list_add(&prange->update_list, update_list);
+
+ start = l + 1;
+ }
+ return 0;
+}
+
/**
* svm_range_add - add svm range and handle overlap
* @p: the range add to this process svms
@@ -1889,6 +1973,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
struct interval_tree_node *node;
struct svm_range *prange;
struct svm_range *tmp;
+ struct list_head new_list;
int r = 0;
pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last);
@@ -1896,6 +1981,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
INIT_LIST_HEAD(update_list);
INIT_LIST_HEAD(insert_list);
INIT_LIST_HEAD(remove_list);
+ INIT_LIST_HEAD(&new_list);
node = interval_tree_iter_first(&svms->objects, start, last);
while (node) {
@@ -1951,14 +2037,11 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
/* insert a new node if needed */
if (node->start > start) {
- prange = svm_range_new(svms, start, node->start - 1);
- if (!prange) {
- r = -ENOMEM;
+ r = svm_range_split_new(svms, start, node->start - 1,
+ READ_ONCE(max_svm_range_pages),
+ &new_list, update_list);
+ if (r)
goto out;
- }
-
- list_add(&prange->list, insert_list);
- list_add(&prange->update_list, update_list);
}
node = next;
@@ -1966,20 +2049,20 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
}
/* add a final range at the end if needed */
- if (start <= last) {
- prange = svm_range_new(svms, start, last);
- if (!prange) {
- r = -ENOMEM;
- goto out;
- }
- list_add(&prange->list, insert_list);
- list_add(&prange->update_list, update_list);
- }
+ if (start <= last)
+ r = svm_range_split_new(svms, start, last,
+ READ_ONCE(max_svm_range_pages),
+ &new_list, update_list);
out:
- if (r)
+ if (r) {
list_for_each_entry_safe(prange, tmp, insert_list, list)
- svm_range_free(prange);
+ svm_range_free(prange, false);
+ list_for_each_entry_safe(prange, tmp, &new_list, list)
+ svm_range_free(prange, true);
+ } else {
+ list_splice(&new_list, insert_list);
+ }
return r;
}
@@ -2026,7 +2109,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
svms, prange, prange->start, prange->last);
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
- svm_range_free(prange);
+ svm_range_free(prange, true);
break;
case SVM_OP_UPDATE_RANGE_NOTIFIER:
pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
@@ -2229,6 +2312,7 @@ static void
svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
unsigned long start, unsigned long last)
{
+ uint32_t trigger = KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU;
struct svm_range_list *svms;
struct svm_range *pchild;
struct kfd_process *p;
@@ -2256,14 +2340,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
s = max(start, pchild->start);
l = min(last, pchild->last);
if (l >= s)
- svm_range_unmap_from_gpus(pchild, s, l);
+ svm_range_unmap_from_gpus(pchild, s, l, trigger);
svm_range_unmap_split(mm, prange, pchild, start, last);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
- svm_range_unmap_from_gpus(prange, s, l);
+ svm_range_unmap_from_gpus(prange, s, l, trigger);
svm_range_unmap_split(mm, prange, prange, start, last);
if (unmap_parent)
@@ -2330,7 +2414,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
svm_range_unmap_from_cpu(mni->mm, prange, start, last);
break;
default:
- svm_range_evict(prange, mni->mm, start, last);
+ svm_range_evict(prange, mni->mm, start, last, range->event);
break;
}
@@ -2588,14 +2672,14 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
last = addr;
}
- prange = svm_range_new(&p->svms, start, last);
+ prange = svm_range_new(&p->svms, start, last, true);
if (!prange) {
pr_debug("Failed to create prange in address [0x%llx]\n", addr);
return NULL;
}
if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) {
pr_debug("failed to get gpuid from kgd\n");
- svm_range_free(prange);
+ svm_range_free(prange, true);
return NULL;
}
@@ -2694,11 +2778,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
- uint64_t timestamp;
+ ktime_t timestamp = ktime_get_boottime();
int32_t best_loc;
int32_t gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
struct vm_area_struct *vma;
+ bool migration = false;
int r = 0;
if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
@@ -2775,9 +2860,9 @@ retry_write_locked:
goto out_unlock_range;
}
- timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
/* skip duplicate vm fault on different pages of same range */
- if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
+ if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp,
+ AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) {
pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
svms, prange->start, prange->last);
r = 0;
@@ -2813,9 +2898,14 @@ retry_write_locked:
svms, prange->start, prange->last, best_loc,
prange->actual_loc);
+ kfd_smi_event_page_fault_start(adev->kfd.dev, p->lead_thread->pid, addr,
+ write_fault, timestamp);
+
if (prange->actual_loc != best_loc) {
+ migration = true;
if (best_loc) {
- r = svm_migrate_to_vram(prange, best_loc, mm);
+ r = svm_migrate_to_vram(prange, best_loc, mm,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
if (r) {
pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
r, addr);
@@ -2823,12 +2913,14 @@ retry_write_locked:
* VRAM failed
*/
if (prange->actual_loc)
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
else
r = 0;
}
} else {
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
}
if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
@@ -2842,6 +2934,9 @@ retry_write_locked:
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
r, svms, prange->start, prange->last);
+ kfd_smi_event_page_fault_end(adev->kfd.dev, p->lead_thread->pid, addr,
+ migration);
+
out_unlock_range:
mutex_unlock(&prange->migrate_mutex);
out_unlock_svms:
@@ -2884,7 +2979,7 @@ void svm_range_list_fini(struct kfd_process *p)
list_for_each_entry_safe(prange, next, &p->svms.list, list) {
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
- svm_range_free(prange);
+ svm_range_free(prange, true);
}
mutex_destroy(&p->svms.lock);
@@ -3148,12 +3243,12 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
return 0;
if (!best_loc) {
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
return r;
}
- r = svm_migrate_to_vram(prange, best_loc, mm);
+ r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
return r;
@@ -3211,7 +3306,8 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex);
do {
r = svm_migrate_vram_to_ram(prange,
- svm_bo->eviction_fence->mm);
+ svm_bo->eviction_fence->mm,
+ KFD_MIGRATE_TRIGGER_TTM_EVICTION);
} while (!r && prange->actual_loc && --retries);
if (!r && prange->actual_loc)
@@ -3299,7 +3395,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
prange->last);
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
- svm_range_free(prange);
+ svm_range_free(prange, false);
}
mmap_write_downgrade(mm);
@@ -3317,7 +3413,9 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
if (r)
goto out_unlock_range;
- if (migrated && !p->xnack_enabled) {
+ if (migrated && (!p->xnack_enabled ||
+ (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) &&
+ prange->mapped_to_gpu) {
pr_debug("restore_work will update mappings of GPUs\n");
mutex_unlock(&prange->migrate_mutex);
continue;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 2d54147b4dda..9156b041ef17 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -125,7 +125,7 @@ struct svm_range {
uint32_t actual_loc;
uint8_t granularity;
atomic_t invalid;
- uint64_t validate_timestamp;
+ ktime_t validate_timestamp;
struct mmu_interval_notifier notifier;
struct svm_work_list_item work_item;
struct list_head deferred_list;
@@ -204,6 +204,9 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0)
void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
+
+void svm_range_set_max_pages(struct amdgpu_device *adev);
+
#else
struct kfd_process;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 8d50d207cf66..25990bec600d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -40,6 +40,7 @@
#include "kfd_svm.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
+#include "amdgpu.h"
/* topology_device_list - Master list of all topology devices */
static struct list_head topology_device_list;
@@ -148,6 +149,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
struct kfd_mem_properties *mem;
struct kfd_cache_properties *cache;
struct kfd_iolink_properties *iolink;
+ struct kfd_iolink_properties *p2plink;
struct kfd_perf_properties *perf;
list_del(&dev->list);
@@ -173,6 +175,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
kfree(iolink);
}
+ while (dev->p2p_link_props.next != &dev->p2p_link_props) {
+ p2plink = container_of(dev->p2p_link_props.next,
+ struct kfd_iolink_properties, list);
+ list_del(&p2plink->list);
+ kfree(p2plink);
+ }
+
while (dev->perf_props.next != &dev->perf_props) {
perf = container_of(dev->perf_props.next,
struct kfd_perf_properties, list);
@@ -214,6 +223,7 @@ struct kfd_topology_device *kfd_create_topology_device(
INIT_LIST_HEAD(&dev->mem_props);
INIT_LIST_HEAD(&dev->cache_props);
INIT_LIST_HEAD(&dev->io_link_props);
+ INIT_LIST_HEAD(&dev->p2p_link_props);
INIT_LIST_HEAD(&dev->perf_props);
list_add_tail(&dev->list, device_list);
@@ -465,6 +475,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.caches_count);
sysfs_show_32bit_prop(buffer, offs, "io_links_count",
dev->node_props.io_links_count);
+ sysfs_show_32bit_prop(buffer, offs, "p2p_links_count",
+ dev->node_props.p2p_links_count);
sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
dev->node_props.cpu_core_id_base);
sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
@@ -568,6 +580,7 @@ static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
{
+ struct kfd_iolink_properties *p2plink;
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
@@ -585,6 +598,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
dev->kobj_iolink = NULL;
}
+ if (dev->kobj_p2plink) {
+ list_for_each_entry(p2plink, &dev->p2p_link_props, list)
+ if (p2plink->kobj) {
+ kfd_remove_sysfs_file(p2plink->kobj,
+ &p2plink->attr);
+ p2plink->kobj = NULL;
+ }
+ kobject_del(dev->kobj_p2plink);
+ kobject_put(dev->kobj_p2plink);
+ dev->kobj_p2plink = NULL;
+ }
+
if (dev->kobj_cache) {
list_for_each_entry(cache, &dev->cache_props, list)
if (cache->kobj) {
@@ -631,6 +656,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
uint32_t id)
{
+ struct kfd_iolink_properties *p2plink;
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
@@ -668,6 +694,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
if (!dev->kobj_iolink)
return -ENOMEM;
+ dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node);
+ if (!dev->kobj_p2plink)
+ return -ENOMEM;
+
dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
if (!dev->kobj_perf)
return -ENOMEM;
@@ -757,6 +787,27 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
i++;
}
+ i = 0;
+ list_for_each_entry(p2plink, &dev->p2p_link_props, list) {
+ p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+ if (!p2plink->kobj)
+ return -ENOMEM;
+ ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
+ dev->kobj_p2plink, "%d", i);
+ if (ret < 0) {
+ kobject_put(p2plink->kobj);
+ return ret;
+ }
+
+ p2plink->attr.name = "properties";
+ p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
+ sysfs_attr_init(&iolink->attr);
+ ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
+ if (ret < 0)
+ return ret;
+ i++;
+ }
+
/* All hardware blocks have the same number of attributes. */
num_attrs = ARRAY_SIZE(perf_attr_iommu);
list_for_each_entry(perf, &dev->perf_props, list) {
@@ -1145,6 +1196,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
struct kfd_mem_properties *mem;
struct kfd_cache_properties *cache;
struct kfd_iolink_properties *iolink;
+ struct kfd_iolink_properties *p2plink;
down_write(&topology_lock);
list_for_each_entry(dev, &topology_device_list, list) {
@@ -1165,6 +1217,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
cache->gpu = dev->gpu;
list_for_each_entry(iolink, &dev->io_link_props, list)
iolink->gpu = dev->gpu;
+ list_for_each_entry(p2plink, &dev->p2p_link_props, list)
+ p2plink->gpu = dev->gpu;
break;
}
}
@@ -1287,6 +1341,253 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
}
}
+
+ /* Create indirect links so apply flags setting to all */
+ list_for_each_entry(link, &dev->p2p_link_props, list) {
+ link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+ kfd_set_iolink_no_atomics(dev, NULL, link);
+ peer_dev = kfd_topology_device_by_proximity_domain(
+ link->node_to);
+
+ if (!peer_dev)
+ continue;
+
+ list_for_each_entry(inbound_link, &peer_dev->p2p_link_props,
+ list) {
+ if (inbound_link->node_to != link->node_from)
+ continue;
+
+ inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+ kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
+ kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
+ }
+ }
+}
+
+static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
+ struct kfd_iolink_properties *p2plink)
+{
+ int ret;
+
+ p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+ if (!p2plink->kobj)
+ return -ENOMEM;
+
+ ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
+ dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1);
+ if (ret < 0) {
+ kobject_put(p2plink->kobj);
+ return ret;
+ }
+
+ p2plink->attr.name = "properties";
+ p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
+ sysfs_attr_init(&p2plink->attr);
+ ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
+{
+ struct kfd_iolink_properties *props = NULL, *props2 = NULL;
+ struct kfd_iolink_properties *gpu_link, *cpu_link;
+ struct kfd_topology_device *cpu_dev;
+ int ret = 0;
+ int i, num_cpu;
+
+ num_cpu = 0;
+ list_for_each_entry(cpu_dev, &topology_device_list, list) {
+ if (cpu_dev->gpu)
+ break;
+ num_cpu++;
+ }
+
+ gpu_link = list_first_entry(&kdev->io_link_props,
+ struct kfd_iolink_properties, list);
+ if (!gpu_link)
+ return -ENOMEM;
+
+ for (i = 0; i < num_cpu; i++) {
+ /* CPU <--> GPU */
+ if (gpu_link->node_to == i)
+ continue;
+
+ /* find CPU <--> CPU links */
+ cpu_dev = kfd_topology_device_by_proximity_domain(i);
+ if (cpu_dev) {
+ list_for_each_entry(cpu_link,
+ &cpu_dev->io_link_props, list) {
+ if (cpu_link->node_to == gpu_link->node_to)
+ break;
+ }
+ }
+
+ if (cpu_link->node_to != gpu_link->node_to)
+ return -ENOMEM;
+
+ /* CPU <--> CPU <--> GPU, GPU node*/
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+
+ memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties));
+ props->weight = gpu_link->weight + cpu_link->weight;
+ props->min_latency = gpu_link->min_latency + cpu_link->min_latency;
+ props->max_latency = gpu_link->max_latency + cpu_link->max_latency;
+ props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth);
+ props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth);
+
+ props->node_from = gpu_node;
+ props->node_to = i;
+ kdev->node_props.p2p_links_count++;
+ list_add_tail(&props->list, &kdev->p2p_link_props);
+ ret = kfd_build_p2p_node_entry(kdev, props);
+ if (ret < 0)
+ return ret;
+
+ /* for small Bar, no CPU --> GPU in-direct links */
+ if (kfd_dev_is_large_bar(kdev->gpu)) {
+ /* CPU <--> CPU <--> GPU, CPU node*/
+ props2 = kfd_alloc_struct(props2);
+ if (!props2)
+ return -ENOMEM;
+
+ memcpy(props2, props, sizeof(struct kfd_iolink_properties));
+ props2->node_from = i;
+ props2->node_to = gpu_node;
+ props2->kobj = NULL;
+ cpu_dev->node_props.p2p_links_count++;
+ list_add_tail(&props2->list, &cpu_dev->p2p_link_props);
+ ret = kfd_build_p2p_node_entry(cpu_dev, props2);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return ret;
+}
+
+#if defined(CONFIG_HSA_AMD_P2P)
+static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
+ struct kfd_topology_device *peer, int from, int to)
+{
+ struct kfd_iolink_properties *props = NULL;
+ struct kfd_iolink_properties *iolink1, *iolink2, *iolink3;
+ struct kfd_topology_device *cpu_dev;
+ int ret = 0;
+
+ if (!amdgpu_device_is_peer_accessible(
+ kdev->gpu->adev,
+ peer->gpu->adev))
+ return ret;
+
+ iolink1 = list_first_entry(&kdev->io_link_props,
+ struct kfd_iolink_properties, list);
+ if (!iolink1)
+ return -ENOMEM;
+
+ iolink2 = list_first_entry(&peer->io_link_props,
+ struct kfd_iolink_properties, list);
+ if (!iolink2)
+ return -ENOMEM;
+
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+
+ memcpy(props, iolink1, sizeof(struct kfd_iolink_properties));
+
+ props->weight = iolink1->weight + iolink2->weight;
+ props->min_latency = iolink1->min_latency + iolink2->min_latency;
+ props->max_latency = iolink1->max_latency + iolink2->max_latency;
+ props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth);
+ props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth);
+
+ if (iolink1->node_to != iolink2->node_to) {
+ /* CPU->CPU link*/
+ cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
+ if (cpu_dev) {
+ list_for_each_entry(iolink3, &cpu_dev->io_link_props, list)
+ if (iolink3->node_to == iolink2->node_to)
+ break;
+
+ props->weight += iolink3->weight;
+ props->min_latency += iolink3->min_latency;
+ props->max_latency += iolink3->max_latency;
+ props->min_bandwidth = min(props->min_bandwidth,
+ iolink3->min_bandwidth);
+ props->max_bandwidth = min(props->max_bandwidth,
+ iolink3->max_bandwidth);
+ } else {
+ WARN(1, "CPU node not found");
+ }
+ }
+
+ props->node_from = from;
+ props->node_to = to;
+ peer->node_props.p2p_links_count++;
+ list_add_tail(&props->list, &peer->p2p_link_props);
+ ret = kfd_build_p2p_node_entry(peer, props);
+
+ return ret;
+}
+#endif
+
+static int kfd_dev_create_p2p_links(void)
+{
+ struct kfd_topology_device *dev;
+ struct kfd_topology_device *new_dev;
+#if defined(CONFIG_HSA_AMD_P2P)
+ uint32_t i;
+#endif
+ uint32_t k;
+ int ret = 0;
+
+ k = 0;
+ list_for_each_entry(dev, &topology_device_list, list)
+ k++;
+ if (k < 2)
+ return 0;
+
+ new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list);
+ if (WARN_ON(!new_dev->gpu))
+ return 0;
+
+ k--;
+
+ /* create in-direct links */
+ ret = kfd_create_indirect_link_prop(new_dev, k);
+ if (ret < 0)
+ goto out;
+
+ /* create p2p links */
+#if defined(CONFIG_HSA_AMD_P2P)
+ i = 0;
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (dev == new_dev)
+ break;
+ if (!dev->gpu || !dev->gpu->adev ||
+ (dev->gpu->hive_id &&
+ dev->gpu->hive_id == new_dev->gpu->hive_id))
+ goto next;
+
+ /* check if node(s) is/are peer accessible in one direction or bi-direction */
+ ret = kfd_add_peer_prop(new_dev, dev, i, k);
+ if (ret < 0)
+ goto out;
+
+ ret = kfd_add_peer_prop(dev, new_dev, k, i);
+ if (ret < 0)
+ goto out;
+next:
+ i++;
+ }
+#endif
+
+out:
+ return ret;
}
int kfd_topology_add_device(struct kfd_dev *gpu)
@@ -1305,7 +1606,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
INIT_LIST_HEAD(&temp_topology_device_list);
gpu_id = kfd_generate_gpu_id(gpu);
-
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
/* Check to see if this gpu device exists in the topology_device_list.
@@ -1362,6 +1662,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->gpu_id = gpu_id;
gpu->id = gpu_id;
+ kfd_dev_create_p2p_links();
+
/* TODO: Move the following lines to function
* kfd_add_non_crat_information
*/
@@ -1507,7 +1809,7 @@ err:
static void kfd_topology_update_io_links(int proximity_domain)
{
struct kfd_topology_device *dev;
- struct kfd_iolink_properties *iolink, *tmp;
+ struct kfd_iolink_properties *iolink, *p2plink, *tmp;
list_for_each_entry(dev, &topology_device_list, list) {
if (dev->proximity_domain > proximity_domain)
@@ -1520,7 +1822,6 @@ static void kfd_topology_update_io_links(int proximity_domain)
*/
if (iolink->node_to == proximity_domain) {
list_del(&iolink->list);
- dev->io_link_count--;
dev->node_props.io_links_count--;
} else {
if (iolink->node_from > proximity_domain)
@@ -1529,6 +1830,22 @@ static void kfd_topology_update_io_links(int proximity_domain)
iolink->node_to--;
}
}
+
+ list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) {
+ /*
+ * If there is a p2p link to the dev being deleted
+ * then remove that p2p link also.
+ */
+ if (p2plink->node_to == proximity_domain) {
+ list_del(&p2plink->list);
+ dev->node_props.p2p_links_count--;
+ } else {
+ if (p2plink->node_from > proximity_domain)
+ p2plink->node_from--;
+ if (p2plink->node_to > proximity_domain)
+ p2plink->node_to--;
+ }
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 4f80d2ea1000..9f6c949186c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -38,6 +38,7 @@ struct kfd_node_properties {
uint32_t mem_banks_count;
uint32_t caches_count;
uint32_t io_links_count;
+ uint32_t p2p_links_count;
uint32_t cpu_core_id_base;
uint32_t simd_id_base;
uint32_t capability;
@@ -129,14 +130,15 @@ struct kfd_topology_device {
struct list_head mem_props;
uint32_t cache_count;
struct list_head cache_props;
- uint32_t io_link_count;
struct list_head io_link_props;
+ struct list_head p2p_link_props;
struct list_head perf_props;
struct kfd_dev *gpu;
struct kobject *kobj_node;
struct kobject *kobj_mem;
struct kobject *kobj_cache;
struct kobject *kobj_iolink;
+ struct kobject *kobj_p2plink;
struct kobject *kobj_perf;
struct attribute attr_gpuid;
struct attribute attr_name;