diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
21 files changed, 848 insertions, 139 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index 8cc0a76ddf9f..93bd4eda0d94 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -25,3 +25,17 @@ config HSA_AMD_SVM preemptions and one based on page faults. To enable page fault based memory management on most GFXv9 GPUs, set the module parameter amdgpu.noretry=0. + +config HSA_AMD_P2P + bool "HSA kernel driver support for peer-to-peer for AMD GPU devices" + depends on HSA_AMD && PCI_P2PDMA && DMABUF_MOVE_NOTIFY + help + Enable peer-to-peer (P2P) communication between AMD GPUs over + the PCIe bus. This can improve performance of multi-GPU compute + applications and libraries by enabling GPUs to access data directly + in peer GPUs' memory without intermediate copies in system memory. + + This P2P feature is only enabled on compatible chipsets, and between + GPUs with large memory BARs that expose the entire VRAM in PCIe bus + address space within the physical address limits of the GPUs. + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 1c7016958d6d..2b3d8bc8f0aa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -65,6 +65,25 @@ static int kfd_char_dev_major = -1; static struct class *kfd_class; struct device *kfd_device; +static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id) +{ + struct kfd_process_device *pdd; + + mutex_lock(&p->mutex); + pdd = kfd_process_device_data_by_id(p, gpu_id); + + if (pdd) + return pdd; + + mutex_unlock(&p->mutex); + return NULL; +} + +static inline void kfd_unlock_pdd(struct kfd_process_device *pdd) +{ + mutex_unlock(&pdd->process->mutex); +} + int kfd_chardev_init(void) { int err = 0; @@ -280,6 +299,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, struct kfd_process_device *pdd; struct queue_properties q_properties; uint32_t doorbell_offset_in_process = 0; + struct amdgpu_bo *wptr_bo = NULL; memset(&q_properties, 0, sizeof(struct queue_properties)); @@ -307,12 +327,49 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } + /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work + * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) + */ + if (dev->shared_resources.enable_mes && + ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) + >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + + wptr_vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (err) + goto err_wptr_map_gart; + + wptr_mapping = amdgpu_vm_bo_lookup_mapping( + wptr_vm, args->write_pointer_address >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + pr_err("Failed to lookup wptr bo\n"); + err = -EINVAL; + goto err_wptr_map_gart; + } + + wptr_bo = wptr_mapping->bo_va->base.bo; + if (wptr_bo->tbo.base.size > PAGE_SIZE) { + pr_err("Requested GART mapping for wptr bo larger than one page\n"); + err = -EINVAL; + goto err_wptr_map_gart; + } + + err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo); + if (err) { + pr_err("Failed to map wptr bo to GART\n"); + goto err_wptr_map_gart; + } + } + pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n", p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL, - &doorbell_offset_in_process); + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo, + NULL, NULL, NULL, &doorbell_offset_in_process); if (err != 0) goto err_create_queue; @@ -344,6 +401,9 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, return 0; err_create_queue: + if (wptr_bo) + amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo); +err_wptr_map_gart: err_bind_process: err_pdd: mutex_unlock(&p->mutex); @@ -958,6 +1018,19 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev) return false; } +static int kfd_ioctl_get_available_memory(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_available_memory_args *args = data; + struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id); + + if (!pdd) + return -EINVAL; + args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev); + kfd_unlock_pdd(pdd); + return 0; +} + static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, struct kfd_process *p, void *data) { @@ -2361,7 +2434,7 @@ static int criu_restore(struct file *filep, * Set the process to evicted state to avoid running any new queues before all the memory * mappings are ready. */ - ret = kfd_process_evict_queues(p); + ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE); if (ret) goto exit_unlock; @@ -2480,7 +2553,7 @@ static int criu_process_info(struct file *filep, goto err_unlock; } - ret = kfd_process_evict_queues(p); + ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT); if (ret) goto err_unlock; @@ -2648,6 +2721,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP, kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, + kfd_ioctl_get_available_memory, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cbfb32b3d235..a5409531a2fd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, props->rec_transfer_size = iolink->recommended_transfer_size; - dev->io_link_count++; dev->node_props.io_links_count++; list_add_tail(&props->list, &dev->io_link_props); break; @@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, props2->node_from = id_to; props2->node_to = id_from; props2->kobj = NULL; - to_dev->io_link_count++; to_dev->node_props.io_links_count++; list_add_tail(&props2->list, &to_dev->io_link_props); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c index 581c3a30fee1..ad5a40a685ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -101,6 +101,8 @@ void kfd_debugfs_init(void) kfd_debugfs_rls_by_device, &kfd_debugfs_fops); debugfs_create_file("hang_hws", S_IFREG | 0200, debugfs_root, kfd_debugfs_hang_hws_read, &kfd_debugfs_hang_hws_fops); + debugfs_create_file("mem_limit", S_IFREG | 0200, debugfs_root, + kfd_debugfs_kfd_mem_limits, &kfd_debugfs_fops); } void kfd_debugfs_fini(void) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a08769c5e94b..f5853835f03a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -75,7 +75,6 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) case IP_VERSION(5, 2, 3):/* YELLOW_CARP */ case IP_VERSION(5, 2, 6):/* GC 10.3.6 */ case IP_VERSION(5, 2, 7):/* GC 10.3.7 */ - case IP_VERSION(6, 0, 1): kfd->device_info.num_sdma_queues_per_engine = 2; break; case IP_VERSION(4, 2, 0):/* VEGA20 */ @@ -90,6 +89,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */ case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */ case IP_VERSION(6, 0, 0): + case IP_VERSION(6, 0, 1): case IP_VERSION(6, 0, 2): kfd->device_info.num_sdma_queues_per_engine = 8; break; @@ -839,7 +839,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_unlock_irqrestore(&kfd->interrupt_lock, flags); } -int kgd2kfd_quiesce_mm(struct mm_struct *mm) +int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger) { struct kfd_process *p; int r; @@ -853,7 +853,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm) return -ESRCH; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); - r = kfd_process_evict_queues(p); + r = kfd_process_evict_queues(p, trigger); kfd_unref_process(p); return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e1797657b04c..e83725a28106 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -177,6 +177,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, struct kfd_process_device *pdd = qpd_to_pdd(qpd); struct mes_add_queue_input queue_input; int r, queue_type; + uint64_t wptr_addr_off; if (dqm->is_hws_hang) return -EIO; @@ -197,6 +198,14 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.doorbell_offset = q->properties.doorbell_off; queue_input.mqd_addr = q->gart_mqd_addr; queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; + + if (q->wptr_bo) { + wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va; + queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; + } + + queue_input.is_kfd_process = 1; + queue_input.paging = false; queue_input.tba_addr = qpd->tba_addr; queue_input.tma_addr = qpd->tma_addr; @@ -811,7 +820,6 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; bool prev_active = false; - bool add_queue = false; dqm_lock(dqm); pdd = kfd_get_process_device_data(q->device, q->process); @@ -887,7 +895,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { if (!dqm->dev->shared_resources.enable_mes) retval = map_queues_cpsch(dqm); - else if (add_queue) + else if (q->properties.is_active) retval = add_queue_mes(dqm, q, &pdd->qpd); } else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || @@ -1666,14 +1674,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.is_active) { increment_queue_count(dqm, qpd, q); - if (!dqm->dev->shared_resources.enable_mes) { + if (!dqm->dev->shared_resources.enable_mes) retval = execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - } else { + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + else retval = add_queue_mes(dqm, q, qpd); - if (retval) - goto cleanup_queue; - } + if (retval) + goto cleanup_queue; } /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 4df9c36146ba..3942a56c28bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -377,8 +377,7 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset) return -EINVAL; } - err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev, - mem, &kern_addr, &size); + err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, &kern_addr, &size); if (err) { pr_err("Failed to map event page to kernel\n"); return err; @@ -387,7 +386,7 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset) err = kfd_event_page_set(p, kern_addr, size, event_page_offset); if (err) { pr_err("Failed to set event page\n"); - amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem); + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); return err; } return err; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index a9466d154395..34772fe74296 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -146,7 +146,7 @@ static void interrupt_wq(struct work_struct *work) struct kfd_dev *dev = container_of(work, struct kfd_dev, interrupt_work); uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; - long start_jiffies = jiffies; + unsigned long start_jiffies = jiffies; if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) { dev_err_once(dev->adev->dev, "Ring entry too small\n"); @@ -156,7 +156,7 @@ static void interrupt_wq(struct work_struct *work) while (dequeue_ih_ring_entry(dev, ih_ring_entry)) { dev->device_info.event_interrupt_class->interrupt_wq(dev, ih_ring_entry); - if (jiffies - start_jiffies > HZ) { + if (time_is_before_jiffies(start_jiffies + HZ)) { /* If we spent more than a second processing signals, * reschedule the worker to avoid soft-lockup warnings */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index e44376c2ecdc..373e5bfd4e91 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -33,6 +33,7 @@ #include "kfd_priv.h" #include "kfd_svm.h" #include "kfd_migrate.h" +#include "kfd_smi_events.h" #ifdef dev_fmt #undef dev_fmt @@ -402,8 +403,9 @@ out: static long svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, - uint64_t end) + uint64_t end, uint32_t trigger) { + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end - start) >> PAGE_SHIFT; struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; @@ -430,6 +432,11 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate.dst = migrate.src + npages; scratch = (dma_addr_t *)(migrate.dst + npages); + kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid, + start >> PAGE_SHIFT, end >> PAGE_SHIFT, + 0, adev->kfd.dev->id, prange->prefetch_loc, + prange->preferred_loc, trigger); + r = migrate_vma_setup(&migrate); if (r) { dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n", @@ -458,6 +465,10 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); + kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid, + start >> PAGE_SHIFT, end >> PAGE_SHIFT, + 0, adev->kfd.dev->id, trigger); + svm_range_dma_unmap(adev->dev, scratch, 0, npages); svm_range_free_dma_mappings(prange); @@ -479,6 +490,7 @@ out: * @prange: range structure * @best_loc: the device to migrate to * @mm: the process mm structure + * @trigger: reason of migration * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * @@ -487,7 +499,7 @@ out: */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) + struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; struct vm_area_struct *vma; @@ -524,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, break; next = min(vma->vm_end, end); - r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next); + r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger); if (r < 0) { pr_debug("failed %ld to migrate\n", r); break; @@ -655,8 +667,10 @@ out_oom: */ static long svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, - struct vm_area_struct *vma, uint64_t start, uint64_t end) + struct vm_area_struct *vma, uint64_t start, uint64_t end, + uint32_t trigger) { + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end - start) >> PAGE_SHIFT; unsigned long upages = npages; unsigned long cpages = 0; @@ -685,6 +699,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, migrate.dst = migrate.src + npages; scratch = (dma_addr_t *)(migrate.dst + npages); + kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid, + start >> PAGE_SHIFT, end >> PAGE_SHIFT, + adev->kfd.dev->id, 0, prange->prefetch_loc, + prange->preferred_loc, trigger); + r = migrate_vma_setup(&migrate); if (r) { dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n", @@ -715,6 +734,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); + + kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid, + start >> PAGE_SHIFT, end >> PAGE_SHIFT, + adev->kfd.dev->id, 0, trigger); + svm_range_dma_unmap(adev->dev, scratch, 0, npages); out_free: @@ -732,13 +756,15 @@ out: * svm_migrate_vram_to_ram - migrate svm range from device to system * @prange: range structure * @mm: process mm, use current->mm if NULL + * @trigger: reason of migration * * Context: Process context, caller hold mmap read lock, prange->migrate_mutex * * Return: * 0 - OK, otherwise error code */ -int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, + uint32_t trigger) { struct amdgpu_device *adev; struct vm_area_struct *vma; @@ -779,7 +805,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) } next = min(vma->vm_end, end); - r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next); + r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger); if (r < 0) { pr_debug("failed %ld to migrate prange %p\n", r, prange); break; @@ -802,6 +828,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) * @prange: range structure * @best_loc: the device to migrate to * @mm: process mm, use current->mm if NULL + * @trigger: reason of migration * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * @@ -810,7 +837,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) */ static int svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) + struct mm_struct *mm, uint32_t trigger) { int r, retries = 3; @@ -822,7 +849,7 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); do { - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, trigger); if (r) return r; } while (prange->actual_loc && --retries); @@ -830,17 +857,17 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, if (prange->actual_loc) return -EDEADLK; - return svm_migrate_ram_to_vram(prange, best_loc, mm); + return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); } int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) + struct mm_struct *mm, uint32_t trigger) { if (!prange->actual_loc) - return svm_migrate_ram_to_vram(prange, best_loc, mm); + return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); else - return svm_migrate_vram_to_vram(prange, best_loc, mm); + return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger); } @@ -909,7 +936,7 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) goto out_unlock_prange; } - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU); if (r) pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, prange, prange->start, prange->last); @@ -992,6 +1019,8 @@ int svm_migrate_init(struct amdgpu_device *adev) amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size)); + svm_range_set_max_pages(adev); + pr_info("HMM registered %ldMB device memory\n", size >> 20); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 2f5b3394c9ed..b3f0754b32fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -41,8 +41,9 @@ enum MIGRATION_COPY_DIR { }; int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm); -int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); + struct mm_struct *mm, uint32_t trigger); +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, + uint32_t trigger); unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 49a283be6b57..623ccd227b7d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -100,7 +100,9 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, { struct kfd_cu_info cu_info; uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; - int i, se, sh, cu, cu_bitmap_sh_mul; + bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0); + uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1; + int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1; amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info); @@ -167,13 +169,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, se_mask[i] = 0; i = 0; - for (cu = 0; cu < 16; cu++) { + for (cu = 0; cu < 16; cu += inc) { for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) { for (se = 0; se < cu_info.num_shader_engines; se++) { if (cu_per_sh[se][sh] > cu) { - if (cu_mask[i / 32] & (1 << (i % 32))) - se_mask[se] |= 1 << (cu + sh * 16); - i++; + if (cu_mask[i / 32] & (en_mask << (i % 32))) + se_mask[se] |= en_mask << (cu + sh * 16); + i += inc; if (i == cu_mask_count) return; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 4e0387f591be..b8e14c2cc295 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -377,6 +377,8 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8); m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); + m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); m->sdmax_rlcx_doorbell_offset = q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2585d6e61d42..d03a3b9c9c5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -571,6 +571,8 @@ struct queue { void *gang_ctx_bo; uint64_t gang_ctx_gpu_addr; void *gang_ctx_cpu_ptr; + + struct amdgpu_bo *wptr_bo; }; enum KFD_MQD_TYPE { @@ -945,7 +947,7 @@ static inline struct kfd_process_device *kfd_process_device_from_gpuidx( } void kfd_unref_process(struct kfd_process *p); -int kfd_process_evict_queues(struct kfd_process *p); +int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger); int kfd_process_restore_queues(struct kfd_process *p); void kfd_suspend_all_processes(void); int kfd_resume_all_processes(void); @@ -1206,6 +1208,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct file *f, struct queue_properties *properties, unsigned int *qid, + struct amdgpu_bo *wptr_bo, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index e3d64ec8c353..6c83a519b3a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -43,6 +43,7 @@ struct mm_struct; #include "kfd_device_queue_manager.h" #include "kfd_iommu.h" #include "kfd_svm.h" +#include "kfd_smi_events.h" /* * List of struct kfd_process (field kfd_process). @@ -693,7 +694,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem, struct kfd_dev *dev = pdd->dev; if (kptr) { - amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->adev, mem); + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); kptr = NULL; } @@ -733,7 +734,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, } if (kptr) { - err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->adev, + err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel( (struct kgd_mem *)*mem, kptr, NULL); if (err) { pr_debug("Map GTT BO to kernel failed\n"); @@ -999,7 +1000,7 @@ static void kfd_process_kunmap_signal_bo(struct kfd_process *p) if (!mem) goto out; - amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->adev, mem); + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); out: mutex_unlock(&p->mutex); @@ -1114,6 +1115,15 @@ static void kfd_process_wq_release(struct work_struct *work) struct kfd_process *p = container_of(work, struct kfd_process, release_work); + kfd_process_dequeue_from_all_devices(p); + pqm_uninit(&p->pqm); + + /* Signal the eviction fence after user mode queues are + * destroyed. This allows any BOs to be freed without + * triggering pointless evictions or waiting for fences. + */ + dma_fence_signal(p->ef); + kfd_process_remove_sysfs(p); kfd_iommu_unbind_process(p); @@ -1178,20 +1188,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - mutex_lock(&p->mutex); - - kfd_process_dequeue_from_all_devices(p); - pqm_uninit(&p->pqm); - /* Indicate to other users that MM is no longer valid */ p->mm = NULL; - /* Signal the eviction fence after user mode queues are - * destroyed. This allows any BOs to be freed without - * triggering pointless evictions or waiting for fences. - */ - dma_fence_signal(p->ef); - - mutex_unlock(&p->mutex); mmu_notifier_put(&p->mmu_notifier); } @@ -1404,6 +1402,11 @@ static struct kfd_process *create_process(const struct task_struct *thread) hash_add_rcu(kfd_processes_table, &process->kfd_processes, (uintptr_t)process->mm); + /* Avoid free_notifier to start kfd_process_wq_release if + * mmu_notifier_get failed because of pending signal. + */ + kref_get(&process->ref); + /* MMU notifier registration must be the last call that can fail * because after this point we cannot unwind the process creation. * After this point, mmu_notifier_put will trigger the cleanup by @@ -1416,6 +1419,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) } BUG_ON(mn != &process->mmu_notifier); + kfd_unref_process(process); get_task_struct(process->lead_thread); return process; @@ -1736,7 +1740,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) * Eviction is reference-counted per process-device. This means multiple * evictions from different sources can be nested safely. */ -int kfd_process_evict_queues(struct kfd_process *p) +int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) { int r = 0; int i; @@ -1745,6 +1749,9 @@ int kfd_process_evict_queues(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; + kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid, + trigger); + r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, &pdd->qpd); /* evict return -EIO if HWS is hang or asic is resetting, in this case @@ -1769,6 +1776,9 @@ fail: if (n_evicted == 0) break; + + kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); + if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd)) pr_err("Failed to restore queues\n"); @@ -1788,6 +1798,8 @@ int kfd_process_restore_queues(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; + kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); + r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd); if (r) { @@ -1849,7 +1861,7 @@ static void evict_process_worker(struct work_struct *work) flush_delayed_work(&p->restore_work); pr_debug("Started evicting pasid 0x%x\n", p->pasid); - ret = kfd_process_evict_queues(p); + ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM); if (!ret) { dma_fence_signal(p->ef); dma_fence_put(p->ef); @@ -1916,7 +1928,7 @@ void kfd_suspend_all_processes(void) cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - if (kfd_process_evict_queues(p)) + if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND)) pr_err("Failed to suspend process 0x%x\n", p->pasid); dma_fence_signal(p->ef); dma_fence_put(p->ef); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index dc00484ff484..6e3e7f54381b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -180,7 +180,8 @@ void pqm_uninit(struct process_queue_manager *pqm) static int init_user_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct queue **q, struct queue_properties *q_properties, - struct file *f, unsigned int qid) + struct file *f, struct amdgpu_bo *wptr_bo, + unsigned int qid) { int retval; @@ -210,6 +211,7 @@ static int init_user_queue(struct process_queue_manager *pqm, goto cleanup; } memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); + (*q)->wptr_bo = wptr_bo; } pr_debug("PQM After init queue"); @@ -226,6 +228,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct file *f, struct queue_properties *properties, unsigned int *qid, + struct amdgpu_bo *wptr_bo, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, @@ -288,7 +291,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, * allocate_sdma_queue() in create_queue() has the * corresponding check logic. */ - retval = init_user_queue(pqm, dev, &q, properties, f, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -309,7 +312,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = init_user_queue(pqm, dev, &q, properties, f, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -436,9 +439,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) pdd->qpd.num_gws = 0; } - if (dev->shared_resources.enable_mes) + if (dev->shared_resources.enable_mes) { amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo); + if (pqn->q->wptr_bo) + amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); + + } uninit_queue(pqn->q); } @@ -491,6 +498,21 @@ int pqm_update_mqd(struct process_queue_manager *pqm, return -EFAULT; } + /* ASICs that have WGPs must enforce pairwise enabled mask checks. */ + if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK && minfo->cu_mask.ptr && + KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) { + int i; + + for (i = 0; i < minfo->cu_mask.count; i += 2) { + uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3; + + if (cu_pair && cu_pair != 0x3) { + pr_debug("CUs must be adjacent pairwise enabled.\n"); + return -EINVAL; + } + } + } + retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, pqn->q, minfo); if (retval != 0) @@ -844,7 +866,7 @@ int kfd_criu_restore_queue(struct kfd_process *p, print_queue_properties(&qp); - ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack, + ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack, NULL); if (ret) { pr_err("Failed to create new queue err:%d\n", ret); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index f2e1d506ba21..0472b56de245 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -38,6 +38,9 @@ struct kfd_smi_client { uint64_t events; struct kfd_dev *dev; spinlock_t lock; + struct rcu_head rcu; + pid_t pid; + bool suser; }; #define MAX_KFIFO_SIZE 1024 @@ -135,6 +138,14 @@ static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user, return sizeof(events); } +static void kfd_smi_ev_client_free(struct rcu_head *p) +{ + struct kfd_smi_client *ev = container_of(p, struct kfd_smi_client, rcu); + + kfifo_free(&ev->fifo); + kfree(ev); +} + static int kfd_smi_ev_release(struct inode *inode, struct file *filep) { struct kfd_smi_client *client = filep->private_data; @@ -144,23 +155,31 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep) list_del_rcu(&client->list); spin_unlock(&dev->smi_lock); - synchronize_rcu(); - kfifo_free(&client->fifo); - kfree(client); - + call_rcu(&client->rcu, kfd_smi_ev_client_free); return 0; } -static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event, - char *event_msg, int len) +static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client, + unsigned int event) +{ + uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS); + uint64_t events = READ_ONCE(client->events); + + if (pid && client->pid != pid && !(client->suser && (events & all))) + return false; + + return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event); +} + +static void add_event_to_kfifo(pid_t pid, struct kfd_dev *dev, + unsigned int smi_event, char *event_msg, int len) { struct kfd_smi_client *client; rcu_read_lock(); list_for_each_entry_rcu(client, &dev->smi_clients, list) { - if (!(READ_ONCE(client->events) & - KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event))) + if (!kfd_smi_ev_enabled(pid, client, smi_event)) continue; spin_lock(&client->lock); if (kfifo_avail(&client->fifo) >= len) { @@ -176,9 +195,9 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event, rcu_read_unlock(); } -__printf(3, 4) -static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event, - char *fmt, ...) +__printf(4, 5) +static void kfd_smi_event_add(pid_t pid, struct kfd_dev *dev, + unsigned int event, char *fmt, ...) { char fifo_in[KFD_SMI_EVENT_MSG_SIZE]; int len; @@ -193,7 +212,7 @@ static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event, len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args); va_end(args); - add_event_to_kfifo(dev, event, fifo_in, len); + add_event_to_kfifo(pid, dev, event, fifo_in, len); } void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset) @@ -206,13 +225,13 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset) event = KFD_SMI_EVENT_GPU_PRE_RESET; ++(dev->reset_seq_num); } - kfd_smi_event_add(dev, event, "%x\n", dev->reset_seq_num); + kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num); } void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, uint64_t throttle_bitmask) { - kfd_smi_event_add(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n", + kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n", throttle_bitmask, amdgpu_dpm_get_thermal_throttling_counter(dev->adev)); } @@ -227,10 +246,93 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) if (!task_info.pid) return; - kfd_smi_event_add(dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n", + kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n", task_info.pid, task_info.task_name); } +void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid, + unsigned long address, bool write_fault, + ktime_t ts) +{ + kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_START, + "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid, + address, dev->id, write_fault ? 'W' : 'R'); +} + +void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid, + unsigned long address, bool migration) +{ + kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_END, + "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(), + pid, address, dev->id, migration ? 'M' : 'U'); +} + +void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid, + unsigned long start, unsigned long end, + uint32_t from, uint32_t to, + uint32_t prefetch_loc, uint32_t preferred_loc, + uint32_t trigger) +{ + kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_START, + "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", + ktime_get_boottime_ns(), pid, start, end - start, + from, to, prefetch_loc, preferred_loc, trigger); +} + +void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid, + unsigned long start, unsigned long end, + uint32_t from, uint32_t to, uint32_t trigger) +{ + kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_END, + "%lld -%d @%lx(%lx) %x->%x %d\n", + ktime_get_boottime_ns(), pid, start, end - start, + from, to, trigger); +} + +void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid, + uint32_t trigger) +{ + kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_EVICTION, + "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid, + dev->id, trigger); +} + +void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid) +{ + kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_RESTORE, + "%lld -%d %x\n", ktime_get_boottime_ns(), pid, + dev->id); +} + +void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) +{ + struct kfd_process *p; + int i; + + p = kfd_lookup_process_by_mm(mm); + if (!p) + return; + + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + kfd_smi_event_add(p->lead_thread->pid, pdd->dev, + KFD_SMI_EVENT_QUEUE_RESTORE, + "%lld -%d %x %c\n", ktime_get_boottime_ns(), + p->lead_thread->pid, pdd->dev->id, 'R'); + } + kfd_unref_process(p); +} + +void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid, + unsigned long address, unsigned long last, + uint32_t trigger) +{ + kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_UNMAP_FROM_GPU, + "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(), + pid, address, last - address + 1, dev->id, trigger); +} + int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) { struct kfd_smi_client *client; @@ -251,6 +353,8 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) spin_lock_init(&client->lock); client->events = 0; client->dev = dev; + client->pid = current->tgid; + client->suser = capable(CAP_SYS_ADMIN); spin_lock(&dev->smi_lock); list_add_rcu(&client->list, &dev->smi_clients); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h index dfe101c21166..76fe4e0ec2d2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -29,5 +29,24 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid); void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, uint64_t throttle_bitmask); void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset); - +void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid, + unsigned long address, bool write_fault, + ktime_t ts); +void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid, + unsigned long address, bool migration); +void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid, + unsigned long start, unsigned long end, + uint32_t from, uint32_t to, + uint32_t prefetch_loc, uint32_t preferred_loc, + uint32_t trigger); +void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid, + unsigned long start, unsigned long end, + uint32_t from, uint32_t to, uint32_t trigger); +void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid, + uint32_t trigger); +void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid); +void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm); +void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid, + unsigned long address, unsigned long last, + uint32_t trigger); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 7b332246eda3..a67ba8879a56 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -32,6 +32,7 @@ #include "kfd_priv.h" #include "kfd_svm.h" #include "kfd_migrate.h" +#include "kfd_smi_events.h" #ifdef dev_fmt #undef dev_fmt @@ -43,7 +44,13 @@ /* Long enough to ensure no retry fault comes after svm range is restored and * page table is updated. */ -#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000 +#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC) + +/* Giant svm range split into smaller ranges based on this, it is decided using + * minimum of all dGPU/APU 1/32 VRAM size, between 2MB to 1GB and alignment to + * power of 2MB. + */ +static uint64_t max_svm_range_pages; struct criu_svm_metadata { struct list_head list; @@ -259,13 +266,22 @@ void svm_range_free_dma_mappings(struct svm_range *prange) } } -static void svm_range_free(struct svm_range *prange) +static void svm_range_free(struct svm_range *prange, bool update_mem_usage) { + uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT; + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); svm_range_vram_node_free(prange); svm_range_free_dma_mappings(prange); + + if (update_mem_usage && !p->xnack_enabled) { + pr_debug("unreserve mem limit: %lld\n", size); + amdgpu_amdkfd_unreserve_mem_limit(NULL, size, + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR); + } mutex_destroy(&prange->lock); mutex_destroy(&prange->migrate_mutex); kfree(prange); @@ -284,7 +300,7 @@ svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc, static struct svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, - uint64_t last) + uint64_t last, bool update_mem_usage) { uint64_t size = last - start + 1; struct svm_range *prange; @@ -293,6 +309,15 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, prange = kzalloc(sizeof(*prange), GFP_KERNEL); if (!prange) return NULL; + + p = container_of(svms, struct kfd_process, svms); + if (!p->xnack_enabled && update_mem_usage && + amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT, + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) { + pr_info("SVM mapping failed, exceeds resident system memory limit\n"); + kfree(prange); + return NULL; + } prange->npages = size; prange->svms = svms; prange->start = start; @@ -307,7 +332,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); - p = container_of(svms, struct kfd_process, svms); if (p->xnack_enabled) bitmap_copy(prange->bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); @@ -1000,9 +1024,9 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, svms = prange->svms; if (old_start == start) - *new = svm_range_new(svms, last + 1, old_last); + *new = svm_range_new(svms, last + 1, old_last, false); else - *new = svm_range_new(svms, old_start, start - 1); + *new = svm_range_new(svms, old_start, start - 1, false); if (!*new) return -ENOMEM; @@ -1010,7 +1034,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, if (r) { pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", r, old_start, old_last, start, last); - svm_range_free(*new); + svm_range_free(*new, false); *new = NULL; } @@ -1199,7 +1223,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, static int svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, - unsigned long last) + unsigned long last, uint32_t trigger) { DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); struct kfd_process_device *pdd; @@ -1231,6 +1255,9 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, return -EINVAL; } + kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid, + start, last, trigger); + r = svm_range_unmap_from_gpu(pdd->dev->adev, drm_priv_to_vm(pdd->drm_priv), start, last, &fence); @@ -1617,7 +1644,7 @@ unreserve_out: svm_range_unreserve_bos(&ctx); if (!r) - prange->validate_timestamp = ktime_to_us(ktime_get()); + prange->validate_timestamp = ktime_get_boottime(); return r; } @@ -1729,14 +1756,16 @@ out_reschedule: mutex_unlock(&svms->lock); mmap_write_unlock(mm); mutex_unlock(&process_info->lock); - mmput(mm); /* If validation failed, reschedule another attempt */ if (evicted_ranges) { pr_debug("reschedule to restore svm range\n"); schedule_delayed_work(&svms->restore_work, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + + kfd_smi_event_queue_restore_rescheduled(mm); } + mmput(mm); } /** @@ -1756,7 +1785,8 @@ out_reschedule: */ static int svm_range_evict(struct svm_range *prange, struct mm_struct *mm, - unsigned long start, unsigned long last) + unsigned long start, unsigned long last, + enum mmu_notifier_event event) { struct svm_range_list *svms = prange->svms; struct svm_range *pchild; @@ -1768,10 +1798,15 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange->start, prange->last, start, last); - if (!p->xnack_enabled) { + if (!p->xnack_enabled || + (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) { int evicted_ranges; + bool mapped = prange->mapped_to_gpu; list_for_each_entry(pchild, &prange->child_list, child_list) { + if (!pchild->mapped_to_gpu) + continue; + mapped = true; mutex_lock_nested(&pchild->lock, 1); if (pchild->start <= last && pchild->last >= start) { pr_debug("increment pchild invalid [0x%lx 0x%lx]\n", @@ -1781,6 +1816,9 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, mutex_unlock(&pchild->lock); } + if (!mapped) + return r; + if (prange->start <= last && prange->last >= start) atomic_inc(&prange->invalid); @@ -1792,7 +1830,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, prange->svms, prange->start, prange->last); /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mm); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM); if (r) pr_debug("failed to quiesce KFD\n"); @@ -1801,6 +1839,12 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); } else { unsigned long s, l; + uint32_t trigger; + + if (event == MMU_NOTIFY_MIGRATE) + trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE; + else + trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY; pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", prange->svms, start, last); @@ -1809,13 +1853,13 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, s = max(start, pchild->start); l = min(last, pchild->last); if (l >= s) - svm_range_unmap_from_gpus(pchild, s, l); + svm_range_unmap_from_gpus(pchild, s, l, trigger); mutex_unlock(&pchild->lock); } s = max(start, prange->start); l = min(last, prange->last); if (l >= s) - svm_range_unmap_from_gpus(prange, s, l); + svm_range_unmap_from_gpus(prange, s, l, trigger); } return r; @@ -1825,7 +1869,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) { struct svm_range *new; - new = svm_range_new(old->svms, old->start, old->last); + new = svm_range_new(old->svms, old->start, old->last, false); if (!new) return NULL; @@ -1849,6 +1893,46 @@ static struct svm_range *svm_range_clone(struct svm_range *old) return new; } +void svm_range_set_max_pages(struct amdgpu_device *adev) +{ + uint64_t max_pages; + uint64_t pages, _pages; + + /* 1/32 VRAM size in pages */ + pages = adev->gmc.real_vram_size >> 17; + pages = clamp(pages, 1ULL << 9, 1ULL << 18); + pages = rounddown_pow_of_two(pages); + do { + max_pages = READ_ONCE(max_svm_range_pages); + _pages = min_not_zero(max_pages, pages); + } while (cmpxchg(&max_svm_range_pages, max_pages, _pages) != max_pages); +} + +static int +svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last, + uint64_t max_pages, struct list_head *insert_list, + struct list_head *update_list) +{ + struct svm_range *prange; + uint64_t l; + + pr_debug("max_svm_range_pages 0x%llx adding [0x%llx 0x%llx]\n", + max_pages, start, last); + + while (last >= start) { + l = min(last, ALIGN_DOWN(start + max_pages, max_pages) - 1); + + prange = svm_range_new(svms, start, l, true); + if (!prange) + return -ENOMEM; + list_add(&prange->list, insert_list); + list_add(&prange->update_list, update_list); + + start = l + 1; + } + return 0; +} + /** * svm_range_add - add svm range and handle overlap * @p: the range add to this process svms @@ -1889,6 +1973,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, struct interval_tree_node *node; struct svm_range *prange; struct svm_range *tmp; + struct list_head new_list; int r = 0; pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last); @@ -1896,6 +1981,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, INIT_LIST_HEAD(update_list); INIT_LIST_HEAD(insert_list); INIT_LIST_HEAD(remove_list); + INIT_LIST_HEAD(&new_list); node = interval_tree_iter_first(&svms->objects, start, last); while (node) { @@ -1951,14 +2037,11 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, /* insert a new node if needed */ if (node->start > start) { - prange = svm_range_new(svms, start, node->start - 1); - if (!prange) { - r = -ENOMEM; + r = svm_range_split_new(svms, start, node->start - 1, + READ_ONCE(max_svm_range_pages), + &new_list, update_list); + if (r) goto out; - } - - list_add(&prange->list, insert_list); - list_add(&prange->update_list, update_list); } node = next; @@ -1966,20 +2049,20 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, } /* add a final range at the end if needed */ - if (start <= last) { - prange = svm_range_new(svms, start, last); - if (!prange) { - r = -ENOMEM; - goto out; - } - list_add(&prange->list, insert_list); - list_add(&prange->update_list, update_list); - } + if (start <= last) + r = svm_range_split_new(svms, start, last, + READ_ONCE(max_svm_range_pages), + &new_list, update_list); out: - if (r) + if (r) { list_for_each_entry_safe(prange, tmp, insert_list, list) - svm_range_free(prange); + svm_range_free(prange, false); + list_for_each_entry_safe(prange, tmp, &new_list, list) + svm_range_free(prange, true); + } else { + list_splice(&new_list, insert_list); + } return r; } @@ -2026,7 +2109,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange, svms, prange, prange->start, prange->last); svm_range_unlink(prange); svm_range_remove_notifier(prange); - svm_range_free(prange); + svm_range_free(prange, true); break; case SVM_OP_UPDATE_RANGE_NOTIFIER: pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n", @@ -2229,6 +2312,7 @@ static void svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, unsigned long start, unsigned long last) { + uint32_t trigger = KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU; struct svm_range_list *svms; struct svm_range *pchild; struct kfd_process *p; @@ -2256,14 +2340,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, s = max(start, pchild->start); l = min(last, pchild->last); if (l >= s) - svm_range_unmap_from_gpus(pchild, s, l); + svm_range_unmap_from_gpus(pchild, s, l, trigger); svm_range_unmap_split(mm, prange, pchild, start, last); mutex_unlock(&pchild->lock); } s = max(start, prange->start); l = min(last, prange->last); if (l >= s) - svm_range_unmap_from_gpus(prange, s, l); + svm_range_unmap_from_gpus(prange, s, l, trigger); svm_range_unmap_split(mm, prange, prange, start, last); if (unmap_parent) @@ -2330,7 +2414,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, svm_range_unmap_from_cpu(mni->mm, prange, start, last); break; default: - svm_range_evict(prange, mni->mm, start, last); + svm_range_evict(prange, mni->mm, start, last, range->event); break; } @@ -2588,14 +2672,14 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, last = addr; } - prange = svm_range_new(&p->svms, start, last); + prange = svm_range_new(&p->svms, start, last, true); if (!prange) { pr_debug("Failed to create prange in address [0x%llx]\n", addr); return NULL; } if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) { pr_debug("failed to get gpuid from kgd\n"); - svm_range_free(prange); + svm_range_free(prange, true); return NULL; } @@ -2694,11 +2778,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, struct svm_range_list *svms; struct svm_range *prange; struct kfd_process *p; - uint64_t timestamp; + ktime_t timestamp = ktime_get_boottime(); int32_t best_loc; int32_t gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; struct vm_area_struct *vma; + bool migration = false; int r = 0; if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) { @@ -2775,9 +2860,9 @@ retry_write_locked: goto out_unlock_range; } - timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp; /* skip duplicate vm fault on different pages of same range */ - if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { + if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp, + AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) { pr_debug("svms 0x%p [0x%lx %lx] already restored\n", svms, prange->start, prange->last); r = 0; @@ -2813,9 +2898,14 @@ retry_write_locked: svms, prange->start, prange->last, best_loc, prange->actual_loc); + kfd_smi_event_page_fault_start(adev->kfd.dev, p->lead_thread->pid, addr, + write_fault, timestamp); + if (prange->actual_loc != best_loc) { + migration = true; if (best_loc) { - r = svm_migrate_to_vram(prange, best_loc, mm); + r = svm_migrate_to_vram(prange, best_loc, mm, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); if (r) { pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", r, addr); @@ -2823,12 +2913,14 @@ retry_write_locked: * VRAM failed */ if (prange->actual_loc) - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); else r = 0; } } else { - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); } if (r) { pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", @@ -2842,6 +2934,9 @@ retry_write_locked: pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", r, svms, prange->start, prange->last); + kfd_smi_event_page_fault_end(adev->kfd.dev, p->lead_thread->pid, addr, + migration); + out_unlock_range: mutex_unlock(&prange->migrate_mutex); out_unlock_svms: @@ -2884,7 +2979,7 @@ void svm_range_list_fini(struct kfd_process *p) list_for_each_entry_safe(prange, next, &p->svms.list, list) { svm_range_unlink(prange); svm_range_remove_notifier(prange); - svm_range_free(prange); + svm_range_free(prange, true); } mutex_destroy(&p->svms.lock); @@ -3148,12 +3243,12 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, return 0; if (!best_loc) { - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; return r; } - r = svm_migrate_to_vram(prange, best_loc, mm); + r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; return r; @@ -3211,7 +3306,8 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mutex_lock(&prange->migrate_mutex); do { r = svm_migrate_vram_to_ram(prange, - svm_bo->eviction_fence->mm); + svm_bo->eviction_fence->mm, + KFD_MIGRATE_TRIGGER_TTM_EVICTION); } while (!r && prange->actual_loc && --retries); if (!r && prange->actual_loc) @@ -3299,7 +3395,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, prange->last); svm_range_unlink(prange); svm_range_remove_notifier(prange); - svm_range_free(prange); + svm_range_free(prange, false); } mmap_write_downgrade(mm); @@ -3317,7 +3413,9 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, if (r) goto out_unlock_range; - if (migrated && !p->xnack_enabled) { + if (migrated && (!p->xnack_enabled || + (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) && + prange->mapped_to_gpu) { pr_debug("restore_work will update mappings of GPUs\n"); mutex_unlock(&prange->migrate_mutex); continue; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 2d54147b4dda..9156b041ef17 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -125,7 +125,7 @@ struct svm_range { uint32_t actual_loc; uint8_t granularity; atomic_t invalid; - uint64_t validate_timestamp; + ktime_t validate_timestamp; struct mmu_interval_notifier notifier; struct svm_work_list_item work_item; struct list_head deferred_list; @@ -204,6 +204,9 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s #define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0) void svm_range_bo_unref_async(struct svm_range_bo *svm_bo); + +void svm_range_set_max_pages(struct amdgpu_device *adev); + #else struct kfd_process; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 8d50d207cf66..25990bec600d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -40,6 +40,7 @@ #include "kfd_svm.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ras.h" +#include "amdgpu.h" /* topology_device_list - Master list of all topology devices */ static struct list_head topology_device_list; @@ -148,6 +149,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) struct kfd_mem_properties *mem; struct kfd_cache_properties *cache; struct kfd_iolink_properties *iolink; + struct kfd_iolink_properties *p2plink; struct kfd_perf_properties *perf; list_del(&dev->list); @@ -173,6 +175,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) kfree(iolink); } + while (dev->p2p_link_props.next != &dev->p2p_link_props) { + p2plink = container_of(dev->p2p_link_props.next, + struct kfd_iolink_properties, list); + list_del(&p2plink->list); + kfree(p2plink); + } + while (dev->perf_props.next != &dev->perf_props) { perf = container_of(dev->perf_props.next, struct kfd_perf_properties, list); @@ -214,6 +223,7 @@ struct kfd_topology_device *kfd_create_topology_device( INIT_LIST_HEAD(&dev->mem_props); INIT_LIST_HEAD(&dev->cache_props); INIT_LIST_HEAD(&dev->io_link_props); + INIT_LIST_HEAD(&dev->p2p_link_props); INIT_LIST_HEAD(&dev->perf_props); list_add_tail(&dev->list, device_list); @@ -465,6 +475,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.caches_count); sysfs_show_32bit_prop(buffer, offs, "io_links_count", dev->node_props.io_links_count); + sysfs_show_32bit_prop(buffer, offs, "p2p_links_count", + dev->node_props.p2p_links_count); sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base", dev->node_props.cpu_core_id_base); sysfs_show_32bit_prop(buffer, offs, "simd_id_base", @@ -568,6 +580,7 @@ static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr) static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) { + struct kfd_iolink_properties *p2plink; struct kfd_iolink_properties *iolink; struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; @@ -585,6 +598,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) dev->kobj_iolink = NULL; } + if (dev->kobj_p2plink) { + list_for_each_entry(p2plink, &dev->p2p_link_props, list) + if (p2plink->kobj) { + kfd_remove_sysfs_file(p2plink->kobj, + &p2plink->attr); + p2plink->kobj = NULL; + } + kobject_del(dev->kobj_p2plink); + kobject_put(dev->kobj_p2plink); + dev->kobj_p2plink = NULL; + } + if (dev->kobj_cache) { list_for_each_entry(cache, &dev->cache_props, list) if (cache->kobj) { @@ -631,6 +656,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, uint32_t id) { + struct kfd_iolink_properties *p2plink; struct kfd_iolink_properties *iolink; struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; @@ -668,6 +694,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, if (!dev->kobj_iolink) return -ENOMEM; + dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node); + if (!dev->kobj_p2plink) + return -ENOMEM; + dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); if (!dev->kobj_perf) return -ENOMEM; @@ -757,6 +787,27 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, i++; } + i = 0; + list_for_each_entry(p2plink, &dev->p2p_link_props, list) { + p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (!p2plink->kobj) + return -ENOMEM; + ret = kobject_init_and_add(p2plink->kobj, &iolink_type, + dev->kobj_p2plink, "%d", i); + if (ret < 0) { + kobject_put(p2plink->kobj); + return ret; + } + + p2plink->attr.name = "properties"; + p2plink->attr.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&iolink->attr); + ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); + if (ret < 0) + return ret; + i++; + } + /* All hardware blocks have the same number of attributes. */ num_attrs = ARRAY_SIZE(perf_attr_iommu); list_for_each_entry(perf, &dev->perf_props, list) { @@ -1145,6 +1196,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) struct kfd_mem_properties *mem; struct kfd_cache_properties *cache; struct kfd_iolink_properties *iolink; + struct kfd_iolink_properties *p2plink; down_write(&topology_lock); list_for_each_entry(dev, &topology_device_list, list) { @@ -1165,6 +1217,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) cache->gpu = dev->gpu; list_for_each_entry(iolink, &dev->io_link_props, list) iolink->gpu = dev->gpu; + list_for_each_entry(p2plink, &dev->p2p_link_props, list) + p2plink->gpu = dev->gpu; break; } } @@ -1287,6 +1341,253 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); } } + + /* Create indirect links so apply flags setting to all */ + list_for_each_entry(link, &dev->p2p_link_props, list) { + link->flags = CRAT_IOLINK_FLAGS_ENABLED; + kfd_set_iolink_no_atomics(dev, NULL, link); + peer_dev = kfd_topology_device_by_proximity_domain( + link->node_to); + + if (!peer_dev) + continue; + + list_for_each_entry(inbound_link, &peer_dev->p2p_link_props, + list) { + if (inbound_link->node_to != link->node_from) + continue; + + inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; + kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); + kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); + } + } +} + +static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev, + struct kfd_iolink_properties *p2plink) +{ + int ret; + + p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (!p2plink->kobj) + return -ENOMEM; + + ret = kobject_init_and_add(p2plink->kobj, &iolink_type, + dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1); + if (ret < 0) { + kobject_put(p2plink->kobj); + return ret; + } + + p2plink->attr.name = "properties"; + p2plink->attr.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&p2plink->attr); + ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); + if (ret < 0) + return ret; + + return 0; +} + +static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node) +{ + struct kfd_iolink_properties *props = NULL, *props2 = NULL; + struct kfd_iolink_properties *gpu_link, *cpu_link; + struct kfd_topology_device *cpu_dev; + int ret = 0; + int i, num_cpu; + + num_cpu = 0; + list_for_each_entry(cpu_dev, &topology_device_list, list) { + if (cpu_dev->gpu) + break; + num_cpu++; + } + + gpu_link = list_first_entry(&kdev->io_link_props, + struct kfd_iolink_properties, list); + if (!gpu_link) + return -ENOMEM; + + for (i = 0; i < num_cpu; i++) { + /* CPU <--> GPU */ + if (gpu_link->node_to == i) + continue; + + /* find CPU <--> CPU links */ + cpu_dev = kfd_topology_device_by_proximity_domain(i); + if (cpu_dev) { + list_for_each_entry(cpu_link, + &cpu_dev->io_link_props, list) { + if (cpu_link->node_to == gpu_link->node_to) + break; + } + } + + if (cpu_link->node_to != gpu_link->node_to) + return -ENOMEM; + + /* CPU <--> CPU <--> GPU, GPU node*/ + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties)); + props->weight = gpu_link->weight + cpu_link->weight; + props->min_latency = gpu_link->min_latency + cpu_link->min_latency; + props->max_latency = gpu_link->max_latency + cpu_link->max_latency; + props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth); + props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth); + + props->node_from = gpu_node; + props->node_to = i; + kdev->node_props.p2p_links_count++; + list_add_tail(&props->list, &kdev->p2p_link_props); + ret = kfd_build_p2p_node_entry(kdev, props); + if (ret < 0) + return ret; + + /* for small Bar, no CPU --> GPU in-direct links */ + if (kfd_dev_is_large_bar(kdev->gpu)) { + /* CPU <--> CPU <--> GPU, CPU node*/ + props2 = kfd_alloc_struct(props2); + if (!props2) + return -ENOMEM; + + memcpy(props2, props, sizeof(struct kfd_iolink_properties)); + props2->node_from = i; + props2->node_to = gpu_node; + props2->kobj = NULL; + cpu_dev->node_props.p2p_links_count++; + list_add_tail(&props2->list, &cpu_dev->p2p_link_props); + ret = kfd_build_p2p_node_entry(cpu_dev, props2); + if (ret < 0) + return ret; + } + } + return ret; +} + +#if defined(CONFIG_HSA_AMD_P2P) +static int kfd_add_peer_prop(struct kfd_topology_device *kdev, + struct kfd_topology_device *peer, int from, int to) +{ + struct kfd_iolink_properties *props = NULL; + struct kfd_iolink_properties *iolink1, *iolink2, *iolink3; + struct kfd_topology_device *cpu_dev; + int ret = 0; + + if (!amdgpu_device_is_peer_accessible( + kdev->gpu->adev, + peer->gpu->adev)) + return ret; + + iolink1 = list_first_entry(&kdev->io_link_props, + struct kfd_iolink_properties, list); + if (!iolink1) + return -ENOMEM; + + iolink2 = list_first_entry(&peer->io_link_props, + struct kfd_iolink_properties, list); + if (!iolink2) + return -ENOMEM; + + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + memcpy(props, iolink1, sizeof(struct kfd_iolink_properties)); + + props->weight = iolink1->weight + iolink2->weight; + props->min_latency = iolink1->min_latency + iolink2->min_latency; + props->max_latency = iolink1->max_latency + iolink2->max_latency; + props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth); + props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth); + + if (iolink1->node_to != iolink2->node_to) { + /* CPU->CPU link*/ + cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); + if (cpu_dev) { + list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) + if (iolink3->node_to == iolink2->node_to) + break; + + props->weight += iolink3->weight; + props->min_latency += iolink3->min_latency; + props->max_latency += iolink3->max_latency; + props->min_bandwidth = min(props->min_bandwidth, + iolink3->min_bandwidth); + props->max_bandwidth = min(props->max_bandwidth, + iolink3->max_bandwidth); + } else { + WARN(1, "CPU node not found"); + } + } + + props->node_from = from; + props->node_to = to; + peer->node_props.p2p_links_count++; + list_add_tail(&props->list, &peer->p2p_link_props); + ret = kfd_build_p2p_node_entry(peer, props); + + return ret; +} +#endif + +static int kfd_dev_create_p2p_links(void) +{ + struct kfd_topology_device *dev; + struct kfd_topology_device *new_dev; +#if defined(CONFIG_HSA_AMD_P2P) + uint32_t i; +#endif + uint32_t k; + int ret = 0; + + k = 0; + list_for_each_entry(dev, &topology_device_list, list) + k++; + if (k < 2) + return 0; + + new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list); + if (WARN_ON(!new_dev->gpu)) + return 0; + + k--; + + /* create in-direct links */ + ret = kfd_create_indirect_link_prop(new_dev, k); + if (ret < 0) + goto out; + + /* create p2p links */ +#if defined(CONFIG_HSA_AMD_P2P) + i = 0; + list_for_each_entry(dev, &topology_device_list, list) { + if (dev == new_dev) + break; + if (!dev->gpu || !dev->gpu->adev || + (dev->gpu->hive_id && + dev->gpu->hive_id == new_dev->gpu->hive_id)) + goto next; + + /* check if node(s) is/are peer accessible in one direction or bi-direction */ + ret = kfd_add_peer_prop(new_dev, dev, i, k); + if (ret < 0) + goto out; + + ret = kfd_add_peer_prop(dev, new_dev, k, i); + if (ret < 0) + goto out; +next: + i++; + } +#endif + +out: + return ret; } int kfd_topology_add_device(struct kfd_dev *gpu) @@ -1305,7 +1606,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu) INIT_LIST_HEAD(&temp_topology_device_list); gpu_id = kfd_generate_gpu_id(gpu); - pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); /* Check to see if this gpu device exists in the topology_device_list. @@ -1362,6 +1662,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->gpu_id = gpu_id; gpu->id = gpu_id; + kfd_dev_create_p2p_links(); + /* TODO: Move the following lines to function * kfd_add_non_crat_information */ @@ -1507,7 +1809,7 @@ err: static void kfd_topology_update_io_links(int proximity_domain) { struct kfd_topology_device *dev; - struct kfd_iolink_properties *iolink, *tmp; + struct kfd_iolink_properties *iolink, *p2plink, *tmp; list_for_each_entry(dev, &topology_device_list, list) { if (dev->proximity_domain > proximity_domain) @@ -1520,7 +1822,6 @@ static void kfd_topology_update_io_links(int proximity_domain) */ if (iolink->node_to == proximity_domain) { list_del(&iolink->list); - dev->io_link_count--; dev->node_props.io_links_count--; } else { if (iolink->node_from > proximity_domain) @@ -1529,6 +1830,22 @@ static void kfd_topology_update_io_links(int proximity_domain) iolink->node_to--; } } + + list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) { + /* + * If there is a p2p link to the dev being deleted + * then remove that p2p link also. + */ + if (p2plink->node_to == proximity_domain) { + list_del(&p2plink->list); + dev->node_props.p2p_links_count--; + } else { + if (p2plink->node_from > proximity_domain) + p2plink->node_from--; + if (p2plink->node_to > proximity_domain) + p2plink->node_to--; + } + } } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 4f80d2ea1000..9f6c949186c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -38,6 +38,7 @@ struct kfd_node_properties { uint32_t mem_banks_count; uint32_t caches_count; uint32_t io_links_count; + uint32_t p2p_links_count; uint32_t cpu_core_id_base; uint32_t simd_id_base; uint32_t capability; @@ -129,14 +130,15 @@ struct kfd_topology_device { struct list_head mem_props; uint32_t cache_count; struct list_head cache_props; - uint32_t io_link_count; struct list_head io_link_props; + struct list_head p2p_link_props; struct list_head perf_props; struct kfd_dev *gpu; struct kobject *kobj_node; struct kobject *kobj_mem; struct kobject *kobj_cache; struct kobject *kobj_iolink; + struct kobject *kobj_p2plink; struct kobject *kobj_perf; struct attribute attr_gpuid; struct attribute attr_name; |