summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/common
diff options
context:
space:
mode:
authorOfir Bitton <obitton@habana.ai>2020-07-13 13:36:55 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2020-07-24 20:31:37 +0300
commita04b7cd97eef13a489ca44c979cf91e24cfa7b55 (patch)
tree5e887df646106fcb7d155cec600b76b8dc5f5903 /drivers/misc/habanalabs/common
parenteb8b293e794bbbafa9d615ea939982a19bf92867 (diff)
habanalabs: create internal CB pool
Create a device MMU-mapped internal command buffer pool, in order to allow the driver to allocate CBs for the signal/wait operations that are fetched by the queues when they are configured with the user's address space ID. We must pre-map this internal pool due to performance issues. This pool is needed for future ASIC support and it is currently unused in GOYA and GAUDI. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/misc/habanalabs/common')
-rw-r--r--drivers/misc/habanalabs/common/command_buffer.c82
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c13
-rw-r--r--drivers/misc/habanalabs/common/context.c8
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h18
4 files changed, 83 insertions, 38 deletions
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 02d13f71b1df..7c38c4f7f9c0 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -10,12 +10,18 @@
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/genalloc.h>
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
{
- hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
- (void *) (uintptr_t) cb->kernel_address,
- cb->bus_address);
+ if (cb->is_internal)
+ gen_pool_free(hdev->internal_cb_pool,
+ cb->kernel_address, cb->size);
+ else
+ hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
+ (void *) (uintptr_t) cb->kernel_address,
+ cb->bus_address);
+
kfree(cb);
}
@@ -44,9 +50,10 @@ static void cb_release(struct kref *ref)
}
static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
- int ctx_id)
+ int ctx_id, bool internal_cb)
{
struct hl_cb *cb;
+ u32 cb_offset;
void *p;
/*
@@ -65,13 +72,25 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
if (!cb)
return NULL;
- if (ctx_id == HL_KERNEL_ASID_ID)
+ if (internal_cb) {
+ p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size);
+ if (!p) {
+ kfree(cb);
+ return NULL;
+ }
+
+ cb_offset = p - hdev->internal_cb_pool_virt_addr;
+ cb->is_internal = true;
+ cb->bus_address = hdev->internal_cb_va_base + cb_offset;
+ } else if (ctx_id == HL_KERNEL_ASID_ID) {
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address, GFP_ATOMIC);
- else
+ } else {
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address,
GFP_USER | __GFP_ZERO);
+ }
+
if (!p) {
dev_err(hdev->dev,
"failed to allocate %d of dma memory for CB\n",
@@ -87,7 +106,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
}
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
- u32 cb_size, u64 *handle, int ctx_id)
+ u32 cb_size, u64 *handle, int ctx_id, bool internal_cb)
{
struct hl_cb *cb;
bool alloc_new_cb = true;
@@ -112,28 +131,30 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
goto out_err;
}
- /* Minimum allocation must be PAGE SIZE */
- if (cb_size < PAGE_SIZE)
- cb_size = PAGE_SIZE;
-
- if (ctx_id == HL_KERNEL_ASID_ID &&
- cb_size <= hdev->asic_prop.cb_pool_cb_size) {
-
- spin_lock(&hdev->cb_pool_lock);
- if (!list_empty(&hdev->cb_pool)) {
- cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
- pool_list);
- list_del(&cb->pool_list);
- spin_unlock(&hdev->cb_pool_lock);
- alloc_new_cb = false;
- } else {
- spin_unlock(&hdev->cb_pool_lock);
- dev_dbg(hdev->dev, "CB pool is empty\n");
+ if (!internal_cb) {
+ /* Minimum allocation must be PAGE SIZE */
+ if (cb_size < PAGE_SIZE)
+ cb_size = PAGE_SIZE;
+
+ if (ctx_id == HL_KERNEL_ASID_ID &&
+ cb_size <= hdev->asic_prop.cb_pool_cb_size) {
+
+ spin_lock(&hdev->cb_pool_lock);
+ if (!list_empty(&hdev->cb_pool)) {
+ cb = list_first_entry(&hdev->cb_pool,
+ typeof(*cb), pool_list);
+ list_del(&cb->pool_list);
+ spin_unlock(&hdev->cb_pool_lock);
+ alloc_new_cb = false;
+ } else {
+ spin_unlock(&hdev->cb_pool_lock);
+ dev_dbg(hdev->dev, "CB pool is empty\n");
+ }
}
}
if (alloc_new_cb) {
- cb = hl_cb_alloc(hdev, cb_size, ctx_id);
+ cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb);
if (!cb) {
rc = -ENOMEM;
goto out_err;
@@ -229,8 +250,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
rc = -EINVAL;
} else {
rc = hl_cb_create(hdev, &hpriv->cb_mgr,
- args->in.cb_size, &handle,
- hpriv->ctx->asid);
+ args->in.cb_size, &handle,
+ hpriv->ctx->asid, false);
}
memset(args, 0, sizeof(*args));
@@ -398,14 +419,15 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
idr_destroy(&mgr->cb_handles);
}
-struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
+ bool internal_cb)
{
u64 cb_handle;
struct hl_cb *cb;
int rc;
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
- HL_KERNEL_ASID_ID);
+ HL_KERNEL_ASID_ID, internal_cb);
if (rc) {
dev_err(hdev->dev,
"Failed to allocate CB for the kernel driver %d\n", rc);
@@ -437,7 +459,7 @@ int hl_cb_pool_init(struct hl_device *hdev)
for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
- HL_KERNEL_ASID_ID);
+ HL_KERNEL_ASID_ID, false);
if (cb) {
cb->is_pool = true;
list_add(&cb->pool_list, &hdev->cb_pool);
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index c605be89f764..e096532c0e48 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -919,7 +919,13 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
goto put_cs;
}
- cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+ if (cs->type == CS_TYPE_WAIT)
+ cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
+ else
+ cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
+
+ cb = hl_cb_kernel_create(hdev, cb_size,
+ q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
if (!cb) {
ctx->cs_counters.out_of_mem_drop_cnt++;
kfree(job);
@@ -927,11 +933,6 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
goto put_cs;
}
- if (cs->type == CS_TYPE_WAIT)
- cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
- else
- cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
-
job->id = 0;
job->cs = cs;
job->user_cb = cb;
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index 1e3e5b19ecd9..b75a20364fad 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -153,10 +153,18 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
rc = -ENOMEM;
goto mem_ctx_err;
}
+
+ rc = hdev->asic_funcs->ctx_init(ctx);
+ if (rc) {
+ dev_err(hdev->dev, "ctx_init failed\n");
+ goto ctx_init_err;
+ }
}
return 0;
+ctx_init_err:
+ hl_vm_ctx_fini(ctx);
mem_ctx_err:
if (ctx->asid != HL_KERNEL_ASID_ID)
hl_asid_free(hdev, ctx->asid);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 82532f1f94cb..bf9abfa47b7a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -392,6 +392,7 @@ struct hl_cb_mgr {
* @ctx_id: holds the ID of the owner's context.
* @mmap: true if the CB is currently mmaped to user.
* @is_pool: true if CB was acquired from the pool, false otherwise.
+ * @is_internal: internaly allocated
*/
struct hl_cb {
struct kref refcount;
@@ -408,6 +409,7 @@ struct hl_cb {
u32 ctx_id;
u8 mmap;
u8 is_pool;
+ u8 is_internal;
};
@@ -643,6 +645,7 @@ enum div_select_defs {
* @rreg: Read a register. Needed for simulator support.
* @wreg: Write a register. Needed for simulator support.
* @halt_coresight: stop the ETF and ETR traces.
+ * @ctx_init: context dependent initialization.
* @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
* @read_device_fw_version: read the device's firmware versions that are
@@ -745,6 +748,7 @@ struct hl_asic_funcs {
u32 (*rreg)(struct hl_device *hdev, u32 reg);
void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
void (*halt_coresight)(struct hl_device *hdev);
+ int (*ctx_init)(struct hl_ctx *ctx);
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
void (*read_device_fw_version)(struct hl_device *hdev,
@@ -1432,6 +1436,10 @@ struct hl_device_idle_busy_ts {
* @hl_debugfs: device's debugfs manager.
* @cb_pool: list of preallocated CBs.
* @cb_pool_lock: protects the CB pool.
+ * @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
+ * @internal_cb_pool_dma_addr: internal command buffer pool dma address.
+ * @internal_cb_pool: internal command buffer memory pool.
+ * @internal_cb_va_base: internal cb pool mmu virtual address base
* @fpriv_list: list of file private data structures. Each structure is created
* when a user opens the device
* @fpriv_list_lock: protects the fpriv_list
@@ -1531,6 +1539,11 @@ struct hl_device {
struct list_head cb_pool;
spinlock_t cb_pool_lock;
+ void *internal_cb_pool_virt_addr;
+ dma_addr_t internal_cb_pool_dma_addr;
+ struct gen_pool *internal_cb_pool;
+ u64 internal_cb_va_base;
+
struct list_head fpriv_list;
struct mutex fpriv_list_lock;
@@ -1741,7 +1754,7 @@ int hl_hwmon_init(struct hl_device *hdev);
void hl_hwmon_fini(struct hl_device *hdev);
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
- u64 *handle, int ctx_id);
+ u64 *handle, int ctx_id, bool internal_cb);
int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
@@ -1749,7 +1762,8 @@ struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
void hl_cb_put(struct hl_cb *cb);
void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
-struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
+ bool internal_cb);
int hl_cb_pool_init(struct hl_device *hdev);
int hl_cb_pool_fini(struct hl_device *hdev);