summaryrefslogtreecommitdiff
path: root/drivers/accel/habanalabs/common/habanalabs.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel/habanalabs/common/habanalabs.h')
-rw-r--r--drivers/accel/habanalabs/common/habanalabs.h139
1 files changed, 109 insertions, 30 deletions
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index fa05e76d3d21..eaae69a9f817 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -155,18 +155,12 @@ enum hl_mmu_enablement {
#define hl_asic_dma_alloc_coherent(hdev, size, dma_handle, flags) \
hl_asic_dma_alloc_coherent_caller(hdev, size, dma_handle, flags, __func__)
-#define hl_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle) \
- hl_cpu_accessible_dma_pool_alloc_caller(hdev, size, dma_handle, __func__)
-
#define hl_asic_dma_pool_zalloc(hdev, size, mem_flags, dma_handle) \
hl_asic_dma_pool_zalloc_caller(hdev, size, mem_flags, dma_handle, __func__)
#define hl_asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle) \
hl_asic_dma_free_coherent_caller(hdev, size, cpu_addr, dma_handle, __func__)
-#define hl_cpu_accessible_dma_pool_free(hdev, size, vaddr) \
- hl_cpu_accessible_dma_pool_free_caller(hdev, size, vaddr, __func__)
-
#define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \
hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__)
@@ -378,6 +372,7 @@ enum hl_cs_type {
CS_RESERVE_SIGNALS,
CS_UNRESERVE_SIGNALS,
CS_TYPE_ENGINE_CORE,
+ CS_TYPE_ENGINES,
CS_TYPE_FLUSH_PCI_HBW_WRITES,
};
@@ -592,6 +587,8 @@ struct hl_hints_range {
* @host_base_address: host physical start address for host DMA from device
* @host_end_address: host physical end address for host DMA from device
* @max_freq_value: current max clk frequency.
+ * @engine_core_interrupt_reg_addr: interrupt register address for engine core to use
+ * in order to raise events toward FW.
* @clk_pll_index: clock PLL index that specify which PLL determines the clock
* we display to the user
* @mmu_pgt_size: MMU page tables total size.
@@ -612,8 +609,8 @@ struct hl_hints_range {
* @cb_pool_cb_cnt: number of CBs in the CB pool.
* @cb_pool_cb_size: size of each CB in the CB pool.
* @decoder_enabled_mask: which decoders are enabled.
- * @decoder_binning_mask: which decoders are binned, 0 means usable and 1
- * means binned (at most one binned decoder per dcore).
+ * @decoder_binning_mask: which decoders are binned, 0 means usable and 1 means binned.
+ * @rotator_enabled_mask: which rotators are enabled.
* @edma_enabled_mask: which EDMAs are enabled.
* @edma_binning_mask: which EDMAs are binned, 0 means usable and 1 means
* binned (at most one binned DMA).
@@ -648,7 +645,8 @@ struct hl_hints_range {
* which the property supports_user_set_page_size is true
* (i.e. the DRAM supports multiple page sizes), otherwise
* it will shall be equal to dram_page_size.
- * @num_engine_cores: number of engine cpu cores
+ * @num_engine_cores: number of engine cpu cores.
+ * @max_num_of_engines: maximum number of all engines in the ASIC.
* @num_of_special_blocks: special_blocks array size.
* @glbl_err_cause_num: global err cause number.
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
@@ -663,6 +661,8 @@ struct hl_hints_range {
* @first_available_cq: first available CQ for the user.
* @user_interrupt_count: number of user interrupts.
* @user_dec_intr_count: number of decoder interrupts exposed to user.
+ * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host.
+ * @eq_interrupt_id: interrupt id for EQ, uses to synchronize EQ interrupts in hard-reset.
* @cache_line_size: device cache line size.
* @server_type: Server type that the ASIC is currently installed in.
* The value is according to enum hl_server_type in uapi file.
@@ -698,6 +698,7 @@ struct hl_hints_range {
* @supports_user_set_page_size: true if user can set the allocation page size.
* @dma_mask: the dma mask to be set for this device
* @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
+ * @supports_engine_modes: true if changing engines/engine_cores modes is supported.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -739,6 +740,7 @@ struct asic_fixed_properties {
u64 host_base_address;
u64 host_end_address;
u64 max_freq_value;
+ u64 engine_core_interrupt_reg_addr;
u32 clk_pll_index;
u32 mmu_pgt_size;
u32 mmu_pte_size;
@@ -759,6 +761,7 @@ struct asic_fixed_properties {
u32 cb_pool_cb_size;
u32 decoder_enabled_mask;
u32 decoder_binning_mask;
+ u32 rotator_enabled_mask;
u32 edma_enabled_mask;
u32 edma_binning_mask;
u32 max_pending_cs;
@@ -775,6 +778,7 @@ struct asic_fixed_properties {
u32 xbar_edge_enabled_mask;
u32 device_mem_alloc_default_page_size;
u32 num_engine_cores;
+ u32 max_num_of_engines;
u32 num_of_special_blocks;
u32 glbl_err_cause_num;
u32 hbw_flush_reg;
@@ -788,6 +792,8 @@ struct asic_fixed_properties {
u16 first_available_cq[HL_MAX_DCORES];
u16 user_interrupt_count;
u16 user_dec_intr_count;
+ u16 tpc_interrupt_id;
+ u16 eq_interrupt_id;
u16 cache_line_size;
u16 server_type;
u8 completion_queues_count;
@@ -811,6 +817,7 @@ struct asic_fixed_properties {
u8 supports_user_set_page_size;
u8 dma_mask;
u8 supports_advanced_cpucp_rc;
+ u8 supports_engine_modes;
};
/**
@@ -1096,6 +1103,8 @@ struct hl_cq {
enum hl_user_interrupt_type {
HL_USR_INTERRUPT_CQ = 0,
HL_USR_INTERRUPT_DECODER,
+ HL_USR_INTERRUPT_TPC,
+ HL_USR_INTERRUPT_UNEXPECTED
};
/**
@@ -1104,6 +1113,7 @@ enum hl_user_interrupt_type {
* @type: user interrupt type
* @wait_list_head: head to the list of user threads pending on this interrupt
* @wait_list_lock: protects wait_list_head
+ * @timestamp: last timestamp taken upon interrupt
* @interrupt_id: msix interrupt id
*/
struct hl_user_interrupt {
@@ -1111,6 +1121,7 @@ struct hl_user_interrupt {
enum hl_user_interrupt_type type;
struct list_head wait_list_head;
spinlock_t wait_list_lock;
+ ktime_t timestamp;
u32 interrupt_id;
};
@@ -1200,15 +1211,15 @@ struct hl_eq {
/**
* struct hl_dec - describes a decoder sw instance.
* @hdev: pointer to the device structure.
- * @completion_abnrm_work: workqueue object to run when decoder generates an error interrupt
+ * @abnrm_intr_work: workqueue work item to run when decoder generates an error interrupt.
* @core_id: ID of the decoder.
* @base_addr: base address of the decoder.
*/
struct hl_dec {
- struct hl_device *hdev;
- struct work_struct completion_abnrm_work;
- u32 core_id;
- u32 base_addr;
+ struct hl_device *hdev;
+ struct work_struct abnrm_intr_work;
+ u32 core_id;
+ u32 base_addr;
};
/**
@@ -1562,6 +1573,7 @@ struct engines_data {
* @access_dev_mem: access device memory
* @set_dram_bar_base: set the base of the DRAM BAR
* @set_engine_cores: set a config command to engine cores
+ * @set_engines: set a config command to user engines
* @send_device_activity: indication to FW about device availability
* @set_dram_properties: set DRAM related properties.
* @set_binning_masks: set binning/enable masks for all relevant components.
@@ -1574,7 +1586,7 @@ struct hl_asic_funcs {
int (*sw_init)(struct hl_device *hdev);
int (*sw_fini)(struct hl_device *hdev);
int (*hw_init)(struct hl_device *hdev);
- void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
+ int (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
int (*suspend)(struct hl_device *hdev);
int (*resume)(struct hl_device *hdev);
@@ -1701,6 +1713,8 @@ struct hl_asic_funcs {
u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids,
u32 num_cores, u32 core_command);
+ int (*set_engines)(struct hl_device *hdev, u32 *engine_ids,
+ u32 num_engines, u32 engine_command);
int (*send_device_activity)(struct hl_device *hdev, bool open);
int (*set_dram_properties)(struct hl_device *hdev);
int (*set_binning_masks)(struct hl_device *hdev);
@@ -1824,7 +1838,7 @@ struct hl_cs_outcome_store {
* @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
* @hdev: pointer to the device structure.
* @refcount: reference counter for the context. Context is released only when
- * this hits 0l. It is incremented on CS and CS_WAIT.
+ * this hits 0. It is incremented on CS and CS_WAIT.
* @cs_pending: array of hl fence objects representing pending CS.
* @outcome_store: storage data structure used to remember outcomes of completed
* command submissions for a long time after CS id wraparound.
@@ -2318,7 +2332,7 @@ struct hl_debugfs_entry {
* @userptr_list: list of available userptrs (virtual memory chunk descriptor).
* @userptr_spinlock: protects userptr_list.
* @ctx_mem_hash_list: list of available contexts with MMU mappings.
- * @ctx_mem_hash_spinlock: protects cb_list.
+ * @ctx_mem_hash_mutex: protects list of available contexts with MMU mappings.
* @data_dma_blob_desc: data DMA descriptor of blob.
* @mon_dump_blob_desc: monitor dump descriptor of blob.
* @state_dump: data of the system states in case of a bad cs.
@@ -2349,7 +2363,7 @@ struct hl_dbg_device_entry {
struct list_head userptr_list;
spinlock_t userptr_spinlock;
struct list_head ctx_mem_hash_list;
- spinlock_t ctx_mem_hash_spinlock;
+ struct mutex ctx_mem_hash_mutex;
struct debugfs_blob_wrapper data_dma_blob_desc;
struct debugfs_blob_wrapper mon_dump_blob_desc;
char *state_dump[HL_STATE_DUMP_HIST_LEN];
@@ -2974,8 +2988,8 @@ struct cs_timeout_info {
* @cq_addr: the address of the current handled command buffer
* @cq_size: the size of the current handled command buffer
* @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array.
- * should be equal to 1 incase of undefined opcode
- * in Upper-CP (specific stream) and equal to 4 incase
+ * should be equal to 1 in case of undefined opcode
+ * in Upper-CP (specific stream) and equal to 4 in case
* of undefined opcode in Lower-CP.
* @engine_id: engine-id that the error occurred on
* @stream_id: the stream id the error occurred on. In case the stream equals to
@@ -3032,17 +3046,55 @@ struct razwi_info {
};
/**
+ * struct hw_err_info - HW error information.
+ * @event: holds information on the event.
+ * @event_detected: if set as 1, then a HW event was discovered for the
+ * first time after the driver has finished booting-up.
+ * currently we assume that only fatal events (that require hard-reset) are
+ * reported so we don't care of the others that might follow it.
+ * so once changed to 1, it will remain that way.
+ * TODO: support multiple events.
+ * @event_info_available: indicates that a HW event info is now available.
+ */
+struct hw_err_info {
+ struct hl_info_hw_err_event event;
+ atomic_t event_detected;
+ bool event_info_available;
+};
+
+/**
+ * struct fw_err_info - FW error information.
+ * @event: holds information on the event.
+ * @event_detected: if set as 1, then a FW event was discovered for the
+ * first time after the driver has finished booting-up.
+ * currently we assume that only fatal events (that require hard-reset) are
+ * reported so we don't care of the others that might follow it.
+ * so once changed to 1, it will remain that way.
+ * TODO: support multiple events.
+ * @event_info_available: indicates that a HW event info is now available.
+ */
+struct fw_err_info {
+ struct hl_info_fw_err_event event;
+ atomic_t event_detected;
+ bool event_info_available;
+};
+
+/**
* struct hl_error_info - holds information collected during an error.
* @cs_timeout: CS timeout error information.
* @razwi_info: RAZWI information.
* @undef_opcode: undefined opcode information.
* @page_fault_info: page fault information.
+ * @hw_err: (fatal) hardware error information.
+ * @fw_err: firmware error information.
*/
struct hl_error_info {
struct cs_timeout_info cs_timeout;
struct razwi_info razwi_info;
struct undefined_opcode_info undef_opcode;
struct page_fault_info page_fault_info;
+ struct hw_err_info hw_err;
+ struct fw_err_info fw_err;
};
/**
@@ -3090,6 +3142,7 @@ struct hl_reset_info {
* (required only for PCI address match mode)
* @pcie_bar: array of available PCIe bars virtual addresses.
* @rmmio: configuration area address on SRAM.
+ * @hclass: pointer to the habanalabs class.
* @cdev: related char device.
* @cdev_ctrl: char device for control operations only (INFO IOCTL)
* @dev: related kernel basic device structure.
@@ -3104,6 +3157,8 @@ struct hl_reset_info {
* @user_interrupt: array of hl_user_interrupt. upon the corresponding user
* interrupt, driver will monitor the list of fences
* registered to this interrupt.
+ * @tpc_interrupt: single TPC interrupt for all TPCs.
+ * @unexpected_error_interrupt: single interrupt for unexpected user error indication.
* @common_user_cq_interrupt: common user CQ interrupt for all user CQ interrupts.
* upon any user CQ interrupt, driver will monitor the
* list of fences registered to this common structure.
@@ -3199,6 +3254,7 @@ struct hl_reset_info {
* drams are binned-out
* @tpc_binning: contains mask of tpc engines that is received from the f/w which indicates which
* tpc engines are binned-out
+ * @dmabuf_export_cnt: number of dma-buf exporting.
* @card_type: Various ASICs have several card types. This indicates the card
* type of the current device.
* @major: habanalabs kernel driver major.
@@ -3253,6 +3309,8 @@ struct hl_reset_info {
* @supports_mmu_prefetch: true if prefetch is supported, otherwise false.
* @reset_upon_device_release: reset the device when the user closes the file descriptor of the
* device.
+ * @supports_ctx_switch: true if a ctx switch is required upon first submission.
+ * @support_preboot_binning: true if we support read binning info from preboot.
* @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing.
* @fw_components: Controls which f/w components to load to the device. There are multiple f/w
* stages and sometimes we want to stop at a certain stage. Used only for testing.
@@ -3266,14 +3324,13 @@ struct hl_reset_info {
* Used only for testing.
* @heartbeat: Controls if we want to enable the heartbeat mechanism vs. the f/w, which verifies
* that the f/w is always alive. Used only for testing.
- * @supports_ctx_switch: true if a ctx switch is required upon first submission.
- * @support_preboot_binning: true if we support read binning info from preboot.
*/
struct hl_device {
struct pci_dev *pdev;
u64 pcie_bar_phys[HL_PCI_NUM_BARS];
void __iomem *pcie_bar[HL_PCI_NUM_BARS];
void __iomem *rmmio;
+ struct class *hclass;
struct cdev cdev;
struct cdev cdev_ctrl;
struct device *dev;
@@ -3286,6 +3343,8 @@ struct hl_device {
enum hl_asic_type asic_type;
struct hl_cq *completion_queue;
struct hl_user_interrupt *user_interrupt;
+ struct hl_user_interrupt tpc_interrupt;
+ struct hl_user_interrupt unexpected_error_interrupt;
struct hl_user_interrupt common_user_cq_interrupt;
struct hl_user_interrupt common_decoder_interrupt;
struct hl_cs **shadow_cs_queue;
@@ -3369,7 +3428,7 @@ struct hl_device {
u64 fw_comms_poll_interval_usec;
u64 dram_binning;
u64 tpc_binning;
-
+ atomic_t dmabuf_export_cnt;
enum cpucp_card_types card_type;
u32 major;
u32 high_pll;
@@ -3412,7 +3471,7 @@ struct hl_device {
u8 supports_ctx_switch;
u8 support_preboot_binning;
- /* Parameters for bring-up */
+ /* Parameters for bring-up to be upstreamed */
u64 nic_ports_mask;
u64 fw_components;
u8 mmu_enable;
@@ -3450,6 +3509,20 @@ struct hl_cs_encaps_sig_handle {
u32 count;
};
+/**
+ * struct hl_info_fw_err_info - firmware error information structure
+ * @err_type: The type of error detected (or reported).
+ * @event_mask: Pointer to the event mask to be modified with the detected error flag
+ * (can be NULL)
+ * @event_id: The id of the event that reported the error
+ * (applicable when err_type is HL_INFO_FW_REPORTED_ERR).
+ */
+struct hl_info_fw_err_info {
+ enum hl_info_fw_err_type err_type;
+ u64 *event_mask;
+ u16 event_id;
+};
+
/*
* IOCTLs
*/
@@ -3474,6 +3547,10 @@ struct hl_ioctl_desc {
hl_ioctl_t *func;
};
+static inline bool hl_is_fw_ver_below_1_9(struct hl_device *hdev)
+{
+ return (hdev->fw_major_version < 42);
+}
/*
* Kernel module functions that can be accessed by entire module
@@ -3537,14 +3614,12 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
}
uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr);
+void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle);
+void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr);
void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
gfp_t flag, const char *caller);
void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr,
dma_addr_t dma_handle, const char *caller);
-void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size,
- dma_addr_t *dma_handle, const char *caller);
-void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr,
- const char *caller);
void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
dma_addr_t *dma_handle, const char *caller);
void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
@@ -3591,7 +3666,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg);
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg);
irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg);
-irqreturn_t hl_irq_handler_default(int irq, void *arg);
+irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg);
u32 hl_cq_inc_ptr(u32 ptr);
int hl_asid_init(struct hl_device *hdev);
@@ -3612,7 +3687,7 @@ int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
-int hl_device_init(struct hl_device *hdev, struct class *hclass);
+int hl_device_init(struct hl_device *hdev);
void hl_device_fini(struct hl_device *hdev);
int hl_device_suspend(struct hl_device *hdev);
int hl_device_resume(struct hl_device *hdev);
@@ -3662,6 +3737,7 @@ bool cs_needs_timeout(struct hl_cs *cs);
bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
void hl_multi_cs_completion_init(struct hl_device *hdev);
+u32 hl_get_active_cs_num(struct hl_device *hdev);
void goya_set_asic_funcs(struct hl_device *hdev);
void gaudi_set_asic_funcs(struct hl_device *hdev);
@@ -3861,6 +3937,7 @@ const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg);
void hl_mem_mgr_fini(struct hl_mem_mgr *mmg);
+void hl_mem_mgr_idr_destroy(struct hl_mem_mgr *mmg);
int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,
void *args);
struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg,
@@ -3879,6 +3956,8 @@ void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_o
void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu);
void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu,
u64 *event_mask);
+void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask);
+void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
#ifdef CONFIG_DEBUG_FS