diff options
Diffstat (limited to 'drivers/accel/habanalabs/common/habanalabs.h')
-rw-r--r-- | drivers/accel/habanalabs/common/habanalabs.h | 139 |
1 files changed, 109 insertions, 30 deletions
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index fa05e76d3d21..eaae69a9f817 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -155,18 +155,12 @@ enum hl_mmu_enablement { #define hl_asic_dma_alloc_coherent(hdev, size, dma_handle, flags) \ hl_asic_dma_alloc_coherent_caller(hdev, size, dma_handle, flags, __func__) -#define hl_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle) \ - hl_cpu_accessible_dma_pool_alloc_caller(hdev, size, dma_handle, __func__) - #define hl_asic_dma_pool_zalloc(hdev, size, mem_flags, dma_handle) \ hl_asic_dma_pool_zalloc_caller(hdev, size, mem_flags, dma_handle, __func__) #define hl_asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle) \ hl_asic_dma_free_coherent_caller(hdev, size, cpu_addr, dma_handle, __func__) -#define hl_cpu_accessible_dma_pool_free(hdev, size, vaddr) \ - hl_cpu_accessible_dma_pool_free_caller(hdev, size, vaddr, __func__) - #define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \ hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__) @@ -378,6 +372,7 @@ enum hl_cs_type { CS_RESERVE_SIGNALS, CS_UNRESERVE_SIGNALS, CS_TYPE_ENGINE_CORE, + CS_TYPE_ENGINES, CS_TYPE_FLUSH_PCI_HBW_WRITES, }; @@ -592,6 +587,8 @@ struct hl_hints_range { * @host_base_address: host physical start address for host DMA from device * @host_end_address: host physical end address for host DMA from device * @max_freq_value: current max clk frequency. + * @engine_core_interrupt_reg_addr: interrupt register address for engine core to use + * in order to raise events toward FW. * @clk_pll_index: clock PLL index that specify which PLL determines the clock * we display to the user * @mmu_pgt_size: MMU page tables total size. @@ -612,8 +609,8 @@ struct hl_hints_range { * @cb_pool_cb_cnt: number of CBs in the CB pool. * @cb_pool_cb_size: size of each CB in the CB pool. * @decoder_enabled_mask: which decoders are enabled. - * @decoder_binning_mask: which decoders are binned, 0 means usable and 1 - * means binned (at most one binned decoder per dcore). + * @decoder_binning_mask: which decoders are binned, 0 means usable and 1 means binned. + * @rotator_enabled_mask: which rotators are enabled. * @edma_enabled_mask: which EDMAs are enabled. * @edma_binning_mask: which EDMAs are binned, 0 means usable and 1 means * binned (at most one binned DMA). @@ -648,7 +645,8 @@ struct hl_hints_range { * which the property supports_user_set_page_size is true * (i.e. the DRAM supports multiple page sizes), otherwise * it will shall be equal to dram_page_size. - * @num_engine_cores: number of engine cpu cores + * @num_engine_cores: number of engine cpu cores. + * @max_num_of_engines: maximum number of all engines in the ASIC. * @num_of_special_blocks: special_blocks array size. * @glbl_err_cause_num: global err cause number. * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is @@ -663,6 +661,8 @@ struct hl_hints_range { * @first_available_cq: first available CQ for the user. * @user_interrupt_count: number of user interrupts. * @user_dec_intr_count: number of decoder interrupts exposed to user. + * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host. + * @eq_interrupt_id: interrupt id for EQ, uses to synchronize EQ interrupts in hard-reset. * @cache_line_size: device cache line size. * @server_type: Server type that the ASIC is currently installed in. * The value is according to enum hl_server_type in uapi file. @@ -698,6 +698,7 @@ struct hl_hints_range { * @supports_user_set_page_size: true if user can set the allocation page size. * @dma_mask: the dma mask to be set for this device * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported. + * @supports_engine_modes: true if changing engines/engine_cores modes is supported. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -739,6 +740,7 @@ struct asic_fixed_properties { u64 host_base_address; u64 host_end_address; u64 max_freq_value; + u64 engine_core_interrupt_reg_addr; u32 clk_pll_index; u32 mmu_pgt_size; u32 mmu_pte_size; @@ -759,6 +761,7 @@ struct asic_fixed_properties { u32 cb_pool_cb_size; u32 decoder_enabled_mask; u32 decoder_binning_mask; + u32 rotator_enabled_mask; u32 edma_enabled_mask; u32 edma_binning_mask; u32 max_pending_cs; @@ -775,6 +778,7 @@ struct asic_fixed_properties { u32 xbar_edge_enabled_mask; u32 device_mem_alloc_default_page_size; u32 num_engine_cores; + u32 max_num_of_engines; u32 num_of_special_blocks; u32 glbl_err_cause_num; u32 hbw_flush_reg; @@ -788,6 +792,8 @@ struct asic_fixed_properties { u16 first_available_cq[HL_MAX_DCORES]; u16 user_interrupt_count; u16 user_dec_intr_count; + u16 tpc_interrupt_id; + u16 eq_interrupt_id; u16 cache_line_size; u16 server_type; u8 completion_queues_count; @@ -811,6 +817,7 @@ struct asic_fixed_properties { u8 supports_user_set_page_size; u8 dma_mask; u8 supports_advanced_cpucp_rc; + u8 supports_engine_modes; }; /** @@ -1096,6 +1103,8 @@ struct hl_cq { enum hl_user_interrupt_type { HL_USR_INTERRUPT_CQ = 0, HL_USR_INTERRUPT_DECODER, + HL_USR_INTERRUPT_TPC, + HL_USR_INTERRUPT_UNEXPECTED }; /** @@ -1104,6 +1113,7 @@ enum hl_user_interrupt_type { * @type: user interrupt type * @wait_list_head: head to the list of user threads pending on this interrupt * @wait_list_lock: protects wait_list_head + * @timestamp: last timestamp taken upon interrupt * @interrupt_id: msix interrupt id */ struct hl_user_interrupt { @@ -1111,6 +1121,7 @@ struct hl_user_interrupt { enum hl_user_interrupt_type type; struct list_head wait_list_head; spinlock_t wait_list_lock; + ktime_t timestamp; u32 interrupt_id; }; @@ -1200,15 +1211,15 @@ struct hl_eq { /** * struct hl_dec - describes a decoder sw instance. * @hdev: pointer to the device structure. - * @completion_abnrm_work: workqueue object to run when decoder generates an error interrupt + * @abnrm_intr_work: workqueue work item to run when decoder generates an error interrupt. * @core_id: ID of the decoder. * @base_addr: base address of the decoder. */ struct hl_dec { - struct hl_device *hdev; - struct work_struct completion_abnrm_work; - u32 core_id; - u32 base_addr; + struct hl_device *hdev; + struct work_struct abnrm_intr_work; + u32 core_id; + u32 base_addr; }; /** @@ -1562,6 +1573,7 @@ struct engines_data { * @access_dev_mem: access device memory * @set_dram_bar_base: set the base of the DRAM BAR * @set_engine_cores: set a config command to engine cores + * @set_engines: set a config command to user engines * @send_device_activity: indication to FW about device availability * @set_dram_properties: set DRAM related properties. * @set_binning_masks: set binning/enable masks for all relevant components. @@ -1574,7 +1586,7 @@ struct hl_asic_funcs { int (*sw_init)(struct hl_device *hdev); int (*sw_fini)(struct hl_device *hdev); int (*hw_init)(struct hl_device *hdev); - void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset); + int (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset); void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset); int (*suspend)(struct hl_device *hdev); int (*resume)(struct hl_device *hdev); @@ -1701,6 +1713,8 @@ struct hl_asic_funcs { u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids, u32 num_cores, u32 core_command); + int (*set_engines)(struct hl_device *hdev, u32 *engine_ids, + u32 num_engines, u32 engine_command); int (*send_device_activity)(struct hl_device *hdev, bool open); int (*set_dram_properties)(struct hl_device *hdev); int (*set_binning_masks)(struct hl_device *hdev); @@ -1824,7 +1838,7 @@ struct hl_cs_outcome_store { * @hpriv: pointer to the private (Kernel Driver) data of the process (fd). * @hdev: pointer to the device structure. * @refcount: reference counter for the context. Context is released only when - * this hits 0l. It is incremented on CS and CS_WAIT. + * this hits 0. It is incremented on CS and CS_WAIT. * @cs_pending: array of hl fence objects representing pending CS. * @outcome_store: storage data structure used to remember outcomes of completed * command submissions for a long time after CS id wraparound. @@ -2318,7 +2332,7 @@ struct hl_debugfs_entry { * @userptr_list: list of available userptrs (virtual memory chunk descriptor). * @userptr_spinlock: protects userptr_list. * @ctx_mem_hash_list: list of available contexts with MMU mappings. - * @ctx_mem_hash_spinlock: protects cb_list. + * @ctx_mem_hash_mutex: protects list of available contexts with MMU mappings. * @data_dma_blob_desc: data DMA descriptor of blob. * @mon_dump_blob_desc: monitor dump descriptor of blob. * @state_dump: data of the system states in case of a bad cs. @@ -2349,7 +2363,7 @@ struct hl_dbg_device_entry { struct list_head userptr_list; spinlock_t userptr_spinlock; struct list_head ctx_mem_hash_list; - spinlock_t ctx_mem_hash_spinlock; + struct mutex ctx_mem_hash_mutex; struct debugfs_blob_wrapper data_dma_blob_desc; struct debugfs_blob_wrapper mon_dump_blob_desc; char *state_dump[HL_STATE_DUMP_HIST_LEN]; @@ -2974,8 +2988,8 @@ struct cs_timeout_info { * @cq_addr: the address of the current handled command buffer * @cq_size: the size of the current handled command buffer * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. - * should be equal to 1 incase of undefined opcode - * in Upper-CP (specific stream) and equal to 4 incase + * should be equal to 1 in case of undefined opcode + * in Upper-CP (specific stream) and equal to 4 in case * of undefined opcode in Lower-CP. * @engine_id: engine-id that the error occurred on * @stream_id: the stream id the error occurred on. In case the stream equals to @@ -3032,17 +3046,55 @@ struct razwi_info { }; /** + * struct hw_err_info - HW error information. + * @event: holds information on the event. + * @event_detected: if set as 1, then a HW event was discovered for the + * first time after the driver has finished booting-up. + * currently we assume that only fatal events (that require hard-reset) are + * reported so we don't care of the others that might follow it. + * so once changed to 1, it will remain that way. + * TODO: support multiple events. + * @event_info_available: indicates that a HW event info is now available. + */ +struct hw_err_info { + struct hl_info_hw_err_event event; + atomic_t event_detected; + bool event_info_available; +}; + +/** + * struct fw_err_info - FW error information. + * @event: holds information on the event. + * @event_detected: if set as 1, then a FW event was discovered for the + * first time after the driver has finished booting-up. + * currently we assume that only fatal events (that require hard-reset) are + * reported so we don't care of the others that might follow it. + * so once changed to 1, it will remain that way. + * TODO: support multiple events. + * @event_info_available: indicates that a HW event info is now available. + */ +struct fw_err_info { + struct hl_info_fw_err_event event; + atomic_t event_detected; + bool event_info_available; +}; + +/** * struct hl_error_info - holds information collected during an error. * @cs_timeout: CS timeout error information. * @razwi_info: RAZWI information. * @undef_opcode: undefined opcode information. * @page_fault_info: page fault information. + * @hw_err: (fatal) hardware error information. + * @fw_err: firmware error information. */ struct hl_error_info { struct cs_timeout_info cs_timeout; struct razwi_info razwi_info; struct undefined_opcode_info undef_opcode; struct page_fault_info page_fault_info; + struct hw_err_info hw_err; + struct fw_err_info fw_err; }; /** @@ -3090,6 +3142,7 @@ struct hl_reset_info { * (required only for PCI address match mode) * @pcie_bar: array of available PCIe bars virtual addresses. * @rmmio: configuration area address on SRAM. + * @hclass: pointer to the habanalabs class. * @cdev: related char device. * @cdev_ctrl: char device for control operations only (INFO IOCTL) * @dev: related kernel basic device structure. @@ -3104,6 +3157,8 @@ struct hl_reset_info { * @user_interrupt: array of hl_user_interrupt. upon the corresponding user * interrupt, driver will monitor the list of fences * registered to this interrupt. + * @tpc_interrupt: single TPC interrupt for all TPCs. + * @unexpected_error_interrupt: single interrupt for unexpected user error indication. * @common_user_cq_interrupt: common user CQ interrupt for all user CQ interrupts. * upon any user CQ interrupt, driver will monitor the * list of fences registered to this common structure. @@ -3199,6 +3254,7 @@ struct hl_reset_info { * drams are binned-out * @tpc_binning: contains mask of tpc engines that is received from the f/w which indicates which * tpc engines are binned-out + * @dmabuf_export_cnt: number of dma-buf exporting. * @card_type: Various ASICs have several card types. This indicates the card * type of the current device. * @major: habanalabs kernel driver major. @@ -3253,6 +3309,8 @@ struct hl_reset_info { * @supports_mmu_prefetch: true if prefetch is supported, otherwise false. * @reset_upon_device_release: reset the device when the user closes the file descriptor of the * device. + * @supports_ctx_switch: true if a ctx switch is required upon first submission. + * @support_preboot_binning: true if we support read binning info from preboot. * @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing. * @fw_components: Controls which f/w components to load to the device. There are multiple f/w * stages and sometimes we want to stop at a certain stage. Used only for testing. @@ -3266,14 +3324,13 @@ struct hl_reset_info { * Used only for testing. * @heartbeat: Controls if we want to enable the heartbeat mechanism vs. the f/w, which verifies * that the f/w is always alive. Used only for testing. - * @supports_ctx_switch: true if a ctx switch is required upon first submission. - * @support_preboot_binning: true if we support read binning info from preboot. */ struct hl_device { struct pci_dev *pdev; u64 pcie_bar_phys[HL_PCI_NUM_BARS]; void __iomem *pcie_bar[HL_PCI_NUM_BARS]; void __iomem *rmmio; + struct class *hclass; struct cdev cdev; struct cdev cdev_ctrl; struct device *dev; @@ -3286,6 +3343,8 @@ struct hl_device { enum hl_asic_type asic_type; struct hl_cq *completion_queue; struct hl_user_interrupt *user_interrupt; + struct hl_user_interrupt tpc_interrupt; + struct hl_user_interrupt unexpected_error_interrupt; struct hl_user_interrupt common_user_cq_interrupt; struct hl_user_interrupt common_decoder_interrupt; struct hl_cs **shadow_cs_queue; @@ -3369,7 +3428,7 @@ struct hl_device { u64 fw_comms_poll_interval_usec; u64 dram_binning; u64 tpc_binning; - + atomic_t dmabuf_export_cnt; enum cpucp_card_types card_type; u32 major; u32 high_pll; @@ -3412,7 +3471,7 @@ struct hl_device { u8 supports_ctx_switch; u8 support_preboot_binning; - /* Parameters for bring-up */ + /* Parameters for bring-up to be upstreamed */ u64 nic_ports_mask; u64 fw_components; u8 mmu_enable; @@ -3450,6 +3509,20 @@ struct hl_cs_encaps_sig_handle { u32 count; }; +/** + * struct hl_info_fw_err_info - firmware error information structure + * @err_type: The type of error detected (or reported). + * @event_mask: Pointer to the event mask to be modified with the detected error flag + * (can be NULL) + * @event_id: The id of the event that reported the error + * (applicable when err_type is HL_INFO_FW_REPORTED_ERR). + */ +struct hl_info_fw_err_info { + enum hl_info_fw_err_type err_type; + u64 *event_mask; + u16 event_id; +}; + /* * IOCTLs */ @@ -3474,6 +3547,10 @@ struct hl_ioctl_desc { hl_ioctl_t *func; }; +static inline bool hl_is_fw_ver_below_1_9(struct hl_device *hdev) +{ + return (hdev->fw_major_version < 42); +} /* * Kernel module functions that can be accessed by entire module @@ -3537,14 +3614,12 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, } uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr); +void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle); +void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, gfp_t flag, const char *caller); void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, dma_addr_t dma_handle, const char *caller); -void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size, - dma_addr_t *dma_handle, const char *caller); -void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr, - const char *caller); void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, dma_addr_t *dma_handle, const char *caller); void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, @@ -3591,7 +3666,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg); irqreturn_t hl_irq_handler_eq(int irq, void *arg); irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg); irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg); -irqreturn_t hl_irq_handler_default(int irq, void *arg); +irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg); u32 hl_cq_inc_ptr(u32 ptr); int hl_asid_init(struct hl_device *hdev); @@ -3612,7 +3687,7 @@ int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr, void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr); void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr); -int hl_device_init(struct hl_device *hdev, struct class *hclass); +int hl_device_init(struct hl_device *hdev); void hl_device_fini(struct hl_device *hdev); int hl_device_suspend(struct hl_device *hdev); int hl_device_resume(struct hl_device *hdev); @@ -3662,6 +3737,7 @@ bool cs_needs_timeout(struct hl_cs *cs); bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs); struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq); void hl_multi_cs_completion_init(struct hl_device *hdev); +u32 hl_get_active_cs_num(struct hl_device *hdev); void goya_set_asic_funcs(struct hl_device *hdev); void gaudi_set_asic_funcs(struct hl_device *hdev); @@ -3861,6 +3937,7 @@ const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type); void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg); void hl_mem_mgr_fini(struct hl_mem_mgr *mmg); +void hl_mem_mgr_idr_destroy(struct hl_mem_mgr *mmg); int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma, void *args); struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg, @@ -3879,6 +3956,8 @@ void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_o void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu); void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu, u64 *event_mask); +void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask); +void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info); #ifdef CONFIG_DEBUG_FS |