summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-10 11:31:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-10 11:31:47 -0700
commit5ffc06ebeaab65d8e08df6953caf8155df2c45f8 (patch)
treeba9e297c32ff79c630c55994b65db1369c61e27d /include
parenta668acb8f01fc0d1e3877cddecbe319ef2ef651c (diff)
parent4cd67adc44a3ccdb3b8526c9f932f905284e028f (diff)
Merge tag 'char-misc-5.15-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull habanalabs updates from Greg KH: "Here is another round of misc driver patches for 5.15-rc1. In here is only updates for the Habanalabs driver. This request is late because the previously-objected-to dma-buf patches are all removed and some fixes that you and others found are now included in here as well. All of these have been in linux-next for well over a week with no reports of problems, and they are all self-contained to only this one driver. Full details are in the shortlog" * tag 'char-misc-5.15-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (61 commits) habanalabs/gaudi: hwmon default card name habanalabs: add support for f/w reset habanalabs/gaudi: block ICACHE_BASE_ADDERESS_HIGH in TPC habanalabs: cannot sleep while holding spinlock habanalabs: never copy_from_user inside spinlock habanalabs: remove unnecessary device status check habanalabs: disable IRQ in user interrupts spinlock habanalabs: add "in device creation" status habanalabs/gaudi: invalidate PMMU mem cache on init habanalabs/gaudi: size should be printed in decimal habanalabs/gaudi: define DC POWER for secured PMC habanalabs/gaudi: unmask out of bounds SLM access interrupt habanalabs: add userptr_lookup node in debugfs habanalabs/gaudi: fetch TPC/MME ECC errors from F/W habanalabs: modify multi-CS to wait on stream masters habanalabs/gaudi: add monitored SOBs to state dump habanalabs/gaudi: restore user registers when context opens habanalabs/gaudi: increase boot fit timeout habanalabs: update to latest firmware headers habanalabs/gaudi: minimize number of register reads ...
Diffstat (limited to 'include')
-rw-r--r--include/uapi/misc/habanalabs.h186
1 files changed, 166 insertions, 20 deletions
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a47a731e4527..7cc2a0f3f2f5 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -276,7 +276,17 @@ enum hl_device_status {
HL_DEVICE_STATUS_OPERATIONAL,
HL_DEVICE_STATUS_IN_RESET,
HL_DEVICE_STATUS_MALFUNCTION,
- HL_DEVICE_STATUS_NEEDS_RESET
+ HL_DEVICE_STATUS_NEEDS_RESET,
+ HL_DEVICE_STATUS_IN_DEVICE_CREATION,
+ HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
+};
+
+enum hl_server_type {
+ HL_SERVER_TYPE_UNKNOWN = 0,
+ HL_SERVER_GAUDI_HLS1 = 1,
+ HL_SERVER_GAUDI_HLS1H = 2,
+ HL_SERVER_GAUDI_TYPE1 = 3,
+ HL_SERVER_GAUDI_TYPE2 = 4
};
/* Opcode for management ioctl
@@ -337,17 +347,49 @@ enum hl_device_status {
#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
+/**
+ * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC
+ * @sram_base_address: The first SRAM physical base address that is free to be
+ * used by the user.
+ * @dram_base_address: The first DRAM virtual or physical base address that is
+ * free to be used by the user.
+ * @dram_size: The DRAM size that is available to the user.
+ * @sram_size: The SRAM size that is available to the user.
+ * @num_of_events: The number of events that can be received from the f/w. This
+ * is needed so the user can what is the size of the h/w events
+ * array he needs to pass to the kernel when he wants to fetch
+ * the event counters.
+ * @device_id: PCI device ID of the ASIC.
+ * @module_id: Module ID of the ASIC for mezzanine cards in servers
+ * (From OCP spec).
+ * @first_available_interrupt_id: The first available interrupt ID for the user
+ * to be used when it works with user interrupts.
+ * @server_type: Server type that the Gaudi ASIC is currently installed in.
+ * The value is according to enum hl_server_type
+ * @cpld_version: CPLD version on the board.
+ * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler
+ * in some ASICs.
+ * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant
+ * for Goya/Gaudi only.
+ * @dram_enabled: Whether the DRAM is enabled.
+ * @cpucp_version: The CPUCP f/w version.
+ * @card_name: The card name as passed by the f/w.
+ * @dram_page_size: The DRAM physical page size.
+ */
struct hl_info_hw_ip_info {
__u64 sram_base_address;
__u64 dram_base_address;
__u64 dram_size;
__u32 sram_size;
__u32 num_of_events;
- __u32 device_id; /* PCI Device ID */
- __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
+ __u32 device_id;
+ __u32 module_id;
__u32 reserved;
__u16 first_available_interrupt_id;
- __u16 reserved2;
+ __u16 server_type;
__u32 cpld_version;
__u32 psoc_pci_pll_nr;
__u32 psoc_pci_pll_nf;
@@ -358,7 +400,7 @@ struct hl_info_hw_ip_info {
__u8 pad[2];
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
- __u64 reserved3;
+ __u64 reserved2;
__u64 dram_page_size;
};
@@ -628,12 +670,21 @@ struct hl_cs_chunk {
__u64 cb_handle;
/* Relevant only when HL_CS_FLAGS_WAIT or
- * HL_CS_FLAGS_COLLECTIVE_WAIT is set.
+ * HL_CS_FLAGS_COLLECTIVE_WAIT is set
* This holds address of array of u64 values that contain
- * signal CS sequence numbers. The wait described by this job
- * will listen on all those signals (wait event per signal)
+ * signal CS sequence numbers. The wait described by
+ * this job will listen on all those signals
+ * (wait event per signal)
*/
__u64 signal_seq_arr;
+
+ /*
+ * Relevant only when HL_CS_FLAGS_WAIT or
+ * HL_CS_FLAGS_COLLECTIVE_WAIT is set
+ * along with HL_CS_FLAGS_ENCAP_SIGNALS.
+ * This is the CS sequence which has the encapsulated signals.
+ */
+ __u64 encaps_signal_seq;
};
/* Index of queue to put the CB on */
@@ -651,6 +702,17 @@ struct hl_cs_chunk {
* Number of entries in signal_seq_arr
*/
__u32 num_signal_seq_arr;
+
+ /* Relevant only when HL_CS_FLAGS_WAIT or
+ * HL_CS_FLAGS_COLLECTIVE_WAIT is set along
+ * with HL_CS_FLAGS_ENCAP_SIGNALS
+ * This set the signals range that the user want to wait for
+ * out of the whole reserved signals range.
+ * e.g if the signals range is 20, and user don't want
+ * to wait for signal 8, so he set this offset to 7, then
+ * he call the API again with 9 and so on till 20.
+ */
+ __u32 encaps_signal_offset;
};
/* HL_CS_CHUNK_FLAGS_* */
@@ -678,6 +740,28 @@ struct hl_cs_chunk {
#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200
#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400
+/*
+ * The encapsulated signals CS is merged into the existing CS ioctls.
+ * In order to use this feature need to follow the below procedure:
+ * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY
+ * the output of this API will be the SOB offset from CFG_BASE.
+ * this address will be used to patch CB cmds to do the signaling for this
+ * SOB by incrementing it's value.
+ * for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY
+ * CS type, note that this might fail if out-of-sync happened to the SOB
+ * value, in case other signaling request to the same SOB occurred between
+ * reserve-unreserve calls.
+ * 2. Use the staged CS to do the encapsulated signaling jobs.
+ * use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
+ * along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset
+ * field. This offset allows app to wait on part of the reserved signals.
+ * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag
+ * to wait for the encapsulated signals.
+ */
+#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800
+#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000
+#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000
+
#define HL_CS_STATUS_SUCCESS 0
#define HL_MAX_JOBS_PER_CS 512
@@ -690,10 +774,35 @@ struct hl_cs_in {
/* holds address of array of hl_cs_chunk for execution phase */
__u64 chunks_execute;
- /* Sequence number of a staged submission CS
- * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
- */
- __u64 seq;
+ union {
+ /*
+ * Sequence number of a staged submission CS
+ * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and
+ * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset.
+ */
+ __u64 seq;
+
+ /*
+ * Encapsulated signals handle id
+ * Valid for two flows:
+ * 1. CS with encapsulated signals:
+ * when HL_CS_FLAGS_STAGED_SUBMISSION and
+ * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
+ * and HL_CS_FLAGS_ENCAP_SIGNALS are set.
+ * 2. unreserve signals:
+ * valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set.
+ */
+ __u32 encaps_sig_handle_id;
+
+ /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
+ struct {
+ /* Encapsulated signals number */
+ __u32 encaps_signals_count;
+
+ /* Encapsulated signals queue index (stream) */
+ __u32 encaps_signals_q_idx;
+ };
+ };
/* Number of chunks in restore phase array. Maximum number is
* HL_MAX_JOBS_PER_CS
@@ -718,14 +827,31 @@ struct hl_cs_in {
};
struct hl_cs_out {
+ union {
+ /*
+ * seq holds the sequence number of the CS to pass to wait
+ * ioctl. All values are valid except for 0 and ULLONG_MAX
+ */
+ __u64 seq;
+
+ /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
+ struct {
+ /* This is the resereved signal handle id */
+ __u32 handle_id;
+
+ /* This is the signals count */
+ __u32 count;
+ };
+ };
+
+ /* HL_CS_STATUS */
+ __u32 status;
+
/*
- * seq holds the sequence number of the CS to pass to wait ioctl. All
- * values are valid except for 0 and ULLONG_MAX
+ * SOB base address offset
+ * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set
*/
- __u64 seq;
- /* HL_CS_STATUS_* */
- __u32 status;
- __u32 pad;
+ __u32 sob_base_addr_offset;
};
union hl_cs_args {
@@ -735,11 +861,18 @@ union hl_cs_args {
#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
+#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
+
+#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32
struct hl_wait_cs_in {
union {
struct {
- /* Command submission sequence number */
+ /*
+ * In case of wait_cs holds the CS sequence number.
+ * In case of wait for multi CS hold a user pointer to
+ * an array of CS sequence numbers
+ */
__u64 seq;
/* Absolute timeout to wait for command submission
* in microseconds
@@ -767,12 +900,17 @@ struct hl_wait_cs_in {
/* Context ID - Currently not in use */
__u32 ctx_id;
+
/* HL_WAIT_CS_FLAGS_*
* If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include
* interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order
* not to specify an interrupt id ,set mask to all 1s.
*/
__u32 flags;
+
+ /* Multi CS API info- valid entries in multi-CS array */
+ __u8 seq_arr_len;
+ __u8 pad[7];
};
#define HL_WAIT_CS_STATUS_COMPLETED 0
@@ -789,8 +927,15 @@ struct hl_wait_cs_out {
__u32 status;
/* HL_WAIT_CS_STATUS_FLAG* */
__u32 flags;
- /* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */
+ /*
+ * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set
+ * for wait_cs: timestamp of CS completion
+ * for wait_multi_cs: timestamp of FIRST CS completion
+ */
__s64 timestamp_nsec;
+ /* multi CS completion bitmap */
+ __u32 cs_completion_map;
+ __u32 pad;
};
union hl_wait_cs_args {
@@ -813,6 +958,7 @@ union hl_wait_cs_args {
#define HL_MEM_CONTIGUOUS 0x1
#define HL_MEM_SHARED 0x2
#define HL_MEM_USERPTR 0x4
+#define HL_MEM_FORCE_HINT 0x8
struct hl_mem_in {
union {