summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hns
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hns')
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h26
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c116
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h82
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c9
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c33
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h43
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c791
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h141
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c30
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c458
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c38
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c331
12 files changed, 1134 insertions, 964 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 5afee04fb02c..23c438cef40d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -32,6 +32,7 @@
#ifndef _HNS_ROCE_COMMON_H
#define _HNS_ROCE_COMMON_H
+#include <linux/bitfield.h>
#define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg))
#define roce_read(dev, reg) readl((dev)->reg_base + (reg))
@@ -65,6 +66,27 @@
#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field)
+#define _hr_reg_clear(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ *((__le32 *)_ptr + (field_h) / 32) &= \
+ cpu_to_le32( \
+ ~GENMASK((field_h) % 32, (field_l) % 32)) + \
+ BUILD_BUG_ON_ZERO(((field_h) / 32) != \
+ ((field_l) / 32)); \
+ })
+
+#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field)
+
+#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \
+ ({ \
+ _hr_reg_clear(ptr, field_type, field_h, field_l); \
+ *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \
+ GENMASK((field_h) % 32, (field_l) % 32), val)); \
+ })
+
+#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val)
+
#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
@@ -342,8 +364,8 @@
#define ROCEE_TX_CMQ_BASEADDR_L_REG 0x07000
#define ROCEE_TX_CMQ_BASEADDR_H_REG 0x07004
#define ROCEE_TX_CMQ_DEPTH_REG 0x07008
-#define ROCEE_TX_CMQ_TAIL_REG 0x07010
-#define ROCEE_TX_CMQ_HEAD_REG 0x07014
+#define ROCEE_TX_CMQ_HEAD_REG 0x07010
+#define ROCEE_TX_CMQ_TAIL_REG 0x07014
#define ROCEE_RX_CMQ_BASEADDR_L_REG 0x07018
#define ROCEE_RX_CMQ_BASEADDR_H_REG 0x0701c
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 8533fc2d8df2..74fc4940b03a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -38,11 +38,74 @@
#include "hns_roce_hem.h"
#include "hns_roce_common.h"
+static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank)
+{
+ u32 least_load = bank[0].inuse;
+ u8 bankid = 0;
+ u32 bankcnt;
+ u8 i;
+
+ for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ bankcnt = bank[i].inuse;
+ if (bankcnt < least_load) {
+ least_load = bankcnt;
+ bankid = i;
+ }
+ }
+
+ return bankid;
+}
+
+static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+ u8 bankid;
+ int id;
+
+ mutex_lock(&cq_table->bank_mutex);
+ bankid = get_least_load_bankid_for_cq(cq_table->bank);
+ bank = &cq_table->bank[bankid];
+
+ id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL);
+ if (id < 0) {
+ mutex_unlock(&cq_table->bank_mutex);
+ return id;
+ }
+
+ /* the lower 2 bits is bankid */
+ hr_cq->cqn = (id << CQ_BANKID_SHIFT) | bankid;
+ bank->inuse++;
+ mutex_unlock(&cq_table->bank_mutex);
+
+ return 0;
+}
+
+static inline u8 get_cq_bankid(unsigned long cqn)
+{
+ /* The lower 2 bits of CQN are used to hash to different banks */
+ return (u8)(cqn & GENMASK(1, 0));
+}
+
+static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+
+ bank = &cq_table->bank[get_cq_bankid(cqn)];
+
+ ida_free(&bank->ida, cqn >> CQ_BANKID_SHIFT);
+
+ mutex_lock(&cq_table->bank_mutex);
+ bank->inuse--;
+ mutex_unlock(&cq_table->bank_mutex);
+}
+
static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_cmd_mailbox *mailbox;
- struct hns_roce_cq_table *cq_table;
u64 mtts[MTT_MIN_COUNT] = { 0 };
dma_addr_t dma_handle;
int ret;
@@ -54,13 +117,6 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
return -EINVAL;
}
- cq_table = &hr_dev->cq_table;
- ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn);
- if (ret) {
- ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret);
- return ret;
- }
-
/* Get CQC memory HEM(Hardware Entry Memory) table */
ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
if (ret) {
@@ -110,7 +166,6 @@ err_put:
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
err_out:
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
return ret;
}
@@ -138,7 +193,6 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
wait_for_completion(&hr_cq->free);
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
}
static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
@@ -152,7 +206,6 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
buf_attr.region_count = 1;
- buf_attr.fixed_page = true;
ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr,
hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT,
@@ -298,11 +351,17 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
goto err_cq_buf;
}
+ ret = alloc_cqn(hr_dev, hr_cq);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret);
+ goto err_cq_db;
+ }
+
ret = alloc_cqc(hr_dev, hr_cq);
if (ret) {
ibdev_err(ibdev,
"failed to alloc CQ context, ret = %d.\n", ret);
- goto err_cq_db;
+ goto err_cqn;
}
/*
@@ -326,6 +385,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
err_cqc:
free_cqc(hr_dev, hr_cq);
+err_cqn:
+ free_cqn(hr_dev, hr_cq->cqn);
err_cq_db:
free_cq_db(hr_dev, hr_cq, udata);
err_cq_buf:
@@ -341,9 +402,11 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
if (hr_dev->hw->destroy_cq)
hr_dev->hw->destroy_cq(ib_cq, udata);
- free_cq_buf(hr_dev, hr_cq);
- free_cq_db(hr_dev, hr_cq, udata);
free_cqc(hr_dev, hr_cq);
+ free_cqn(hr_dev, hr_cq->cqn);
+ free_cq_db(hr_dev, hr_cq, udata);
+ free_cq_buf(hr_dev, hr_cq);
+
return 0;
}
@@ -402,18 +465,33 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
complete(&hr_cq->free);
}
-int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ unsigned int reserved_from_bot;
+ unsigned int i;
+ mutex_init(&cq_table->bank_mutex);
xa_init(&cq_table->array);
- return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs,
- hr_dev->caps.num_cqs - 1,
- hr_dev->caps.reserved_cqs, 0);
+ reserved_from_bot = hr_dev->caps.reserved_cqs;
+
+ for (i = 0; i < reserved_from_bot; i++) {
+ cq_table->bank[get_cq_bankid(i)].inuse++;
+ cq_table->bank[get_cq_bankid(i)].min++;
+ }
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ ida_init(&cq_table->bank[i].ida);
+ cq_table->bank[i].max = hr_dev->caps.num_cqs /
+ HNS_ROCE_CQ_BANK_NUM - 1;
+ }
}
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
{
- hns_roce_bitmap_cleanup(&hr_dev->cq_table.bitmap);
+ int i;
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++)
+ ida_destroy(&hr_dev->cq_table.bank[i].ida);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index ad8253245a85..3d6b7a2db496 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -54,6 +54,7 @@
/* Hardware specification only for v1 engine */
#define HNS_ROCE_MIN_CQE_NUM 0x40
#define HNS_ROCE_MIN_WQE_NUM 0x20
+#define HNS_ROCE_MIN_SRQ_WQE_NUM 1
/* Hardware specification only for v1 engine */
#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7
@@ -65,6 +66,8 @@
#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2
#define HNS_ROCE_MIN_CQE_CNT 16
+#define HNS_ROCE_RESERVED_SGE 1
+
#define HNS_ROCE_MAX_IRQ_NUM 128
#define HNS_ROCE_SGE_IN_WQE 2
@@ -90,6 +93,7 @@
#define HNS_ROCE_MAX_PORTS 6
#define HNS_ROCE_GID_SIZE 16
#define HNS_ROCE_SGE_SIZE 16
+#define HNS_ROCE_DWQE_SIZE 65536
#define HNS_ROCE_HOP_NUM_0 0xff
@@ -119,6 +123,9 @@
#define SRQ_DB_REG 0x230
#define HNS_ROCE_QP_BANK_NUM 8
+#define HNS_ROCE_CQ_BANK_NUM 4
+
+#define CQ_BANKID_SHIFT 2
/* The chip implementation of the consumer index is calculated
* according to twice the actual EQ depth
@@ -163,44 +170,6 @@ enum hns_roce_event {
HNS_ROCE_EVENT_TYPE_FLR = 0x15,
};
-/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */
-enum {
- HNS_ROCE_LWQCE_QPC_ERROR = 1,
- HNS_ROCE_LWQCE_MTU_ERROR = 2,
- HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR = 3,
- HNS_ROCE_LWQCE_WQE_ADDR_ERROR = 4,
- HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR = 5,
- HNS_ROCE_LWQCE_SL_ERROR = 6,
- HNS_ROCE_LWQCE_PORT_ERROR = 7,
-};
-
-/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */
-enum {
- HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1,
- HNS_ROCE_LAVWQE_LENGTH_ERROR = 2,
- HNS_ROCE_LAVWQE_VA_ERROR = 3,
- HNS_ROCE_LAVWQE_PD_ERROR = 4,
- HNS_ROCE_LAVWQE_RW_ACC_ERROR = 5,
- HNS_ROCE_LAVWQE_KEY_STATE_ERROR = 6,
- HNS_ROCE_LAVWQE_MR_OPERATION_ERROR = 7,
-};
-
-/* DOORBELL overflow subtype */
-enum {
- HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1,
- HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF = 2,
- HNS_ROCE_DB_SUBTYPE_ODB_OVF = 3,
- HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF = 4,
- HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP = 5,
- HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP = 6,
-};
-
-enum {
- /* RQ&SRQ related operations */
- HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06,
- HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07,
-};
-
#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
enum {
@@ -253,9 +222,6 @@ enum {
#define HNS_ROCE_CMD_SUCCESS 1
-#define HNS_ROCE_PORT_DOWN 0
-#define HNS_ROCE_PORT_UP 1
-
/* The minimum page size is 4K for hardware */
#define HNS_HW_PAGE_SHIFT 12
#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
@@ -332,7 +298,6 @@ struct hns_roce_buf_attr {
} region[HNS_ROCE_MAX_BT_REGION];
unsigned int region_count; /* valid region count */
unsigned int page_shift; /* buffer page shift */
- bool fixed_page; /* decide page shift is fixed-size or maximum size */
unsigned int user_access; /* umem access flag */
bool mtt_only; /* only alloc buffer-required MTT memory */
};
@@ -393,6 +358,7 @@ struct hns_roce_wq {
spinlock_t lock;
u32 wqe_cnt; /* WQE num */
u32 max_gs;
+ u32 rsv_sge;
int offset;
int wqe_shift; /* WQE size */
u32 head;
@@ -489,6 +455,8 @@ struct hns_roce_idx_que {
struct hns_roce_mtr mtr;
int entry_shift;
unsigned long *bitmap;
+ u32 head;
+ u32 tail;
};
struct hns_roce_srq {
@@ -496,7 +464,9 @@ struct hns_roce_srq {
unsigned long srqn;
u32 wqe_cnt;
int max_gs;
+ u32 rsv_sge;
int wqe_shift;
+ u32 cqn;
void __iomem *db_reg_l;
atomic_t refcount;
@@ -507,8 +477,6 @@ struct hns_roce_srq {
u64 *wrid;
struct hns_roce_idx_que idx_que;
spinlock_t lock;
- u16 head;
- u16 tail;
struct mutex mutex;
void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
};
@@ -536,9 +504,10 @@ struct hns_roce_qp_table {
};
struct hns_roce_cq_table {
- struct hns_roce_bitmap bitmap;
struct xarray array;
struct hns_roce_hem_table table;
+ struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM];
+ struct mutex bank_mutex;
};
struct hns_roce_srq_table {
@@ -640,6 +609,10 @@ struct hns_roce_work {
u32 queue_num;
};
+enum {
+ HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5),
+};
+
struct hns_roce_qp {
struct ib_qp ibqp;
struct hns_roce_wq rq;
@@ -647,7 +620,7 @@ struct hns_roce_qp {
struct hns_roce_db sdb;
unsigned long en_flags;
u32 doorbell_qpn;
- u32 sq_signal_bits;
+ enum ib_sig_type sq_signal_bits;
struct hns_roce_wq sq;
struct hns_roce_mtr mtr;
@@ -779,7 +752,7 @@ struct hns_roce_caps {
u32 max_cqes;
u32 min_cqes;
u32 min_wqes;
- int reserved_cqs;
+ u32 reserved_cqs;
int reserved_srqs;
int num_aeq_vectors;
int num_comp_vectors;
@@ -911,8 +884,7 @@ struct hns_roce_hw {
int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
struct hns_roce_mr *mr, unsigned long mtpt_idx);
int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr, int flags, u32 pdn,
- int mr_access_flags, u64 iova, u64 size,
+ struct hns_roce_mr *mr, int flags,
void *mb_buf);
int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
struct hns_roce_mr *mr);
@@ -945,11 +917,7 @@ struct hns_roce_hw {
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int (*init_eq)(struct hns_roce_dev *hr_dev);
void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
- void (*write_srqc)(struct hns_roce_dev *hr_dev,
- struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn,
- void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx,
- dma_addr_t dma_handle_wqe,
- dma_addr_t dma_handle_idx);
+ int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
@@ -982,6 +950,7 @@ struct hns_roce_dev {
struct mutex pgdir_mutex;
int irq[HNS_ROCE_MAX_IRQ_NUM];
u8 __iomem *reg_base;
+ void __iomem *mem_base;
struct hns_roce_caps caps;
struct xarray qp_table_xa;
@@ -1067,7 +1036,7 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq)
static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
{
- __raw_writeq(*(u64 *) val, dest);
+ writeq(*(u64 *)val, dest);
}
static inline struct hns_roce_qp
@@ -1164,7 +1133,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
@@ -1281,7 +1250,6 @@ u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
-
int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
struct ib_cq *ib_cq);
#endif /* _HNS_ROCE_DEVICE_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index edc9d6b98d95..cfd2e1b60c7f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -1075,9 +1075,8 @@ static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev,
return NULL;
if (exist_bt) {
- hem->addr = dma_alloc_coherent(hr_dev->dev,
- count * BA_BYTE_LEN,
- &hem->dma_addr, GFP_KERNEL);
+ hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN,
+ &hem->dma_addr, GFP_KERNEL);
if (!hem->addr) {
kfree(hem);
return NULL;
@@ -1336,6 +1335,10 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
if (ba_num < 1)
return -ENOMEM;
+ if (ba_num > unit)
+ return -ENOBUFS;
+
+ ba_num = min_t(int, ba_num, unit);
INIT_LIST_HEAD(&temp_root);
offset = r->offset;
/* indicate to last region */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index f68585ff8e8a..5346fdca9473 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -43,6 +43,22 @@
#include "hns_roce_hem.h"
#include "hns_roce_hw_v1.h"
+/**
+ * hns_get_gid_index - Get gid index.
+ * @hr_dev: pointer to structure hns_roce_dev.
+ * @port: port, value range: 0 ~ MAX
+ * @gid_index: gid_index, value range: 0 ~ MAX
+ * Description:
+ * N ports shared gids, allocation method as follow:
+ * GID[0][0], GID[1][0],.....GID[N - 1][0],
+ * GID[0][0], GID[1][0],.....GID[N - 1][0],
+ * And so on
+ */
+u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index)
+{
+ return gid_index * hr_dev->caps.num_ports + port;
+}
+
static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg)
{
dseg->lkey = cpu_to_le32(sg->lkey);
@@ -314,8 +330,6 @@ out:
/* Set DB return */
if (likely(nreq)) {
qp->sq.head += nreq;
- /* Memory barrier */
- wmb();
roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M,
SQ_DOORBELL_U32_4_SQ_HEAD_S,
@@ -395,8 +409,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
out:
if (likely(nreq)) {
hr_qp->rq.head += nreq;
- /* Memory barrier */
- wmb();
if (ibqp->qp_type == IB_QPT_GSI) {
__le32 tmp;
@@ -1391,7 +1403,7 @@ static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev)
/**
* hns_roce_v1_reset - reset RoCE
* @hr_dev: RoCE device struct pointer
- * @enable: true -- drop reset, false -- reset
+ * @dereset: true -- drop reset, false -- reset
* return 0 - success , negative --fail
*/
static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
@@ -1968,12 +1980,6 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
if (nfreed) {
hr_cq->cons_index += nfreed;
- /*
- * Make sure update of buffer contents is done before
- * updating consumer index.
- */
- wmb();
-
hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
}
}
@@ -2314,8 +2320,6 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
*hr_cq->tptr_addr = hr_cq->cons_index &
((hr_cq->cq_depth << 1) - 1);
- /* Memroy barrier */
- wmb();
hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
}
@@ -3204,9 +3208,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
* need to hw to flash RQ HEAD by DB again
*/
if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- /* Memory barrier */
- wmb();
-
roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M,
RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head);
roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 46ab0a321d21..84383236e47d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -193,6 +193,49 @@
#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0
#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M GENMASK(4, 0)
+/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */
+enum {
+ HNS_ROCE_LWQCE_QPC_ERROR = 1,
+ HNS_ROCE_LWQCE_MTU_ERROR,
+ HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR,
+ HNS_ROCE_LWQCE_WQE_ADDR_ERROR,
+ HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR,
+ HNS_ROCE_LWQCE_SL_ERROR,
+ HNS_ROCE_LWQCE_PORT_ERROR,
+};
+
+/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */
+enum {
+ HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1,
+ HNS_ROCE_LAVWQE_LENGTH_ERROR,
+ HNS_ROCE_LAVWQE_VA_ERROR,
+ HNS_ROCE_LAVWQE_PD_ERROR,
+ HNS_ROCE_LAVWQE_RW_ACC_ERROR,
+ HNS_ROCE_LAVWQE_KEY_STATE_ERROR,
+ HNS_ROCE_LAVWQE_MR_OPERATION_ERROR,
+};
+
+/* DOORBELL overflow subtype */
+enum {
+ HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1,
+ HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF,
+ HNS_ROCE_DB_SUBTYPE_ODB_OVF,
+ HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF,
+ HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP,
+ HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP,
+};
+
+enum {
+ /* RQ&SRQ related operations */
+ HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06,
+ HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE,
+};
+
+enum {
+ HNS_ROCE_PORT_DOWN = 0,
+ HNS_ROCE_PORT_UP,
+};
+
struct hns_roce_cq_context {
__le32 cqc_byte_4;
__le32 cq_bt_l;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 833e1f259936..c3934abeb260 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -48,8 +48,8 @@
#include "hns_roce_hem.h"
#include "hns_roce_hw_v2.h"
-static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
- struct ib_sge *sg)
+static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct ib_sge *sg)
{
dseg->lkey = cpu_to_le32(sg->lkey);
dseg->addr = cpu_to_le64(sg->addr);
@@ -99,16 +99,16 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
u64 pbl_ba;
/* use ib_access_flags */
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
- wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
- wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RR_S,
- wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RW_S,
- wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_LW_S,
- wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S,
+ !!(wr->access & IB_ACCESS_MW_BIND));
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S,
+ !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RR_S,
+ !!(wr->access & IB_ACCESS_REMOTE_READ));
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RW_S,
+ !!(wr->access & IB_ACCESS_REMOTE_WRITE));
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_LW_S,
+ !!(wr->access & IB_ACCESS_LOCAL_WRITE));
/* Data structure reuse may lead to confusion */
pbl_ba = mr->pbl_mtr.hem_cfg.root_ba;
@@ -121,12 +121,10 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
fseg->pbl_size = cpu_to_le32(mr->npages);
- roce_set_field(fseg->mode_buf_pg_sz,
- V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
+ roce_set_field(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
- roce_set_bit(fseg->mode_buf_pg_sz,
- V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
+ roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
}
static void set_atomic_seg(const struct ib_send_wr *wr,
@@ -361,7 +359,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev,
} else if (unlikely(hr_qp->state == IB_QPS_RESET ||
hr_qp->state == IB_QPS_INIT ||
hr_qp->state == IB_QPS_RTR)) {
- ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n",
+ ibdev_err(ibdev, "failed to post WQE, QP state %u!\n",
hr_qp->state);
return -EINVAL;
} else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
@@ -469,7 +467,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
int ret;
valid_num_sge = calc_wr_sge_num(wr, &msg_len);
- memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
ret = set_ud_opcode(ud_sq_wqe, wr);
if (WARN_ON(ret))
@@ -503,6 +500,8 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
if (ret)
return ret;
+ qp->sl = to_hr_ah(ud_wr(wr)->ah)->av.sl;
+
set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge);
/*
@@ -521,10 +520,12 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
return 0;
}
-static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+static int set_rc_opcode(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
const struct ib_send_wr *wr)
{
u32 ib_op = wr->opcode;
+ int ret = 0;
rc_sq_wqe->immtdata = get_immtdata(wr);
@@ -544,7 +545,10 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
break;
case IB_WR_REG_MR:
- set_frmr_seg(rc_sq_wqe, reg_wr(wr));
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ set_frmr_seg(rc_sq_wqe, reg_wr(wr));
+ else
+ ret = -EOPNOTSUPP;
break;
case IB_WR_LOCAL_INV:
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
@@ -553,19 +557,23 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
break;
default:
- return -EINVAL;
+ ret = -EINVAL;
}
+ if (unlikely(ret))
+ return ret;
+
roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
- return 0;
+ return ret;
}
static inline int set_rc_wqe(struct hns_roce_qp *qp,
const struct ib_send_wr *wr,
void *wqe, unsigned int *sge_idx,
unsigned int owner_bit)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
unsigned int curr_idx = *sge_idx;
unsigned int valid_num_sge;
@@ -573,11 +581,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
int ret;
valid_num_sge = calc_wr_sge_num(wr, &msg_len);
- memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
rc_sq_wqe->msg_len = cpu_to_le32(msg_len);
- ret = set_rc_opcode(rc_sq_wqe, wr);
+ ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr);
if (WARN_ON(ret))
return ret;
@@ -635,6 +642,8 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev,
V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn);
roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M,
V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB);
+ /* indicates data on new BAR, 0 : SQ doorbell, 1 : DWQE */
+ roce_set_bit(sq_db.byte_4, V2_DB_FLAG_S, 0);
roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M,
V2_DB_PARAMETER_IDX_S, qp->sq.head);
roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M,
@@ -644,6 +653,38 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev,
}
}
+static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val,
+ u64 __iomem *dest)
+{
+#define HNS_ROCE_WRITE_TIMES 8
+ struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ int i;
+
+ if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle))
+ for (i = 0; i < HNS_ROCE_WRITE_TIMES; i++)
+ writeq_relaxed(*(val + i), dest + i);
+}
+
+static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
+ void *wqe)
+{
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+
+ /* All kinds of DirectWQE have the same header field layout */
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1);
+ roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M,
+ V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl);
+ roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M,
+ V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, qp->sl >> 2);
+ roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M,
+ V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
+
+ hns_roce_write512(hr_dev, wqe, hr_dev->mem_base +
+ HNS_ROCE_DWQE_SIZE * qp->ibqp.qp_num);
+}
+
static int hns_roce_v2_post_send(struct ib_qp *ibqp,
const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
@@ -708,9 +749,12 @@ out:
if (likely(nreq)) {
qp->sq.head += nreq;
qp->next_sge = sge_idx;
- /* Memory barrier */
- wmb();
- update_sq_db(hr_dev, qp);
+
+ if (nreq == 1 && qp->sq.head == qp->sq.tail + 1 &&
+ (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ write_dwqe(hr_dev, qp, wqe);
+ else
+ update_sq_db(hr_dev, qp);
}
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -721,14 +765,74 @@ out:
static int check_recv_valid(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp)
{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct ib_qp *ibqp = &hr_qp->ibqp;
+
+ if (unlikely(ibqp->qp_type != IB_QPT_RC &&
+ ibqp->qp_type != IB_QPT_GSI &&
+ ibqp->qp_type != IB_QPT_UD)) {
+ ibdev_err(ibdev, "unsupported qp type, qp_type = %d.\n",
+ ibqp->qp_type);
+ return -EOPNOTSUPP;
+ }
+
if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
return -EIO;
- else if (hr_qp->state == IB_QPS_RESET)
+
+ if (hr_qp->state == IB_QPS_RESET)
return -EINVAL;
return 0;
}
+static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe,
+ u32 max_sge, bool rsv)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+ u32 i, cnt;
+
+ for (i = 0, cnt = 0; i < wr->num_sge; i++) {
+ /* Skip zero-length sge */
+ if (!wr->sg_list[i].length)
+ continue;
+ set_data_seg_v2(dseg + cnt, wr->sg_list + i);
+ cnt++;
+ }
+
+ /* Fill a reserved sge to make hw stop reading remaining segments */
+ if (rsv) {
+ dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
+ dseg[cnt].addr = 0;
+ dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
+ } else {
+ /* Clear remaining segments to make ROCEE ignore sges */
+ if (cnt < max_sge)
+ memset(dseg + cnt, 0,
+ (max_sge - cnt) * HNS_ROCE_SGE_SIZE);
+ }
+}
+
+static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
+ u32 wqe_idx, u32 max_sge)
+{
+ struct hns_roce_rinl_sge *sge_list;
+ void *wqe = NULL;
+ u32 i;
+
+ wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
+
+ /* rq support inline data */
+ if (hr_qp->rq_inl_buf.wqe_cnt) {
+ sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
+ hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++) {
+ sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr;
+ sge_list[i].len = wr->sg_list[i].length;
+ }
+ }
+}
+
static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
@@ -736,14 +840,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_v2_wqe_data_seg *dseg;
- struct hns_roce_rinl_sge *sge_list;
+ u32 wqe_idx, nreq, max_sge;
unsigned long flags;
- void *wqe = NULL;
- u32 wqe_idx;
- int nreq;
int ret;
- int i;
spin_lock_irqsave(&hr_qp->rq.lock, flags);
@@ -754,6 +853,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
goto out;
}
+ max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq,
hr_qp->ibqp.recv_cq))) {
@@ -762,50 +862,22 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
goto out;
}
- wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
-
- if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
+ if (unlikely(wr->num_sge > max_sge)) {
ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n",
- wr->num_sge, hr_qp->rq.max_gs);
+ wr->num_sge, max_sge);
ret = -EINVAL;
*bad_wr = wr;
goto out;
}
- wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
- for (i = 0; i < wr->num_sge; i++) {
- if (!wr->sg_list[i].length)
- continue;
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- }
-
- if (wr->num_sge < hr_qp->rq.max_gs) {
- dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
- dseg->addr = 0;
- }
-
- /* rq support inline data */
- if (hr_qp->rq_inl_buf.wqe_cnt) {
- sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
- hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt =
- (u32)wr->num_sge;
- for (i = 0; i < wr->num_sge; i++) {
- sge_list[i].addr =
- (void *)(u64)wr->sg_list[i].addr;
- sge_list[i].len = wr->sg_list[i].length;
- }
- }
-
+ wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
+ fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge);
hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
}
out:
if (likely(nreq)) {
hr_qp->rq.head += nreq;
- /* Memory barrier */
- wmb();
/*
* Hip08 hardware cannot flush the WQEs in RQ if the QP state
@@ -829,41 +901,82 @@ out:
return ret;
}
-static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
+static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n)
{
return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift);
}
-static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n)
+static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n)
{
return hns_roce_buf_offset(idx_que->mtr.kmem,
n << idx_que->entry_shift);
}
-static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
+static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index)
{
/* always called with interrupts disabled. */
spin_lock(&srq->lock);
bitmap_clear(srq->idx_que.bitmap, wqe_index, 1);
- srq->tail++;
+ srq->idx_que.tail++;
spin_unlock(&srq->lock);
}
-static int find_empty_entry(struct hns_roce_idx_que *idx_que,
- unsigned long size)
+static int hns_roce_srqwq_overflow(struct hns_roce_srq *srq)
{
- int wqe_idx;
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
- if (unlikely(bitmap_full(idx_que->bitmap, size)))
+ return idx_que->head - idx_que->tail >= srq->wqe_cnt;
+}
+
+static int check_post_srq_valid(struct hns_roce_srq *srq, u32 max_sge,
+ const struct ib_recv_wr *wr)
+{
+ struct ib_device *ib_dev = srq->ibsrq.device;
+
+ if (unlikely(wr->num_sge > max_sge)) {
+ ibdev_err(ib_dev,
+ "failed to check sge, wr->num_sge = %d, max_sge = %u.\n",
+ wr->num_sge, max_sge);
+ return -EINVAL;
+ }
+
+ if (unlikely(hns_roce_srqwq_overflow(srq))) {
+ ibdev_err(ib_dev,
+ "failed to check srqwq status, srqwq is full.\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ u32 pos;
+
+ pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt);
+ if (unlikely(pos == srq->wqe_cnt))
return -ENOSPC;
- wqe_idx = find_first_zero_bit(idx_que->bitmap, size);
+ bitmap_set(idx_que->bitmap, pos, 1);
+ *wqe_idx = pos;
+ return 0;
+}
- bitmap_set(idx_que->bitmap, wqe_idx, 1);
+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ unsigned int head;
+ __le32 *buf;
- return wqe_idx;
+ head = idx_que->head & (srq->wqe_cnt - 1);
+
+ buf = get_idx_buf(idx_que, head);
+ *buf = cpu_to_le32(wqe_idx);
+
+ idx_que->head++;
}
static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
@@ -872,77 +985,42 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- struct hns_roce_v2_wqe_data_seg *dseg;
struct hns_roce_v2_db srq_db;
unsigned long flags;
- unsigned int ind;
- __le32 *srq_idx;
int ret = 0;
- int wqe_idx;
+ u32 max_sge;
+ u32 wqe_idx;
void *wqe;
- int nreq;
- int i;
+ u32 nreq;
spin_lock_irqsave(&srq->lock, flags);
- ind = srq->head & (srq->wqe_cnt - 1);
-
+ max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (unlikely(wr->num_sge >= srq->max_gs)) {
- ret = -EINVAL;
- *bad_wr = wr;
- break;
- }
-
- if (unlikely(srq->head == srq->tail)) {
- ret = -ENOMEM;
+ ret = check_post_srq_valid(srq, max_sge, wr);
+ if (ret) {
*bad_wr = wr;
break;
}
- wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt);
- if (unlikely(wqe_idx < 0)) {
- ret = -ENOMEM;
+ ret = get_srq_wqe_idx(srq, &wqe_idx);
+ if (unlikely(ret)) {
*bad_wr = wr;
break;
}
- wqe = get_srq_wqe(srq, wqe_idx);
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
-
- for (i = 0; i < wr->num_sge; ++i) {
- dseg[i].len = cpu_to_le32(wr->sg_list[i].length);
- dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey);
- dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
- }
-
- if (wr->num_sge < srq->max_gs) {
- dseg[i].len = 0;
- dseg[i].lkey = cpu_to_le32(0x100);
- dseg[i].addr = 0;
- }
-
- srq_idx = get_idx_buf(&srq->idx_que, ind);
- *srq_idx = cpu_to_le32(wqe_idx);
-
+ wqe = get_srq_wqe_buf(srq, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
+ fill_wqe_idx(srq, wqe_idx);
srq->wrid[wqe_idx] = wr->wr_id;
- ind = (ind + 1) & (srq->wqe_cnt - 1);
}
if (likely(nreq)) {
- srq->head += nreq;
-
- /*
- * Make sure that descriptors are written before
- * doorbell record.
- */
- wmb();
-
srq_db.byte_4 =
cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S |
(srq->srqn & V2_DB_BYTE_4_TAG_M));
srq_db.parameter =
- cpu_to_le32(srq->head & V2_DB_PARAMETER_IDX_M);
+ cpu_to_le32(srq->idx_que.head & V2_DB_PARAMETER_IDX_M);
hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l);
}
@@ -1059,15 +1137,6 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev)
return 0;
}
-static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring)
-{
- int ntu = ring->next_to_use;
- int ntc = ring->next_to_clean;
- int used = (ntu - ntc + ring->desc_num) % ring->desc_num;
-
- return ring->desc_num - used - 1;
-}
-
static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev,
struct hns_roce_v2_cmq_ring *ring)
{
@@ -1107,8 +1176,7 @@ static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type)
&priv->cmq.csq : &priv->cmq.crq;
ring->flag = ring_type;
- ring->next_to_clean = 0;
- ring->next_to_use = 0;
+ ring->head = 0;
return hns_roce_alloc_cmq_desc(hr_dev, ring);
}
@@ -1207,34 +1275,10 @@ static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc,
static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev)
{
- u32 head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG);
+ u32 tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG);
struct hns_roce_v2_priv *priv = hr_dev->priv;
- return head == priv->cmq.csq.next_to_use;
-}
-
-static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v2_priv *priv = hr_dev->priv;
- struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
- struct hns_roce_cmq_desc *desc;
- u16 ntc = csq->next_to_clean;
- u32 head;
- int clean = 0;
-
- desc = &csq->desc[ntc];
- head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG);
- while (head != ntc) {
- memset(desc, 0, sizeof(*desc));
- ntc++;
- if (ntc == csq->desc_num)
- ntc = 0;
- desc = &csq->desc[ntc];
- clean++;
- }
- csq->next_to_clean = ntc;
-
- return clean;
+ return tail == priv->cmq.csq.head;
}
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
@@ -1242,42 +1286,26 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
- struct hns_roce_cmq_desc *desc_to_use;
- bool complete = false;
u32 timeout = 0;
- int handle = 0;
u16 desc_ret;
- int ret = 0;
- int ntc;
+ u32 tail;
+ int ret;
+ int i;
spin_lock_bh(&csq->lock);
- if (num > hns_roce_cmq_space(csq)) {
- spin_unlock_bh(&csq->lock);
- return -EBUSY;
- }
-
- /*
- * Record the location of desc in the cmq for this time
- * which will be use for hardware to write back
- */
- ntc = csq->next_to_use;
+ tail = csq->head;
- while (handle < num) {
- desc_to_use = &csq->desc[csq->next_to_use];
- *desc_to_use = desc[handle];
- dev_dbg(hr_dev->dev, "set cmq desc:\n");
- csq->next_to_use++;
- if (csq->next_to_use == csq->desc_num)
- csq->next_to_use = 0;
- handle++;
+ for (i = 0; i < num; i++) {
+ csq->desc[csq->head++] = desc[i];
+ if (csq->head == csq->desc_num)
+ csq->head = 0;
}
/* Write to hardware */
- roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, csq->next_to_use);
+ roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, csq->head);
- /*
- * If the command is sync, wait for the firmware to write back,
+ /* If the command is sync, wait for the firmware to write back,
* if multi descriptors to be sent, use the first one to check
*/
if (le16_to_cpu(desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) {
@@ -1285,39 +1313,34 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
if (hns_roce_cmq_csq_done(hr_dev))
break;
udelay(1);
- timeout++;
- } while (timeout < priv->cmq.tx_timeout);
+ } while (++timeout < priv->cmq.tx_timeout);
}
if (hns_roce_cmq_csq_done(hr_dev)) {
- complete = true;
- handle = 0;
- while (handle < num) {
- /* get the result of hardware write back */
- desc_to_use = &csq->desc[ntc];
- desc[handle] = *desc_to_use;
- dev_dbg(hr_dev->dev, "Get cmq desc:\n");
- desc_ret = le16_to_cpu(desc[handle].retval);
- if (desc_ret == CMD_EXEC_SUCCESS)
- ret = 0;
- else
- ret = -EIO;
- priv->cmq.last_status = desc_ret;
- ntc++;
- handle++;
- if (ntc == csq->desc_num)
- ntc = 0;
+ for (ret = 0, i = 0; i < num; i++) {
+ /* check the result of hardware write back */
+ desc[i] = csq->desc[tail++];
+ if (tail == csq->desc_num)
+ tail = 0;
+
+ desc_ret = le16_to_cpu(desc[i].retval);
+ if (likely(desc_ret == CMD_EXEC_SUCCESS))
+ continue;
+
+ dev_err_ratelimited(hr_dev->dev,
+ "Cmdq IO error, opcode = %x, return = %x\n",
+ desc->opcode, desc_ret);
+ ret = -EIO;
}
- }
+ } else {
+ /* FW/HW reset or incorrect number of desc */
+ tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG);
+ dev_warn(hr_dev->dev, "CMDQ move tail from %d to %d\n",
+ csq->head, tail);
+ csq->head = tail;
- if (!complete)
ret = -EAGAIN;
-
- /* clean the command send queue */
- handle = hns_roce_cmq_csq_clean(hr_dev);
- if (handle != num)
- dev_warn(hr_dev->dev, "Cleaned %d, need to clean %d\n",
- handle, num);
+ }
spin_unlock_bh(&csq->lock);
@@ -1530,7 +1553,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_S, 0x3e8);
roce_set_field(req->time_cfg_udp_port,
CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M,
- CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, 0x12b7);
+ CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S,
+ ROCE_V2_UDP_DPORT);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
@@ -1541,17 +1565,13 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
struct hns_roce_pf_res_a *req_a;
struct hns_roce_pf_res_b *req_b;
int ret;
- int i;
- for (i = 0; i < 2; i++) {
- hns_roce_cmq_setup_basic_desc(&desc[i],
- HNS_ROCE_OPC_QUERY_PF_RES, true);
+ hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_QUERY_PF_RES,
+ true);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- if (i == 0)
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- }
+ hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_QUERY_PF_RES,
+ true);
ret = hns_roce_cmq_send(hr_dev, desc, 2);
if (ret)
@@ -1644,19 +1664,16 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
struct hns_roce_cmq_desc desc[2];
struct hns_roce_vf_res_a *req_a;
struct hns_roce_vf_res_b *req_b;
- int i;
req_a = (struct hns_roce_vf_res_a *)desc[0].data;
req_b = (struct hns_roce_vf_res_b *)desc[1].data;
- for (i = 0; i < 2; i++) {
- hns_roce_cmq_setup_basic_desc(&desc[i],
- HNS_ROCE_OPC_ALLOC_VF_RES, false);
- if (i == 0)
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- }
+ hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_ALLOC_VF_RES,
+ false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+ hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_ALLOC_VF_RES,
+ false);
roce_set_field(req_a->vf_qpc_bt_idx_num,
VF_RES_A_DATA_1_VF_QPC_BT_IDX_M,
@@ -1866,7 +1883,6 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR |
HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
- HNS_ROCE_CAP_FLAG_RQ_INLINE |
HNS_ROCE_CAP_FLAG_RECORD_DB |
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
@@ -1999,10 +2015,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg);
caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline);
caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg);
+ caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg);
caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer);
caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer);
caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
+ caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
caps->num_aeq_vectors = resp_a->num_aeq_vectors;
caps->num_other_vectors = resp_a->num_other_vectors;
caps->max_sq_desc_sz = resp_a->max_sq_desc_sz;
@@ -2336,7 +2354,6 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
struct hns_roce_link_table_entry *entry;
enum hns_roce_opcode_type opcode;
u32 page_num;
- int i;
switch (type) {
case TSQ_LINK_TABLE:
@@ -2354,14 +2371,10 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
page_num = link_tbl->npages;
entry = link_tbl->table.buf;
- for (i = 0; i < 2; i++) {
- hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false);
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- if (i == 0)
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- }
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
req_a->base_addr_l = cpu_to_le32(link_tbl->table.map & 0xffffffff);
req_a->base_addr_h = cpu_to_le32(link_tbl->table.map >> 32);
@@ -2880,36 +2893,20 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
- V2_MPT_BYTE_4_PBL_HOP_NUM_S, mr->pbl_hop_num ==
- HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num);
- roce_set_field(mpt_entry->byte_4_pd_hop_st,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
- to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
- V2_MPT_BYTE_4_PD_S, mr->pd);
-
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
- (mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
- mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
- (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
- (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S,
- (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0));
-
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S,
- mr->type == MR_TYPE_MR ? 0 : 1);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S,
- 1);
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+ hr_reg_enable(mpt_entry, MPT_L_INV_EN);
+
+ hr_reg_write(mpt_entry, MPT_BIND_EN,
+ !!(mr->access & IB_ACCESS_MW_BIND));
+ hr_reg_write(mpt_entry, MPT_ATOMIC_EN,
+ !!(mr->access & IB_ACCESS_REMOTE_ATOMIC));
+ hr_reg_write(mpt_entry, MPT_RR_EN,
+ !!(mr->access & IB_ACCESS_REMOTE_READ));
+ hr_reg_write(mpt_entry, MPT_RW_EN,
+ !!(mr->access & IB_ACCESS_REMOTE_WRITE));
+ hr_reg_write(mpt_entry, MPT_LW_EN,
+ !!((mr->access & IB_ACCESS_LOCAL_WRITE)));
mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
@@ -2917,9 +2914,19 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova));
mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova));
+ if (mr->type != MR_TYPE_MR)
+ hr_reg_enable(mpt_entry, MPT_PA);
+
if (mr->type == MR_TYPE_DMA)
return 0;
+ if (mr->pbl_hop_num != HNS_ROCE_HOP_NUM_0)
+ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, mr->pbl_hop_num);
+
+ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD);
+
ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
return ret;
@@ -2927,20 +2934,17 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr, int flags,
- u32 pdn, int mr_access_flags, u64 iova,
- u64 size, void *mb_buf)
+ void *mb_buf)
{
struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf;
+ u32 mr_access_flags = mr->access;
int ret = 0;
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
- if (flags & IB_MR_REREG_PD) {
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
- V2_MPT_BYTE_4_PD_S, pdn);
- mr->pd = pdn;
- }
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+ V2_MPT_BYTE_4_PD_S, mr->pd);
if (flags & IB_MR_REREG_ACCESS) {
roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
@@ -2958,13 +2962,10 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
}
if (flags & IB_MR_REREG_TRANS) {
- mpt_entry->va_l = cpu_to_le32(lower_32_bits(iova));
- mpt_entry->va_h = cpu_to_le32(upper_32_bits(iova));
- mpt_entry->len_l = cpu_to_le32(lower_32_bits(size));
- mpt_entry->len_h = cpu_to_le32(upper_32_bits(size));
-
- mr->iova = iova;
- mr->size = size;
+ mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova));
+ mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova));
+ mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
+ mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
}
@@ -3126,11 +3127,6 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
if (nfreed) {
hr_cq->cons_index += nfreed;
- /*
- * Make sure update of buffer contents is done before
- * updating consumer index.
- */
- wmb();
hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index);
}
}
@@ -3639,11 +3635,8 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
break;
}
- if (npolled) {
- /* Memory barrier */
- wmb();
+ if (npolled)
hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index);
- }
out:
spin_unlock_irqrestore(&hr_cq->lock, flags);
@@ -4235,7 +4228,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
- const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct ib_device *ibdev = &hr_dev->ib_dev;
@@ -4243,7 +4235,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
dma_addr_t irrl_ba;
enum ib_mtu mtu;
u8 lp_pktn_ini;
- u8 port_num;
u64 *mtts;
u8 *dmac;
u8 *smac;
@@ -4324,15 +4315,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
}
- /* Configure GID index */
- port_num = rdma_ah_get_port_num(&attr->ah_attr);
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S,
- hns_get_gid_index(hr_dev, port_num - 1,
- grh->sgid_index));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0);
-
memcpy(&(context->dmac), dmac, sizeof(u32));
roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M,
V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4])));
@@ -5083,7 +5065,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
done:
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
- qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+ qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
if (!ibqp->uobject) {
qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
@@ -5174,6 +5156,9 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc desc;
int ret, i;
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
+
mutex_lock(&hr_dev->qp_table.scc_mutex);
/* set scc ctx clear done flag */
@@ -5220,98 +5205,96 @@ out:
return ret;
}
-static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
- struct hns_roce_srq *srq, u32 pdn, u16 xrcd,
- u32 cqn, void *mb_buf, u64 *mtts_wqe,
- u64 *mtts_idx, dma_addr_t dma_handle_wqe,
- dma_addr_t dma_handle_idx)
+#define DMA_IDX_SHIFT 3
+#define DMA_WQE_SHIFT 3
+
+static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq,
+ struct hns_roce_srq_context *ctx)
{
- struct hns_roce_srq_context *srq_context;
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ struct ib_device *ibdev = srq->ibsrq.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ u64 mtts_idx[MTT_MIN_COUNT] = {};
+ dma_addr_t dma_handle_idx = 0;
+ int ret;
+
+ /* Get physical address of idx que buf */
+ ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx,
+ ARRAY_SIZE(mtts_idx), &dma_handle_idx);
+ if (ret < 1) {
+ ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n",
+ ret);
+ return -ENOBUFS;
+ }
+
+ hr_reg_write(ctx, SRQC_IDX_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt));
+
+ hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> DMA_IDX_SHIFT);
+ hr_reg_write(ctx, SRQC_IDX_BT_BA_H,
+ upper_32_bits(dma_handle_idx >> DMA_IDX_SHIFT));
+
+ hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ,
+ to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(ctx, SRQC_IDX_BUF_PG_SZ,
+ to_hr_hw_page_shift(idx_que->mtr.hem_cfg.buf_pg_shift));
+
+ hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts_idx[0]));
+ hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[0])));
+
+ hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts_idx[1]));
+ hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[1])));
+
+ return 0;
+}
- srq_context = mb_buf;
- memset(srq_context, 0, sizeof(*srq_context));
-
- roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M,
- SRQC_BYTE_4_SRQ_ST_S, 1);
-
- roce_set_field(srq_context->byte_4_srqn_srqst,
- SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M,
- SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S,
- to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num,
- srq->wqe_cnt));
- roce_set_field(srq_context->byte_4_srqn_srqst,
- SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
- ilog2(srq->wqe_cnt));
-
- roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
- SRQC_BYTE_4_SRQN_S, srq->srqn);
-
- roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M,
- SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
-
- roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M,
- SRQC_BYTE_12_SRQ_XRCD_S, xrcd);
-
- srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3));
-
- roce_set_field(srq_context->byte_24_wqe_bt_ba,
- SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
- SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
- dma_handle_wqe >> 35);
-
- roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
- SRQC_BYTE_28_PD_S, pdn);
- roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M,
- SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
- fls(srq->max_gs - 1));
-
- srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3);
- roce_set_field(srq_context->rsv_idx_bt_ba,
- SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
- SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
- dma_handle_idx >> 35);
-
- srq_context->idx_cur_blk_addr =
- cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0]));
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
- SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
- upper_32_bits(to_hr_hw_page_addr(mtts_idx[0])));
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
- SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
- to_hr_hem_hopnum(hr_dev->caps.idx_hop_num,
- srq->wqe_cnt));
-
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
- SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
- to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift));
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
- SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
- to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift));
-
- srq_context->idx_nxt_blk_addr =
- cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1]));
- roce_set_field(srq_context->rsv_idxnxtblkaddr,
- SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
- SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
- upper_32_bits(to_hr_hw_page_addr(mtts_idx[1])));
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
- cqn);
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M,
- SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S,
- to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift));
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M,
- SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S,
- to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift));
-
- roce_set_bit(srq_context->db_record_addr_record_en,
- SRQC_BYTE_60_SRQ_RECORD_EN_S, 0);
+static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf)
+{
+ struct ib_device *ibdev = srq->ibsrq.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ struct hns_roce_srq_context *ctx = mb_buf;
+ u64 mtts_wqe[MTT_MIN_COUNT] = {};
+ dma_addr_t dma_handle_wqe = 0;
+ int ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ /* Get the physical address of srq buf */
+ ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
+ ARRAY_SIZE(mtts_wqe), &dma_handle_wqe);
+ if (ret < 1) {
+ ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n",
+ ret);
+ return -ENOBUFS;
+ }
+
+ hr_reg_write(ctx, SRQC_SRQ_ST, 1);
+ hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn);
+ hr_reg_write(ctx, SRQC_SRQN, srq->srqn);
+ hr_reg_write(ctx, SRQC_XRCD, 0);
+ hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn);
+ hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt));
+ hr_reg_write(ctx, SRQC_RQWS,
+ srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1));
+
+ hr_reg_write(ctx, SRQC_WQE_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num,
+ srq->wqe_cnt));
+
+ hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> DMA_WQE_SHIFT);
+ hr_reg_write(ctx, SRQC_WQE_BT_BA_H,
+ upper_32_bits(dma_handle_wqe >> DMA_WQE_SHIFT));
+
+ hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift));
+
+ return hns_roce_v2_write_srqc_index_queue(srq, ctx);
}
static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
@@ -5331,7 +5314,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
return -EINVAL;
if (srq_attr_mask & IB_SRQ_LIMIT) {
- if (srq_attr->srq_limit >= srq->wqe_cnt)
+ if (srq_attr->srq_limit > srq->wqe_cnt)
return -EINVAL;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
@@ -5394,8 +5377,8 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
SRQC_BYTE_8_SRQ_LIMIT_WL_S);
attr->srq_limit = limit_wl;
- attr->max_wr = srq->wqe_cnt - 1;
- attr->max_sge = srq->max_gs;
+ attr->max_wr = srq->wqe_cnt;
+ attr->max_sge = srq->max_gs - srq->rsv_sge;
out:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -5626,9 +5609,6 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
++eq->cons_index;
aeqe_found = 1;
- if (eq->cons_index > (2 * eq->entries - 1))
- eq->cons_index = 0;
-
hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
aeqe = next_aeqe_sw_v2(eq);
@@ -5671,9 +5651,6 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
++eq->cons_index;
ceqe_found = 1;
- if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1))
- eq->cons_index = 0;
-
ceqe = next_ceqe_sw_v2(eq);
}
@@ -5948,7 +5925,6 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
buf_attr.region[0].size = eq->entries * eq->eqe_size;
buf_attr.region[0].hopnum = eq->hop_num;
buf_attr.region_count = 1;
- buf_attr.fixed_page = true;
err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr,
hr_dev->caps.eqe_ba_pg_sz +
@@ -6286,6 +6262,7 @@ static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
/* Get info from NIC driver. */
hr_dev->reg_base = handle->rinfo.roce_io_base;
+ hr_dev->mem_base = handle->rinfo.roce_mem_base;
hr_dev->caps.num_ports = 1;
hr_dev->iboe.netdevs[0] = handle->rinfo.netdev;
hr_dev->iboe.phy_port[0] = 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index bdaccf86460d..39621fb6ec16 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -96,7 +96,8 @@
#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
-#define HNS_ROCE_INVALID_LKEY 0x100
+#define HNS_ROCE_INVALID_LKEY 0x0
+#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
#define HNS_ROCE_V2_RSV_QPS 8
@@ -366,24 +367,61 @@ struct hns_roce_v2_cq_context {
#define CQC_STASH CQC_FIELD_LOC(63, 63)
struct hns_roce_srq_context {
- __le32 byte_4_srqn_srqst;
- __le32 byte_8_limit_wl;
- __le32 byte_12_xrcd;
- __le32 byte_16_pi_ci;
- __le32 wqe_bt_ba;
- __le32 byte_24_wqe_bt_ba;
- __le32 byte_28_rqws_pd;
- __le32 idx_bt_ba;
- __le32 rsv_idx_bt_ba;
- __le32 idx_cur_blk_addr;
- __le32 byte_44_idxbufpgsz_addr;
- __le32 idx_nxt_blk_addr;
- __le32 rsv_idxnxtblkaddr;
- __le32 byte_56_xrc_cqn;
- __le32 db_record_addr_record_en;
- __le32 db_record_addr;
+ __le32 byte_4_srqn_srqst;
+ __le32 byte_8_limit_wl;
+ __le32 byte_12_xrcd;
+ __le32 byte_16_pi_ci;
+ __le32 wqe_bt_ba;
+ __le32 byte_24_wqe_bt_ba;
+ __le32 byte_28_rqws_pd;
+ __le32 idx_bt_ba;
+ __le32 rsv_idx_bt_ba;
+ __le32 idx_cur_blk_addr;
+ __le32 byte_44_idxbufpgsz_addr;
+ __le32 idx_nxt_blk_addr;
+ __le32 rsv_idxnxtblkaddr;
+ __le32 byte_56_xrc_cqn;
+ __le32 db_record_addr_record_en;
+ __le32 db_record_addr;
};
+#define SRQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_srq_context, h, l)
+
+#define SRQC_SRQ_ST SRQC_FIELD_LOC(1, 0)
+#define SRQC_WQE_HOP_NUM SRQC_FIELD_LOC(3, 2)
+#define SRQC_SHIFT SRQC_FIELD_LOC(7, 4)
+#define SRQC_SRQN SRQC_FIELD_LOC(31, 8)
+#define SRQC_LIMIT_WL SRQC_FIELD_LOC(47, 32)
+#define SRQC_RSV0 SRQC_FIELD_LOC(63, 48)
+#define SRQC_XRCD SRQC_FIELD_LOC(87, 64)
+#define SRQC_RSV1 SRQC_FIELD_LOC(95, 88)
+#define SRQC_PRODUCER_IDX SRQC_FIELD_LOC(111, 96)
+#define SRQC_CONSUMER_IDX SRQC_FIELD_LOC(127, 112)
+#define SRQC_WQE_BT_BA_L SRQC_FIELD_LOC(159, 128)
+#define SRQC_WQE_BT_BA_H SRQC_FIELD_LOC(188, 160)
+#define SRQC_RSV2 SRQC_FIELD_LOC(191, 189)
+#define SRQC_PD SRQC_FIELD_LOC(215, 192)
+#define SRQC_RQWS SRQC_FIELD_LOC(219, 216)
+#define SRQC_RSV3 SRQC_FIELD_LOC(223, 220)
+#define SRQC_IDX_BT_BA_L SRQC_FIELD_LOC(255, 224)
+#define SRQC_IDX_BT_BA_H SRQC_FIELD_LOC(284, 256)
+#define SRQC_RSV4 SRQC_FIELD_LOC(287, 285)
+#define SRQC_IDX_CUR_BLK_ADDR_L SRQC_FIELD_LOC(319, 288)
+#define SRQC_IDX_CUR_BLK_ADDR_H SRQC_FIELD_LOC(339, 320)
+#define SRQC_RSV5 SRQC_FIELD_LOC(341, 340)
+#define SRQC_IDX_HOP_NUM SRQC_FIELD_LOC(343, 342)
+#define SRQC_IDX_BA_PG_SZ SRQC_FIELD_LOC(347, 344)
+#define SRQC_IDX_BUF_PG_SZ SRQC_FIELD_LOC(351, 348)
+#define SRQC_IDX_NXT_BLK_ADDR_L SRQC_FIELD_LOC(383, 352)
+#define SRQC_IDX_NXT_BLK_ADDR_H SRQC_FIELD_LOC(403, 384)
+#define SRQC_RSV6 SRQC_FIELD_LOC(415, 404)
+#define SRQC_XRC_CQN SRQC_FIELD_LOC(439, 416)
+#define SRQC_WQE_BA_PG_SZ SRQC_FIELD_LOC(443, 440)
+#define SRQC_WQE_BUF_PG_SZ SRQC_FIELD_LOC(447, 444)
+#define SRQC_DB_RECORD_EN SRQC_FIELD_LOC(448, 448)
+#define SRQC_DB_RECORD_ADDR_L SRQC_FIELD_LOC(479, 449)
+#define SRQC_DB_RECORD_ADDR_H SRQC_FIELD_LOC(511, 480)
+
#define SRQC_BYTE_4_SRQ_ST_S 0
#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0)
@@ -993,6 +1031,45 @@ struct hns_roce_v2_mpt_entry {
__le32 byte_64_buf_pa1;
};
+#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l)
+
+#define MPT_ST MPT_FIELD_LOC(1, 0)
+#define MPT_PBL_HOP_NUM MPT_FIELD_LOC(3, 2)
+#define MPT_PBL_BA_PG_SZ MPT_FIELD_LOC(7, 4)
+#define MPT_PD MPT_FIELD_LOC(31, 8)
+#define MPT_RA_EN MPT_FIELD_LOC(32, 32)
+#define MPT_R_INV_EN MPT_FIELD_LOC(33, 33)
+#define MPT_L_INV_EN MPT_FIELD_LOC(34, 34)
+#define MPT_BIND_EN MPT_FIELD_LOC(35, 35)
+#define MPT_ATOMIC_EN MPT_FIELD_LOC(36, 36)
+#define MPT_RR_EN MPT_FIELD_LOC(37, 37)
+#define MPT_RW_EN MPT_FIELD_LOC(38, 38)
+#define MPT_LW_EN MPT_FIELD_LOC(39, 39)
+#define MPT_MW_CNT MPT_FIELD_LOC(63, 40)
+#define MPT_FRE MPT_FIELD_LOC(64, 64)
+#define MPT_PA MPT_FIELD_LOC(65, 65)
+#define MPT_ZBVA MPT_FIELD_LOC(66, 66)
+#define MPT_SHARE MPT_FIELD_LOC(67, 67)
+#define MPT_MR_MW MPT_FIELD_LOC(68, 68)
+#define MPT_BPD MPT_FIELD_LOC(69, 69)
+#define MPT_BQP MPT_FIELD_LOC(70, 70)
+#define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71)
+#define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72)
+#define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96)
+#define MPT_LEN MPT_FIELD_LOC(191, 128)
+#define MPT_LKEY MPT_FIELD_LOC(223, 192)
+#define MPT_VA MPT_FIELD_LOC(287, 224)
+#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288)
+#define MPT_PBL_BA MPT_FIELD_LOC(380, 320)
+#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381)
+#define MPT_RSV0 MPT_FIELD_LOC(383, 382)
+#define MPT_PA0 MPT_FIELD_LOC(441, 384)
+#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442)
+#define MPT_PA1 MPT_FIELD_LOC(505, 448)
+#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506)
+#define MPT_RSV2 MPT_FIELD_LOC(507, 507)
+#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508)
+
#define V2_MPT_BYTE_4_MPT_ST_S 0
#define V2_MPT_BYTE_4_MPT_ST_M GENMASK(1, 0)
@@ -1059,6 +1136,8 @@ struct hns_roce_v2_mpt_entry {
#define V2_DB_BYTE_4_CMD_S 24
#define V2_DB_BYTE_4_CMD_M GENMASK(27, 24)
+#define V2_DB_FLAG_S 31
+
#define V2_DB_PARAMETER_IDX_S 0
#define V2_DB_PARAMETER_IDX_M GENMASK(15, 0)
@@ -1155,6 +1234,15 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0
#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
+
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
+
+#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15
+#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
+
#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7
#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8
@@ -1167,15 +1255,17 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
-#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
+#define V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S 10
+
+#define V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S 11
-#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
+#define V2_RC_FRMR_WQE_BYTE_40_RR_S 12
-#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
+#define V2_RC_FRMR_WQE_BYTE_40_RW_S 13
-#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
+#define V2_RC_FRMR_WQE_BYTE_40_LW_S 14
-#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
+#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
@@ -1190,7 +1280,7 @@ struct hns_roce_v2_rc_send_wqe {
struct hns_roce_wqe_frmr_seg {
__le32 pbl_size;
- __le32 mode_buf_pg_sz;
+ __le32 byte_40;
};
#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4
@@ -1786,12 +1876,8 @@ struct hns_roce_v2_cmq_ring {
dma_addr_t desc_dma_addr;
struct hns_roce_cmq_desc *desc;
u32 head;
- u32 tail;
-
u16 buf_size;
u16 desc_num;
- int next_to_use;
- int next_to_clean;
u8 flag;
spinlock_t lock; /* command queue lock */
};
@@ -1800,7 +1886,6 @@ struct hns_roce_v2_cmq {
struct hns_roce_v2_cmq_ring csq;
struct hns_roce_v2_cmq_ring crq;
u16 tx_timeout;
- u16 last_status;
};
enum hns_roce_link_table_type {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index d9179bae4989..c9c0836394a2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -42,22 +42,6 @@
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
-/**
- * hns_get_gid_index - Get gid index.
- * @hr_dev: pointer to structure hns_roce_dev.
- * @port: port, value range: 0 ~ MAX
- * @gid_index: gid_index, value range: 0 ~ MAX
- * Description:
- * N ports shared gids, allocation method as follow:
- * GID[0][0], GID[1][0],.....GID[N - 1][0],
- * GID[0][0], GID[1][0],.....GID[N - 1][0],
- * And so on
- */
-u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index)
-{
- return gid_index * hr_dev->caps.num_ports + port;
-}
-
static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
{
u8 phy_port;
@@ -217,7 +201,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_srq_sge = hr_dev->caps.max_srq_sges;
}
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR &&
+ hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA;
}
@@ -748,11 +733,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
goto err_pd_table_free;
}
- ret = hns_roce_init_cq_table(hr_dev);
- if (ret) {
- dev_err(dev, "Failed to init completion queue table.\n");
- goto err_mr_table_free;
- }
+ hns_roce_init_cq_table(hr_dev);
ret = hns_roce_init_qp_table(hr_dev);
if (ret) {
@@ -772,13 +753,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
return 0;
err_qp_table_free:
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
- hns_roce_cleanup_qp_table(hr_dev);
+ hns_roce_cleanup_qp_table(hr_dev);
err_cq_table_free:
hns_roce_cleanup_cq_table(hr_dev);
-
-err_mr_table_free:
hns_roce_cleanup_mr_table(hr_dev);
err_pd_table_free:
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 1bcffd93ff3e..79b3c3023fe7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -66,8 +66,7 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
- u32 pd, u64 iova, u64 size, u32 access)
+static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
unsigned long obj = 0;
@@ -82,11 +81,6 @@ static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
return -ENOMEM;
}
- mr->iova = iova; /* MR va starting addr */
- mr->size = size; /* MR addr range */
- mr->pd = pd; /* MR num */
- mr->access = access; /* MR access permit */
- mr->enabled = 0; /* MR active status */
mr->key = hw_index_to_key(obj); /* MR key */
err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
@@ -110,8 +104,7 @@ static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
}
static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
- size_t length, struct ib_udata *udata, u64 start,
- int access)
+ struct ib_udata *udata, u64 start)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
bool is_fast = mr->type == MR_TYPE_FRMR;
@@ -121,11 +114,10 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num;
buf_attr.page_shift = is_fast ? PAGE_SHIFT :
hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT;
- buf_attr.region[0].size = length;
+ buf_attr.region[0].size = mr->size;
buf_attr.region[0].hopnum = mr->pbl_hop_num;
buf_attr.region_count = 1;
- buf_attr.fixed_page = true;
- buf_attr.user_access = access;
+ buf_attr.user_access = mr->access;
/* fast MR's buffer is alloced before mapping, not at creation */
buf_attr.mtt_only = is_fast;
@@ -197,9 +189,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
}
mr->enabled = 1;
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return 0;
err_page:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -237,14 +226,16 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_DMA;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->access = acc;
/* Allocate memory region key */
hns_roce_hem_list_init(&mr->pbl_mtr.hem_list);
- ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc);
+ ret = alloc_mr_key(hr_dev, mr);
if (ret)
goto err_free;
- ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr);
+ ret = hns_roce_mr_enable(hr_dev, mr);
if (ret)
goto err_mr;
@@ -271,13 +262,17 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
+ mr->iova = virt_addr;
+ mr->size = length;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->access = access_flags;
mr->type = MR_TYPE_MR;
- ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length,
- access_flags);
+
+ ret = alloc_mr_key(hr_dev, mr);
if (ret)
goto err_alloc_mr;
- ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags);
+ ret = alloc_mr_pbl(hr_dev, mr, udata, start);
if (ret)
goto err_alloc_key;
@@ -299,35 +294,6 @@ err_alloc_mr:
return ERR_PTR(ret);
}
-static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
- u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct hns_roce_cmd_mailbox *mailbox,
- u32 pdn, struct ib_udata *udata)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_mr *mr = to_hr_mr(ibmr);
- int ret;
-
- free_mr_pbl(hr_dev, mr);
- ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags);
- if (ret) {
- ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret);
- return ret;
- }
-
- ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
- mr_access_flags, virt_addr,
- length, mailbox->buf);
- if (ret) {
- ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret);
- free_mr_pbl(hr_dev, mr);
- }
-
- return ret;
-}
-
struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
u64 length, u64 virt_addr,
int mr_access_flags, struct ib_pd *pd,
@@ -338,7 +304,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
struct hns_roce_mr *mr = to_hr_mr(ibmr);
struct hns_roce_cmd_mailbox *mailbox;
unsigned long mtpt_idx;
- u32 pdn = 0;
int ret;
if (!mr->enabled)
@@ -360,23 +325,29 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
mr->enabled = 0;
+ mr->iova = virt_addr;
+ mr->size = length;
if (flags & IB_MR_REREG_PD)
- pdn = to_hr_pd(pd)->pdn;
+ mr->pd = to_hr_pd(pd)->pdn;
+
+ if (flags & IB_MR_REREG_ACCESS)
+ mr->access = mr_access_flags;
if (flags & IB_MR_REREG_TRANS) {
- ret = rereg_mr_trans(ibmr, flags,
- start, length,
- virt_addr, mr_access_flags,
- mailbox, pdn, udata);
- if (ret)
- goto free_cmd_mbox;
- } else {
- ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
- mr_access_flags, virt_addr,
- length, mailbox->buf);
- if (ret)
+ free_mr_pbl(hr_dev, mr);
+ ret = alloc_mr_pbl(hr_dev, mr, udata, start);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n",
+ ret);
goto free_cmd_mbox;
+ }
+ }
+
+ ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret);
+ goto free_cmd_mbox;
}
ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx);
@@ -386,12 +357,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
}
mr->enabled = 1;
- if (flags & IB_MR_REREG_ACCESS)
- mr->access = mr_access_flags;
-
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return NULL;
free_cmd_mbox:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -421,7 +386,6 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct device *dev = hr_dev->dev;
struct hns_roce_mr *mr;
- u64 length;
int ret;
if (mr_type != IB_MR_TYPE_MEM_REG)
@@ -438,14 +402,15 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_FRMR;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->size = max_num_sg * (1 << PAGE_SHIFT);
/* Allocate memory region key */
- length = max_num_sg * (1 << PAGE_SHIFT);
- ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0);
+ ret = alloc_mr_key(hr_dev, mr);
if (ret)
goto err_free;
- ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0);
+ ret = alloc_mr_pbl(hr_dev, mr, NULL, 0);
if (ret)
goto err_key;
@@ -454,7 +419,7 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
goto err_pbl;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
- mr->ibmr.length = length;
+ mr->ibmr.length = mr->size;
return &mr->ibmr;
@@ -631,30 +596,26 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw)
}
static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- dma_addr_t *pages, struct hns_roce_buf_region *region)
+ struct hns_roce_buf_region *region, dma_addr_t *pages,
+ int max_count)
{
+ int count, npage;
+ int offset, end;
__le64 *mtts;
- int offset;
- int count;
- int npage;
u64 addr;
- int end;
int i;
- /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */
- if (!region->hopnum)
- return 0;
-
offset = region->offset;
end = offset + region->count;
npage = 0;
- while (offset < end) {
+ while (offset < end && npage < max_count) {
+ count = 0;
mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
offset, &count, NULL);
if (!mtts)
return -ENOBUFS;
- for (i = 0; i < count; i++) {
+ for (i = 0; i < count && npage < max_count; i++) {
if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
addr = to_hr_hw_page_addr(pages[npage]);
else
@@ -666,7 +627,7 @@ static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
offset += count;
}
- return 0;
+ return npage;
}
static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr)
@@ -729,25 +690,15 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
}
static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- struct hns_roce_buf_attr *buf_attr, bool is_direct,
+ struct hns_roce_buf_attr *buf_attr,
struct ib_udata *udata, unsigned long user_addr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
- unsigned int best_pg_shift;
- int all_pg_count = 0;
size_t total_size;
- int ret;
total_size = mtr_bufs_size(buf_attr);
- if (total_size < 1) {
- ibdev_err(ibdev, "failed to check mtr size\n.");
- return -EINVAL;
- }
if (udata) {
- unsigned long pgsz_bitmap;
- unsigned long page_size;
-
mtr->kmem = NULL;
mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
buf_attr->user_access);
@@ -756,76 +707,67 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
PTR_ERR(mtr->umem));
return -ENOMEM;
}
- if (buf_attr->fixed_page)
- pgsz_bitmap = 1 << buf_attr->page_shift;
- else
- pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT);
-
- page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap,
- user_addr);
- if (!page_size)
- return -EINVAL;
- best_pg_shift = order_base_2(page_size);
- all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size);
- ret = 0;
} else {
mtr->umem = NULL;
- mtr->kmem =
- hns_roce_buf_alloc(hr_dev, total_size,
- buf_attr->page_shift,
- is_direct ? HNS_ROCE_BUF_DIRECT : 0);
+ mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size,
+ buf_attr->page_shift,
+ mtr->hem_cfg.is_direct ?
+ HNS_ROCE_BUF_DIRECT : 0);
if (IS_ERR(mtr->kmem)) {
ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
PTR_ERR(mtr->kmem));
return PTR_ERR(mtr->kmem);
}
-
- best_pg_shift = buf_attr->page_shift;
- all_pg_count = mtr->kmem->npages;
- }
-
- /* must bigger than minimum hardware page shift */
- if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) {
- ret = -EINVAL;
- ibdev_err(ibdev,
- "failed to check mtr, page shift = %u count = %d.\n",
- best_pg_shift, all_pg_count);
- goto err_alloc_mem;
}
- mtr->hem_cfg.buf_pg_shift = best_pg_shift;
- mtr->hem_cfg.buf_pg_count = all_pg_count;
-
return 0;
-err_alloc_mem:
- mtr_free_bufs(hr_dev, mtr);
- return ret;
}
-static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- dma_addr_t *pages, int count, unsigned int page_shift)
+static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ int page_count, unsigned int page_shift)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t *pages;
int npage;
- int err;
+ int ret;
+
+ /* alloc a tmp array to store buffer's dma address */
+ pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
if (mtr->umem)
- npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0,
+ npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0,
mtr->umem, page_shift);
else
- npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0,
+ npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0,
mtr->kmem);
+ if (npage != page_count) {
+ ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage,
+ page_count);
+ ret = -ENOBUFS;
+ goto err_alloc_list;
+ }
+
if (mtr->hem_cfg.is_direct && npage > 1) {
- err = mtr_check_direct_pages(pages, npage, page_shift);
- if (err) {
- ibdev_err(ibdev, "Failed to check %s direct page-%d\n",
- mtr->umem ? "user" : "kernel", err);
- npage = err;
+ ret = mtr_check_direct_pages(pages, npage, page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n",
+ mtr->umem ? "user" : "kernel", ret);
+ ret = -ENOBUFS;
+ goto err_alloc_list;
}
}
- return npage;
+ ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count);
+ if (ret)
+ ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
+
+err_alloc_list:
+ kvfree(pages);
+
+ return ret;
}
int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
@@ -833,8 +775,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
{
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_buf_region *r;
- unsigned int i;
- int err;
+ unsigned int i, mapped_cnt;
+ int ret;
/*
* Only use the first page address as root ba when hopnum is 0, this
@@ -845,26 +787,42 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
return 0;
}
- for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count &&
+ mapped_cnt < page_cnt; i++) {
r = &mtr->hem_cfg.region[i];
+ /* if hopnum is 0, no need to map pages in this region */
+ if (!r->hopnum) {
+ mapped_cnt += r->count;
+ continue;
+ }
+
if (r->offset + r->count > page_cnt) {
- err = -EINVAL;
+ ret = -EINVAL;
ibdev_err(ibdev,
"failed to check mtr%u end %u + %u, max %u.\n",
i, r->offset, r->count, page_cnt);
- return err;
+ return ret;
}
- err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r);
- if (err) {
+ ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset],
+ page_cnt - mapped_cnt);
+ if (ret < 0) {
ibdev_err(ibdev,
"failed to map mtr%u offset %u, ret = %d.\n",
- i, r->offset, err);
- return err;
+ i, r->offset, ret);
+ return ret;
}
+ mapped_cnt += ret;
+ ret = 0;
}
- return 0;
+ if (mapped_cnt < page_cnt) {
+ ret = -ENOBUFS;
+ ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n",
+ mapped_cnt, page_cnt);
+ }
+
+ return ret;
}
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
@@ -928,68 +886,92 @@ done:
static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
struct hns_roce_buf_attr *attr,
struct hns_roce_hem_cfg *cfg,
- unsigned int *buf_page_shift)
+ unsigned int *buf_page_shift, int unalinged_size)
{
struct hns_roce_buf_region *r;
+ int first_region_padding;
+ int page_cnt, region_cnt;
unsigned int page_shift;
- int page_cnt = 0;
size_t buf_size;
- int region_cnt;
+ /* If mtt is disabled, all pages must be within a continuous range */
+ cfg->is_direct = !mtr_has_mtt(attr);
+ buf_size = mtr_bufs_size(attr);
if (cfg->is_direct) {
- buf_size = cfg->buf_pg_count << cfg->buf_pg_shift;
- page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE);
- /*
- * When HEM buffer use level-0 addressing, the page size equals
- * the buffer size, and the the page size = 4K * 2^N.
+ /* When HEM buffer uses 0-level addressing, the page size is
+ * equal to the whole buffer size, and we split the buffer into
+ * small pages which is used to check whether the adjacent
+ * units are in the continuous space and its size is fixed to
+ * 4K based on hns ROCEE's requirement.
*/
- cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt);
- if (attr->region_count > 1) {
- cfg->buf_pg_count = page_cnt;
- page_shift = HNS_HW_PAGE_SHIFT;
- } else {
- cfg->buf_pg_count = 1;
- page_shift = cfg->buf_pg_shift;
- if (buf_size != 1 << page_shift) {
- ibdev_err(&hr_dev->ib_dev,
- "failed to check direct size %zu shift %d.\n",
- buf_size, page_shift);
- return -EINVAL;
- }
- }
+ page_shift = HNS_HW_PAGE_SHIFT;
+
+ /* The ROCEE requires the page size to be 4K * 2 ^ N. */
+ cfg->buf_pg_count = 1;
+ cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT +
+ order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE));
+ first_region_padding = 0;
} else {
- page_shift = cfg->buf_pg_shift;
+ page_shift = attr->page_shift;
+ cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size,
+ 1 << page_shift);
+ cfg->buf_pg_shift = page_shift;
+ first_region_padding = unalinged_size;
}
- /* convert buffer size to page index and page count */
- for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count &&
- region_cnt < attr->region_count &&
+ /* Convert buffer size to page index and page count for each region and
+ * the buffer's offset needs to be appended to the first region.
+ */
+ for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count &&
region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
r = &cfg->region[region_cnt];
r->offset = page_cnt;
- buf_size = hr_hw_page_align(attr->region[region_cnt].size);
+ buf_size = hr_hw_page_align(attr->region[region_cnt].size +
+ first_region_padding);
r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
+ first_region_padding = 0;
page_cnt += r->count;
r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
r->count);
}
- if (region_cnt < 1) {
- ibdev_err(&hr_dev->ib_dev,
- "failed to check mtr region count, pages = %d.\n",
- cfg->buf_pg_count);
- return -ENOBUFS;
- }
-
cfg->region_count = region_cnt;
*buf_page_shift = page_shift;
return page_cnt;
}
+static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ unsigned int ba_page_shift)
+{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ int ret;
+
+ hns_roce_hem_list_init(&mtr->hem_list);
+ if (!cfg->is_direct) {
+ ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
+ cfg->region, cfg->region_count,
+ ba_page_shift);
+ if (ret)
+ return ret;
+ cfg->root_ba = mtr->hem_list.root_ba;
+ cfg->ba_pg_shift = ba_page_shift;
+ } else {
+ cfg->ba_pg_shift = cfg->buf_pg_shift;
+ }
+
+ return 0;
+}
+
+static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+}
+
/**
* hns_roce_mtr_create - Create hns memory translate region.
*
+ * @hr_dev: RoCE device struct pointer
* @mtr: memory translate region
* @buf_attr: buffer attribute for creating mtr
* @ba_page_shift: page shift for multi-hop base address table
@@ -1001,95 +983,51 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
unsigned int ba_page_shift, struct ib_udata *udata,
unsigned long user_addr)
{
- struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
struct ib_device *ibdev = &hr_dev->ib_dev;
unsigned int buf_page_shift = 0;
- dma_addr_t *pages = NULL;
- int all_pg_cnt;
- int get_pg_cnt;
- int ret = 0;
-
- /* if disable mtt, all pages must in a continuous address range */
- cfg->is_direct = !mtr_has_mtt(buf_attr);
-
- /* if buffer only need mtt, just init the hem cfg */
- if (buf_attr->mtt_only) {
- cfg->buf_pg_shift = buf_attr->page_shift;
- cfg->buf_pg_count = mtr_bufs_size(buf_attr) >>
- buf_attr->page_shift;
- mtr->umem = NULL;
- mtr->kmem = NULL;
- } else {
- ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct,
- udata, user_addr);
- if (ret) {
- ibdev_err(ibdev,
- "failed to alloc mtr bufs, ret = %d.\n", ret);
- return ret;
- }
- }
+ int buf_page_cnt;
+ int ret;
- all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift);
- if (all_pg_cnt < 1) {
- ret = -ENOBUFS;
- ibdev_err(ibdev, "failed to init mtr buf cfg.\n");
- goto err_alloc_bufs;
+ buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg,
+ &buf_page_shift,
+ udata ? user_addr & ~PAGE_MASK : 0);
+ if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) {
+ ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %d.\n",
+ buf_page_cnt, buf_page_shift);
+ return -EINVAL;
}
- hns_roce_hem_list_init(&mtr->hem_list);
- if (!cfg->is_direct) {
- ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
- cfg->region, cfg->region_count,
- ba_page_shift);
- if (ret) {
- ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n",
- ret);
- goto err_alloc_bufs;
- }
- cfg->root_ba = mtr->hem_list.root_ba;
- cfg->ba_pg_shift = ba_page_shift;
- } else {
- cfg->ba_pg_shift = cfg->buf_pg_shift;
+ ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
+ return ret;
}
- /* no buffer to map */
- if (buf_attr->mtt_only)
+ /* The caller has its own buffer list and invokes the hns_roce_mtr_map()
+ * to finish the MTT configuration.
+ */
+ if (buf_attr->mtt_only) {
+ mtr->umem = NULL;
+ mtr->kmem = NULL;
return 0;
-
- /* alloc a tmp array to store buffer's dma address */
- pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL);
- if (!pages) {
- ret = -ENOMEM;
- ibdev_err(ibdev, "failed to alloc mtr page list %d.\n",
- all_pg_cnt);
- goto err_alloc_hem_list;
- }
-
- get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt,
- buf_page_shift);
- if (get_pg_cnt != all_pg_cnt) {
- ibdev_err(ibdev, "failed to get mtr page %d != %d.\n",
- get_pg_cnt, all_pg_cnt);
- ret = -ENOBUFS;
- goto err_alloc_page_list;
}
- /* write buffer's dma address to BA table */
- ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt);
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
if (ret) {
- ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
- goto err_alloc_page_list;
+ ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret);
+ goto err_alloc_mtt;
}
- /* drop tmp array */
- kvfree(pages);
- return 0;
-err_alloc_page_list:
- kvfree(pages);
-err_alloc_hem_list:
- hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
-err_alloc_bufs:
+ /* Write buffer's dma address to MTT */
+ ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift);
+ if (ret)
+ ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret);
+ else
+ return 0;
+
mtr_free_bufs(hr_dev, mtr);
+err_alloc_mtt:
+ mtr_free_mtt(hr_dev, mtr);
return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 1116371adf74..004aca9086ab 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -413,9 +413,32 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
mutex_unlock(&hr_dev->qp_table.bank_mutex);
}
+static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp,
+ bool user)
+{
+ u32 max_sge = dev->caps.max_rq_sg;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_qp->rq.rsv_sge = 1;
+
+ return max_sge;
+}
+
static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
- struct hns_roce_qp *hr_qp, int has_rq)
+ struct hns_roce_qp *hr_qp, int has_rq, bool user)
{
+ u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user);
u32 cnt;
/* If srq exist, set zero for relative number of rq */
@@ -431,8 +454,9 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
/* Check the validity of QP support capacity */
if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes ||
- cap->max_recv_sge > hr_dev->caps.max_rq_sg) {
- ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n",
+ cap->max_recv_sge > max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "RQ config error, depth = %u, sge = %u\n",
cap->max_recv_wr, cap->max_recv_sge);
return -EINVAL;
}
@@ -444,7 +468,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
return -EINVAL;
}
- hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
+ hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
+ hr_qp->rq.rsv_sge);
if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
@@ -459,7 +484,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
hr_qp->rq_inl_buf.wqe_cnt = 0;
cap->max_recv_wr = cnt;
- cap->max_recv_sge = hr_qp->rq.max_gs;
+ cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
return 0;
}
@@ -599,7 +624,6 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
return -EINVAL;
buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
- buf_attr->fixed_page = true;
buf_attr->region_count = idx;
return 0;
@@ -919,7 +943,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
- hns_roce_qp_has_rq(init_attr));
+ hns_roce_qp_has_rq(init_attr), !!udata);
if (ret) {
ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
ret);
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index c4ae57e4173a..d5a6de0e7095 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -3,6 +3,7 @@
* Copyright (c) 2018 Hisilicon Limited.
*/
+#include <linux/pci.h>
#include <rdma/ib_umem.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
@@ -76,40 +77,16 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
- u32 pdn, u32 cqn, u16 xrcd, u64 db_rec_addr)
+static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_cmd_mailbox *mailbox;
- u64 mtts_wqe[MTT_MIN_COUNT] = { 0 };
- u64 mtts_idx[MTT_MIN_COUNT] = { 0 };
- dma_addr_t dma_handle_wqe = 0;
- dma_addr_t dma_handle_idx = 0;
int ret;
- /* Get the physical address of srq buf */
- ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
- ARRAY_SIZE(mtts_wqe), &dma_handle_wqe);
- if (ret < 1) {
- ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n",
- ret);
- return -ENOBUFS;
- }
-
- /* Get physical address of idx que buf */
- ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx,
- ARRAY_SIZE(mtts_idx), &dma_handle_idx);
- if (ret < 1) {
- ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n",
- ret);
- return -ENOBUFS;
- }
-
ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
if (ret) {
- ibdev_err(ibdev,
- "failed to alloc SRQ number, ret = %d.\n", ret);
+ ibdev_err(ibdev, "failed to alloc SRQ number.\n");
return -ENOMEM;
}
@@ -127,34 +104,36 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR_OR_NULL(mailbox)) {
- ret = -ENOMEM;
ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n");
+ ret = -ENOMEM;
goto err_xa;
}
- hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf,
- mtts_wqe, mtts_idx, dma_handle_wqe,
- dma_handle_idx);
+ ret = hr_dev->hw->write_srqc(srq, mailbox->buf);
+ if (ret) {
+ ibdev_err(ibdev, "failed to write SRQC.\n");
+ goto err_mbox;
+ }
ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn);
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret) {
ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret);
- goto err_xa;
+ goto err_mbox;
}
- atomic_set(&srq->refcount, 1);
- init_completion(&srq->free);
- return ret;
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return 0;
+err_mbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
err_xa:
xa_erase(&srq_table->xa, srq->srqn);
-
err_put:
hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
-
err_out:
hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
+
return ret;
}
@@ -178,46 +157,13 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
}
-static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
- struct ib_udata *udata, unsigned long addr)
-{
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_buf_attr buf_attr = {};
- int err;
-
- srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE,
- HNS_ROCE_SGE_SIZE *
- srq->max_gs)));
-
- buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
- buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
- srq->wqe_shift);
- buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num;
- buf_attr.region_count = 1;
- buf_attr.fixed_page = true;
-
- err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr,
- hr_dev->caps.srqwqe_ba_pg_sz +
- HNS_HW_PAGE_SHIFT, udata, addr);
- if (err)
- ibdev_err(ibdev,
- "failed to alloc SRQ buf mtr, ret = %d.\n", err);
-
- return err;
-}
-
-static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
-{
- hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr);
-}
-
static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
struct ib_udata *udata, unsigned long addr)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_buf_attr buf_attr = {};
- int err;
+ int ret;
srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ);
@@ -226,31 +172,33 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
srq->idx_que.entry_shift);
buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num;
buf_attr.region_count = 1;
- buf_attr.fixed_page = true;
- err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr,
+ ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr,
hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT,
udata, addr);
- if (err) {
+ if (ret) {
ibdev_err(ibdev,
- "failed to alloc SRQ idx mtr, ret = %d.\n", err);
- return err;
+ "failed to alloc SRQ idx mtr, ret = %d.\n", ret);
+ return ret;
}
if (!udata) {
idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL);
if (!idx_que->bitmap) {
ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n");
- err = -ENOMEM;
+ ret = -ENOMEM;
goto err_idx_mtr;
}
}
+ idx_que->head = 0;
+ idx_que->tail = 0;
+
return 0;
err_idx_mtr:
hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
- return err;
+ return ret;
}
static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
@@ -262,10 +210,42 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
}
+static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq,
+ struct ib_udata *udata, unsigned long addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
+
+ srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE,
+ HNS_ROCE_SGE_SIZE *
+ srq->max_gs)));
+
+ buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
+ buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
+ srq->wqe_shift);
+ buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num;
+ buf_attr.region_count = 1;
+
+ ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr,
+ hr_dev->caps.srqwqe_ba_pg_sz +
+ HNS_HW_PAGE_SHIFT, udata, addr);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to alloc SRQ buf mtr, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq)
+{
+ hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr);
+}
+
static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
- srq->head = 0;
- srq->tail = srq->wqe_cnt - 1;
srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL);
if (!srq->wrid)
return -ENOMEM;
@@ -279,96 +259,171 @@ static void free_srq_wrid(struct hns_roce_srq *srq)
srq->wrid = NULL;
}
-int hns_roce_create_srq(struct ib_srq *ib_srq,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq,
+ bool user)
+{
+ u32 max_sge = dev->caps.max_srq_sges;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_srq->rsv_sge = 1;
+
+ return max_sge;
+}
+
+static int set_srq_basic_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct ib_srq_attr *attr = &init_attr->attr;
+ u32 max_sge;
+
+ max_sge = proc_srq_sge(hr_dev, srq, !!udata);
+ if (attr->max_wr > hr_dev->caps.max_srq_wrs ||
+ attr->max_sge > max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid SRQ attr, depth = %u, sge = %u.\n",
+ attr->max_wr, attr->max_sge);
+ return -EINVAL;
+ }
+
+ attr->max_wr = max_t(u32, attr->max_wr, HNS_ROCE_MIN_SRQ_WQE_NUM);
+ srq->wqe_cnt = roundup_pow_of_two(attr->max_wr);
+ srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
+
+ attr->max_wr = srq->wqe_cnt;
+ attr->max_sge = srq->max_gs - srq->rsv_sge;
+ attr->srq_limit = 0;
+
+ return 0;
+}
+
+static void set_srq_ext_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr)
+{
+ srq->cqn = ib_srq_has_cq(init_attr->srq_type) ?
+ to_hr_cq(init_attr->ext.cq)->cqn : 0;
+}
+
+static int set_srq_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
- struct hns_roce_ib_create_srq_resp resp = {};
- struct hns_roce_srq *srq = to_hr_srq(ib_srq);
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_ib_create_srq ucmd = {};
int ret;
- u32 cqn;
- if (init_attr->srq_type != IB_SRQT_BASIC &&
- init_attr->srq_type != IB_SRQT_XRC)
- return -EOPNOTSUPP;
+ ret = set_srq_basic_param(srq, init_attr, udata);
+ if (ret)
+ return ret;
- /* Check the actual SRQ wqe and SRQ sge num */
- if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
- init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
- return -EINVAL;
+ set_srq_ext_param(srq, init_attr);
- mutex_init(&srq->mutex);
- spin_lock_init(&srq->lock);
+ return 0;
+}
- srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1);
- srq->max_gs = init_attr->attr.max_sge;
+static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ib_create_srq ucmd = {};
+ int ret;
if (udata) {
ret = ib_copy_from_udata(&ucmd, udata,
min(udata->inlen, sizeof(ucmd)));
if (ret) {
- ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n",
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to copy SRQ udata, ret = %d.\n",
ret);
return ret;
}
}
- ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr);
- if (ret) {
- ibdev_err(ibdev,
- "failed to alloc SRQ buffer, ret = %d.\n", ret);
+ ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr);
+ if (ret)
return ret;
- }
- ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr);
- if (ret) {
- ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret);
- goto err_buf_alloc;
- }
+ ret = alloc_srq_wqe_buf(hr_dev, srq, udata, ucmd.buf_addr);
+ if (ret)
+ goto err_idx;
if (!udata) {
ret = alloc_srq_wrid(hr_dev, srq);
- if (ret) {
- ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n",
- ret);
- goto err_idx_alloc;
- }
+ if (ret)
+ goto err_wqe_buf;
}
- cqn = ib_srq_has_cq(init_attr->srq_type) ?
- to_hr_cq(init_attr->ext.cq)->cqn : 0;
- srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
+ return 0;
- ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0);
- if (ret) {
- ibdev_err(ibdev,
- "failed to alloc SRQ context, ret = %d.\n", ret);
- goto err_wrid_alloc;
- }
+err_wqe_buf:
+ free_srq_wqe_buf(hr_dev, srq);
+err_idx:
+ free_srq_idx(hr_dev, srq);
- srq->event = hns_roce_ib_srq_event;
- resp.srqn = srq->srqn;
+ return ret;
+}
+
+static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ free_srq_wrid(srq);
+ free_srq_wqe_buf(hr_dev, srq);
+ free_srq_idx(hr_dev, srq);
+}
+
+int hns_roce_create_srq(struct ib_srq *ib_srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
+ struct hns_roce_ib_create_srq_resp resp = {};
+ struct hns_roce_srq *srq = to_hr_srq(ib_srq);
+ int ret;
+
+ mutex_init(&srq->mutex);
+ spin_lock_init(&srq->lock);
+
+ ret = set_srq_param(srq, init_attr, udata);
+ if (ret)
+ return ret;
+
+ ret = alloc_srq_buf(hr_dev, srq, udata);
+ if (ret)
+ return ret;
+
+ ret = alloc_srqc(hr_dev, srq);
+ if (ret)
+ goto err_srq_buf;
if (udata) {
- ret = ib_copy_to_udata(udata, &resp,
- min(udata->outlen, sizeof(resp)));
- if (ret)
- goto err_srqc_alloc;
+ resp.srqn = srq->srqn;
+ if (ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)))) {
+ ret = -EFAULT;
+ goto err_srqc;
+ }
}
+ srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
+ srq->event = hns_roce_ib_srq_event;
+ atomic_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+
return 0;
-err_srqc_alloc:
+err_srqc:
free_srqc(hr_dev, srq);
-err_wrid_alloc:
- free_srq_wrid(srq);
-err_idx_alloc:
- free_srq_idx(hr_dev, srq);
-err_buf_alloc:
+err_srq_buf:
free_srq_buf(hr_dev, srq);
+
return ret;
}
@@ -378,8 +433,6 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
free_srqc(hr_dev, srq);
- free_srq_idx(hr_dev, srq);
- free_srq_wrid(srq);
free_srq_buf(hr_dev, srq);
return 0;
}