diff --git a/contrib/ofed/infiniband-diags/src/ibdiag_common.c b/contrib/ofed/infiniband-diags/src/ibdiag_common.c index ddaee8c5b55..28dedc19b7f 100644 --- a/contrib/ofed/infiniband-diags/src/ibdiag_common.c +++ b/contrib/ofed/infiniband-diags/src/ibdiag_common.c @@ -120,6 +120,7 @@ static inline int val_str_true(const char *val_str) void read_ibdiag_config(const char *file) { char buf[1024]; + char orig_buf[1024]; FILE *config_fd = NULL; char *p_prefix, *p_last; char *name; @@ -142,8 +143,14 @@ void read_ibdiag_config(const char *file) if (*p_prefix == '#') continue; /* ignore comment lines */ + strlcpy(orig_buf, buf, sizeof(orig_buf)); name = strtok_r(p_prefix, "=", &p_last); val_str = strtok_r(NULL, "\n", &p_last); + if (!name || !val_str) { + fprintf(stderr, "%s: malformed line in \"%s\":\n%s\n", + prog_name, file, orig_buf); + continue; + } if (strncmp(name, "CA", strlen("CA")) == 0) { free(ibd_ca); @@ -165,6 +172,7 @@ void read_ibdiag_config(const char *file) ibd_sakey = strtoull(val_str, 0, 0); } else if (strncmp(name, "nd_format", strlen("nd_format")) == 0) { + free(ibd_nd_format); ibd_nd_format = strdup(val_str); } } diff --git a/contrib/ofed/infiniband-diags/src/ibdiag_sa.c b/contrib/ofed/infiniband-diags/src/ibdiag_sa.c index ea272a976d7..7154ac1b7d2 100644 --- a/contrib/ofed/infiniband-diags/src/ibdiag_sa.c +++ b/contrib/ofed/infiniband-diags/src/ibdiag_sa.c @@ -222,7 +222,7 @@ static const char *ib_mad_inv_field_str[] = { "MAD Reserved", "MAD Reserved", "MAD Reserved", - "MAD Invalid value in Attribute field(s) or Attribute Modifier" + "MAD Invalid value in Attribute field(s) or Attribute Modifier", "MAD UNKNOWN ERROR" }; #define MAD_ERR_UNKNOWN (ARR_SIZE(ib_mad_inv_field_str) - 1) diff --git a/contrib/ofed/infiniband-diags/src/iblinkinfo.c b/contrib/ofed/infiniband-diags/src/iblinkinfo.c index 40e012d4476..13ac3e2eabc 100644 --- a/contrib/ofed/infiniband-diags/src/iblinkinfo.c +++ b/contrib/ofed/infiniband-diags/src/iblinkinfo.c @@ -293,7 +293,8 @@ void print_node_header(ibnd_node_t *node, int *out_header_flag, printf("%s%s: %s:\n", out_prefix ? out_prefix : "", nodetype_str(node), remap); - (*out_header_flag)++; + if (out_header_flag) + (*out_header_flag)++; free(remap); } } @@ -397,7 +398,7 @@ void diff_node_ports(ibnd_node_t * fabric1_node, ibnd_node_t * fabric2_node, } if (output_diff && fabric2_port) { - print_node_header(fabric1_node, + print_node_header(fabric2_node, head_print, NULL); print_port(fabric2_node, diff --git a/contrib/ofed/infiniband-diags/src/ibportstate.c b/contrib/ofed/infiniband-diags/src/ibportstate.c index 06bd5d21755..7bea3398fb7 100644 --- a/contrib/ofed/infiniband-diags/src/ibportstate.c +++ b/contrib/ofed/infiniband-diags/src/ibportstate.c @@ -564,6 +564,7 @@ int main(int argc, char **argv) printf("Port is already in enable state\n"); goto close_port; } + /* FALLTHROUGH */ case ENABLE: case RESET: /* Polling */ diff --git a/contrib/ofed/infiniband-diags/src/ibqueryerrors.c b/contrib/ofed/infiniband-diags/src/ibqueryerrors.c index 2329f914771..3efaf135e62 100644 --- a/contrib/ofed/infiniband-diags/src/ibqueryerrors.c +++ b/contrib/ofed/infiniband-diags/src/ibqueryerrors.c @@ -130,6 +130,7 @@ static void set_thres(char *name, uint32_t val) static void set_thresholds(char *threshold_file) { char buf[1024]; + char orig_buf[1024]; int val = 0; FILE *thresf = fopen(threshold_file, "r"); char *p_prefix, *p_last; @@ -156,8 +157,14 @@ static void set_thresholds(char *threshold_file) if (*p_prefix == '#') continue; /* ignore comment lines */ + strlcpy(orig_buf, buf, sizeof(orig_buf)); name = strtok_r(p_prefix, "=", &p_last); val_str = strtok_r(NULL, "\n", &p_last); + if (!name || !val_str) { + fprintf(stderr, "malformed line in \"%s\":\n%s\n", + threshold_file, orig_buf); + continue; + } val = strtoul(val_str, NULL, 0); set_thres(name, val); diff --git a/contrib/ofed/infiniband-diags/src/ibroute.c b/contrib/ofed/infiniband-diags/src/ibroute.c index 8e4544edb6e..464c34b2597 100644 --- a/contrib/ofed/infiniband-diags/src/ibroute.c +++ b/contrib/ofed/infiniband-diags/src/ibroute.c @@ -222,6 +222,7 @@ char *dump_multicast_tables(ib_portid_t * portid, unsigned startlid, fprintf(stderr, "SubnGet() failed" "; MAD status 0x%x AM 0x%x\n", status, mod); + free(mapnd); return NULL; } } @@ -354,6 +355,8 @@ char *dump_unicast_tables(ib_portid_t * portid, int startlid, int endlid) " (%s):\n", startlid, endlid, portid2str(portid), nodeguid, mapnd); + free(mapnd); + DEBUG("Switch top is 0x%x\n", top); printf(" Lid Out Destination\n"); @@ -390,7 +393,6 @@ char *dump_unicast_tables(ib_portid_t * portid, int startlid, int endlid) } printf("%d %slids dumped \n", n, dump_all ? "" : "valid "); - free(mapnd); return 0; } diff --git a/contrib/ofed/libibumad/umad_str.c b/contrib/ofed/libibumad/umad_str.c index 0a014f7e202..412441375a2 100644 --- a/contrib/ofed/libibumad/umad_str.c +++ b/contrib/ofed/libibumad/umad_str.c @@ -246,7 +246,6 @@ static const char * umad_sm_attr_str(__be16 attr_id) default: return (umad_common_attr_str(attr_id)); } - return (""); } static const char * umad_sa_attr_str(__be16 attr_id) @@ -301,7 +300,6 @@ static const char * umad_sa_attr_str(__be16 attr_id) default: return (umad_common_attr_str(attr_id)); } - return (""); } static const char * umad_cm_attr_str(__be16 attr_id) @@ -336,7 +334,6 @@ static const char * umad_cm_attr_str(__be16 attr_id) default: return (umad_common_attr_str(attr_id)); } - return (""); } const char * umad_attribute_str(uint8_t mgmt_class, __be16 attr_id) diff --git a/contrib/ofed/libibverbs/device.c b/contrib/ofed/libibverbs/device.c index 8b52dd51306..d5cd2173cd8 100644 --- a/contrib/ofed/libibverbs/device.c +++ b/contrib/ofed/libibverbs/device.c @@ -264,7 +264,6 @@ int __ibv_close_device(struct ibv_context *context) { int async_fd = context->async_fd; int cmd_fd = context->cmd_fd; - int cq_fd = -1; struct verbs_context *context_ex; struct verbs_device *verbs_device = verbs_get_device(context->device); @@ -279,8 +278,6 @@ int __ibv_close_device(struct ibv_context *context) close(async_fd); close(cmd_fd); - if (abi_ver <= 2) - close(cq_fd); return 0; } diff --git a/contrib/ofed/libibverbs/examples/rc_pingpong.c b/contrib/ofed/libibverbs/examples/rc_pingpong.c index 1e260161274..2c86be2550e 100644 --- a/contrib/ofed/libibverbs/examples/rc_pingpong.c +++ b/contrib/ofed/libibverbs/examples/rc_pingpong.c @@ -273,7 +273,11 @@ static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, return NULL; } - listen(sockfd, 1); + if (listen(sockfd, 1) < 0) { + perror("listen() failed"); + close(sockfd); + return NULL; + } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { diff --git a/contrib/ofed/libibverbs/examples/srq_pingpong.c b/contrib/ofed/libibverbs/examples/srq_pingpong.c index 676572b559c..36373cebe8f 100644 --- a/contrib/ofed/libibverbs/examples/srq_pingpong.c +++ b/contrib/ofed/libibverbs/examples/srq_pingpong.c @@ -283,7 +283,11 @@ static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, return NULL; } - listen(sockfd, 1); + if (listen(sockfd, 1) < 0) { + perror("listen() failed"); + close(sockfd); + return NULL; + } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { diff --git a/contrib/ofed/libibverbs/examples/uc_pingpong.c b/contrib/ofed/libibverbs/examples/uc_pingpong.c index 6225d6cda70..0f362a424fe 100644 --- a/contrib/ofed/libibverbs/examples/uc_pingpong.c +++ b/contrib/ofed/libibverbs/examples/uc_pingpong.c @@ -247,7 +247,11 @@ static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, return NULL; } - listen(sockfd, 1); + if (listen(sockfd, 1) < 0) { + perror("listen() failed"); + close(sockfd); + return NULL; + } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { diff --git a/contrib/ofed/libibverbs/examples/ud_pingpong.c b/contrib/ofed/libibverbs/examples/ud_pingpong.c index 266b5964a41..07d14ccef71 100644 --- a/contrib/ofed/libibverbs/examples/ud_pingpong.c +++ b/contrib/ofed/libibverbs/examples/ud_pingpong.c @@ -245,7 +245,11 @@ static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, return NULL; } - listen(sockfd, 1); + if (listen(sockfd, 1) < 0) { + perror("listen() failed"); + close(sockfd); + return NULL; + } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { diff --git a/contrib/ofed/libibverbs/examples/xsrq_pingpong.c b/contrib/ofed/libibverbs/examples/xsrq_pingpong.c index 83abaf7780d..c8b8a8ebf34 100644 --- a/contrib/ofed/libibverbs/examples/xsrq_pingpong.c +++ b/contrib/ofed/libibverbs/examples/xsrq_pingpong.c @@ -630,7 +630,11 @@ static int pp_server_connect(int port) return 1; } - listen(sockfd, ctx.num_clients); + if (listen(sockfd, ctx.num_clients) < 0) { + perror("listen() failed"); + close(sockfd); + return 1; + } for (i = 0; i < ctx.num_clients; i++) { connfd = accept(sockfd, NULL, NULL); diff --git a/contrib/ofed/libibverbs/libibverbs.map b/contrib/ofed/libibverbs/libibverbs.map index 540124110e1..b49c09a06ce 100644 --- a/contrib/ofed/libibverbs/libibverbs.map +++ b/contrib/ofed/libibverbs/libibverbs.map @@ -10,7 +10,6 @@ IBVERBS_1.0 { ibv_get_async_event; ibv_ack_async_event; ibv_query_device; - ibv_query_device_ex; ibv_query_port; ibv_query_gid; ibv_query_pkey; @@ -65,7 +64,6 @@ IBVERBS_1.1 { ibv_fork_init; ibv_dontfork_range; ibv_dofork_range; - ibv_register_driver; ibv_node_type_str; ibv_port_state_str; @@ -136,7 +134,6 @@ IBVERBS_PRIVATE_14 { ibv_cmd_rereg_mr; ibv_cmd_resize_cq; ibv_query_gid_type; - ibv_register_driver; verbs_register_driver; verbs_init_cq; }; diff --git a/contrib/ofed/libirdma/abi.h b/contrib/ofed/libirdma/abi.h new file mode 100644 index 00000000000..a3e159828be --- /dev/null +++ b/contrib/ofed/libirdma/abi.h @@ -0,0 +1,195 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (C) 2019 - 2023 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PROVIDER_IRDMA_ABI_H +#define PROVIDER_IRDMA_ABI_H + +#include "irdma.h" +#include +#include "irdma-abi.h" + +#define IRDMA_MIN_ABI_VERSION 0 +#define IRDMA_MAX_ABI_VERSION 5 + +struct irdma_ualloc_pd_resp { + struct ibv_alloc_pd_resp ibv_resp; + __u32 pd_id; + __u8 rsvd[4]; + +}; +struct irdma_ucreate_cq { + struct ibv_create_cq ibv_cmd; + __aligned_u64 user_cq_buf; + __aligned_u64 user_shadow_area; + +}; +struct irdma_ucreate_cq_resp { + struct ibv_create_cq_resp ibv_resp; + __u32 cq_id; + __u32 cq_size; + +}; +struct irdma_ucreate_cq_ex { + struct ibv_create_cq_ex ibv_cmd; + __aligned_u64 user_cq_buf; + __aligned_u64 user_shadow_area; + +}; +struct irdma_ucreate_cq_ex_resp { + struct ibv_create_cq_resp_ex ibv_resp; + __u32 cq_id; + __u32 cq_size; + +}; +struct irdma_uresize_cq { + struct ibv_resize_cq ibv_cmd; + __aligned_u64 user_cq_buffer; + +}; +struct irdma_uresize_cq_resp { + struct ibv_resize_cq_resp ibv_resp; + +}; +struct irdma_ucreate_qp { + struct ibv_create_qp ibv_cmd; + __aligned_u64 user_wqe_bufs; + __aligned_u64 user_compl_ctx; + __aligned_u64 comp_mask; + +}; +struct irdma_ucreate_qp_resp { + struct ibv_create_qp_resp ibv_resp; + __u32 qp_id; + __u32 actual_sq_size; + __u32 actual_rq_size; + __u32 irdma_drv_opt; + __u16 push_idx; + __u8 lsmm; + __u8 rsvd; + __u32 qp_caps; + __aligned_u64 comp_mask; + __u8 start_wqe_idx; + __u8 rsvd2[7]; + +}; +struct irdma_umodify_qp_resp { + struct ibv_modify_qp_resp_ex ibv_resp; + __aligned_u64 push_wqe_mmap_key; + __aligned_u64 push_db_mmap_key; + __u16 push_offset; + __u8 push_valid; + __u8 rd_fence_rate; + __u8 rsvd[4]; + +}; +struct irdma_get_context { + struct ibv_get_context ibv_cmd; + __u32 rsvd32; + __u8 userspace_ver; + __u8 rsvd8[3]; + __aligned_u64 comp_mask; + +}; +struct irdma_get_context_resp { + struct ibv_get_context_resp ibv_resp; + __u32 max_pds; + __u32 max_qps; + __u32 wq_size; /* size of the WQs (SQ+RQ) in the mmaped area */ + __u8 kernel_ver; + __u8 rsvd[3]; + __aligned_u64 feature_flags; + __aligned_u64 db_mmap_key; + __u32 max_hw_wq_frags; + __u32 max_hw_read_sges; + __u32 max_hw_inline; + __u32 max_hw_rq_quanta; + __u32 max_hw_wq_quanta; + __u32 min_hw_cq_size; + __u32 max_hw_cq_size; + __u16 max_hw_sq_chunk; + __u8 hw_rev; + __u8 rsvd2; + __aligned_u64 comp_mask; + __u16 min_hw_wq_size; + __u8 rsvd3[6]; + +}; +struct irdma_ureg_mr { + struct ibv_reg_mr ibv_cmd; + __u16 reg_type; /* enum irdma_memreg_type */ + __u16 cq_pages; + __u16 rq_pages; + __u16 sq_pages; + +}; +struct irdma_urereg_mr { + struct ibv_rereg_mr ibv_cmd; + __u16 reg_type; /* enum irdma_memreg_type */ + __u16 cq_pages; + __u16 rq_pages; + __u16 sq_pages; + +}; +struct irdma_ucreate_ah_resp { + struct ibv_create_ah_resp ibv_resp; + __u32 ah_id; + __u8 rsvd[4]; + +}; + +struct irdma_modify_qp_cmd { + struct ibv_modify_qp_ex ibv_cmd; + __u8 sq_flush; + __u8 rq_flush; + __u8 rsvd[6]; +}; + +struct irdma_query_device_ex { + struct ibv_query_device_ex ibv_cmd; +}; + +struct irdma_query_device_ex_resp { + struct ibv_query_device_resp_ex ibv_resp; + __u32 comp_mask; + __u32 response_length; + struct ibv_odp_caps_resp odp_caps; + __u64 timestamp_mask; + __u64 hca_core_clock; + __u64 device_cap_flags_ex; + struct ibv_rss_caps_resp rss_caps; + __u32 max_wq_type_rq; + __u32 raw_packet_caps; + struct ibv_tso_caps tso_caps; +}; +#endif /* PROVIDER_IRDMA_ABI_H */ diff --git a/contrib/ofed/libirdma/i40e_devids.h b/contrib/ofed/libirdma/i40e_devids.h new file mode 100644 index 00000000000..e775a75bade --- /dev/null +++ b/contrib/ofed/libirdma/i40e_devids.h @@ -0,0 +1,68 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (c) 2015 - 2019 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef I40E_DEVIDS_H +#define I40E_DEVIDS_H + +/* Vendor ID */ +#define I40E_INTEL_VENDOR_ID 0x8086 + +/* Device IDs */ +#define I40E_DEV_ID_SFP_XL710 0x1572 +#define I40E_DEV_ID_QEMU 0x1574 +#define I40E_DEV_ID_KX_B 0x1580 +#define I40E_DEV_ID_KX_C 0x1581 +#define I40E_DEV_ID_QSFP_A 0x1583 +#define I40E_DEV_ID_QSFP_B 0x1584 +#define I40E_DEV_ID_QSFP_C 0x1585 +#define I40E_DEV_ID_10G_BASE_T 0x1586 +#define I40E_DEV_ID_20G_KR2 0x1587 +#define I40E_DEV_ID_20G_KR2_A 0x1588 +#define I40E_DEV_ID_10G_BASE_T4 0x1589 +#define I40E_DEV_ID_25G_B 0x158A +#define I40E_DEV_ID_25G_SFP28 0x158B +#define I40E_DEV_ID_VF 0x154C +#define I40E_DEV_ID_VF_HV 0x1571 +#define I40E_DEV_ID_X722_A0 0x374C +#define I40E_DEV_ID_X722_A0_VF 0x374D +#define I40E_DEV_ID_KX_X722 0x37CE +#define I40E_DEV_ID_QSFP_X722 0x37CF +#define I40E_DEV_ID_SFP_X722 0x37D0 +#define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 +#define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 +#define I40E_DEV_ID_SFP_I_X722 0x37D3 +#define I40E_DEV_ID_X722_VF 0x37CD +#define I40E_DEV_ID_X722_VF_HV 0x37D9 + +#endif /* I40E_DEVIDS_H */ diff --git a/contrib/ofed/libirdma/i40iw_hw.h b/contrib/ofed/libirdma/i40iw_hw.h new file mode 100644 index 00000000000..fcbfea8dfe0 --- /dev/null +++ b/contrib/ofed/libirdma/i40iw_hw.h @@ -0,0 +1,63 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (c) 2015 - 2023 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef I40IW_HW_H +#define I40IW_HW_H + +enum i40iw_device_caps_const { + I40IW_MAX_WQ_FRAGMENT_COUNT = 3, + I40IW_MAX_SGE_RD = 1, + I40IW_MAX_PUSH_PAGE_COUNT = 0, + I40IW_MAX_INLINE_DATA_SIZE = 48, + I40IW_MAX_IRD_SIZE = 64, + I40IW_MAX_ORD_SIZE = 64, + I40IW_MAX_WQ_ENTRIES = 2048, + I40IW_MAX_WQE_SIZE_RQ = 128, + I40IW_MAX_PDS = 32768, + I40IW_MAX_STATS_COUNT = 16, + I40IW_MAX_CQ_SIZE = 1048575, + I40IW_MAX_OUTBOUND_MSG_SIZE = 2147483647, + I40IW_MAX_INBOUND_MSG_SIZE = 2147483647, + I40IW_MIN_WQ_SIZE = 4 /* WQEs */, +}; + +#define I40IW_QP_WQE_MIN_SIZE 32 +#define I40IW_QP_WQE_MAX_SIZE 128 +#define I40IW_MAX_RQ_WQE_SHIFT 2 +#define I40IW_MAX_QUANTA_PER_WR 2 + +#define I40IW_QP_SW_MAX_SQ_QUANTA 2048 +#define I40IW_QP_SW_MAX_RQ_QUANTA 16384 +#define I40IW_QP_SW_MAX_WQ_QUANTA 2048 +#endif /* I40IW_HW_H */ diff --git a/contrib/ofed/libirdma/ice_devids.h b/contrib/ofed/libirdma/ice_devids.h new file mode 100644 index 00000000000..57a7f2f7c2a --- /dev/null +++ b/contrib/ofed/libirdma/ice_devids.h @@ -0,0 +1,91 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (c) 2019 - 2020 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ICE_DEVIDS_H +#define ICE_DEVIDS_H + +#define PCI_VENDOR_ID_INTEL 0x8086 + +/* Device IDs */ +/* Intel(R) Ethernet Connection E823-L for backplane */ +#define ICE_DEV_ID_E823L_BACKPLANE 0x124C +/* Intel(R) Ethernet Connection E823-L for SFP */ +#define ICE_DEV_ID_E823L_SFP 0x124D +/* Intel(R) Ethernet Connection E823-L/X557-AT 10GBASE-T */ +#define ICE_DEV_ID_E823L_10G_BASE_T 0x124E +/* Intel(R) Ethernet Connection E823-L 1GbE */ +#define ICE_DEV_ID_E823L_1GBE 0x124F +/* Intel(R) Ethernet Connection E823-L for QSFP */ +#define ICE_DEV_ID_E823L_QSFP 0x151D +/* Intel(R) Ethernet Controller E810-C for backplane */ +#define ICE_DEV_ID_E810C_BACKPLANE 0x1591 +/* Intel(R) Ethernet Controller E810-C for QSFP */ +#define ICE_DEV_ID_E810C_QSFP 0x1592 +/* Intel(R) Ethernet Controller E810-C for SFP */ +#define ICE_DEV_ID_E810C_SFP 0x1593 +/* Intel(R) Ethernet Controller E810-XXV for backplane */ +#define ICE_DEV_ID_E810_XXV_BACKPLANE 0x1599 +/* Intel(R) Ethernet Controller E810-XXV for QSFP */ +#define ICE_DEV_ID_E810_XXV_QSFP 0x159A +/* Intel(R) Ethernet Controller E810-XXV for SFP */ +#define ICE_DEV_ID_E810_XXV_SFP 0x159B +/* Intel(R) Ethernet Connection E823-C for backplane */ +#define ICE_DEV_ID_E823C_BACKPLANE 0x188A +/* Intel(R) Ethernet Connection E823-C for QSFP */ +#define ICE_DEV_ID_E823C_QSFP 0x188B +/* Intel(R) Ethernet Connection E823-C for SFP */ +#define ICE_DEV_ID_E823C_SFP 0x188C +/* Intel(R) Ethernet Connection E823-C/X557-AT 10GBASE-T */ +#define ICE_DEV_ID_E823C_10G_BASE_T 0x188D +/* Intel(R) Ethernet Connection E823-C 1GbE */ +#define ICE_DEV_ID_E823C_SGMII 0x188E +/* Intel(R) Ethernet Connection C822N for backplane */ +#define ICE_DEV_ID_C822N_BACKPLANE 0x1890 +/* Intel(R) Ethernet Connection C822N for QSFP */ +#define ICE_DEV_ID_C822N_QSFP 0x1891 +/* Intel(R) Ethernet Connection C822N for SFP */ +#define ICE_DEV_ID_C822N_SFP 0x1892 +/* Intel(R) Ethernet Connection E822-C/X557-AT 10GBASE-T */ +#define ICE_DEV_ID_E822C_10G_BASE_T 0x1893 +/* Intel(R) Ethernet Connection E822-C 1GbE */ +#define ICE_DEV_ID_E822C_SGMII 0x1894 +/* Intel(R) Ethernet Connection E822-L for backplane */ +#define ICE_DEV_ID_E822L_BACKPLANE 0x1897 +/* Intel(R) Ethernet Connection E822-L for SFP */ +#define ICE_DEV_ID_E822L_SFP 0x1898 +/* Intel(R) Ethernet Connection E822-L/X557-AT 10GBASE-T */ +#define ICE_DEV_ID_E822L_10G_BASE_T 0x1899 +/* Intel(R) Ethernet Connection E822-L 1GbE */ +#define ICE_DEV_ID_E822L_SGMII 0x189A +#endif /* ICE_DEVIDS_H */ diff --git a/contrib/ofed/libirdma/irdma-abi.h b/contrib/ofed/libirdma/irdma-abi.h new file mode 100644 index 00000000000..ae805919ea5 --- /dev/null +++ b/contrib/ofed/libirdma/irdma-abi.h @@ -0,0 +1,159 @@ +/*- + * SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) + * + * + * Copyright (c) 2006 - 2022 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IRDMA_ABI_H +#define IRDMA_ABI_H + +#include + +/* irdma must support legacy GEN_1 i40iw kernel + * and user-space whose last ABI ver is 5 + */ +#define IRDMA_ABI_VER 5 + +enum irdma_memreg_type { + IRDMA_MEMREG_TYPE_MEM = 0, + IRDMA_MEMREG_TYPE_QP = 1, + IRDMA_MEMREG_TYPE_CQ = 2, +}; + +enum { + IRDMA_ALLOC_UCTX_USE_RAW_ATTR = 1 << 0, + IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE = 1 << 1, +}; + +enum { + IRDMA_CREATE_QP_USE_START_WQE_IDX = 1 << 0, +}; + +struct irdma_alloc_ucontext_req { + __u32 rsvd32; + __u8 userspace_ver; + __u8 rsvd8[3]; + __aligned_u64 comp_mask; +}; + +struct irdma_alloc_ucontext_resp { + __u32 max_pds; + __u32 max_qps; + __u32 wq_size; /* size of the WQs (SQ+RQ) in the mmaped area */ + __u8 kernel_ver; + __u8 rsvd[3]; + __aligned_u64 feature_flags; + __aligned_u64 db_mmap_key; + __u32 max_hw_wq_frags; + __u32 max_hw_read_sges; + __u32 max_hw_inline; + __u32 max_hw_rq_quanta; + __u32 max_hw_wq_quanta; + __u32 min_hw_cq_size; + __u32 max_hw_cq_size; + __u16 max_hw_sq_chunk; + __u8 hw_rev; + __u8 rsvd2; + __aligned_u64 comp_mask; + __u16 min_hw_wq_size; + __u8 rsvd3[6]; +}; + +struct irdma_alloc_pd_resp { + __u32 pd_id; + __u8 rsvd[4]; +}; + +struct irdma_resize_cq_req { + __aligned_u64 user_cq_buffer; +}; + +struct irdma_create_cq_req { + __aligned_u64 user_cq_buf; + __aligned_u64 user_shadow_area; +}; + +struct irdma_create_qp_req { + __aligned_u64 user_wqe_bufs; + __aligned_u64 user_compl_ctx; + __aligned_u64 comp_mask; +}; + +struct irdma_mem_reg_req { + __u16 reg_type; /* enum irdma_memreg_type */ + __u16 cq_pages; + __u16 rq_pages; + __u16 sq_pages; +}; + +struct irdma_modify_qp_req { + __u8 sq_flush; + __u8 rq_flush; + __u8 rsvd[6]; +}; + +struct irdma_create_cq_resp { + __u32 cq_id; + __u32 cq_size; +}; + +struct irdma_create_qp_resp { + __u32 qp_id; + __u32 actual_sq_size; + __u32 actual_rq_size; + __u32 irdma_drv_opt; + __u16 push_idx; + __u8 lsmm; + __u8 rsvd; + __u32 qp_caps; + __aligned_u64 comp_mask; + __u8 start_wqe_idx; + __u8 rsvd2[7]; +}; + +struct irdma_modify_qp_resp { + __aligned_u64 push_wqe_mmap_key; + __aligned_u64 push_db_mmap_key; + __u16 push_offset; + __u8 push_valid; + __u8 rd_fence_rate; + __u8 rsvd[4]; +}; + +struct irdma_create_ah_resp { + __u32 ah_id; + __u8 rsvd[4]; +}; +#endif /* IRDMA_ABI_H */ diff --git a/contrib/ofed/libirdma/irdma.h b/contrib/ofed/libirdma/irdma.h new file mode 100644 index 00000000000..f4a5a4796f8 --- /dev/null +++ b/contrib/ofed/libirdma/irdma.h @@ -0,0 +1,93 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (c) 2017 - 2022 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IRDMA_H +#define IRDMA_H + +#define RDMA_BIT2(type, a) ((u##type) 1UL << a) +#define RDMA_MASK3(type, mask, shift) ((u##type) mask << shift) +#define MAKEMASK(m, s) ((m) << (s)) + +#define IRDMA_WQEALLOC_WQE_DESC_INDEX_S 20 +#define IRDMA_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20) + +enum irdma_vers { + IRDMA_GEN_RSVD = 0, + IRDMA_GEN_1 = 1, + IRDMA_GEN_2 = 2, +}; + +struct irdma_uk_attrs { + u64 feature_flags; + u32 max_hw_wq_frags; + u32 max_hw_read_sges; + u32 max_hw_inline; + u32 max_hw_rq_quanta; + u32 max_hw_wq_quanta; + u32 min_hw_cq_size; + u32 max_hw_cq_size; + u16 max_hw_sq_chunk; + u16 min_hw_wq_size; + u8 hw_rev; +}; + +struct irdma_hw_attrs { + struct irdma_uk_attrs uk_attrs; + u64 max_hw_outbound_msg_size; + u64 max_hw_inbound_msg_size; + u64 max_mr_size; + u64 page_size_cap; + u32 min_hw_qp_id; + u32 min_hw_aeq_size; + u32 max_hw_aeq_size; + u32 min_hw_ceq_size; + u32 max_hw_ceq_size; + u32 max_hw_device_pages; + u32 max_hw_vf_fpm_id; + u32 first_hw_vf_fpm_id; + u32 max_hw_ird; + u32 max_hw_ord; + u32 max_hw_wqes; + u32 max_hw_pds; + u32 max_hw_ena_vf_count; + u32 max_qp_wr; + u32 max_pe_ready_count; + u32 max_done_count; + u32 max_sleep_count; + u32 max_cqp_compl_wait_time_ms; + u16 max_stat_inst; + u16 max_stat_idx; +}; + +#endif /* IRDMA_H*/ diff --git a/contrib/ofed/libirdma/irdma_defs.h b/contrib/ofed/libirdma/irdma_defs.h new file mode 100644 index 00000000000..39d4e7772c3 --- /dev/null +++ b/contrib/ofed/libirdma/irdma_defs.h @@ -0,0 +1,502 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (c) 2015 - 2023 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IRDMA_DEFS_H +#define IRDMA_DEFS_H + +#define IRDMA_BYTE_0 0 +#define IRDMA_BYTE_8 8 +#define IRDMA_BYTE_16 16 +#define IRDMA_BYTE_24 24 +#define IRDMA_BYTE_32 32 +#define IRDMA_BYTE_40 40 +#define IRDMA_BYTE_48 48 +#define IRDMA_BYTE_56 56 +#define IRDMA_BYTE_64 64 +#define IRDMA_BYTE_72 72 +#define IRDMA_BYTE_80 80 +#define IRDMA_BYTE_88 88 +#define IRDMA_BYTE_96 96 +#define IRDMA_BYTE_104 104 +#define IRDMA_BYTE_112 112 +#define IRDMA_BYTE_120 120 +#define IRDMA_BYTE_128 128 +#define IRDMA_BYTE_136 136 +#define IRDMA_BYTE_144 144 +#define IRDMA_BYTE_152 152 +#define IRDMA_BYTE_160 160 +#define IRDMA_BYTE_168 168 +#define IRDMA_BYTE_176 176 +#define IRDMA_BYTE_184 184 +#define IRDMA_BYTE_192 192 +#define IRDMA_BYTE_200 200 +#define IRDMA_BYTE_208 208 +#define IRDMA_BYTE_216 216 + +#define IRDMA_QP_TYPE_IWARP 1 +#define IRDMA_QP_TYPE_UDA 2 +#define IRDMA_QP_TYPE_ROCE_RC 3 +#define IRDMA_QP_TYPE_ROCE_UD 4 + +#define IRDMA_HW_PAGE_SIZE 4096 +#define IRDMA_HW_PAGE_SHIFT 12 +#define IRDMA_CQE_QTYPE_RQ 0 +#define IRDMA_CQE_QTYPE_SQ 1 + +#define IRDMA_QP_SW_MIN_WQSIZE 8 /* in WRs*/ +#define IRDMA_QP_WQE_MIN_SIZE 32 +#define IRDMA_QP_WQE_MAX_SIZE 256 +#define IRDMA_QP_WQE_MIN_QUANTA 1 +#define IRDMA_MAX_RQ_WQE_SHIFT_GEN1 2 +#define IRDMA_MAX_RQ_WQE_SHIFT_GEN2 3 + +#define IRDMA_SQ_RSVD 258 +#define IRDMA_RQ_RSVD 1 + +#define IRDMA_FEATURE_RTS_AE BIT_ULL(0) +#define IRDMA_FEATURE_CQ_RESIZE BIT_ULL(1) +#define IRDMA_FEATURE_RELAX_RQ_ORDER BIT_ULL(2) +#define IRDMA_FEATURE_64_BYTE_CQE BIT_ULL(5) + +#define IRDMAQP_OP_RDMA_WRITE 0x00 +#define IRDMAQP_OP_RDMA_READ 0x01 +#define IRDMAQP_OP_RDMA_SEND 0x03 +#define IRDMAQP_OP_RDMA_SEND_INV 0x04 +#define IRDMAQP_OP_RDMA_SEND_SOL_EVENT 0x05 +#define IRDMAQP_OP_RDMA_SEND_SOL_EVENT_INV 0x06 +#define IRDMAQP_OP_BIND_MW 0x08 +#define IRDMAQP_OP_FAST_REGISTER 0x09 +#define IRDMAQP_OP_LOCAL_INVALIDATE 0x0a +#define IRDMAQP_OP_RDMA_READ_LOC_INV 0x0b +#define IRDMAQP_OP_NOP 0x0c + +#ifndef LS_64_1 +#define LS_64_1(val, bits) ((u64)(uintptr_t)(val) << (bits)) +#define RS_64_1(val, bits) ((u64)(uintptr_t)(val) >> (bits)) +#define LS_32_1(val, bits) ((u32)((val) << (bits))) +#define RS_32_1(val, bits) ((u32)((val) >> (bits))) +#endif +#ifndef GENMASK_ULL +#define GENMASK_ULL(high, low) ((0xFFFFFFFFFFFFFFFFULL >> (64ULL - ((high) - (low) + 1ULL))) << (low)) +#endif /* GENMASK_ULL */ +#ifndef GENMASK +#define GENMASK(high, low) ((0xFFFFFFFFUL >> (32UL - ((high) - (low) + 1UL))) << (low)) +#endif /* GENMASK */ +#ifndef FIELD_PREP +#define FIELD_PREP(mask, val) (((u64)(val) << mask##_S) & (mask)) +#define FIELD_GET(mask, val) (((val) & mask) >> mask##_S) +#endif /* FIELD_PREP */ + +#define IRDMA_CQPHC_QPCTX_S 0 +#define IRDMA_CQPHC_QPCTX GENMASK_ULL(63, 0) +#define IRDMA_QP_DBSA_HW_SQ_TAIL_S 0 +#define IRDMA_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0) +#define IRDMA_CQ_DBSA_CQEIDX_S 0 +#define IRDMA_CQ_DBSA_CQEIDX GENMASK_ULL(19, 0) +#define IRDMA_CQ_DBSA_SW_CQ_SELECT_S 0 +#define IRDMA_CQ_DBSA_SW_CQ_SELECT GENMASK_ULL(13, 0) +#define IRDMA_CQ_DBSA_ARM_NEXT_S 14 +#define IRDMA_CQ_DBSA_ARM_NEXT BIT_ULL(14) +#define IRDMA_CQ_DBSA_ARM_NEXT_SE_S 15 +#define IRDMA_CQ_DBSA_ARM_NEXT_SE BIT_ULL(15) +#define IRDMA_CQ_DBSA_ARM_SEQ_NUM_S 16 +#define IRDMA_CQ_DBSA_ARM_SEQ_NUM GENMASK_ULL(17, 16) + +/* CQP and iWARP Completion Queue */ +#define IRDMA_CQ_QPCTX_S IRDMA_CQPHC_QPCTX_S +#define IRDMA_CQ_QPCTX IRDMA_CQPHC_QPCTX + +#define IRDMA_CQ_MINERR_S 0 +#define IRDMA_CQ_MINERR GENMASK_ULL(15, 0) +#define IRDMA_CQ_MAJERR_S 16 +#define IRDMA_CQ_MAJERR GENMASK_ULL(31, 16) +#define IRDMA_CQ_WQEIDX_S 32 +#define IRDMA_CQ_WQEIDX GENMASK_ULL(46, 32) +#define IRDMA_CQ_EXTCQE_S 50 +#define IRDMA_CQ_EXTCQE BIT_ULL(50) +#define IRDMA_OOO_CMPL_S 54 +#define IRDMA_OOO_CMPL BIT_ULL(54) +#define IRDMA_CQ_ERROR_S 55 +#define IRDMA_CQ_ERROR BIT_ULL(55) +#define IRDMA_CQ_SQ_S 62 +#define IRDMA_CQ_SQ BIT_ULL(62) + +#define IRDMA_CQ_VALID_S 63 +#define IRDMA_CQ_VALID BIT_ULL(63) +#define IRDMA_CQ_IMMVALID BIT_ULL(62) +#define IRDMA_CQ_UDSMACVALID_S 61 +#define IRDMA_CQ_UDSMACVALID BIT_ULL(61) +#define IRDMA_CQ_UDVLANVALID_S 60 +#define IRDMA_CQ_UDVLANVALID BIT_ULL(60) +#define IRDMA_CQ_UDSMAC_S 0 +#define IRDMA_CQ_UDSMAC GENMASK_ULL(47, 0) +#define IRDMA_CQ_UDVLAN_S 48 +#define IRDMA_CQ_UDVLAN GENMASK_ULL(63, 48) + +#define IRDMA_CQ_IMMDATA_S 0 +#define IRDMA_CQ_IMMVALID_S 62 +#define IRDMA_CQ_IMMDATA GENMASK_ULL(125, 62) +#define IRDMA_CQ_IMMDATALOW32_S 0 +#define IRDMA_CQ_IMMDATALOW32 GENMASK_ULL(31, 0) +#define IRDMA_CQ_IMMDATAUP32_S 32 +#define IRDMA_CQ_IMMDATAUP32 GENMASK_ULL(63, 32) +#define IRDMACQ_PAYLDLEN_S 0 +#define IRDMACQ_PAYLDLEN GENMASK_ULL(31, 0) +#define IRDMACQ_TCPSQN_ROCEPSN_RTT_TS_S 32 +#define IRDMACQ_TCPSQN_ROCEPSN_RTT_TS GENMASK_ULL(63, 32) +#define IRDMACQ_INVSTAG_S 0 +#define IRDMACQ_INVSTAG GENMASK_ULL(31, 0) +#define IRDMACQ_QPID_S 32 +#define IRDMACQ_QPID GENMASK_ULL(55, 32) + +#define IRDMACQ_UDSRCQPN_S 0 +#define IRDMACQ_UDSRCQPN GENMASK_ULL(31, 0) +#define IRDMACQ_PSHDROP_S 51 +#define IRDMACQ_PSHDROP BIT_ULL(51) +#define IRDMACQ_STAG_S 53 +#define IRDMACQ_STAG BIT_ULL(53) +#define IRDMACQ_IPV4_S 53 +#define IRDMACQ_IPV4 BIT_ULL(53) +#define IRDMACQ_SOEVENT_S 54 +#define IRDMACQ_SOEVENT BIT_ULL(54) +#define IRDMACQ_OP_S 56 +#define IRDMACQ_OP GENMASK_ULL(61, 56) + +/* Manage Push Page - MPP */ +#define IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff +#define IRDMA_INVALID_PUSH_PAGE_INDEX 0xffffffff + +#define IRDMAQPSQ_OPCODE_S 32 +#define IRDMAQPSQ_OPCODE GENMASK_ULL(37, 32) +#define IRDMAQPSQ_COPY_HOST_PBL_S 43 +#define IRDMAQPSQ_COPY_HOST_PBL BIT_ULL(43) +#define IRDMAQPSQ_ADDFRAGCNT_S 38 +#define IRDMAQPSQ_ADDFRAGCNT GENMASK_ULL(41, 38) +#define IRDMAQPSQ_PUSHWQE_S 56 +#define IRDMAQPSQ_PUSHWQE BIT_ULL(56) +#define IRDMAQPSQ_STREAMMODE_S 58 +#define IRDMAQPSQ_STREAMMODE BIT_ULL(58) +#define IRDMAQPSQ_WAITFORRCVPDU_S 59 +#define IRDMAQPSQ_WAITFORRCVPDU BIT_ULL(59) +#define IRDMAQPSQ_READFENCE_S 60 +#define IRDMAQPSQ_READFENCE BIT_ULL(60) +#define IRDMAQPSQ_LOCALFENCE_S 61 +#define IRDMAQPSQ_LOCALFENCE BIT_ULL(61) +#define IRDMAQPSQ_UDPHEADER_S 61 +#define IRDMAQPSQ_UDPHEADER BIT_ULL(61) +#define IRDMAQPSQ_L4LEN_S 42 +#define IRDMAQPSQ_L4LEN GENMASK_ULL(45, 42) +#define IRDMAQPSQ_SIGCOMPL_S 62 +#define IRDMAQPSQ_SIGCOMPL BIT_ULL(62) +#define IRDMAQPSQ_VALID_S 63 +#define IRDMAQPSQ_VALID BIT_ULL(63) + +#define IRDMAQPSQ_FRAG_TO_S IRDMA_CQPHC_QPCTX_S +#define IRDMAQPSQ_FRAG_TO IRDMA_CQPHC_QPCTX +#define IRDMAQPSQ_FRAG_VALID_S 63 +#define IRDMAQPSQ_FRAG_VALID BIT_ULL(63) +#define IRDMAQPSQ_FRAG_LEN_S 32 +#define IRDMAQPSQ_FRAG_LEN GENMASK_ULL(62, 32) +#define IRDMAQPSQ_FRAG_STAG_S 0 +#define IRDMAQPSQ_FRAG_STAG GENMASK_ULL(31, 0) +#define IRDMAQPSQ_GEN1_FRAG_LEN_S 0 +#define IRDMAQPSQ_GEN1_FRAG_LEN GENMASK_ULL(31, 0) +#define IRDMAQPSQ_GEN1_FRAG_STAG_S 32 +#define IRDMAQPSQ_GEN1_FRAG_STAG GENMASK_ULL(63, 32) +#define IRDMAQPSQ_REMSTAGINV_S 0 +#define IRDMAQPSQ_REMSTAGINV GENMASK_ULL(31, 0) +#define IRDMAQPSQ_DESTQKEY_S 0 +#define IRDMAQPSQ_DESTQKEY GENMASK_ULL(31, 0) +#define IRDMAQPSQ_DESTQPN_S 32 +#define IRDMAQPSQ_DESTQPN GENMASK_ULL(55, 32) +#define IRDMAQPSQ_AHID_S 0 +#define IRDMAQPSQ_AHID GENMASK_ULL(16, 0) +#define IRDMAQPSQ_INLINEDATAFLAG_S 57 +#define IRDMAQPSQ_INLINEDATAFLAG BIT_ULL(57) + +#define IRDMA_INLINE_VALID_S 7 +#define IRDMAQPSQ_INLINEDATALEN_S 48 +#define IRDMAQPSQ_INLINEDATALEN GENMASK_ULL(55, 48) +#define IRDMAQPSQ_IMMDATAFLAG_S 47 +#define IRDMAQPSQ_IMMDATAFLAG BIT_ULL(47) +#define IRDMAQPSQ_REPORTRTT_S 46 +#define IRDMAQPSQ_REPORTRTT BIT_ULL(46) + +#define IRDMAQPSQ_IMMDATA_S 0 +#define IRDMAQPSQ_IMMDATA GENMASK_ULL(63, 0) +#define IRDMAQPSQ_REMSTAG_S 0 +#define IRDMAQPSQ_REMSTAG GENMASK_ULL(31, 0) + +#define IRDMAQPSQ_REMTO_S IRDMA_CQPHC_QPCTX_S +#define IRDMAQPSQ_REMTO IRDMA_CQPHC_QPCTX + +#define IRDMAQPSQ_STAGRIGHTS_S 48 +#define IRDMAQPSQ_STAGRIGHTS GENMASK_ULL(52, 48) +#define IRDMAQPSQ_VABASEDTO_S 53 +#define IRDMAQPSQ_VABASEDTO BIT_ULL(53) +#define IRDMAQPSQ_MEMWINDOWTYPE_S 54 +#define IRDMAQPSQ_MEMWINDOWTYPE BIT_ULL(54) + +#define IRDMAQPSQ_MWLEN_S IRDMA_CQPHC_QPCTX_S +#define IRDMAQPSQ_MWLEN IRDMA_CQPHC_QPCTX +#define IRDMAQPSQ_PARENTMRSTAG_S 32 +#define IRDMAQPSQ_PARENTMRSTAG GENMASK_ULL(63, 32) +#define IRDMAQPSQ_MWSTAG_S 0 +#define IRDMAQPSQ_MWSTAG GENMASK_ULL(31, 0) + +#define IRDMAQPSQ_BASEVA_TO_FBO_S IRDMA_CQPHC_QPCTX_S +#define IRDMAQPSQ_BASEVA_TO_FBO IRDMA_CQPHC_QPCTX + +#define IRDMAQPSQ_LOCSTAG_S 0 +#define IRDMAQPSQ_LOCSTAG GENMASK_ULL(31, 0) + +/* iwarp QP RQ WQE common fields */ +#define IRDMAQPRQ_ADDFRAGCNT_S IRDMAQPSQ_ADDFRAGCNT_S +#define IRDMAQPRQ_ADDFRAGCNT IRDMAQPSQ_ADDFRAGCNT + +#define IRDMAQPRQ_VALID_S IRDMAQPSQ_VALID_S +#define IRDMAQPRQ_VALID IRDMAQPSQ_VALID + +#define IRDMAQPRQ_COMPLCTX_S IRDMA_CQPHC_QPCTX_S +#define IRDMAQPRQ_COMPLCTX IRDMA_CQPHC_QPCTX + +#define IRDMAQPRQ_FRAG_LEN_S IRDMAQPSQ_FRAG_LEN_S +#define IRDMAQPRQ_FRAG_LEN IRDMAQPSQ_FRAG_LEN + +#define IRDMAQPRQ_STAG_S IRDMAQPSQ_FRAG_STAG_S +#define IRDMAQPRQ_STAG IRDMAQPSQ_FRAG_STAG + +#define IRDMAQPRQ_TO_S IRDMAQPSQ_FRAG_TO_S +#define IRDMAQPRQ_TO IRDMAQPSQ_FRAG_TO + +#define IRDMAPFINT_OICR_HMC_ERR_M BIT(26) +#define IRDMAPFINT_OICR_PE_PUSH_M BIT(27) +#define IRDMAPFINT_OICR_PE_CRITERR_M BIT(28) + +#define IRDMA_GET_RING_OFFSET(_ring, _i) \ + ( \ + ((_ring).head + (_i)) % (_ring).size \ + ) + +#define IRDMA_GET_CQ_ELEM_AT_OFFSET(_cq, _i, _cqe) \ + { \ + __u32 offset; \ + offset = IRDMA_GET_RING_OFFSET((_cq)->cq_ring, _i); \ + (_cqe) = (_cq)->cq_base[offset].buf; \ + } +#define IRDMA_GET_CURRENT_CQ_ELEM(_cq) \ + ( \ + (_cq)->cq_base[IRDMA_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \ + ) +#define IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(_cq) \ + ( \ + ((struct irdma_extended_cqe *) \ + ((_cq)->cq_base))[IRDMA_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \ + ) + +#define IRDMA_RING_INIT(_ring, _size) \ + { \ + (_ring).head = 0; \ + (_ring).tail = 0; \ + (_ring).size = (_size); \ + } +#define IRDMA_RING_SIZE(_ring) ((_ring).size) +#define IRDMA_RING_CURRENT_HEAD(_ring) ((_ring).head) +#define IRDMA_RING_CURRENT_TAIL(_ring) ((_ring).tail) + +#define IRDMA_RING_MOVE_HEAD(_ring, _retcode) \ + { \ + u32 size; \ + size = (_ring).size; \ + if (!IRDMA_RING_FULL_ERR(_ring)) { \ + (_ring).head = ((_ring).head + 1) % size; \ + (_retcode) = 0; \ + } else { \ + (_retcode) = ENOSPC; \ + } \ + } +#define IRDMA_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ + { \ + u32 size; \ + size = (_ring).size; \ + if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < size) { \ + (_ring).head = ((_ring).head + (_count)) % size; \ + (_retcode) = 0; \ + } else { \ + (_retcode) = ENOSPC; \ + } \ + } +#define IRDMA_SQ_RING_MOVE_HEAD(_ring, _retcode) \ + { \ + u32 size; \ + size = (_ring).size; \ + if (!IRDMA_SQ_RING_FULL_ERR(_ring)) { \ + (_ring).head = ((_ring).head + 1) % size; \ + (_retcode) = 0; \ + } else { \ + (_retcode) = ENOSPC; \ + } \ + } +#define IRDMA_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ + { \ + u32 size; \ + size = (_ring).size; \ + if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < (size - 256)) { \ + (_ring).head = ((_ring).head + (_count)) % size; \ + (_retcode) = 0; \ + } else { \ + (_retcode) = ENOSPC; \ + } \ + } +#define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ + (_ring).head = ((_ring).head + (_count)) % (_ring).size + +#define IRDMA_RING_MOVE_TAIL(_ring) \ + (_ring).tail = ((_ring).tail + 1) % (_ring).size + +#define IRDMA_RING_MOVE_HEAD_NOCHECK(_ring) \ + (_ring).head = ((_ring).head + 1) % (_ring).size + +#define IRDMA_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ + (_ring).tail = ((_ring).tail + (_count)) % (_ring).size + +#define IRDMA_RING_SET_TAIL(_ring, _pos) \ + (_ring).tail = (_pos) % (_ring).size + +#define IRDMA_RING_FULL_ERR(_ring) \ + ( \ + (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 1)) \ + ) + +#define IRDMA_ERR_RING_FULL2(_ring) \ + ( \ + (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 2)) \ + ) + +#define IRDMA_ERR_RING_FULL3(_ring) \ + ( \ + (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 3)) \ + ) + +#define IRDMA_SQ_RING_FULL_ERR(_ring) \ + ( \ + (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 257)) \ + ) + +#define IRDMA_ERR_SQ_RING_FULL2(_ring) \ + ( \ + (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 258)) \ + ) +#define IRDMA_ERR_SQ_RING_FULL3(_ring) \ + ( \ + (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 259)) \ + ) +#define IRDMA_RING_MORE_WORK(_ring) \ + ( \ + (IRDMA_RING_USED_QUANTA(_ring) != 0) \ + ) + +#define IRDMA_RING_USED_QUANTA(_ring) \ + ( \ + (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \ + ) + +#define IRDMA_RING_FREE_QUANTA(_ring) \ + ( \ + ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 1) \ + ) + +#define IRDMA_SQ_RING_FREE_QUANTA(_ring) \ + ( \ + ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 257) \ + ) + +#define IRDMA_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \ + { \ + index = IRDMA_RING_CURRENT_HEAD(_ring); \ + IRDMA_RING_MOVE_HEAD(_ring, _retcode); \ + } + +enum irdma_qp_wqe_size { + IRDMA_WQE_SIZE_32 = 32, + IRDMA_WQE_SIZE_64 = 64, + IRDMA_WQE_SIZE_96 = 96, + IRDMA_WQE_SIZE_128 = 128, + IRDMA_WQE_SIZE_256 = 256, +}; + +/** + * set_64bit_val - set 64 bit value to hw wqe + * @wqe_words: wqe addr to write + * @byte_index: index in wqe + * @val: value to write + **/ +static inline void set_64bit_val(__le64 *wqe_words, u32 byte_index, u64 val) +{ + wqe_words[byte_index >> 3] = htole64(val); +} + +/** + * set_32bit_val - set 32 bit value to hw wqe + * @wqe_words: wqe addr to write + * @byte_index: index in wqe + * @val: value to write + **/ +static inline void set_32bit_val(__le32 *wqe_words, u32 byte_index, u32 val) +{ + wqe_words[byte_index >> 2] = htole32(val); +} + +/** + * get_64bit_val - read 64 bit value from wqe + * @wqe_words: wqe addr + * @byte_index: index to read from + * @val: read value + **/ +static inline void get_64bit_val(__le64 *wqe_words, u32 byte_index, u64 *val) +{ + *val = le64toh(wqe_words[byte_index >> 3]); +} + +/** + * get_32bit_val - read 32 bit value from wqe + * @wqe_words: wqe addr + * @byte_index: index to reaad from + * @val: return 32 bit value + **/ +static inline void get_32bit_val(__le32 *wqe_words, u32 byte_index, u32 *val) +{ + *val = le32toh(wqe_words[byte_index >> 2]); +} +#endif /* IRDMA_DEFS_H */ diff --git a/contrib/ofed/libirdma/irdma_uk.c b/contrib/ofed/libirdma/irdma_uk.c new file mode 100644 index 00000000000..115c5f0a27f --- /dev/null +++ b/contrib/ofed/libirdma/irdma_uk.c @@ -0,0 +1,1916 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (c) 2015 - 2023 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "osdep.h" +#include "irdma_defs.h" +#include "irdma_user.h" +#include "irdma.h" + +/** + * irdma_set_fragment - set fragment in wqe + * @wqe: wqe for setting fragment + * @offset: offset value + * @sge: sge length and stag + * @valid: The wqe valid + */ +static void +irdma_set_fragment(__le64 * wqe, u32 offset, struct ibv_sge *sge, + u8 valid) +{ + if (sge) { + set_64bit_val(wqe, offset, + FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr)); + set_64bit_val(wqe, offset + IRDMA_BYTE_8, + FIELD_PREP(IRDMAQPSQ_VALID, valid) | + FIELD_PREP(IRDMAQPSQ_FRAG_LEN, sge->length) | + FIELD_PREP(IRDMAQPSQ_FRAG_STAG, sge->lkey)); + } else { + set_64bit_val(wqe, offset, 0); + set_64bit_val(wqe, offset + IRDMA_BYTE_8, + FIELD_PREP(IRDMAQPSQ_VALID, valid)); + } +} + +/** + * irdma_set_fragment_gen_1 - set fragment in wqe + * @wqe: wqe for setting fragment + * @offset: offset value + * @sge: sge length and stag + * @valid: wqe valid flag + */ +static void +irdma_set_fragment_gen_1(__le64 * wqe, u32 offset, + struct ibv_sge *sge, u8 valid) +{ + if (sge) { + set_64bit_val(wqe, offset, + FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr)); + set_64bit_val(wqe, offset + IRDMA_BYTE_8, + FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, sge->length) | + FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, sge->lkey)); + } else { + set_64bit_val(wqe, offset, 0); + set_64bit_val(wqe, offset + IRDMA_BYTE_8, 0); + } +} + +/** + * irdma_nop_hdr - Format header section of noop WQE + * @qp: hw qp ptr + */ +static inline u64 irdma_nop_hdr(struct irdma_qp_uk *qp){ + return FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, false) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); +} + +/** + * irdma_nop_1 - insert a NOP wqe + * @qp: hw qp ptr + */ +static int +irdma_nop_1(struct irdma_qp_uk *qp) +{ + __le64 *wqe; + u32 wqe_idx; + + if (!qp->sq_ring.head) + return EINVAL; + + wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); + wqe = qp->sq_base[wqe_idx].elem; + + qp->sq_wrtrk_array[wqe_idx].quanta = IRDMA_QP_WQE_MIN_QUANTA; + + set_64bit_val(wqe, IRDMA_BYTE_0, 0); + set_64bit_val(wqe, IRDMA_BYTE_8, 0); + set_64bit_val(wqe, IRDMA_BYTE_16, 0); + + /* make sure WQE is written before valid bit is set */ + udma_to_device_barrier(); + + set_64bit_val(wqe, IRDMA_BYTE_24, irdma_nop_hdr(qp)); + + return 0; +} + +/** + * irdma_clr_wqes - clear next 128 sq entries + * @qp: hw qp ptr + * @qp_wqe_idx: wqe_idx + */ +void +irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx) +{ + __le64 *wqe; + u32 wqe_idx; + + if (!(qp_wqe_idx & 0x7F)) { + wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size; + wqe = qp->sq_base[wqe_idx].elem; + if (wqe_idx) + memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000); + else + memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000); + } +} + +/** + * irdma_uk_qp_post_wr - ring doorbell + * @qp: hw qp ptr + */ +void +irdma_uk_qp_post_wr(struct irdma_qp_uk *qp) +{ + u64 temp; + u32 hw_sq_tail; + u32 sw_sq_head; + + /* valid bit is written and loads completed before reading shadow */ + atomic_thread_fence(memory_order_seq_cst); + + /* read the doorbell shadow area */ + get_64bit_val(qp->shadow_area, IRDMA_BYTE_0, &temp); + + hw_sq_tail = (u32)FIELD_GET(IRDMA_QP_DBSA_HW_SQ_TAIL, temp); + sw_sq_head = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); + if (sw_sq_head != qp->initial_ring.head) { + if (qp->push_dropped) { + db_wr32(qp->qp_id, qp->wqe_alloc_db); + qp->push_dropped = false; + } else if (sw_sq_head != hw_sq_tail) { + if (sw_sq_head > qp->initial_ring.head) { + if (hw_sq_tail >= qp->initial_ring.head && + hw_sq_tail < sw_sq_head) + db_wr32(qp->qp_id, qp->wqe_alloc_db); + } else { + if (hw_sq_tail >= qp->initial_ring.head || + hw_sq_tail < sw_sq_head) + db_wr32(qp->qp_id, qp->wqe_alloc_db); + } + } + } + + qp->initial_ring.head = qp->sq_ring.head; +} + +/** + * irdma_qp_ring_push_db - ring qp doorbell + * @qp: hw qp ptr + * @wqe_idx: wqe index + */ +static void +irdma_qp_ring_push_db(struct irdma_qp_uk *qp, u32 wqe_idx) +{ + set_32bit_val(qp->push_db, 0, + FIELD_PREP(IRDMA_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) | qp->qp_id); + qp->initial_ring.head = qp->sq_ring.head; + qp->push_mode = true; + qp->push_dropped = false; +} + +void +irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 * wqe, u16 quanta, + u32 wqe_idx, bool post_sq) +{ + __le64 *push; + + if (IRDMA_RING_CURRENT_HEAD(qp->initial_ring) != + IRDMA_RING_CURRENT_TAIL(qp->sq_ring) && + !qp->push_mode) { + irdma_uk_qp_post_wr(qp); + } else { + push = (__le64 *) ((uintptr_t)qp->push_wqe + + (wqe_idx & 0x7) * 0x20); + irdma_memcpy(push, wqe, quanta * IRDMA_QP_WQE_MIN_SIZE); + irdma_qp_ring_push_db(qp, wqe_idx); + } +} + +/** + * irdma_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go + * @qp: hw qp ptr + * @wqe_idx: return wqe index + * @quanta: (in/out) ptr to size of WR in quanta. Modified in case pad is needed + * @total_size: size of WR in bytes + * @info: info on WR + */ +__le64 * +irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, + u16 *quanta, u32 total_size, + struct irdma_post_sq_info *info) +{ + __le64 *wqe; + __le64 *wqe_0 = NULL; + u32 nop_wqe_idx; + u16 avail_quanta, wqe_quanta = *quanta; + u16 i; + + avail_quanta = qp->uk_attrs->max_hw_sq_chunk - + (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) % + qp->uk_attrs->max_hw_sq_chunk); + + if (*quanta <= avail_quanta) { + /* WR fits in current chunk */ + if (*quanta > IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring)) + return NULL; + } else { + /* Need to pad with NOP */ + if (*quanta + avail_quanta > + IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring)) + return NULL; + + nop_wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); + for (i = 0; i < avail_quanta; i++) { + irdma_nop_1(qp); + IRDMA_RING_MOVE_HEAD_NOCHECK(qp->sq_ring); + } + if (qp->push_db && info->push_wqe) + irdma_qp_push_wqe(qp, qp->sq_base[nop_wqe_idx].elem, + avail_quanta, nop_wqe_idx, true); + } + + *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); + if (!*wqe_idx) + qp->swqe_polarity = !qp->swqe_polarity; + + IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, *quanta); + + irdma_clr_wqes(qp, *wqe_idx); + + wqe = qp->sq_base[*wqe_idx].elem; + if (qp->uk_attrs->hw_rev == IRDMA_GEN_1 && wqe_quanta == 1 && + (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) & 1)) { + wqe_0 = qp->sq_base[IRDMA_RING_CURRENT_HEAD(qp->sq_ring)].elem; + wqe_0[3] = htole64(FIELD_PREP(IRDMAQPSQ_VALID, + qp->swqe_polarity ? 0 : 1)); + } + qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id; + qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; + qp->sq_wrtrk_array[*wqe_idx].quanta = wqe_quanta; + qp->sq_wrtrk_array[*wqe_idx].signaled = info->signaled; + + return wqe; +} + +/** + * irdma_qp_get_next_recv_wqe - get next qp's rcv wqe + * @qp: hw qp ptr + * @wqe_idx: return wqe index + */ +__le64 * +irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx) +{ + __le64 *wqe; + int ret_code; + + if (IRDMA_RING_FULL_ERR(qp->rq_ring)) + return NULL; + + IRDMA_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code); + if (ret_code) + return NULL; + + if (!*wqe_idx) + qp->rwqe_polarity = !qp->rwqe_polarity; + /* rq_wqe_size_multiplier is no of 32 byte quanta in one rq wqe */ + wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem; + + return wqe; +} + +/** + * irdma_uk_rdma_write - rdma write operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +int +irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq) +{ + u64 hdr; + __le64 *wqe; + struct irdma_rdma_write *op_info; + u32 i, wqe_idx; + u32 total_size = 0, byte_off; + int ret_code; + u32 frag_cnt, addl_frag_cnt; + bool read_fence = false; + u16 quanta; + + info->push_wqe = qp->push_db ? true : false; + + op_info = &info->op.rdma_write; + if (op_info->num_lo_sges > qp->max_sq_frag_cnt) + return EINVAL; + + for (i = 0; i < op_info->num_lo_sges; i++) + total_size += op_info->lo_sg_list[i].length; + + read_fence |= info->read_fence; + + if (info->imm_data_valid) + frag_cnt = op_info->num_lo_sges + 1; + else + frag_cnt = op_info->num_lo_sges; + addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; + ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta); + if (ret_code) + return ret_code; + + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); + if (!wqe) + return ENOSPC; + + qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; + set_64bit_val(wqe, IRDMA_BYTE_16, + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); + + if (info->imm_data_valid) { + set_64bit_val(wqe, IRDMA_BYTE_0, + FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); + i = 0; + } else { + qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, + op_info->lo_sg_list, + qp->swqe_polarity); + i = 1; + } + + for (byte_off = IRDMA_BYTE_32; i < op_info->num_lo_sges; i++) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, + &op_info->lo_sg_list[i], + qp->swqe_polarity); + byte_off += 16; + } + + /* if not an odd number set valid bit in next fragment */ + if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(frag_cnt & 0x01) && + frag_cnt) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, + qp->swqe_polarity); + if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) + ++addl_frag_cnt; + } + + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | + FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | + FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid) | + FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt) | + FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | + FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | + FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | + FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, IRDMA_BYTE_24, hdr); + if (info->push_wqe) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + else if (post_sq) + irdma_uk_qp_post_wr(qp); + + return 0; +} + +/** + * irdma_uk_rdma_read - rdma read command + * @qp: hw qp ptr + * @info: post sq information + * @inv_stag: flag for inv_stag + * @post_sq: flag to post sq + */ +int +irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool inv_stag, bool post_sq) +{ + struct irdma_rdma_read *op_info; + int ret_code; + u32 i, byte_off, total_size = 0; + bool local_fence = false; + bool ord_fence = false; + u32 addl_frag_cnt; + __le64 *wqe; + u32 wqe_idx; + u16 quanta; + u64 hdr; + + info->push_wqe = qp->push_db ? true : false; + + op_info = &info->op.rdma_read; + if (qp->max_sq_frag_cnt < op_info->num_lo_sges) + return EINVAL; + + for (i = 0; i < op_info->num_lo_sges; i++) + total_size += op_info->lo_sg_list[i].length; + + ret_code = irdma_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta); + if (ret_code) + return ret_code; + + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); + if (!wqe) + return ENOSPC; + + if (qp->rd_fence_rate && (qp->ord_cnt++ == qp->rd_fence_rate)) { + ord_fence = true; + qp->ord_cnt = 0; + } + + qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; + addl_frag_cnt = op_info->num_lo_sges > 1 ? + (op_info->num_lo_sges - 1) : 0; + local_fence |= info->local_fence; + + qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, op_info->lo_sg_list, + qp->swqe_polarity); + for (i = 1, byte_off = IRDMA_BYTE_32; i < op_info->num_lo_sges; ++i) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, + &op_info->lo_sg_list[i], + qp->swqe_polarity); + byte_off += IRDMA_BYTE_16; + } + + /* if not an odd number set valid bit in next fragment */ + if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && + !(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, + qp->swqe_polarity); + if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) + ++addl_frag_cnt; + } + set_64bit_val(wqe, IRDMA_BYTE_16, + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | + FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | + FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | + FIELD_PREP(IRDMAQPSQ_OPCODE, + (inv_stag ? IRDMAQP_OP_RDMA_READ_LOC_INV : IRDMAQP_OP_RDMA_READ)) | + FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | + FIELD_PREP(IRDMAQPSQ_READFENCE, + info->read_fence || ord_fence ? 1 : 0) | + FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, IRDMA_BYTE_24, hdr); + if (info->push_wqe) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + else if (post_sq) + irdma_uk_qp_post_wr(qp); + + return 0; +} + +/** + * irdma_uk_send - rdma send command + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +int +irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq) +{ + __le64 *wqe; + struct irdma_post_send *op_info; + u64 hdr; + u32 i, wqe_idx, total_size = 0, byte_off; + int ret_code; + u32 frag_cnt, addl_frag_cnt; + bool read_fence = false; + u16 quanta; + + info->push_wqe = qp->push_db ? true : false; + + op_info = &info->op.send; + if (qp->max_sq_frag_cnt < op_info->num_sges) + return EINVAL; + + for (i = 0; i < op_info->num_sges; i++) + total_size += op_info->sg_list[i].length; + + if (info->imm_data_valid) + frag_cnt = op_info->num_sges + 1; + else + frag_cnt = op_info->num_sges; + ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta); + if (ret_code) + return ret_code; + + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); + if (!wqe) + return ENOSPC; + + read_fence |= info->read_fence; + addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; + if (info->imm_data_valid) { + set_64bit_val(wqe, IRDMA_BYTE_0, + FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); + i = 0; + } else { + qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, + frag_cnt ? op_info->sg_list : NULL, + qp->swqe_polarity); + i = 1; + } + + for (byte_off = IRDMA_BYTE_32; i < op_info->num_sges; i++) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->sg_list[i], + qp->swqe_polarity); + byte_off += IRDMA_BYTE_16; + } + + /* if not an odd number set valid bit in next fragment */ + if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(frag_cnt & 0x01) && + frag_cnt) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, + qp->swqe_polarity); + if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) + ++addl_frag_cnt; + } + + set_64bit_val(wqe, IRDMA_BYTE_16, + FIELD_PREP(IRDMAQPSQ_DESTQKEY, op_info->qkey) | + FIELD_PREP(IRDMAQPSQ_DESTQPN, op_info->dest_qp)); + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) | + FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) | + FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, + (info->imm_data_valid ? 1 : 0)) | + FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | + FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | + FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | + FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | + FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | + FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_UDPHEADER, info->udp_hdr) | + FIELD_PREP(IRDMAQPSQ_L4LEN, info->l4len) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, IRDMA_BYTE_24, hdr); + if (info->push_wqe) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + else if (post_sq) + irdma_uk_qp_post_wr(qp); + + return 0; +} + +/** + * irdma_set_mw_bind_wqe_gen_1 - set mw bind wqe + * @wqe: wqe for setting fragment + * @op_info: info for setting bind wqe values + */ +static void +irdma_set_mw_bind_wqe_gen_1(__le64 * wqe, + struct irdma_bind_window *op_info) +{ + set_64bit_val(wqe, IRDMA_BYTE_0, (uintptr_t)op_info->va); + set_64bit_val(wqe, IRDMA_BYTE_8, + FIELD_PREP(IRDMAQPSQ_PARENTMRSTAG, op_info->mw_stag) | + FIELD_PREP(IRDMAQPSQ_MWSTAG, op_info->mr_stag)); + set_64bit_val(wqe, IRDMA_BYTE_16, op_info->bind_len); +} + +/** + * irdma_copy_inline_data_gen_1 - Copy inline data to wqe + * @wqe: pointer to wqe + * @sge_list: table of pointers to inline data + * @num_sges: Total inline data length + * @polarity: compatibility parameter + */ +static void +irdma_copy_inline_data_gen_1(u8 *wqe, struct ibv_sge *sge_list, + u32 num_sges, u8 polarity) +{ + u32 quanta_bytes_remaining = 16; + u32 i; + + for (i = 0; i < num_sges; i++) { + u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr; + u32 sge_len = sge_list[i].length; + + while (sge_len) { + u32 bytes_copied; + + bytes_copied = min(sge_len, quanta_bytes_remaining); + irdma_memcpy(wqe, cur_sge, bytes_copied); + wqe += bytes_copied; + cur_sge += bytes_copied; + quanta_bytes_remaining -= bytes_copied; + sge_len -= bytes_copied; + + if (!quanta_bytes_remaining) { + /* Remaining inline bytes reside after hdr */ + wqe += 16; + quanta_bytes_remaining = 32; + } + } + } +} + +/** + * irdma_inline_data_size_to_quanta_gen_1 - based on inline data, quanta + * @data_size: data size for inline + * + * Gets the quanta based on inline and immediate data. + */ +static inline u16 irdma_inline_data_size_to_quanta_gen_1(u32 data_size) { + return data_size <= 16 ? IRDMA_QP_WQE_MIN_QUANTA : 2; +} + +/** + * irdma_set_mw_bind_wqe - set mw bind in wqe + * @wqe: wqe for setting mw bind + * @op_info: info for setting wqe values + */ +static void +irdma_set_mw_bind_wqe(__le64 * wqe, + struct irdma_bind_window *op_info) +{ + set_64bit_val(wqe, IRDMA_BYTE_0, (uintptr_t)op_info->va); + set_64bit_val(wqe, IRDMA_BYTE_8, + FIELD_PREP(IRDMAQPSQ_PARENTMRSTAG, op_info->mr_stag) | + FIELD_PREP(IRDMAQPSQ_MWSTAG, op_info->mw_stag)); + set_64bit_val(wqe, IRDMA_BYTE_16, op_info->bind_len); +} + +/** + * irdma_copy_inline_data - Copy inline data to wqe + * @wqe: pointer to wqe + * @sge_list: table of pointers to inline data + * @num_sges: number of SGE's + * @polarity: polarity of wqe valid bit + */ +static void +irdma_copy_inline_data(u8 *wqe, struct ibv_sge *sge_list, + u32 num_sges, u8 polarity) +{ + u8 inline_valid = polarity << IRDMA_INLINE_VALID_S; + u32 quanta_bytes_remaining = 8; + u32 i; + bool first_quanta = true; + + wqe += 8; + + for (i = 0; i < num_sges; i++) { + u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr; + u32 sge_len = sge_list[i].length; + + while (sge_len) { + u32 bytes_copied; + + bytes_copied = min(sge_len, quanta_bytes_remaining); + irdma_memcpy(wqe, cur_sge, bytes_copied); + wqe += bytes_copied; + cur_sge += bytes_copied; + quanta_bytes_remaining -= bytes_copied; + sge_len -= bytes_copied; + + if (!quanta_bytes_remaining) { + quanta_bytes_remaining = 31; + + /* Remaining inline bytes reside after hdr */ + if (first_quanta) { + first_quanta = false; + wqe += 16; + } else { + *wqe = inline_valid; + wqe++; + } + } + } + } + if (!first_quanta && quanta_bytes_remaining < 31) + *(wqe + quanta_bytes_remaining) = inline_valid; +} + +/** + * irdma_inline_data_size_to_quanta - based on inline data, quanta + * @data_size: data size for inline + * + * Gets the quanta based on inline and immediate data. + */ +static u16 irdma_inline_data_size_to_quanta(u32 data_size) { + if (data_size <= 8) + return IRDMA_QP_WQE_MIN_QUANTA; + else if (data_size <= 39) + return 2; + else if (data_size <= 70) + return 3; + else if (data_size <= 101) + return 4; + else if (data_size <= 132) + return 5; + else if (data_size <= 163) + return 6; + else if (data_size <= 194) + return 7; + else + return 8; +} + +/** + * irdma_uk_inline_rdma_write - inline rdma write operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +int +irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq) +{ + __le64 *wqe; + struct irdma_rdma_write *op_info; + u64 hdr = 0; + u32 wqe_idx; + bool read_fence = false; + u16 quanta; + u32 i, total_size = 0; + + info->push_wqe = qp->push_db ? true : false; + op_info = &info->op.rdma_write; + + if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges)) + return EINVAL; + + for (i = 0; i < op_info->num_lo_sges; i++) + total_size += op_info->lo_sg_list[i].length; + + if (unlikely(total_size > qp->max_inline_data)) + return EINVAL; + + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); + if (!wqe) + return ENOSPC; + + qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; + read_fence |= info->read_fence; + set_64bit_val(wqe, IRDMA_BYTE_16, + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); + + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | + FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | + FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | + FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) | + FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | + FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) | + FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe ? 1 : 0) | + FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | + FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); + + if (info->imm_data_valid) + set_64bit_val(wqe, IRDMA_BYTE_0, + FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); + + qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->lo_sg_list, + op_info->num_lo_sges, qp->swqe_polarity); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, IRDMA_BYTE_24, hdr); + + if (info->push_wqe) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + else if (post_sq) + irdma_uk_qp_post_wr(qp); + + return 0; +} + +/** + * irdma_uk_inline_send - inline send operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +int +irdma_uk_inline_send(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq) +{ + __le64 *wqe; + struct irdma_post_send *op_info; + u64 hdr; + u32 wqe_idx; + bool read_fence = false; + u16 quanta; + u32 i, total_size = 0; + + info->push_wqe = qp->push_db ? true : false; + op_info = &info->op.send; + + if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges)) + return EINVAL; + + for (i = 0; i < op_info->num_sges; i++) + total_size += op_info->sg_list[i].length; + + if (unlikely(total_size > qp->max_inline_data)) + return EINVAL; + + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); + if (!wqe) + return ENOSPC; + + set_64bit_val(wqe, IRDMA_BYTE_16, + FIELD_PREP(IRDMAQPSQ_DESTQKEY, op_info->qkey) | + FIELD_PREP(IRDMAQPSQ_DESTQPN, op_info->dest_qp)); + + read_fence |= info->read_fence; + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) | + FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) | + FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | + FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | + FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, + (info->imm_data_valid ? 1 : 0)) | + FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | + FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | + FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | + FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | + FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_UDPHEADER, info->udp_hdr) | + FIELD_PREP(IRDMAQPSQ_L4LEN, info->l4len) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); + + if (info->imm_data_valid) + set_64bit_val(wqe, IRDMA_BYTE_0, + FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); + qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->sg_list, + op_info->num_sges, qp->swqe_polarity); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, IRDMA_BYTE_24, hdr); + + if (info->push_wqe) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + else if (post_sq) + irdma_uk_qp_post_wr(qp); + + return 0; +} + +/** + * irdma_uk_stag_local_invalidate - stag invalidate operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +int +irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, + bool post_sq) +{ + __le64 *wqe; + struct irdma_inv_local_stag *op_info; + u64 hdr; + u32 wqe_idx; + bool local_fence = false; + struct ibv_sge sge = {0}; + u16 quanta = IRDMA_QP_WQE_MIN_QUANTA; + + info->push_wqe = qp->push_db ? true : false; + op_info = &info->op.inv_local_stag; + local_fence = info->local_fence; + + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, 0, info); + if (!wqe) + return ENOSPC; + + sge.lkey = op_info->target_stag; + qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, &sge, 0); + + set_64bit_val(wqe, IRDMA_BYTE_16, 0); + + hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_INV_STAG) | + FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | + FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, IRDMA_BYTE_24, hdr); + + if (info->push_wqe) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + else if (post_sq) + irdma_uk_qp_post_wr(qp); + + return 0; +} + +/** + * irdma_uk_mw_bind - bind Memory Window + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +int +irdma_uk_mw_bind(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq) +{ + __le64 *wqe; + struct irdma_bind_window *op_info; + u64 hdr; + u32 wqe_idx; + bool local_fence; + u16 quanta = IRDMA_QP_WQE_MIN_QUANTA; + + info->push_wqe = qp->push_db ? true : false; + op_info = &info->op.bind_window; + local_fence = info->local_fence; + + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, 0, info); + if (!wqe) + return ENOSPC; + + qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info); + + hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_BIND_MW) | + FIELD_PREP(IRDMAQPSQ_STAGRIGHTS, + ((op_info->ena_reads << 2) | (op_info->ena_writes << 3))) | + FIELD_PREP(IRDMAQPSQ_VABASEDTO, + (op_info->addressing_type == IRDMA_ADDR_TYPE_VA_BASED ? 1 : 0)) | + FIELD_PREP(IRDMAQPSQ_MEMWINDOWTYPE, + (op_info->mem_window_type_1 ? 1 : 0)) | + FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | + FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, IRDMA_BYTE_24, hdr); + + if (info->push_wqe) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + else if (post_sq) + irdma_uk_qp_post_wr(qp); + + return 0; +} + +/** + * irdma_uk_post_receive - post receive wqe + * @qp: hw qp ptr + * @info: post rq information + */ +int +irdma_uk_post_receive(struct irdma_qp_uk *qp, + struct irdma_post_rq_info *info) +{ + u32 wqe_idx, i, byte_off; + u32 addl_frag_cnt; + __le64 *wqe; + u64 hdr; + + if (qp->max_rq_frag_cnt < info->num_sges) + return EINVAL; + + wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx); + if (!wqe) + return ENOSPC; + + qp->rq_wrid_array[wqe_idx] = info->wr_id; + addl_frag_cnt = info->num_sges > 1 ? (info->num_sges - 1) : 0; + qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, info->sg_list, + qp->rwqe_polarity); + + for (i = 1, byte_off = IRDMA_BYTE_32; i < info->num_sges; i++) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, &info->sg_list[i], + qp->rwqe_polarity); + byte_off += 16; + } + + /* if not an odd number set valid bit in next fragment */ + if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(info->num_sges & 0x01) && + info->num_sges) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, + qp->rwqe_polarity); + if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) + ++addl_frag_cnt; + } + + set_64bit_val(wqe, IRDMA_BYTE_16, 0); + hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->rwqe_polarity); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, IRDMA_BYTE_24, hdr); + + return 0; +} + +/** + * irdma_uk_cq_resize - reset the cq buffer info + * @cq: cq to resize + * @cq_base: new cq buffer addr + * @cq_size: number of cqes + */ +void +irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int cq_size) +{ + cq->cq_base = cq_base; + cq->cq_size = cq_size; + IRDMA_RING_INIT(cq->cq_ring, cq->cq_size); + cq->polarity = 1; +} + +/** + * irdma_uk_cq_set_resized_cnt - record the count of the resized buffers + * @cq: cq to resize + * @cq_cnt: the count of the resized cq buffers + */ +void +irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *cq, u16 cq_cnt) +{ + u64 temp_val; + u16 sw_cq_sel; + u8 arm_next_se; + u8 arm_next; + u8 arm_seq_num; + + get_64bit_val(cq->shadow_area, 32, &temp_val); + + sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val); + sw_cq_sel += cq_cnt; + + arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val); + arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val); + arm_next = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT, temp_val); + + temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) | + FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) | + FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) | + FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, arm_next); + + set_64bit_val(cq->shadow_area, 32, temp_val); +} + +/** + * irdma_uk_cq_request_notification - cq notification request (door bell) + * @cq: hw cq + * @cq_notify: notification type + */ +void +irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, + enum irdma_cmpl_notify cq_notify) +{ + u64 temp_val; + u16 sw_cq_sel; + u8 arm_next_se = 0; + u8 arm_next = 0; + u8 arm_seq_num; + + get_64bit_val(cq->shadow_area, IRDMA_BYTE_32, &temp_val); + arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val); + arm_seq_num++; + sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val); + arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val); + arm_next_se |= 1; + if (cq_notify == IRDMA_CQ_COMPL_EVENT) + arm_next = 1; + temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) | + FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) | + FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) | + FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, arm_next); + + set_64bit_val(cq->shadow_area, IRDMA_BYTE_32, temp_val); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + db_wr32(cq->cq_id, cq->cqe_alloc_db); +} + +static void +irdma_copy_quanta(__le64 * dst, __le64 * src, u32 offset, bool flip, + bool barrier) +{ + __le64 val; + + get_64bit_val(src, offset, &val); + set_64bit_val(dst, offset, val); + + get_64bit_val(src, offset + 8, &val); + if (flip) + val ^= IRDMAQPSQ_VALID; + set_64bit_val(dst, offset + 8, val); + + get_64bit_val(src, offset + 24, &val); + if (flip) + val ^= IRDMAQPSQ_VALID; + if (barrier) + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + set_64bit_val(dst, offset + 24, val); +} + +static void +irdma_copy_wqe(__le64 * dst, __le64 * src, u8 wqe_quanta, + bool flip_polarity) +{ + u32 offset; + + offset = 32; + while (--wqe_quanta) { + irdma_copy_quanta(dst, src, offset, flip_polarity, false); + offset += 32; + } + + irdma_copy_quanta(dst, src, 0, flip_polarity, true); +} + +static void +irdma_repost_rq_wqes(struct irdma_qp_uk *qp, u32 start_idx, + u32 end_idx) +{ + __le64 *dst_wqe, *src_wqe; + u32 wqe_idx = 0; + u8 wqe_quanta = qp->rq_wqe_size_multiplier; + bool flip_polarity; + u64 val; + + libirdma_debug("reposting_wqes: from start_idx=%d to end_idx = %d\n", start_idx, end_idx); + if (pthread_spin_lock(qp->lock)) + return; + while (start_idx != end_idx) { + IRDMA_RING_SET_TAIL(qp->rq_ring, start_idx + 1); + src_wqe = qp->rq_base[start_idx * qp->rq_wqe_size_multiplier].elem; + dst_wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx); + + /* Check to see if polarity has changed */ + get_64bit_val(src_wqe, 24, &val); + if (FIELD_GET(IRDMAQPSQ_VALID, val) != qp->rwqe_polarity) + flip_polarity = true; + else + flip_polarity = false; + + qp->rq_wrid_array[wqe_idx] = qp->rq_wrid_array[start_idx]; + irdma_copy_wqe(dst_wqe, src_wqe, wqe_quanta, flip_polarity); + + start_idx = (start_idx + 1) % qp->rq_size; + } + + pthread_spin_unlock(qp->lock); +} + +static int +irdma_check_rq_cqe(struct irdma_qp_uk *qp, u32 *array_idx) +{ + u32 exp_idx = (qp->last_rx_cmpl_idx + 1) % qp->rq_size; + + if (*array_idx != exp_idx) { + if (qp->uk_attrs->feature_flags & IRDMA_FEATURE_RELAX_RQ_ORDER) { + irdma_repost_rq_wqes(qp, exp_idx, *array_idx); + qp->last_rx_cmpl_idx = *array_idx; + + return 0; + } + + *array_idx = exp_idx; + qp->last_rx_cmpl_idx = exp_idx; + + return -1; + } + + qp->last_rx_cmpl_idx = *array_idx; + + return 0; +} + +/** + * irdma_skip_duplicate_flush_cmpl - check last cmpl and update wqe if needed + * + * @ring: sq/rq ring + * @flush_seen: information if flush for specific ring was already seen + * @comp_status: completion status + * @wqe_idx: new value of WQE index returned if there is more work on ring + */ +static inline int +irdma_skip_duplicate_flush_cmpl(struct irdma_ring ring, u8 flush_seen, + enum irdma_cmpl_status comp_status, + u32 *wqe_idx) +{ + if (flush_seen) { + if (IRDMA_RING_MORE_WORK(ring)) + *wqe_idx = ring.tail; + else + return ENOENT; + } + + return 0; +} + +/** + * irdma_uk_cq_poll_cmpl - get cq completion info + * @cq: hw cq + * @info: cq poll information returned + */ +int +irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, + struct irdma_cq_poll_info *info) +{ + u64 comp_ctx, qword0, qword2, qword3; + __le64 *cqe; + struct irdma_qp_uk *qp; + struct irdma_ring *pring = NULL; + u32 wqe_idx; + int ret_code; + bool move_cq_head = true; + u8 polarity; + bool ext_valid; + __le64 *ext_cqe; + + if (cq->avoid_mem_cflct) + cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq); + else + cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq); + + get_64bit_val(cqe, IRDMA_BYTE_24, &qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + if (polarity != cq->polarity) + return ENOENT; + + /* Ensure CQE contents are read after valid bit is checked */ + udma_from_device_barrier(); + + ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3); + if (ext_valid) { + u64 qword6, qword7; + u32 peek_head; + + if (cq->avoid_mem_cflct) { + ext_cqe = (__le64 *) ((u8 *)cqe + 32); + get_64bit_val(ext_cqe, IRDMA_BYTE_24, &qword7); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); + } else { + peek_head = (cq->cq_ring.head + 1) % cq->cq_ring.size; + ext_cqe = cq->cq_base[peek_head].buf; + get_64bit_val(ext_cqe, IRDMA_BYTE_24, &qword7); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); + if (!peek_head) + polarity ^= 1; + } + if (polarity != cq->polarity) + return ENOENT; + + /* Ensure ext CQE contents are read after ext valid bit is checked */ + udma_from_device_barrier(); + + info->imm_valid = (bool)FIELD_GET(IRDMA_CQ_IMMVALID, qword7); + if (info->imm_valid) { + u64 qword4; + + get_64bit_val(ext_cqe, IRDMA_BYTE_0, &qword4); + info->imm_data = (u32)FIELD_GET(IRDMA_CQ_IMMDATALOW32, qword4); + } + info->ud_smac_valid = (bool)FIELD_GET(IRDMA_CQ_UDSMACVALID, qword7); + info->ud_vlan_valid = (bool)FIELD_GET(IRDMA_CQ_UDVLANVALID, qword7); + if (info->ud_smac_valid || info->ud_vlan_valid) { + get_64bit_val(ext_cqe, IRDMA_BYTE_16, &qword6); + if (info->ud_vlan_valid) + info->ud_vlan = (u16)FIELD_GET(IRDMA_CQ_UDVLAN, qword6); + if (info->ud_smac_valid) { + info->ud_smac[5] = qword6 & 0xFF; + info->ud_smac[4] = (qword6 >> 8) & 0xFF; + info->ud_smac[3] = (qword6 >> 16) & 0xFF; + info->ud_smac[2] = (qword6 >> 24) & 0xFF; + info->ud_smac[1] = (qword6 >> 32) & 0xFF; + info->ud_smac[0] = (qword6 >> 40) & 0xFF; + } + } + } else { + info->imm_valid = false; + info->ud_smac_valid = false; + info->ud_vlan_valid = false; + } + + info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); + info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3); + info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3); + info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); + get_64bit_val(cqe, IRDMA_BYTE_8, &comp_ctx); + qp = (struct irdma_qp_uk *)(irdma_uintptr) comp_ctx; + if (info->error) { + info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3); + info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3); + switch (info->major_err) { + case IRDMA_FLUSH_MAJOR_ERR: + /* Set the min error to standard flush error code for remaining cqes */ + if (info->minor_err != FLUSH_GENERAL_ERR) { + qword3 &= ~IRDMA_CQ_MINERR; + qword3 |= FIELD_PREP(IRDMA_CQ_MINERR, FLUSH_GENERAL_ERR); + set_64bit_val(cqe, IRDMA_BYTE_24, qword3); + } + info->comp_status = IRDMA_COMPL_STATUS_FLUSHED; + break; + default: + info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; + break; + } + } else { + info->comp_status = IRDMA_COMPL_STATUS_SUCCESS; + } + + get_64bit_val(cqe, IRDMA_BYTE_0, &qword0); + get_64bit_val(cqe, IRDMA_BYTE_16, &qword2); + + info->stat.raw = (u32)FIELD_GET(IRDMACQ_TCPSQN_ROCEPSN_RTT_TS, qword0); + info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2); + info->ud_src_qpn = (u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2); + + info->solicited_event = (bool)FIELD_GET(IRDMACQ_SOEVENT, qword3); + if (!qp || qp->destroy_pending) { + ret_code = EFAULT; + goto exit; + } + wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3); + info->qp_handle = (irdma_qp_handle) (irdma_uintptr) qp; + info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3); + + if (info->q_type == IRDMA_CQE_QTYPE_RQ) { + u32 array_idx; + + ret_code = irdma_skip_duplicate_flush_cmpl(qp->rq_ring, + qp->rq_flush_seen, + info->comp_status, + &wqe_idx); + if (ret_code != 0) + goto exit; + + array_idx = wqe_idx / qp->rq_wqe_size_multiplier; + + if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED || + info->comp_status == IRDMA_COMPL_STATUS_UNKNOWN) { + if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) { + ret_code = ENOENT; + goto exit; + } + + info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail]; + info->signaled = 1; + array_idx = qp->rq_ring.tail; + } else { + info->wr_id = qp->rq_wrid_array[array_idx]; + info->signaled = 1; + if (irdma_check_rq_cqe(qp, &array_idx)) { + info->wr_id = qp->rq_wrid_array[array_idx]; + info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; + IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); + return 0; + } + } + + info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); + + if (qword3 & IRDMACQ_STAG) { + info->stag_invalid_set = true; + info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2); + } else { + info->stag_invalid_set = false; + } + IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); + if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) { + qp->rq_flush_seen = true; + if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) + qp->rq_flush_complete = true; + else + move_cq_head = false; + } + pring = &qp->rq_ring; + } else { /* q_type is IRDMA_CQE_QTYPE_SQ */ + if (qp->first_sq_wq) { + if (wqe_idx + 1 >= qp->conn_wqes) + qp->first_sq_wq = false; + + if (wqe_idx < qp->conn_wqes && qp->sq_ring.head == qp->sq_ring.tail) { + IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); + IRDMA_RING_MOVE_TAIL(cq->cq_ring); + set_64bit_val(cq->shadow_area, IRDMA_BYTE_0, + IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); + memset(info, 0, sizeof(*info)); + return irdma_uk_cq_poll_cmpl(cq, info); + } + } + /* cease posting push mode on push drop */ + if (info->push_dropped) { + qp->push_mode = false; + qp->push_dropped = true; + } + ret_code = irdma_skip_duplicate_flush_cmpl(qp->sq_ring, + qp->sq_flush_seen, + info->comp_status, + &wqe_idx); + if (ret_code != 0) + goto exit; + if (info->comp_status != IRDMA_COMPL_STATUS_FLUSHED) { + info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; + info->signaled = qp->sq_wrtrk_array[wqe_idx].signaled; + if (!info->comp_status) + info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len; + info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3); + IRDMA_RING_SET_TAIL(qp->sq_ring, + wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); + } else { + if (pthread_spin_lock(qp->lock)) { + ret_code = ENOENT; + goto exit; + } + if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) { + pthread_spin_unlock(qp->lock); + ret_code = ENOENT; + goto exit; + } + + do { + __le64 *sw_wqe; + u64 wqe_qword; + u32 tail; + + tail = qp->sq_ring.tail; + sw_wqe = qp->sq_base[tail].elem; + get_64bit_val(sw_wqe, IRDMA_BYTE_24, + &wqe_qword); + info->op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, + wqe_qword); + IRDMA_RING_SET_TAIL(qp->sq_ring, + tail + qp->sq_wrtrk_array[tail].quanta); + if (info->op_type != IRDMAQP_OP_NOP) { + info->wr_id = qp->sq_wrtrk_array[tail].wrid; + info->signaled = qp->sq_wrtrk_array[tail].signaled; + info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len; + break; + } + } while (1); + + if (info->op_type == IRDMA_OP_TYPE_BIND_MW && + info->minor_err == FLUSH_PROT_ERR) + info->minor_err = FLUSH_MW_BIND_ERR; + qp->sq_flush_seen = true; + if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) + qp->sq_flush_complete = true; + pthread_spin_unlock(qp->lock); + } + pring = &qp->sq_ring; + } + + ret_code = 0; + +exit: + if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) { + if (pring && IRDMA_RING_MORE_WORK(*pring)) + move_cq_head = false; + } + if (move_cq_head) { + IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); + if (!IRDMA_RING_CURRENT_HEAD(cq->cq_ring)) + cq->polarity ^= 1; + + if (ext_valid && !cq->avoid_mem_cflct) { + IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); + if (!IRDMA_RING_CURRENT_HEAD(cq->cq_ring)) + cq->polarity ^= 1; + } + + IRDMA_RING_MOVE_TAIL(cq->cq_ring); + if (!cq->avoid_mem_cflct && ext_valid) + IRDMA_RING_MOVE_TAIL(cq->cq_ring); + set_64bit_val(cq->shadow_area, IRDMA_BYTE_0, + IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); + } else { + qword3 &= ~IRDMA_CQ_WQEIDX; + qword3 |= FIELD_PREP(IRDMA_CQ_WQEIDX, pring->tail); + set_64bit_val(cqe, IRDMA_BYTE_24, qword3); + } + + return ret_code; +} + +/** + * irdma_round_up_wq - return round up qp wq depth + * @wqdepth: wq depth in quanta to round up + */ +static int +irdma_round_up_wq(u32 wqdepth) +{ + int scount = 1; + + for (wqdepth--; scount <= 16; scount *= 2) + wqdepth |= wqdepth >> scount; + + return ++wqdepth; +} + +/** + * irdma_get_wqe_shift - get shift count for maximum wqe size + * @uk_attrs: qp HW attributes + * @sge: Maximum Scatter Gather Elements wqe + * @inline_data: Maximum inline data size + * @shift: Returns the shift needed based on sge + * + * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size. + * For 1 SGE or inline data <= 8, shift = 0 (wqe size of 32 + * bytes). For 2 or 3 SGEs or inline data <= 39, shift = 1 (wqe + * size of 64 bytes). + * For 4-7 SGE's and inline <= 101 Shift of 2 otherwise (wqe + * size of 256 bytes). + */ +void +irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, + u32 inline_data, u8 *shift) +{ + *shift = 0; + if (uk_attrs->hw_rev >= IRDMA_GEN_2) { + if (sge > 1 || inline_data > 8) { + if (sge < 4 && inline_data <= 39) + *shift = 1; + else if (sge < 8 && inline_data <= 101) + *shift = 2; + else + *shift = 3; + } + } else if (sge > 1 || inline_data > 16) { + *shift = (sge < 4 && inline_data <= 48) ? 1 : 2; + } +} + +/* + * irdma_get_sqdepth - get SQ depth (quanta) @uk_attrs: qp HW attributes @sq_size: SQ size @shift: shift which + * determines size of WQE @sqdepth: depth of SQ + */ +int +irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth) +{ + u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + + *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD); + + if (*sqdepth < min_size) + *sqdepth = min_size; + else if (*sqdepth > uk_attrs->max_hw_wq_quanta) + return EINVAL; + + return 0; +} + +/* + * irdma_get_rqdepth - get RQ depth (quanta) @uk_attrs: qp HW attributes @rq_size: SRQ size @shift: shift which + * determines size of WQE @rqdepth: depth of RQ/SRQ + */ +int +irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth) +{ + u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + + *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD); + + if (*rqdepth < min_size) + *rqdepth = min_size; + else if (*rqdepth > uk_attrs->max_hw_rq_quanta) + return EINVAL; + + return 0; +} + +static const struct irdma_wqe_uk_ops iw_wqe_uk_ops = { + .iw_copy_inline_data = irdma_copy_inline_data, + .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta, + .iw_set_fragment = irdma_set_fragment, + .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe, +}; + +static const struct irdma_wqe_uk_ops iw_wqe_uk_ops_gen_1 = { + .iw_copy_inline_data = irdma_copy_inline_data_gen_1, + .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta_gen_1, + .iw_set_fragment = irdma_set_fragment_gen_1, + .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe_gen_1, +}; + +/** + * irdma_setup_connection_wqes - setup WQEs necessary to complete + * connection. + * @qp: hw qp (user and kernel) + * @info: qp initialization info + */ +static void +irdma_setup_connection_wqes(struct irdma_qp_uk *qp, + struct irdma_qp_uk_init_info *info) +{ + u16 move_cnt = 1; + + if (info->start_wqe_idx) + move_cnt = info->start_wqe_idx; + else if (qp->uk_attrs->feature_flags & IRDMA_FEATURE_RTS_AE) + move_cnt = 3; + qp->conn_wqes = move_cnt; + IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, move_cnt); + IRDMA_RING_MOVE_TAIL_BY_COUNT(qp->sq_ring, move_cnt); + IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->initial_ring, move_cnt); +} + +/** + * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size. + * @ukinfo: qp initialization info + * @sq_depth: Returns depth of SQ + * @sq_shift: Returns shift of SQ + */ +int +irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift) +{ + bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2 ? true : false; + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, + imm_support ? ukinfo->max_sq_frag_cnt + 1 : + ukinfo->max_sq_frag_cnt, + ukinfo->max_inline_data, sq_shift); + status = irdma_get_sqdepth(ukinfo->uk_attrs, ukinfo->sq_size, + *sq_shift, sq_depth); + + return status; +} + +/** + * irdma_uk_calc_depth_shift_rq - calculate depth and shift for RQ size. + * @ukinfo: qp initialization info + * @rq_depth: Returns depth of RQ + * @rq_shift: Returns shift of RQ + */ +int +irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift) +{ + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, + rq_shift); + + if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { + if (ukinfo->abi_ver > 4) + *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; + } + + status = irdma_get_rqdepth(ukinfo->uk_attrs, ukinfo->rq_size, + *rq_shift, rq_depth); + + return status; +} + +/** + * irdma_uk_qp_init - initialize shared qp + * @qp: hw qp (user and kernel) + * @info: qp initialization info + * + * initializes the vars used in both user and kernel mode. + * size of the wqe depends on numbers of max. fragements + * allowed. Then size of wqe * the number of wqes should be the + * amount of memory allocated for sq and rq. + */ +int +irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) +{ + int ret_code = 0; + u32 sq_ring_size; + + qp->uk_attrs = info->uk_attrs; + if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags || + info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags) + return EINVAL; + + qp->qp_caps = info->qp_caps; + qp->sq_base = info->sq; + qp->rq_base = info->rq; + qp->qp_type = info->type ? info->type : IRDMA_QP_TYPE_IWARP; + qp->shadow_area = info->shadow_area; + qp->sq_wrtrk_array = info->sq_wrtrk_array; + + qp->rq_wrid_array = info->rq_wrid_array; + qp->wqe_alloc_db = info->wqe_alloc_db; + qp->last_rx_cmpl_idx = 0xffffffff; + qp->rd_fence_rate = info->rd_fence_rate; + qp->qp_id = info->qp_id; + qp->sq_size = info->sq_size; + qp->push_mode = false; + qp->max_sq_frag_cnt = info->max_sq_frag_cnt; + sq_ring_size = qp->sq_size << info->sq_shift; + IRDMA_RING_INIT(qp->sq_ring, sq_ring_size); + IRDMA_RING_INIT(qp->initial_ring, sq_ring_size); + if (info->first_sq_wq) { + irdma_setup_connection_wqes(qp, info); + qp->swqe_polarity = 1; + qp->first_sq_wq = true; + } else { + qp->swqe_polarity = 0; + } + qp->swqe_polarity_deferred = 1; + qp->rwqe_polarity = 0; + qp->rq_size = info->rq_size; + qp->max_rq_frag_cnt = info->max_rq_frag_cnt; + qp->max_inline_data = info->max_inline_data; + qp->rq_wqe_size = info->rq_shift; + IRDMA_RING_INIT(qp->rq_ring, qp->rq_size); + qp->rq_wqe_size_multiplier = 1 << info->rq_shift; + if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) + qp->wqe_ops = iw_wqe_uk_ops_gen_1; + else + qp->wqe_ops = iw_wqe_uk_ops; + qp->start_wqe_idx = info->start_wqe_idx; + + return ret_code; +} + +/** + * irdma_uk_cq_init - initialize shared cq (user and kernel) + * @cq: hw cq + * @info: hw cq initialization info + */ +int +irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info) +{ + cq->cq_base = info->cq_base; + cq->cq_id = info->cq_id; + cq->cq_size = info->cq_size; + cq->cqe_alloc_db = info->cqe_alloc_db; + cq->cq_ack_db = info->cq_ack_db; + cq->shadow_area = info->shadow_area; + cq->avoid_mem_cflct = info->avoid_mem_cflct; + IRDMA_RING_INIT(cq->cq_ring, cq->cq_size); + cq->polarity = 1; + + return 0; +} + +/** + * irdma_uk_clean_cq - clean cq entries + * @q: completion context + * @cq: cq to clean + */ +int +irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq) +{ + __le64 *cqe; + u64 qword3, comp_ctx; + u32 cq_head; + u8 polarity, temp; + + cq_head = cq->cq_ring.head; + temp = cq->polarity; + do { + if (cq->avoid_mem_cflct) + cqe = ((struct irdma_extended_cqe *)(cq->cq_base))[cq_head].buf; + else + cqe = cq->cq_base[cq_head].buf; + get_64bit_val(cqe, IRDMA_BYTE_24, &qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + + if (polarity != temp) + break; + + /* Ensure CQE contents are read after valid bit is checked */ + udma_from_device_barrier(); + + get_64bit_val(cqe, IRDMA_BYTE_8, &comp_ctx); + if ((void *)(irdma_uintptr) comp_ctx == q) + set_64bit_val(cqe, IRDMA_BYTE_8, 0); + + cq_head = (cq_head + 1) % cq->cq_ring.size; + if (!cq_head) + temp ^= 1; + } while (true); + return 0; +} + +/** + * irdma_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ + * @frag_cnt: number of fragments + * @quanta: quanta for frag_cnt + */ +int +irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta) +{ + switch (frag_cnt) { + case 0: + case 1: + *quanta = IRDMA_QP_WQE_MIN_QUANTA; + break; + case 2: + case 3: + *quanta = 2; + break; + case 4: + case 5: + *quanta = 3; + break; + case 6: + case 7: + *quanta = 4; + break; + case 8: + case 9: + *quanta = 5; + break; + case 10: + case 11: + *quanta = 6; + break; + case 12: + case 13: + *quanta = 7; + break; + case 14: + case 15: /* when immediate data is present */ + *quanta = 8; + break; + default: + return EINVAL; + } + + return 0; +} + +/** + * irdma_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ + * @frag_cnt: number of fragments + * @wqe_size: size in bytes given frag_cnt + */ +int +irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size) +{ + switch (frag_cnt) { + case 0: + case 1: + *wqe_size = 32; + break; + case 2: + case 3: + *wqe_size = 64; + break; + case 4: + case 5: + case 6: + case 7: + *wqe_size = 128; + break; + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + *wqe_size = 256; + break; + default: + return EINVAL; + } + + return 0; +} diff --git a/contrib/ofed/libirdma/irdma_umain.c b/contrib/ofed/libirdma/irdma_umain.c new file mode 100644 index 00000000000..e8d27c31a0d --- /dev/null +++ b/contrib/ofed/libirdma/irdma_umain.c @@ -0,0 +1,276 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (c) 2021 - 2022 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include +#include +#include +#include "irdma_umain.h" +#include "irdma-abi.h" +#include "irdma_uquery.h" + +#include "ice_devids.h" +#include "i40e_devids.h" + +#include "abi.h" + +/** + * Driver version + */ +char libirdma_version[] = "1.2.36-k"; + +unsigned int irdma_dbg; + +#define INTEL_HCA(d) \ + { .vendor = PCI_VENDOR_ID_INTEL, \ + .device = d } + +struct hca_info { + unsigned vendor; + unsigned device; +}; + +static const struct hca_info hca_table[] = { + INTEL_HCA(ICE_DEV_ID_E823L_BACKPLANE), + INTEL_HCA(ICE_DEV_ID_E823L_SFP), + INTEL_HCA(ICE_DEV_ID_E823L_10G_BASE_T), + INTEL_HCA(ICE_DEV_ID_E823L_1GBE), + INTEL_HCA(ICE_DEV_ID_E823L_QSFP), + INTEL_HCA(ICE_DEV_ID_E810C_BACKPLANE), + INTEL_HCA(ICE_DEV_ID_E810C_QSFP), + INTEL_HCA(ICE_DEV_ID_E810C_SFP), + INTEL_HCA(ICE_DEV_ID_E810_XXV_BACKPLANE), + INTEL_HCA(ICE_DEV_ID_E810_XXV_QSFP), + INTEL_HCA(ICE_DEV_ID_E810_XXV_SFP), + INTEL_HCA(ICE_DEV_ID_E823C_BACKPLANE), + INTEL_HCA(ICE_DEV_ID_E823C_QSFP), + INTEL_HCA(ICE_DEV_ID_E823C_SFP), + INTEL_HCA(ICE_DEV_ID_E823C_10G_BASE_T), + INTEL_HCA(ICE_DEV_ID_E823C_SGMII), + INTEL_HCA(ICE_DEV_ID_C822N_BACKPLANE), + INTEL_HCA(ICE_DEV_ID_C822N_QSFP), + INTEL_HCA(ICE_DEV_ID_C822N_SFP), + INTEL_HCA(ICE_DEV_ID_E822C_10G_BASE_T), + INTEL_HCA(ICE_DEV_ID_E822C_SGMII), + INTEL_HCA(ICE_DEV_ID_E822L_BACKPLANE), + INTEL_HCA(ICE_DEV_ID_E822L_SFP), + INTEL_HCA(ICE_DEV_ID_E822L_10G_BASE_T), + INTEL_HCA(ICE_DEV_ID_E822L_SGMII), +}; + +static struct ibv_context_ops irdma_ctx_ops = { + .query_device = irdma_uquery_device, + .query_port = irdma_uquery_port, + .alloc_pd = irdma_ualloc_pd, + .dealloc_pd = irdma_ufree_pd, + .reg_mr = irdma_ureg_mr, + .rereg_mr = NULL, + .dereg_mr = irdma_udereg_mr, + .alloc_mw = irdma_ualloc_mw, + .dealloc_mw = irdma_udealloc_mw, + .bind_mw = irdma_ubind_mw, + .create_cq = irdma_ucreate_cq, + .poll_cq = irdma_upoll_cq, + .req_notify_cq = irdma_uarm_cq, + .cq_event = irdma_cq_event, + .resize_cq = irdma_uresize_cq, + .destroy_cq = irdma_udestroy_cq, + .create_qp = irdma_ucreate_qp, + .query_qp = irdma_uquery_qp, + .modify_qp = irdma_umodify_qp, + .destroy_qp = irdma_udestroy_qp, + .post_send = irdma_upost_send, + .post_recv = irdma_upost_recv, + .create_ah = irdma_ucreate_ah, + .destroy_ah = irdma_udestroy_ah, + .attach_mcast = irdma_uattach_mcast, + .detach_mcast = irdma_udetach_mcast, +}; + +/** + * libirdma_query_device - fill libirdma_device structure + * @ctx_in - ibv_context identifying device + * @out - libirdma_device structure to fill quered info + * + * ctx_in is not used at the moment + */ +int +libirdma_query_device(struct ibv_context *ctx_in, struct libirdma_device *out) +{ + if (!out) + return EIO; + if (sizeof(out->lib_ver) < sizeof(libirdma_version)) + return ERANGE; + + out->query_ver = 1; + snprintf(out->lib_ver, min(sizeof(libirdma_version), sizeof(out->lib_ver)), + "%s", libirdma_version); + + return 0; +} + +static int +irdma_init_context(struct verbs_device *vdev, + struct ibv_context *ctx, int cmd_fd) +{ + struct irdma_uvcontext *iwvctx; + struct irdma_get_context cmd = {}; + struct irdma_get_context_resp resp = {}; + struct ibv_pd *ibv_pd; + u64 mmap_key; + + iwvctx = container_of(ctx, struct irdma_uvcontext, ibv_ctx); + iwvctx->ibv_ctx.cmd_fd = cmd_fd; + cmd.userspace_ver = IRDMA_ABI_VER; + if (ibv_cmd_get_context(&iwvctx->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp))) { + /* failed first attempt */ + printf("%s %s get context failure\n", __FILE__, __func__); + return -1; + } + iwvctx->uk_attrs.feature_flags = resp.feature_flags; + iwvctx->uk_attrs.hw_rev = resp.hw_rev; + iwvctx->uk_attrs.max_hw_wq_frags = resp.max_hw_wq_frags; + iwvctx->uk_attrs.max_hw_read_sges = resp.max_hw_read_sges; + iwvctx->uk_attrs.max_hw_inline = resp.max_hw_inline; + iwvctx->uk_attrs.max_hw_rq_quanta = resp.max_hw_rq_quanta; + iwvctx->uk_attrs.max_hw_wq_quanta = resp.max_hw_wq_quanta; + iwvctx->uk_attrs.max_hw_sq_chunk = resp.max_hw_sq_chunk; + iwvctx->uk_attrs.max_hw_cq_size = resp.max_hw_cq_size; + iwvctx->uk_attrs.min_hw_cq_size = resp.min_hw_cq_size; + iwvctx->uk_attrs.min_hw_wq_size = IRDMA_QP_SW_MIN_WQSIZE; + iwvctx->abi_ver = IRDMA_ABI_VER; + mmap_key = resp.db_mmap_key; + + iwvctx->db = mmap(NULL, IRDMA_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, + MAP_SHARED, cmd_fd, mmap_key); + if (iwvctx->db == MAP_FAILED) + goto err_free; + + iwvctx->ibv_ctx.ops = irdma_ctx_ops; + + ibv_pd = irdma_ualloc_pd(&iwvctx->ibv_ctx); + if (!ibv_pd) { + munmap(iwvctx->db, IRDMA_HW_PAGE_SIZE); + goto err_free; + } + + ibv_pd->context = &iwvctx->ibv_ctx; + iwvctx->iwupd = container_of(ibv_pd, struct irdma_upd, ibv_pd); + + return 0; + +err_free: + + printf("%s %s failure\n", __FILE__, __func__); + return -1; +} + +static void +irdma_cleanup_context(struct verbs_device *device, + struct ibv_context *ibctx) +{ + struct irdma_uvcontext *iwvctx; + + iwvctx = container_of(ibctx, struct irdma_uvcontext, ibv_ctx); + irdma_ufree_pd(&iwvctx->iwupd->ibv_pd); + munmap(iwvctx->db, IRDMA_HW_PAGE_SIZE); + +} + +static struct verbs_device_ops irdma_dev_ops = { + .init_context = irdma_init_context, + .uninit_context = irdma_cleanup_context, +}; + +static struct verbs_device * +irdma_driver_init(const char *uverbs_sys_path, + int abi_version) +{ + struct irdma_udevice *dev; + int i = 0; + unsigned int device_found = 0; + unsigned vendor_id, device_id; + unsigned hca_size; + char buf[8]; + + if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", + buf, sizeof(buf)) < 0) + return NULL; + sscanf(buf, "%i", &vendor_id); + if (vendor_id != PCI_VENDOR_ID_INTEL) + return NULL; + + if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", + buf, sizeof(buf)) < 0) + return NULL; + sscanf(buf, "%i", &device_id); + + hca_size = sizeof(hca_table) / sizeof(struct hca_info); + while (i < hca_size && !device_found) { + if (device_id != hca_table[i].device) + device_found = 1; + ++i; + } + + if (!device_found) + return NULL; + + if (abi_version < IRDMA_MIN_ABI_VERSION || + abi_version > IRDMA_MAX_ABI_VERSION) { + printf("Invalid ABI version: %d of %s\n", + abi_version, uverbs_sys_path); + return NULL; + } + + dev = calloc(1, sizeof(struct irdma_udevice)); + if (!dev) { + printf("Device creation for %s failed\n", uverbs_sys_path); + return NULL; + } + + dev->ibv_dev.ops = &irdma_dev_ops; + dev->ibv_dev.sz = sizeof(*dev); + dev->ibv_dev.size_of_context = sizeof(struct irdma_uvcontext) - + sizeof(struct ibv_context); + + return &dev->ibv_dev; +} + +static __attribute__((constructor)) +void +irdma_register_driver(void) +{ + verbs_register_driver("irdma", irdma_driver_init); +} diff --git a/contrib/ofed/libirdma/irdma_umain.h b/contrib/ofed/libirdma/irdma_umain.h new file mode 100644 index 00000000000..269609f8c77 --- /dev/null +++ b/contrib/ofed/libirdma/irdma_umain.h @@ -0,0 +1,200 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (C) 2019 - 2022 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IRDMA_UMAIN_H +#define IRDMA_UMAIN_H + +#include +#include +#include + +#include "osdep.h" +#include "irdma.h" +#include "irdma_defs.h" +#include "i40iw_hw.h" +#include "irdma_user.h" + +#define PFX "libirdma-" + +#define IRDMA_BASE_PUSH_PAGE 1 +#define IRDMA_U_MINCQ_SIZE 4 +#define IRDMA_DB_SHADOW_AREA_SIZE 64 +#define IRDMA_DB_CQ_OFFSET 64 + +LIST_HEAD(list_head, irdma_cq_buf); +LIST_HEAD(list_head_cmpl, irdma_cmpl_gen); + +enum irdma_supported_wc_flags_ex { + IRDMA_STANDARD_WC_FLAGS_EX = IBV_WC_EX_WITH_BYTE_LEN + | IBV_WC_EX_WITH_IMM + | IBV_WC_EX_WITH_QP_NUM + | IBV_WC_EX_WITH_SRC_QP + | IBV_WC_EX_WITH_SL, +}; + +struct irdma_udevice { + struct verbs_device ibv_dev; +}; + +struct irdma_uah { + struct ibv_ah ibv_ah; + uint32_t ah_id; + struct ibv_global_route grh; +}; + +struct irdma_upd { + struct ibv_pd ibv_pd; + void *arm_cq_page; + void *arm_cq; + uint32_t pd_id; +}; + +struct irdma_uvcontext { + struct ibv_context ibv_ctx; + struct irdma_upd *iwupd; + struct irdma_uk_attrs uk_attrs; + void *db; + int abi_ver; + bool legacy_mode:1; + bool use_raw_attrs:1; +}; + +struct irdma_uqp; + +struct irdma_cq_buf { + LIST_ENTRY(irdma_cq_buf) list; + struct irdma_cq_uk cq; + struct verbs_mr vmr; +}; + +extern pthread_mutex_t sigusr1_wait_mutex; + +struct verbs_cq { + union { + struct ibv_cq cq; + struct ibv_cq_ex cq_ex; + }; +}; + +struct irdma_ucq { + struct verbs_cq verbs_cq; + struct verbs_mr vmr; + struct verbs_mr vmr_shadow_area; + pthread_spinlock_t lock; + size_t buf_size; + bool is_armed; + bool skip_arm; + bool arm_sol; + bool skip_sol; + int comp_vector; + struct irdma_uqp *uqp; + struct irdma_cq_uk cq; + struct list_head resize_list; + /* for extended CQ completion fields */ + struct irdma_cq_poll_info cur_cqe; +}; + +struct irdma_uqp { + struct ibv_qp ibv_qp; + struct irdma_ucq *send_cq; + struct irdma_ucq *recv_cq; + struct verbs_mr vmr; + size_t buf_size; + uint32_t irdma_drv_opt; + pthread_spinlock_t lock; + uint16_t sq_sig_all; + uint16_t qperr; + uint16_t rsvd; + uint32_t pending_rcvs; + uint32_t wq_size; + struct ibv_recv_wr *pend_rx_wr; + struct irdma_qp_uk qp; + enum ibv_qp_type qp_type; +}; + +/* irdma_uverbs.c */ +int irdma_uquery_device_ex(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); +int irdma_uquery_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr); +struct ibv_pd *irdma_ualloc_pd(struct ibv_context *context); +int irdma_ufree_pd(struct ibv_pd *pd); +int irdma_uquery_device(struct ibv_context *, struct ibv_device_attr *); +struct ibv_mr *irdma_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, + int access); +int irdma_udereg_mr(struct ibv_mr *mr); + +int irdma_urereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr, + size_t length, int access); + +struct ibv_mw *irdma_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type); +int irdma_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, + struct ibv_mw_bind *mw_bind); +int irdma_udealloc_mw(struct ibv_mw *mw); +struct ibv_cq *irdma_ucreate_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector); +struct ibv_cq_ex *irdma_ucreate_cq_ex(struct ibv_context *context, + struct ibv_cq_init_attr_ex *attr_ex); +void irdma_ibvcq_ex_fill_priv_funcs(struct irdma_ucq *iwucq, + struct ibv_cq_init_attr_ex *attr_ex); +int irdma_uresize_cq(struct ibv_cq *cq, int cqe); +int irdma_udestroy_cq(struct ibv_cq *cq); +int irdma_upoll_cq(struct ibv_cq *cq, int entries, struct ibv_wc *entry); +int irdma_uarm_cq(struct ibv_cq *cq, int solicited); +void irdma_cq_event(struct ibv_cq *cq); +struct ibv_qp *irdma_ucreate_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *attr); +int irdma_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, + struct ibv_qp_init_attr *init_attr); +int irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask); +int irdma_udestroy_qp(struct ibv_qp *qp); +int irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + struct ibv_send_wr **bad_wr); +int irdma_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, + struct ibv_recv_wr **bad_wr); +struct ibv_ah *irdma_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr); +int irdma_udestroy_ah(struct ibv_ah *ibah); +int irdma_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, + uint16_t lid); +int irdma_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, + uint16_t lid); +void irdma_async_event(struct ibv_context *context, + struct ibv_async_event *event); +void irdma_set_hw_attrs(struct irdma_hw_attrs *attrs); +void *irdma_mmap(int fd, off_t offset); +void irdma_munmap(void *map); +#endif /* IRDMA_UMAIN_H */ diff --git a/contrib/ofed/libirdma/irdma_uquery.h b/contrib/ofed/libirdma/irdma_uquery.h new file mode 100644 index 00000000000..4660c05f0a9 --- /dev/null +++ b/contrib/ofed/libirdma/irdma_uquery.h @@ -0,0 +1,49 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (C) 2022 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef IRDMA_UQUERY_H +#define IRDMA_UQUERY_H + +#include +#include "osdep.h" + +struct libirdma_device { + uint32_t query_ver; + char lib_ver[32]; + uint8_t rsvd[128]; +}; + +int libirdma_query_device(struct ibv_context *ctx_in, struct libirdma_device *out); +#endif /* IRDMA_UQUERY_H */ diff --git a/contrib/ofed/libirdma/irdma_user.h b/contrib/ofed/libirdma/irdma_user.h new file mode 100644 index 00000000000..aeb6aa9feeb --- /dev/null +++ b/contrib/ofed/libirdma/irdma_user.h @@ -0,0 +1,470 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (c) 2015 - 2023 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IRDMA_USER_H +#define IRDMA_USER_H + +#include "osdep.h" + +#define irdma_handle void * +#define irdma_adapter_handle irdma_handle +#define irdma_qp_handle irdma_handle +#define irdma_cq_handle irdma_handle +#define irdma_pd_id irdma_handle +#define irdma_stag_handle irdma_handle +#define irdma_stag_index u32 +#define irdma_stag u32 +#define irdma_stag_key u8 +#define irdma_tagged_offset u64 +#define irdma_access_privileges u32 +#define irdma_physical_fragment u64 +#define irdma_address_list u64 * +#define irdma_sgl struct ibv_sge * + +#define IRDMA_MAX_MR_SIZE 0x200000000000ULL + +#define IRDMA_ACCESS_FLAGS_LOCALREAD 0x01 +#define IRDMA_ACCESS_FLAGS_LOCALWRITE 0x02 +#define IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04 +#define IRDMA_ACCESS_FLAGS_REMOTEREAD 0x05 +#define IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08 +#define IRDMA_ACCESS_FLAGS_REMOTEWRITE 0x0a +#define IRDMA_ACCESS_FLAGS_BIND_WINDOW 0x10 +#define IRDMA_ACCESS_FLAGS_ZERO_BASED 0x20 +#define IRDMA_ACCESS_FLAGS_ALL 0x3f + +#define IRDMA_OP_TYPE_RDMA_WRITE 0x00 +#define IRDMA_OP_TYPE_RDMA_READ 0x01 +#define IRDMA_OP_TYPE_SEND 0x03 +#define IRDMA_OP_TYPE_SEND_INV 0x04 +#define IRDMA_OP_TYPE_SEND_SOL 0x05 +#define IRDMA_OP_TYPE_SEND_SOL_INV 0x06 +#define IRDMA_OP_TYPE_RDMA_WRITE_SOL 0x0d +#define IRDMA_OP_TYPE_BIND_MW 0x08 +#define IRDMA_OP_TYPE_FAST_REG_NSMR 0x09 +#define IRDMA_OP_TYPE_INV_STAG 0x0a +#define IRDMA_OP_TYPE_RDMA_READ_INV_STAG 0x0b +#define IRDMA_OP_TYPE_NOP 0x0c +#define IRDMA_OP_TYPE_REC 0x3e +#define IRDMA_OP_TYPE_REC_IMM 0x3f + +#define IRDMA_FLUSH_MAJOR_ERR 1 + +enum irdma_device_caps_const { + IRDMA_WQE_SIZE = 4, + IRDMA_CQP_WQE_SIZE = 8, + IRDMA_CQE_SIZE = 4, + IRDMA_EXTENDED_CQE_SIZE = 8, + IRDMA_AEQE_SIZE = 2, + IRDMA_CEQE_SIZE = 1, + IRDMA_CQP_CTX_SIZE = 8, + IRDMA_SHADOW_AREA_SIZE = 8, + IRDMA_GATHER_STATS_BUF_SIZE = 1024, + IRDMA_MIN_IW_QP_ID = 0, + IRDMA_QUERY_FPM_BUF_SIZE = 176, + IRDMA_COMMIT_FPM_BUF_SIZE = 176, + IRDMA_MAX_IW_QP_ID = 262143, + IRDMA_MIN_CEQID = 0, + IRDMA_MAX_CEQID = 1023, + IRDMA_CEQ_MAX_COUNT = IRDMA_MAX_CEQID + 1, + IRDMA_MIN_CQID = 0, + IRDMA_MAX_CQID = 524287, + IRDMA_MIN_AEQ_ENTRIES = 1, + IRDMA_MAX_AEQ_ENTRIES = 524287, + IRDMA_MIN_CEQ_ENTRIES = 1, + IRDMA_MAX_CEQ_ENTRIES = 262143, + IRDMA_MIN_CQ_SIZE = 1, + IRDMA_MAX_CQ_SIZE = 1048575, + IRDMA_DB_ID_ZERO = 0, + /* 64K + 1 */ + IRDMA_MAX_OUTBOUND_MSG_SIZE = 65537, + /* 64K +1 */ + IRDMA_MAX_INBOUND_MSG_SIZE = 65537, + IRDMA_MAX_PE_ENA_VF_COUNT = 32, + IRDMA_MAX_VF_FPM_ID = 47, + IRDMA_MAX_SQ_PAYLOAD_SIZE = 2145386496, + IRDMA_MAX_INLINE_DATA_SIZE = 101, + IRDMA_MAX_WQ_ENTRIES = 32768, + IRDMA_Q2_BUF_SIZE = 256, + IRDMA_QP_CTX_SIZE = 256, + IRDMA_MAX_PDS = 262144, +}; + +enum irdma_addressing_type { + IRDMA_ADDR_TYPE_ZERO_BASED = 0, + IRDMA_ADDR_TYPE_VA_BASED = 1, +}; + +enum irdma_flush_opcode { + FLUSH_INVALID = 0, + FLUSH_GENERAL_ERR, + FLUSH_PROT_ERR, + FLUSH_REM_ACCESS_ERR, + FLUSH_LOC_QP_OP_ERR, + FLUSH_REM_OP_ERR, + FLUSH_LOC_LEN_ERR, + FLUSH_FATAL_ERR, + FLUSH_RETRY_EXC_ERR, + FLUSH_MW_BIND_ERR, + FLUSH_REM_INV_REQ_ERR, + FLUSH_RNR_RETRY_EXC_ERR, +}; + +enum irdma_cmpl_status { + IRDMA_COMPL_STATUS_SUCCESS = 0, + IRDMA_COMPL_STATUS_FLUSHED, + IRDMA_COMPL_STATUS_INVALID_WQE, + IRDMA_COMPL_STATUS_QP_CATASTROPHIC, + IRDMA_COMPL_STATUS_REMOTE_TERMINATION, + IRDMA_COMPL_STATUS_INVALID_STAG, + IRDMA_COMPL_STATUS_BASE_BOUND_VIOLATION, + IRDMA_COMPL_STATUS_ACCESS_VIOLATION, + IRDMA_COMPL_STATUS_INVALID_PD_ID, + IRDMA_COMPL_STATUS_WRAP_ERROR, + IRDMA_COMPL_STATUS_STAG_INVALID_PDID, + IRDMA_COMPL_STATUS_RDMA_READ_ZERO_ORD, + IRDMA_COMPL_STATUS_QP_NOT_PRIVLEDGED, + IRDMA_COMPL_STATUS_STAG_NOT_INVALID, + IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_SIZE, + IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_ENTRY, + IRDMA_COMPL_STATUS_INVALID_FBO, + IRDMA_COMPL_STATUS_INVALID_LEN, + IRDMA_COMPL_STATUS_INVALID_ACCESS, + IRDMA_COMPL_STATUS_PHYS_BUF_LIST_TOO_LONG, + IRDMA_COMPL_STATUS_INVALID_VIRT_ADDRESS, + IRDMA_COMPL_STATUS_INVALID_REGION, + IRDMA_COMPL_STATUS_INVALID_WINDOW, + IRDMA_COMPL_STATUS_INVALID_TOTAL_LEN, + IRDMA_COMPL_STATUS_UNKNOWN, +}; + +enum irdma_cmpl_notify { + IRDMA_CQ_COMPL_EVENT = 0, + IRDMA_CQ_COMPL_SOLICITED = 1, +}; + +enum irdma_qp_caps { + IRDMA_WRITE_WITH_IMM = 1, + IRDMA_SEND_WITH_IMM = 2, + IRDMA_ROCE = 4, + IRDMA_PUSH_MODE = 8, +}; + +struct irdma_qp_uk; +struct irdma_cq_uk; +struct irdma_qp_uk_init_info; +struct irdma_cq_uk_init_info; + +struct irdma_ring { + volatile u32 head; + volatile u32 tail; /* effective tail */ + u32 size; +}; + +struct irdma_cqe { + __le64 buf[IRDMA_CQE_SIZE]; +}; + +struct irdma_extended_cqe { + __le64 buf[IRDMA_EXTENDED_CQE_SIZE]; +}; + +struct irdma_post_send { + irdma_sgl sg_list; + u32 num_sges; + u32 qkey; + u32 dest_qp; + u32 ah_id; +}; + +struct irdma_post_rq_info { + u64 wr_id; + irdma_sgl sg_list; + u32 num_sges; +}; + +struct irdma_rdma_write { + irdma_sgl lo_sg_list; + u32 num_lo_sges; + struct ibv_sge rem_addr; +}; + +struct irdma_rdma_read { + irdma_sgl lo_sg_list; + u32 num_lo_sges; + struct ibv_sge rem_addr; +}; + +struct irdma_bind_window { + irdma_stag mr_stag; + u64 bind_len; + void *va; + enum irdma_addressing_type addressing_type; + bool ena_reads:1; + bool ena_writes:1; + irdma_stag mw_stag; + bool mem_window_type_1:1; +}; + +struct irdma_inv_local_stag { + irdma_stag target_stag; +}; + +struct irdma_post_sq_info { + u64 wr_id; + u8 op_type; + u8 l4len; + bool signaled:1; + bool read_fence:1; + bool local_fence:1; + bool inline_data:1; + bool imm_data_valid:1; + bool push_wqe:1; + bool report_rtt:1; + bool udp_hdr:1; + bool defer_flag:1; + u32 imm_data; + u32 stag_to_inv; + union { + struct irdma_post_send send; + struct irdma_rdma_write rdma_write; + struct irdma_rdma_read rdma_read; + struct irdma_bind_window bind_window; + struct irdma_inv_local_stag inv_local_stag; + } op; +}; + +struct irdma_cq_poll_info { + u64 wr_id; + irdma_qp_handle qp_handle; + u32 bytes_xfered; + u32 qp_id; + u32 ud_src_qpn; + u32 imm_data; + irdma_stag inv_stag; /* or L_R_Key */ + enum irdma_cmpl_status comp_status; + u16 major_err; + u16 minor_err; + u16 ud_vlan; + u8 ud_smac[6]; + u8 op_type; + u8 q_type; + bool stag_invalid_set:1; /* or L_R_Key set */ + bool push_dropped:1; + bool error:1; + bool solicited_event:1; + bool ipv4:1; + bool ud_vlan_valid:1; + bool ud_smac_valid:1; + bool imm_valid:1; + bool signaled:1; + union { + u32 tcp_sqn; + u32 roce_psn; + u32 rtt; + u32 raw; + } stat; +}; + +int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq); +int irdma_uk_inline_send(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq); +int irdma_uk_mw_bind(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq); +int irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, + bool post_sq); +int irdma_uk_post_receive(struct irdma_qp_uk *qp, + struct irdma_post_rq_info *info); +void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp); +int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool inv_stag, bool post_sq); +int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq); +int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq); +int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, + bool post_sq); + +struct irdma_wqe_uk_ops { + void (*iw_copy_inline_data)(u8 *dest, struct ibv_sge *sge_list, u32 num_sges, u8 polarity); + u16 (*iw_inline_data_size_to_quanta)(u32 data_size); + void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ibv_sge *sge, + u8 valid); + void (*iw_set_mw_bind_wqe)(__le64 *wqe, + struct irdma_bind_window *op_info); +}; + +int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, + struct irdma_cq_poll_info *info); +void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, + enum irdma_cmpl_notify cq_notify); +void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int size); +void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *qp, u16 cnt); +int irdma_uk_cq_init(struct irdma_cq_uk *cq, + struct irdma_cq_uk_init_info *info); +int irdma_uk_qp_init(struct irdma_qp_uk *qp, + struct irdma_qp_uk_init_info *info); +int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift); +int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift); +struct irdma_sq_uk_wr_trk_info { + u64 wrid; + u32 wr_len; + u16 quanta; + u8 signaled; + u8 reserved[1]; +}; + +struct irdma_qp_quanta { + __le64 elem[IRDMA_WQE_SIZE]; +}; + +struct irdma_qp_uk { + struct irdma_qp_quanta *sq_base; + struct irdma_qp_quanta *rq_base; + struct irdma_uk_attrs *uk_attrs; + u32 IOMEM *wqe_alloc_db; + struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array; + struct irdma_sig_wr_trk_info *sq_sigwrtrk_array; + u64 *rq_wrid_array; + __le64 *shadow_area; + __le32 *push_db; + __le64 *push_wqe; + struct irdma_ring sq_ring; + struct irdma_ring sq_sig_ring; + struct irdma_ring rq_ring; + struct irdma_ring initial_ring; + u32 qp_id; + u32 qp_caps; + u32 sq_size; + u32 rq_size; + u32 max_sq_frag_cnt; + u32 max_rq_frag_cnt; + u32 max_inline_data; + u32 last_rx_cmpl_idx; + u32 last_tx_cmpl_idx; + struct irdma_wqe_uk_ops wqe_ops; + u16 conn_wqes; + u8 qp_type; + u8 swqe_polarity; + u8 swqe_polarity_deferred; + u8 rwqe_polarity; + u8 rq_wqe_size; + u8 rq_wqe_size_multiplier; + u8 start_wqe_idx; + bool deferred_flag:1; + bool push_mode:1; /* whether the last post wqe was pushed */ + bool push_dropped:1; + bool first_sq_wq:1; + bool sq_flush_complete:1; /* Indicates flush was seen and SQ was empty after the flush */ + bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */ + bool destroy_pending:1; /* Indicates the QP is being destroyed */ + void *back_qp; + pthread_spinlock_t *lock; + u8 dbg_rq_flushed; + u16 ord_cnt; + u8 sq_flush_seen; + u8 rq_flush_seen; + u8 rd_fence_rate; +}; + +struct irdma_cq_uk { + struct irdma_cqe *cq_base; + u32 IOMEM *cqe_alloc_db; + u32 IOMEM *cq_ack_db; + __le64 *shadow_area; + u32 cq_id; + u32 cq_size; + struct irdma_ring cq_ring; + u8 polarity; + bool avoid_mem_cflct:1; +}; + +struct irdma_qp_uk_init_info { + struct irdma_qp_quanta *sq; + struct irdma_qp_quanta *rq; + struct irdma_uk_attrs *uk_attrs; + u32 IOMEM *wqe_alloc_db; + __le64 *shadow_area; + struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array; + struct irdma_sig_wr_trk_info *sq_sigwrtrk_array; + u64 *rq_wrid_array; + u32 qp_id; + u32 qp_caps; + u32 sq_size; + u32 rq_size; + u32 max_sq_frag_cnt; + u32 max_rq_frag_cnt; + u32 max_inline_data; + u32 sq_depth; + u32 rq_depth; + u8 first_sq_wq; + u8 start_wqe_idx; + u8 type; + u8 sq_shift; + u8 rq_shift; + u8 rd_fence_rate; + int abi_ver; + bool legacy_mode; +}; + +struct irdma_cq_uk_init_info { + u32 IOMEM *cqe_alloc_db; + u32 IOMEM *cq_ack_db; + struct irdma_cqe *cq_base; + __le64 *shadow_area; + u32 cq_size; + u32 cq_id; + bool avoid_mem_cflct; +}; + +__le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, + u16 *quanta, u32 total_size, + struct irdma_post_sq_info *info); +__le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx); +int irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq); +int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq); +int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta); +int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size); +void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, + u32 inline_data, u8 *shift); +int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth); +int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth); +void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta, + u32 wqe_idx, bool post_sq); +void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx); +#endif /* IRDMA_USER_H */ diff --git a/contrib/ofed/libirdma/irdma_uverbs.c b/contrib/ofed/libirdma/irdma_uverbs.c new file mode 100644 index 00000000000..e52ce1cfa22 --- /dev/null +++ b/contrib/ofed/libirdma/irdma_uverbs.c @@ -0,0 +1,2062 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (C) 2019 - 2023 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "irdma_umain.h" +#include "abi.h" + +static inline void +print_fw_ver(uint64_t fw_ver, char *str, size_t len) +{ + uint16_t major, minor; + + major = fw_ver >> 32 & 0xffff; + minor = fw_ver & 0xffff; + + snprintf(str, len, "%d.%d", major, minor); +} + +/** + * irdma_uquery_device_ex - query device attributes including extended properties + * @context: user context for the device + * @input: extensible input struct for ibv_query_device_ex verb + * @attr: extended device attribute struct + * @attr_size: size of extended device attribute struct + **/ +int +irdma_uquery_device_ex(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size) +{ + struct irdma_query_device_ex cmd = {}; + struct irdma_query_device_ex_resp resp = {}; + uint64_t fw_ver; + int ret; + + ret = ibv_cmd_query_device_ex(context, input, attr, attr_size, &fw_ver, + &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), + &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); + if (ret) + return ret; + + print_fw_ver(fw_ver, attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver)); + + return 0; +} + +/** + * irdma_uquery_device - call driver to query device for max resources + * @context: user context for the device + * @attr: where to save all the mx resources from the driver + **/ +int +irdma_uquery_device(struct ibv_context *context, struct ibv_device_attr *attr) +{ + struct ibv_query_device cmd; + uint64_t fw_ver; + int ret; + + ret = ibv_cmd_query_device(context, attr, &fw_ver, &cmd, sizeof(cmd)); + if (ret) + return ret; + + print_fw_ver(fw_ver, attr->fw_ver, sizeof(attr->fw_ver)); + + return 0; +} + +/** + * irdma_uquery_port - get port attributes (msg size, lnk, mtu...) + * @context: user context of the device + * @port: port for the attributes + * @attr: to return port attributes + **/ +int +irdma_uquery_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr) +{ + struct ibv_query_port cmd; + + return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); +} + +/** + * irdma_ualloc_pd - allocates protection domain and return pd ptr + * @context: user context of the device + **/ +struct ibv_pd * +irdma_ualloc_pd(struct ibv_context *context) +{ + struct ibv_alloc_pd cmd; + struct irdma_ualloc_pd_resp resp = {}; + struct irdma_upd *iwupd; + int err; + + iwupd = calloc(1, sizeof(*iwupd)); + if (!iwupd) + return NULL; + + err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp)); + if (err) + goto err_free; + + iwupd->pd_id = resp.pd_id; + + return &iwupd->ibv_pd; + +err_free: + free(iwupd); + + errno = err; + return NULL; +} + +/** + * irdma_ufree_pd - free pd resources + * @pd: pd to free resources + */ +int +irdma_ufree_pd(struct ibv_pd *pd) +{ + struct irdma_upd *iwupd; + int ret; + + iwupd = container_of(pd, struct irdma_upd, ibv_pd); + ret = ibv_cmd_dealloc_pd(pd); + if (ret) + return ret; + + free(iwupd); + + return 0; +} + +/** + * irdma_ureg_mr - register user memory region + * @pd: pd for the mr + * @addr: user address of the memory region + * @length: length of the memory + * @hca_va: hca_va + * @access: access allowed on this mr + */ +struct ibv_mr * +irdma_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, + int access) +{ + struct verbs_mr *vmr; + struct irdma_ureg_mr cmd = {}; + struct ibv_reg_mr_resp resp; + int err; + + vmr = malloc(sizeof(*vmr)); + if (!vmr) + return NULL; + + cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; + err = ibv_cmd_reg_mr(pd, addr, length, + (uintptr_t)addr, access, &vmr->ibv_mr, &cmd.ibv_cmd, + sizeof(cmd), &resp, sizeof(resp)); + if (err) { + free(vmr); + errno = err; + return NULL; + } + + return &vmr->ibv_mr; +} + +/* + * irdma_urereg_mr - re-register memory region @vmr: mr that was allocated @flags: bit mask to indicate which of the + * attr's of MR modified @pd: pd of the mr @addr: user address of the memory region @length: length of the memory + * @access: access allowed on this mr + */ +int +irdma_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, + void *addr, size_t length, int access) +{ + struct irdma_urereg_mr cmd = {}; + struct ibv_rereg_mr_resp resp; + + cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; + return ibv_cmd_rereg_mr(&vmr->ibv_mr, flags, addr, length, (uintptr_t)addr, + access, pd, &cmd.ibv_cmd, sizeof(cmd), &resp, + sizeof(resp)); +} + +/** + * irdma_udereg_mr - re-register memory region + * @mr: mr that was allocated + */ +int +irdma_udereg_mr(struct ibv_mr *mr) +{ + struct verbs_mr *vmr; + int ret; + + vmr = container_of(mr, struct verbs_mr, ibv_mr); + + ret = ibv_cmd_dereg_mr(mr); + if (ret) + return ret; + + return 0; +} + +/** + * irdma_ualloc_mw - allocate memory window + * @pd: protection domain + * @type: memory window type + */ +struct ibv_mw * +irdma_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) +{ + struct ibv_mw *mw; + struct ibv_alloc_mw cmd; + struct ibv_alloc_mw_resp resp; + int err; + + mw = calloc(1, sizeof(*mw)); + if (!mw) + return NULL; + + err = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, + sizeof(resp)); + if (err) { + printf("%s: Failed to alloc memory window\n", + __func__); + free(mw); + errno = err; + return NULL; + } + + return mw; +} + +/** + * irdma_ubind_mw - bind a memory window + * @qp: qp to post WR + * @mw: memory window to bind + * @mw_bind: bind info + */ +int +irdma_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, + struct ibv_mw_bind *mw_bind) +{ + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; + struct verbs_mr *vmr; + + struct ibv_send_wr wr = {}; + struct ibv_send_wr *bad_wr; + int err; + + if (!bind_info->mr && (bind_info->addr || bind_info->length)) + return EINVAL; + + if (bind_info->mr) { + vmr = verbs_get_mr(bind_info->mr); + if (vmr->mr_type != IBV_MR_TYPE_MR) + return ENOTSUP; + + if (vmr->access & IBV_ACCESS_ZERO_BASED) + return EINVAL; + + if (mw->pd != bind_info->mr->pd) + return EPERM; + } + + wr.opcode = IBV_WR_BIND_MW; + wr.bind_mw.bind_info = mw_bind->bind_info; + wr.bind_mw.mw = mw; + wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey); + + wr.wr_id = mw_bind->wr_id; + wr.send_flags = mw_bind->send_flags; + + err = irdma_upost_send(qp, &wr, &bad_wr); + if (!err) + mw->rkey = wr.bind_mw.rkey; + + return err; +} + +/** + * irdma_udealloc_mw - deallocate memory window + * @mw: memory window to dealloc + */ +int +irdma_udealloc_mw(struct ibv_mw *mw) +{ + int ret; + struct ibv_dealloc_mw cmd; + + ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd)); + if (ret) + return ret; + free(mw); + + return 0; +} + +static void * +irdma_alloc_hw_buf(size_t size) +{ + void *buf; + + buf = memalign(IRDMA_HW_PAGE_SIZE, size); + + if (!buf) + return NULL; + if (ibv_dontfork_range(buf, size)) { + free(buf); + return NULL; + } + + return buf; +} + +static void +irdma_free_hw_buf(void *buf, size_t size) +{ + ibv_dofork_range(buf, size); + free(buf); +} + +/** + * get_cq_size - returns actual cqe needed by HW + * @ncqe: minimum cqes requested by application + * @hw_rev: HW generation + * @cqe_64byte_ena: enable 64byte cqe + */ +static inline int +get_cq_size(int ncqe, u8 hw_rev) +{ + ncqe++; + + /* Completions with immediate require 1 extra entry */ + if (hw_rev > IRDMA_GEN_1) + ncqe *= 2; + + if (ncqe < IRDMA_U_MINCQ_SIZE) + ncqe = IRDMA_U_MINCQ_SIZE; + + return ncqe; +} + +static inline size_t get_cq_total_bytes(u32 cq_size) { + return roundup(cq_size * sizeof(struct irdma_cqe), IRDMA_HW_PAGE_SIZE); +} + +/** + * ucreate_cq - irdma util function to create a CQ + * @context: ibv context + * @attr_ex: CQ init attributes + * @ext_cq: flag to create an extendable or normal CQ + */ +static struct ibv_cq_ex * +ucreate_cq(struct ibv_context *context, + struct ibv_cq_init_attr_ex *attr_ex, + bool ext_cq) +{ + struct irdma_cq_uk_init_info info = {}; + struct irdma_ureg_mr reg_mr_cmd = {}; + struct irdma_ucreate_cq_ex cmd = {}; + struct irdma_ucreate_cq_ex_resp resp = {}; + struct ibv_reg_mr_resp reg_mr_resp = {}; + struct irdma_ureg_mr reg_mr_shadow_cmd = {}; + struct ibv_reg_mr_resp reg_mr_shadow_resp = {}; + struct irdma_uk_attrs *uk_attrs; + struct irdma_uvcontext *iwvctx; + struct irdma_ucq *iwucq; + size_t total_size; + u32 cq_pages; + int ret, ncqe; + u8 hw_rev; + + iwvctx = container_of(context, struct irdma_uvcontext, ibv_ctx); + uk_attrs = &iwvctx->uk_attrs; + hw_rev = uk_attrs->hw_rev; + + if (ext_cq) { + u32 supported_flags = IRDMA_STANDARD_WC_FLAGS_EX; + + if (hw_rev == IRDMA_GEN_1 || attr_ex->wc_flags & ~supported_flags) { + errno = EOPNOTSUPP; + return NULL; + } + } + + if (attr_ex->cqe < uk_attrs->min_hw_cq_size || attr_ex->cqe > uk_attrs->max_hw_cq_size - 1) { + errno = EINVAL; + return NULL; + } + + /* save the cqe requested by application */ + ncqe = attr_ex->cqe; + + iwucq = calloc(1, sizeof(*iwucq)); + if (!iwucq) + return NULL; + + ret = pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE); + if (ret) { + free(iwucq); + errno = ret; + return NULL; + } + + info.cq_size = get_cq_size(attr_ex->cqe, hw_rev); + total_size = get_cq_total_bytes(info.cq_size); + iwucq->comp_vector = attr_ex->comp_vector; + LIST_INIT(&iwucq->resize_list); + cq_pages = total_size >> IRDMA_HW_PAGE_SHIFT; + + if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) + total_size = (cq_pages << IRDMA_HW_PAGE_SHIFT) + IRDMA_DB_SHADOW_AREA_SIZE; + + iwucq->buf_size = total_size; + info.cq_base = irdma_alloc_hw_buf(total_size); + if (!info.cq_base) { + ret = ENOMEM; + goto err_cq_base; + } + + memset(info.cq_base, 0, total_size); + reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; + reg_mr_cmd.cq_pages = cq_pages; + + ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base, + total_size, (uintptr_t)info.cq_base, + IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr.ibv_mr, + ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), + ®_mr_resp, sizeof(reg_mr_resp)); + if (ret) + goto err_dereg_mr; + + iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; + + if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { + info.shadow_area = irdma_alloc_hw_buf(IRDMA_DB_SHADOW_AREA_SIZE); + if (!info.shadow_area) { + ret = ENOMEM; + goto err_alloc_shadow; + } + + memset(info.shadow_area, 0, IRDMA_DB_SHADOW_AREA_SIZE); + reg_mr_shadow_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; + reg_mr_shadow_cmd.cq_pages = 1; + + ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.shadow_area, + IRDMA_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area, + IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area.ibv_mr, + ®_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd), + ®_mr_shadow_resp, sizeof(reg_mr_shadow_resp)); + if (ret) { + irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); + goto err_alloc_shadow; + } + + iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; + + } else { + info.shadow_area = (__le64 *) ((u8 *)info.cq_base + (cq_pages << IRDMA_HW_PAGE_SHIFT)); + } + + attr_ex->cqe = info.cq_size; + cmd.user_cq_buf = (__u64) ((uintptr_t)info.cq_base); + cmd.user_shadow_area = (__u64) ((uintptr_t)info.shadow_area); + + ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq.cq_ex, + &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), &resp.ibv_resp, + sizeof(resp.ibv_resp), sizeof(resp)); + attr_ex->cqe = ncqe; + if (ret) + goto err_create_cq; + + if (ext_cq) + irdma_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex); + info.cq_id = resp.cq_id; + /* Do not report the CQE's reserved for immediate and burned by HW */ + iwucq->verbs_cq.cq.cqe = ncqe; + info.cqe_alloc_db = (u32 *)((u8 *)iwvctx->db + IRDMA_DB_CQ_OFFSET); + irdma_uk_cq_init(&iwucq->cq, &info); + return &iwucq->verbs_cq.cq_ex; + +err_create_cq: + if (iwucq->vmr_shadow_area.ibv_mr.handle) { + ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); + irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); + } +err_alloc_shadow: + ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); +err_dereg_mr: + irdma_free_hw_buf(info.cq_base, total_size); +err_cq_base: + printf("%s: failed to initialize CQ\n", __func__); + pthread_spin_destroy(&iwucq->lock); + + free(iwucq); + + errno = ret; + return NULL; +} + +struct ibv_cq * +irdma_ucreate_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector) +{ + struct ibv_cq_init_attr_ex attr_ex = { + .cqe = cqe, + .channel = channel, + .comp_vector = comp_vector, + }; + struct ibv_cq_ex *ibvcq_ex; + + ibvcq_ex = ucreate_cq(context, &attr_ex, false); + + return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL; +} + +struct ibv_cq_ex * +irdma_ucreate_cq_ex(struct ibv_context *context, + struct ibv_cq_init_attr_ex *attr_ex) +{ + return ucreate_cq(context, attr_ex, true); +} + +/** + * irdma_free_cq_buf - free memory for cq buffer + * @cq_buf: cq buf to free + */ +static void +irdma_free_cq_buf(struct irdma_cq_buf *cq_buf) +{ + ibv_cmd_dereg_mr(&cq_buf->vmr.ibv_mr); + irdma_free_hw_buf(cq_buf->cq.cq_base, get_cq_total_bytes(cq_buf->cq.cq_size)); + free(cq_buf); +} + +/** + * irdma_process_resize_list - process the cq list to remove buffers + * @iwucq: cq which owns the list + * @lcqe_buf: cq buf where the last cqe is found + */ +static int +irdma_process_resize_list(struct irdma_ucq *iwucq, + struct irdma_cq_buf *lcqe_buf) +{ + struct irdma_cq_buf *cq_buf, *next; + int cq_cnt = 0; + + LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { + if (cq_buf == lcqe_buf) + return cq_cnt; + + LIST_REMOVE(cq_buf, list); + irdma_free_cq_buf(cq_buf); + cq_cnt++; + } + + return cq_cnt; +} + +/** + * irdma_udestroy_cq - destroys cq + * @cq: ptr to cq to be destroyed + */ +int +irdma_udestroy_cq(struct ibv_cq *cq) +{ + struct irdma_uk_attrs *uk_attrs; + struct irdma_uvcontext *iwvctx; + struct irdma_ucq *iwucq; + int ret; + + iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); + iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); + uk_attrs = &iwvctx->uk_attrs; + + ret = pthread_spin_destroy(&iwucq->lock); + if (ret) + goto err; + + irdma_process_resize_list(iwucq, NULL); + ret = ibv_cmd_destroy_cq(cq); + if (ret) + goto err; + + ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); + irdma_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size); + + if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { + ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); + irdma_free_hw_buf(iwucq->cq.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); + } + free(iwucq); + return 0; + +err: + return ret; +} + +static enum ibv_wc_status +irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode) +{ + switch (opcode) { + case FLUSH_PROT_ERR: + return IBV_WC_LOC_PROT_ERR; + case FLUSH_REM_ACCESS_ERR: + return IBV_WC_REM_ACCESS_ERR; + case FLUSH_LOC_QP_OP_ERR: + return IBV_WC_LOC_QP_OP_ERR; + case FLUSH_REM_OP_ERR: + return IBV_WC_REM_OP_ERR; + case FLUSH_LOC_LEN_ERR: + return IBV_WC_LOC_LEN_ERR; + case FLUSH_GENERAL_ERR: + return IBV_WC_WR_FLUSH_ERR; + case FLUSH_MW_BIND_ERR: + return IBV_WC_MW_BIND_ERR; + case FLUSH_REM_INV_REQ_ERR: + return IBV_WC_REM_INV_REQ_ERR; + case FLUSH_RETRY_EXC_ERR: + return IBV_WC_RETRY_EXC_ERR; + case FLUSH_FATAL_ERR: + default: + return IBV_WC_FATAL_ERR; + } +} + +static inline void +set_ib_wc_op_sq(struct irdma_cq_poll_info *cur_cqe, struct ibv_wc *entry) +{ + switch (cur_cqe->op_type) { + case IRDMA_OP_TYPE_RDMA_WRITE: + case IRDMA_OP_TYPE_RDMA_WRITE_SOL: + entry->opcode = IBV_WC_RDMA_WRITE; + break; + case IRDMA_OP_TYPE_RDMA_READ: + entry->opcode = IBV_WC_RDMA_READ; + break; + case IRDMA_OP_TYPE_SEND_SOL: + case IRDMA_OP_TYPE_SEND_SOL_INV: + case IRDMA_OP_TYPE_SEND_INV: + case IRDMA_OP_TYPE_SEND: + entry->opcode = IBV_WC_SEND; + break; + case IRDMA_OP_TYPE_BIND_MW: + entry->opcode = IBV_WC_BIND_MW; + break; + case IRDMA_OP_TYPE_INV_STAG: + entry->opcode = IBV_WC_LOCAL_INV; + break; + default: + entry->status = IBV_WC_GENERAL_ERR; + printf("%s: Invalid opcode = %d in CQE\n", + __func__, cur_cqe->op_type); + } +} + +static inline void +set_ib_wc_op_rq(struct irdma_cq_poll_info *cur_cqe, + struct ibv_wc *entry, bool send_imm_support) +{ + if (!send_imm_support) { + entry->opcode = cur_cqe->imm_valid ? IBV_WC_RECV_RDMA_WITH_IMM : + IBV_WC_RECV; + return; + } + switch (cur_cqe->op_type) { + case IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: + case IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: + entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM; + break; + default: + entry->opcode = IBV_WC_RECV; + } +} + +/** + * irdma_process_cqe_ext - process current cqe for extended CQ + * @cur_cqe - current cqe info + */ +static void +irdma_process_cqe_ext(struct irdma_cq_poll_info *cur_cqe) +{ + struct irdma_ucq *iwucq = container_of(cur_cqe, struct irdma_ucq, cur_cqe); + struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; + + ibvcq_ex->wr_id = cur_cqe->wr_id; + if (cur_cqe->error) + ibvcq_ex->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? + irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; + else + ibvcq_ex->status = IBV_WC_SUCCESS; +} + +/** + * irdma_process_cqe - process current cqe info + * @entry - ibv_wc object to fill in for non-extended CQ + * @cur_cqe - current cqe info + */ +static void +irdma_process_cqe(struct ibv_wc *entry, struct irdma_cq_poll_info *cur_cqe) +{ + struct irdma_qp_uk *qp; + struct ibv_qp *ib_qp; + + entry->wc_flags = 0; + entry->wr_id = cur_cqe->wr_id; + entry->qp_num = cur_cqe->qp_id; + qp = cur_cqe->qp_handle; + ib_qp = qp->back_qp; + + if (cur_cqe->error) { + entry->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? + irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; + entry->vendor_err = cur_cqe->major_err << 16 | + cur_cqe->minor_err; + } else { + entry->status = IBV_WC_SUCCESS; + } + + if (cur_cqe->imm_valid) { + entry->imm_data = htonl(cur_cqe->imm_data); + entry->wc_flags |= IBV_WC_WITH_IMM; + } + + if (cur_cqe->q_type == IRDMA_CQE_QTYPE_SQ) { + set_ib_wc_op_sq(cur_cqe, entry); + } else { + set_ib_wc_op_rq(cur_cqe, entry, + qp->qp_caps & IRDMA_SEND_WITH_IMM ? + true : false); + if (ib_qp->qp_type != IBV_QPT_UD && + cur_cqe->stag_invalid_set) { + entry->invalidated_rkey = cur_cqe->inv_stag; + entry->wc_flags |= IBV_WC_WITH_INV; + } + } + + if (ib_qp->qp_type == IBV_QPT_UD) { + entry->src_qp = cur_cqe->ud_src_qpn; + entry->wc_flags |= IBV_WC_GRH; + } else { + entry->src_qp = cur_cqe->qp_id; + } + entry->byte_len = cur_cqe->bytes_xfered; +} + +/** + * irdma_poll_one - poll one entry of the CQ + * @ukcq: ukcq to poll + * @cur_cqe: current CQE info to be filled in + * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ + * + * Returns the internal irdma device error code or 0 on success + */ +static int +irdma_poll_one(struct irdma_cq_uk *ukcq, struct irdma_cq_poll_info *cur_cqe, + struct ibv_wc *entry) +{ + int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe); + + if (ret) + return ret; + + if (!entry) + irdma_process_cqe_ext(cur_cqe); + else + irdma_process_cqe(entry, cur_cqe); + + return 0; +} + +/** + * __irdma_upoll_cq - irdma util function to poll device CQ + * @iwucq: irdma cq to poll + * @num_entries: max cq entries to poll + * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ + * + * Returns non-negative value equal to the number of completions + * found. On failure, EINVAL + */ +static int +__irdma_upoll_cq(struct irdma_ucq *iwucq, int num_entries, + struct ibv_wc *entry) +{ + struct irdma_cq_buf *cq_buf, *next; + struct irdma_cq_buf *last_buf = NULL; + struct irdma_cq_poll_info *cur_cqe = &iwucq->cur_cqe; + bool cq_new_cqe = false; + int resized_bufs = 0; + int npolled = 0; + int ret; + + /* go through the list of previously resized CQ buffers */ + LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { + while (npolled < num_entries) { + ret = irdma_poll_one(&cq_buf->cq, cur_cqe, + entry ? entry + npolled : NULL); + if (!ret) { + ++npolled; + cq_new_cqe = true; + continue; + } + if (ret == ENOENT) + break; + /* QP using the CQ is destroyed. Skip reporting this CQE */ + if (ret == EFAULT) { + cq_new_cqe = true; + continue; + } + goto error; + } + + /* save the resized CQ buffer which received the last cqe */ + if (cq_new_cqe) + last_buf = cq_buf; + cq_new_cqe = false; + } + + /* check the current CQ for new cqes */ + while (npolled < num_entries) { + ret = irdma_poll_one(&iwucq->cq, cur_cqe, + entry ? entry + npolled : NULL); + if (!ret) { + ++npolled; + cq_new_cqe = true; + continue; + } + if (ret == ENOENT) + break; + /* QP using the CQ is destroyed. Skip reporting this CQE */ + if (ret == EFAULT) { + cq_new_cqe = true; + continue; + } + goto error; + } + + if (cq_new_cqe) + /* all previous CQ resizes are complete */ + resized_bufs = irdma_process_resize_list(iwucq, NULL); + else if (last_buf) + /* only CQ resizes up to the last_buf are complete */ + resized_bufs = irdma_process_resize_list(iwucq, last_buf); + if (resized_bufs) + /* report to the HW the number of complete CQ resizes */ + irdma_uk_cq_set_resized_cnt(&iwucq->cq, resized_bufs); + + return npolled; + +error: + printf("%s: Error polling CQ, irdma_err: %d\n", __func__, ret); + + return EINVAL; +} + +/** + * irdma_upoll_cq - verb API callback to poll device CQ + * @cq: ibv_cq to poll + * @num_entries: max cq entries to poll + * @entry: pointer to array of ibv_wc objects to be filled in for each completion + * + * Returns non-negative value equal to the number of completions + * found and a negative error code on failure + */ +int +irdma_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry) +{ + struct irdma_ucq *iwucq; + int ret; + + iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); + ret = pthread_spin_lock(&iwucq->lock); + if (ret) + return -ret; + + ret = __irdma_upoll_cq(iwucq, num_entries, entry); + + pthread_spin_unlock(&iwucq->lock); + + return ret; +} + +/** + * irdma_start_poll - verb_ex API callback to poll batch of WC's + * @ibvcq_ex: ibv extended CQ + * @attr: attributes (not used) + * + * Start polling batch of work completions. Return 0 on success, ENONENT when + * no completions are available on CQ. And an error code on errors + */ +static int +irdma_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr) +{ + struct irdma_ucq *iwucq; + int ret; + + iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); + ret = pthread_spin_lock(&iwucq->lock); + if (ret) + return ret; + + ret = __irdma_upoll_cq(iwucq, 1, NULL); + if (ret == 1) + return 0; + + /* No Completions on CQ */ + if (!ret) + ret = ENOENT; + + pthread_spin_unlock(&iwucq->lock); + + return ret; +} + +/** + * irdma_next_poll - verb_ex API callback to get next WC + * @ibvcq_ex: ibv extended CQ + * + * Return 0 on success, ENONENT when no completions are available on CQ. + * And an error code on errors + */ +static int +irdma_next_poll(struct ibv_cq_ex *ibvcq_ex) +{ + struct irdma_ucq *iwucq; + int ret; + + iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); + ret = __irdma_upoll_cq(iwucq, 1, NULL); + if (ret == 1) + return 0; + + /* No Completions on CQ */ + if (!ret) + ret = ENOENT; + + return ret; +} + +/** + * irdma_end_poll - verb_ex API callback to end polling of WC's + * @ibvcq_ex: ibv extended CQ + */ +static void +irdma_end_poll(struct ibv_cq_ex *ibvcq_ex) +{ + struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, + verbs_cq.cq_ex); + + pthread_spin_unlock(&iwucq->lock); +} + +static enum ibv_wc_opcode +irdma_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) +{ + struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, + verbs_cq.cq_ex); + + switch (iwucq->cur_cqe.op_type) { + case IRDMA_OP_TYPE_RDMA_WRITE: + case IRDMA_OP_TYPE_RDMA_WRITE_SOL: + return IBV_WC_RDMA_WRITE; + case IRDMA_OP_TYPE_RDMA_READ: + return IBV_WC_RDMA_READ; + case IRDMA_OP_TYPE_SEND_SOL: + case IRDMA_OP_TYPE_SEND_SOL_INV: + case IRDMA_OP_TYPE_SEND_INV: + case IRDMA_OP_TYPE_SEND: + return IBV_WC_SEND; + case IRDMA_OP_TYPE_BIND_MW: + return IBV_WC_BIND_MW; + case IRDMA_OP_TYPE_REC: + return IBV_WC_RECV; + case IRDMA_OP_TYPE_REC_IMM: + return IBV_WC_RECV_RDMA_WITH_IMM; + case IRDMA_OP_TYPE_INV_STAG: + return IBV_WC_LOCAL_INV; + } + + printf("%s: Invalid opcode = %d in CQE\n", __func__, + iwucq->cur_cqe.op_type); + + return 0; +} + +static uint32_t irdma_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex){ + struct irdma_cq_poll_info *cur_cqe; + struct irdma_ucq *iwucq; + + iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); + cur_cqe = &iwucq->cur_cqe; + + return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err : 0; +} + +static int +irdma_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex) +{ + struct irdma_cq_poll_info *cur_cqe; + struct irdma_ucq *iwucq; + struct irdma_qp_uk *qp; + struct ibv_qp *ib_qp; + int wc_flags = 0; + + iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); + cur_cqe = &iwucq->cur_cqe; + qp = cur_cqe->qp_handle; + ib_qp = qp->back_qp; + + if (cur_cqe->imm_valid) + wc_flags |= IBV_WC_WITH_IMM; + + if (ib_qp->qp_type == IBV_QPT_UD) { + wc_flags |= IBV_WC_GRH; + } else { + if (cur_cqe->stag_invalid_set) { + switch (cur_cqe->op_type) { + case IRDMA_OP_TYPE_REC: + wc_flags |= IBV_WC_WITH_INV; + break; + case IRDMA_OP_TYPE_REC_IMM: + wc_flags |= IBV_WC_WITH_INV; + break; + } + } + } + + return wc_flags; +} + +static uint32_t irdma_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex){ + struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, + verbs_cq.cq_ex); + + return iwucq->cur_cqe.bytes_xfered; +} + +static __be32 irdma_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex){ + struct irdma_cq_poll_info *cur_cqe; + struct irdma_ucq *iwucq; + + iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); + cur_cqe = &iwucq->cur_cqe; + + return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0; +} + +static uint32_t irdma_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex){ + struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, + verbs_cq.cq_ex); + + return iwucq->cur_cqe.qp_id; +} + +static uint32_t irdma_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex){ + struct irdma_cq_poll_info *cur_cqe; + struct irdma_ucq *iwucq; + struct irdma_qp_uk *qp; + struct ibv_qp *ib_qp; + + iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); + cur_cqe = &iwucq->cur_cqe; + qp = cur_cqe->qp_handle; + ib_qp = qp->back_qp; + + return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn : cur_cqe->qp_id; +} + +static uint8_t irdma_wc_read_sl(struct ibv_cq_ex *ibvcq_ex){ + return 0; +} + +void +irdma_ibvcq_ex_fill_priv_funcs(struct irdma_ucq *iwucq, + struct ibv_cq_init_attr_ex *attr_ex) +{ + struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; + + ibvcq_ex->start_poll = irdma_start_poll; + ibvcq_ex->end_poll = irdma_end_poll; + ibvcq_ex->next_poll = irdma_next_poll; + + ibvcq_ex->read_opcode = irdma_wc_read_opcode; + ibvcq_ex->read_vendor_err = irdma_wc_read_vendor_err; + ibvcq_ex->read_wc_flags = irdma_wc_read_wc_flags; + + if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) + ibvcq_ex->read_byte_len = irdma_wc_read_byte_len; + if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM) + ibvcq_ex->read_imm_data = irdma_wc_read_imm_data; + if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM) + ibvcq_ex->read_qp_num = irdma_wc_read_qp_num; + if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP) + ibvcq_ex->read_src_qp = irdma_wc_read_src_qp; + if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL) + ibvcq_ex->read_sl = irdma_wc_read_sl; +} + +/** + * irdma_arm_cq - arm of cq + * @iwucq: cq to which arm + * @cq_notify: notification params + */ +static void +irdma_arm_cq(struct irdma_ucq *iwucq, + enum irdma_cmpl_notify cq_notify) +{ + iwucq->is_armed = true; + iwucq->arm_sol = true; + iwucq->skip_arm = false; + iwucq->skip_sol = true; + irdma_uk_cq_request_notification(&iwucq->cq, cq_notify); +} + +/** + * irdma_uarm_cq - callback for arm of cq + * @cq: cq to arm + * @solicited: to get notify params + */ +int +irdma_uarm_cq(struct ibv_cq *cq, int solicited) +{ + struct irdma_ucq *iwucq; + enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT; + int ret; + + iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); + if (solicited) + cq_notify = IRDMA_CQ_COMPL_SOLICITED; + + ret = pthread_spin_lock(&iwucq->lock); + if (ret) + return ret; + + if (iwucq->is_armed) { + if (iwucq->arm_sol && !solicited) { + irdma_arm_cq(iwucq, cq_notify); + } else { + iwucq->skip_arm = true; + iwucq->skip_sol = solicited ? true : false; + } + } else { + irdma_arm_cq(iwucq, cq_notify); + } + + pthread_spin_unlock(&iwucq->lock); + + return 0; +} + +/** + * irdma_cq_event - cq to do completion event + * @cq: cq to arm + */ +void +irdma_cq_event(struct ibv_cq *cq) +{ + struct irdma_ucq *iwucq; + + iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); + if (pthread_spin_lock(&iwucq->lock)) + return; + + if (iwucq->skip_arm) + irdma_arm_cq(iwucq, IRDMA_CQ_COMPL_EVENT); + else + iwucq->is_armed = false; + + pthread_spin_unlock(&iwucq->lock); +} + +void * +irdma_mmap(int fd, off_t offset) +{ + void *map; + + map = mmap(NULL, IRDMA_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED, + fd, offset); + if (map == MAP_FAILED) + return map; + + if (ibv_dontfork_range(map, IRDMA_HW_PAGE_SIZE)) { + munmap(map, IRDMA_HW_PAGE_SIZE); + return MAP_FAILED; + } + + return map; +} + +void +irdma_munmap(void *map) +{ + ibv_dofork_range(map, IRDMA_HW_PAGE_SIZE); + munmap(map, IRDMA_HW_PAGE_SIZE); +} + +/** + * irdma_destroy_vmapped_qp - destroy resources for qp + * @iwuqp: qp struct for resources + */ +static int +irdma_destroy_vmapped_qp(struct irdma_uqp *iwuqp) +{ + int ret; + + ret = ibv_cmd_destroy_qp(&iwuqp->ibv_qp); + if (ret) + return ret; + + if (iwuqp->qp.push_db) + irdma_munmap(iwuqp->qp.push_db); + if (iwuqp->qp.push_wqe) + irdma_munmap(iwuqp->qp.push_wqe); + + ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); + + return 0; +} + +/** + * irdma_vmapped_qp - create resources for qp + * @iwuqp: qp struct for resources + * @pd: pd for the qp + * @attr: attributes of qp passed + * @resp: response back from create qp + * @info: uk info for initializing user level qp + * @abi_ver: abi version of the create qp command + */ +static int +irdma_vmapped_qp(struct irdma_uqp *iwuqp, struct ibv_pd *pd, + struct ibv_qp_init_attr *attr, + struct irdma_qp_uk_init_info *info, + bool legacy_mode) +{ + struct irdma_ucreate_qp cmd = {}; + size_t sqsize, rqsize, totalqpsize; + struct irdma_ucreate_qp_resp resp = {}; + struct irdma_ureg_mr reg_mr_cmd = {}; + struct ibv_reg_mr_resp reg_mr_resp = {}; + int ret; + + sqsize = roundup(info->sq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); + rqsize = roundup(info->rq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); + totalqpsize = rqsize + sqsize + IRDMA_DB_SHADOW_AREA_SIZE; + info->sq = irdma_alloc_hw_buf(totalqpsize); + iwuqp->buf_size = totalqpsize; + + if (!info->sq) + return ENOMEM; + + memset(info->sq, 0, totalqpsize); + info->rq = &info->sq[sqsize / IRDMA_QP_WQE_MIN_SIZE]; + info->shadow_area = info->rq[rqsize / IRDMA_QP_WQE_MIN_SIZE].elem; + + reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_QP; + reg_mr_cmd.sq_pages = sqsize >> IRDMA_HW_PAGE_SHIFT; + reg_mr_cmd.rq_pages = rqsize >> IRDMA_HW_PAGE_SHIFT; + + ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize, + (uintptr_t)info->sq, IBV_ACCESS_LOCAL_WRITE, + &iwuqp->vmr.ibv_mr, ®_mr_cmd.ibv_cmd, + sizeof(reg_mr_cmd), ®_mr_resp, + sizeof(reg_mr_resp)); + if (ret) + goto err_dereg_mr; + + cmd.user_wqe_bufs = (__u64) ((uintptr_t)info->sq); + cmd.user_compl_ctx = (__u64) (uintptr_t)&iwuqp->qp; + cmd.comp_mask |= IRDMA_CREATE_QP_USE_START_WQE_IDX; + + ret = ibv_cmd_create_qp(pd, &iwuqp->ibv_qp, attr, &cmd.ibv_cmd, + sizeof(cmd), &resp.ibv_resp, + sizeof(struct irdma_ucreate_qp_resp)); + if (ret) + goto err_qp; + + info->sq_size = resp.actual_sq_size; + info->rq_size = resp.actual_rq_size; + info->first_sq_wq = legacy_mode ? 1 : resp.lsmm; + if (resp.comp_mask & IRDMA_CREATE_QP_USE_START_WQE_IDX) + info->start_wqe_idx = resp.start_wqe_idx; + info->qp_caps = resp.qp_caps; + info->qp_id = resp.qp_id; + iwuqp->irdma_drv_opt = resp.irdma_drv_opt; + iwuqp->ibv_qp.qp_num = resp.qp_id; + + iwuqp->send_cq = container_of(attr->send_cq, struct irdma_ucq, + verbs_cq.cq); + iwuqp->recv_cq = container_of(attr->recv_cq, struct irdma_ucq, + verbs_cq.cq); + iwuqp->send_cq->uqp = iwuqp; + iwuqp->recv_cq->uqp = iwuqp; + + return 0; +err_qp: + ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); +err_dereg_mr: + printf("%s: failed to create QP, status %d\n", __func__, ret); + irdma_free_hw_buf(info->sq, iwuqp->buf_size); + return ret; +} + +/** + * irdma_ucreate_qp - create qp on user app + * @pd: pd for the qp + * @attr: attributes of the qp to be created (sizes, sge, cq) + */ +struct ibv_qp * +irdma_ucreate_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *attr) +{ + struct irdma_qp_uk_init_info info = {}; + struct irdma_uk_attrs *uk_attrs; + struct irdma_uvcontext *iwvctx; + struct irdma_uqp *iwuqp; + int status; + + if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) { + printf("%s: failed to create QP, unsupported QP type: 0x%x\n", + __func__, attr->qp_type); + errno = EOPNOTSUPP; + return NULL; + } + + iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); + uk_attrs = &iwvctx->uk_attrs; + + if (attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || + attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || + attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta || + attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta || + attr->cap.max_inline_data > uk_attrs->max_hw_inline) { + errno = EINVAL; + return NULL; + } + + info.uk_attrs = uk_attrs; + info.sq_size = attr->cap.max_send_wr; + info.rq_size = attr->cap.max_recv_wr; + info.max_sq_frag_cnt = attr->cap.max_send_sge; + info.max_rq_frag_cnt = attr->cap.max_recv_sge; + info.max_inline_data = attr->cap.max_inline_data; + info.abi_ver = iwvctx->abi_ver; + + status = irdma_uk_calc_depth_shift_sq(&info, &info.sq_depth, &info.sq_shift); + if (status) { + printf("%s: invalid SQ attributes, max_send_wr=%d max_send_sge=%d max_inline=%d\n", + __func__, attr->cap.max_send_wr, attr->cap.max_send_sge, + attr->cap.max_inline_data); + errno = status; + return NULL; + } + + status = irdma_uk_calc_depth_shift_rq(&info, &info.rq_depth, &info.rq_shift); + if (status) { + printf("%s: invalid RQ attributes, recv_wr=%d recv_sge=%d\n", + __func__, attr->cap.max_recv_wr, attr->cap.max_recv_sge); + errno = status; + return NULL; + } + + iwuqp = memalign(1024, sizeof(*iwuqp)); + if (!iwuqp) + return NULL; + + memset(iwuqp, 0, sizeof(*iwuqp)); + + status = pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE); + if (status) + goto err_free_qp; + + info.sq_size = info.sq_depth >> info.sq_shift; + info.rq_size = info.rq_depth >> info.rq_shift; + /** + * Maintain backward compatibility with older ABI which pass sq + * and rq depth (in quanta) in cap.max_send_wr a cap.max_recv_wr + */ + if (!iwvctx->use_raw_attrs) { + attr->cap.max_send_wr = info.sq_size; + attr->cap.max_recv_wr = info.rq_size; + } + + info.wqe_alloc_db = (u32 *)iwvctx->db; + info.legacy_mode = iwvctx->legacy_mode; + info.sq_wrtrk_array = calloc(info.sq_depth, sizeof(*info.sq_wrtrk_array)); + if (!info.sq_wrtrk_array) { + status = errno; /* preserve errno */ + goto err_destroy_lock; + } + + info.rq_wrid_array = calloc(info.rq_depth, sizeof(*info.rq_wrid_array)); + if (!info.rq_wrid_array) { + status = errno; /* preserve errno */ + goto err_free_sq_wrtrk; + } + + iwuqp->sq_sig_all = attr->sq_sig_all; + iwuqp->qp_type = attr->qp_type; + status = irdma_vmapped_qp(iwuqp, pd, attr, &info, iwvctx->legacy_mode); + if (status) + goto err_free_rq_wrid; + + iwuqp->qp.back_qp = iwuqp; + iwuqp->qp.lock = &iwuqp->lock; + + status = irdma_uk_qp_init(&iwuqp->qp, &info); + if (status) + goto err_free_vmap_qp; + + attr->cap.max_send_wr = (info.sq_depth - IRDMA_SQ_RSVD) >> info.sq_shift; + attr->cap.max_recv_wr = (info.rq_depth - IRDMA_RQ_RSVD) >> info.rq_shift; + + return &iwuqp->ibv_qp; + +err_free_vmap_qp: + irdma_destroy_vmapped_qp(iwuqp); + irdma_free_hw_buf(info.sq, iwuqp->buf_size); +err_free_rq_wrid: + free(info.rq_wrid_array); +err_free_sq_wrtrk: + free(info.sq_wrtrk_array); +err_destroy_lock: + pthread_spin_destroy(&iwuqp->lock); +err_free_qp: + printf("%s: failed to create QP\n", __func__); + free(iwuqp); + + errno = status; + return NULL; +} + +/** + * irdma_uquery_qp - query qp for some attribute + * @qp: qp for the attributes query + * @attr: to return the attributes + * @attr_mask: mask of what is query for + * @init_attr: initial attributes during create_qp + */ +int +irdma_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, + struct ibv_qp_init_attr *init_attr) +{ + struct ibv_query_qp cmd; + + return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, + sizeof(cmd)); +} + +/** + * irdma_umodify_qp - send qp modify to driver + * @qp: qp to modify + * @attr: attribute to modify + * @attr_mask: mask of the attribute + */ +int +irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) +{ + struct irdma_umodify_qp_resp resp = {}; + struct ibv_modify_qp cmd = {}; + struct irdma_modify_qp_cmd cmd_ex = {}; + struct irdma_uvcontext *iwvctx; + struct irdma_uqp *iwuqp; + + iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); + iwvctx = container_of(qp->context, struct irdma_uvcontext, ibv_ctx); + + if (iwuqp->qp.qp_caps & IRDMA_PUSH_MODE && attr_mask & IBV_QP_STATE && + iwvctx->uk_attrs.hw_rev > IRDMA_GEN_1) { + u64 offset; + void *map; + int ret; + + ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd, + sizeof(cmd_ex.ibv_cmd), + sizeof(cmd_ex), &resp.ibv_resp, + sizeof(resp.ibv_resp), + sizeof(resp)); + if (!ret) + iwuqp->qp.rd_fence_rate = resp.rd_fence_rate; + if (ret || !resp.push_valid) + return ret; + + if (iwuqp->qp.push_wqe) + return ret; + + offset = resp.push_wqe_mmap_key; + map = irdma_mmap(qp->context->cmd_fd, offset); + if (map == MAP_FAILED) + return ret; + + iwuqp->qp.push_wqe = map; + + offset = resp.push_db_mmap_key; + map = irdma_mmap(qp->context->cmd_fd, offset); + if (map == MAP_FAILED) { + irdma_munmap(iwuqp->qp.push_wqe); + iwuqp->qp.push_wqe = NULL; + printf("failed to map push page, errno %d\n", errno); + return ret; + } + iwuqp->qp.push_wqe += resp.push_offset; + iwuqp->qp.push_db = map + resp.push_offset; + + return ret; + } else { + return ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); + } +} + +static void +irdma_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush) +{ + struct irdma_umodify_qp_resp resp = {}; + struct irdma_modify_qp_cmd cmd_ex = {}; + struct ibv_qp_attr attr = {}; + + attr.qp_state = IBV_QPS_ERR; + cmd_ex.sq_flush = sq_flush; + cmd_ex.rq_flush = rq_flush; + + ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE, + &cmd_ex.ibv_cmd, + sizeof(cmd_ex.ibv_cmd), + sizeof(cmd_ex), &resp.ibv_resp, + sizeof(resp.ibv_resp), + sizeof(resp)); +} + +/** + * irdma_clean_cqes - clean cq entries for qp + * @qp: qp for which completions are cleaned + * @iwcq: cq to be cleaned + */ +static void +irdma_clean_cqes(struct irdma_qp_uk *qp, struct irdma_ucq *iwucq) +{ + struct irdma_cq_uk *ukcq = &iwucq->cq; + int ret; + + ret = pthread_spin_lock(&iwucq->lock); + if (ret) + return; + + irdma_uk_clean_cq(qp, ukcq); + pthread_spin_unlock(&iwucq->lock); +} + +/** + * irdma_udestroy_qp - destroy qp + * @qp: qp to destroy + */ +int +irdma_udestroy_qp(struct ibv_qp *qp) +{ + struct irdma_uqp *iwuqp; + int ret; + + iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); + ret = pthread_spin_destroy(&iwuqp->lock); + if (ret) + goto err; + + ret = irdma_destroy_vmapped_qp(iwuqp); + if (ret) + goto err; + + /* Clean any pending completions from the cq(s) */ + if (iwuqp->send_cq) + irdma_clean_cqes(&iwuqp->qp, iwuqp->send_cq); + + if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq) + irdma_clean_cqes(&iwuqp->qp, iwuqp->recv_cq); + + if (iwuqp->qp.sq_wrtrk_array) + free(iwuqp->qp.sq_wrtrk_array); + if (iwuqp->qp.rq_wrid_array) + free(iwuqp->qp.rq_wrid_array); + + irdma_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size); + free(iwuqp); + return 0; + +err: + printf("%s: failed to destroy QP, status %d\n", + __func__, ret); + return ret; +} + +/** + * calc_type2_mw_stag - calculate type 2 MW stag + * @rkey: desired rkey of the MW + * @mw_rkey: type2 memory window rkey + * + * compute type2 memory window stag by taking lower 8 bits + * of the desired rkey and leaving 24 bits if mw->rkey unchanged + */ +static inline u32 calc_type2_mw_stag(u32 rkey, u32 mw_rkey) { + const u32 mask = 0xff; + + return (rkey & mask) | (mw_rkey & ~mask); +} + +/** + * irdma_post_send - post send wr for user application + * @ib_qp: qp to post wr + * @ib_wr: work request ptr + * @bad_wr: return of bad wr if err + */ +int +irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + struct ibv_send_wr **bad_wr) +{ + struct irdma_post_sq_info info; + struct irdma_uvcontext *iwvctx; + struct irdma_uk_attrs *uk_attrs; + struct irdma_uqp *iwuqp; + bool reflush = false; + int err = 0; + + iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); + iwvctx = container_of(ib_qp->context, struct irdma_uvcontext, ibv_ctx); + uk_attrs = &iwvctx->uk_attrs; + + err = pthread_spin_lock(&iwuqp->lock); + if (err) + return err; + + if (!IRDMA_RING_MORE_WORK(iwuqp->qp.sq_ring) && + ib_qp->state == IBV_QPS_ERR) + reflush = true; + + while (ib_wr) { + memset(&info, 0, sizeof(info)); + info.wr_id = (u64)(ib_wr->wr_id); + if ((ib_wr->send_flags & IBV_SEND_SIGNALED) || + iwuqp->sq_sig_all) + info.signaled = true; + if (ib_wr->send_flags & IBV_SEND_FENCE) + info.read_fence = true; + + switch (ib_wr->opcode) { + case IBV_WR_SEND_WITH_IMM: + if (iwuqp->qp.qp_caps & IRDMA_SEND_WITH_IMM) { + info.imm_data_valid = true; + info.imm_data = ntohl(ib_wr->imm_data); + } else { + err = EINVAL; + break; + } + /* fallthrough */ + case IBV_WR_SEND: + case IBV_WR_SEND_WITH_INV: + if (ib_wr->opcode == IBV_WR_SEND || + ib_wr->opcode == IBV_WR_SEND_WITH_IMM) { + if (ib_wr->send_flags & IBV_SEND_SOLICITED) + info.op_type = IRDMA_OP_TYPE_SEND_SOL; + else + info.op_type = IRDMA_OP_TYPE_SEND; + } else { + if (ib_wr->send_flags & IBV_SEND_SOLICITED) + info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV; + else + info.op_type = IRDMA_OP_TYPE_SEND_INV; + info.stag_to_inv = ib_wr->imm_data; + } + info.op.send.num_sges = ib_wr->num_sge; + info.op.send.sg_list = (struct ibv_sge *)ib_wr->sg_list; + if (ib_qp->qp_type == IBV_QPT_UD) { + struct irdma_uah *ah = container_of(ib_wr->wr.ud.ah, + struct irdma_uah, ibv_ah); + + info.op.send.ah_id = ah->ah_id; + info.op.send.qkey = ib_wr->wr.ud.remote_qkey; + info.op.send.dest_qp = ib_wr->wr.ud.remote_qpn; + } + + if (ib_wr->send_flags & IBV_SEND_INLINE) + err = irdma_uk_inline_send(&iwuqp->qp, &info, false); + else + err = irdma_uk_send(&iwuqp->qp, &info, false); + break; + case IBV_WR_RDMA_WRITE_WITH_IMM: + if (iwuqp->qp.qp_caps & IRDMA_WRITE_WITH_IMM) { + info.imm_data_valid = true; + info.imm_data = ntohl(ib_wr->imm_data); + } else { + err = EINVAL; + break; + } + /* fallthrough */ + case IBV_WR_RDMA_WRITE: + if (ib_wr->send_flags & IBV_SEND_SOLICITED) + info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL; + else + info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; + + info.op.rdma_write.num_lo_sges = ib_wr->num_sge; + info.op.rdma_write.lo_sg_list = ib_wr->sg_list; + info.op.rdma_write.rem_addr.addr = ib_wr->wr.rdma.remote_addr; + info.op.rdma_write.rem_addr.lkey = ib_wr->wr.rdma.rkey; + if (ib_wr->send_flags & IBV_SEND_INLINE) + err = irdma_uk_inline_rdma_write(&iwuqp->qp, &info, false); + else + err = irdma_uk_rdma_write(&iwuqp->qp, &info, false); + break; + case IBV_WR_RDMA_READ: + if (ib_wr->num_sge > uk_attrs->max_hw_read_sges) { + err = EINVAL; + break; + } + info.op_type = IRDMA_OP_TYPE_RDMA_READ; + info.op.rdma_read.rem_addr.addr = ib_wr->wr.rdma.remote_addr; + info.op.rdma_read.rem_addr.lkey = ib_wr->wr.rdma.rkey; + + info.op.rdma_read.lo_sg_list = ib_wr->sg_list; + info.op.rdma_read.num_lo_sges = ib_wr->num_sge; + err = irdma_uk_rdma_read(&iwuqp->qp, &info, false, false); + break; + case IBV_WR_BIND_MW: + if (ib_qp->qp_type != IBV_QPT_RC) { + err = EINVAL; + break; + } + info.op_type = IRDMA_OP_TYPE_BIND_MW; + info.op.bind_window.mr_stag = ib_wr->bind_mw.bind_info.mr->rkey; + if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { + info.op.bind_window.mem_window_type_1 = true; + info.op.bind_window.mw_stag = ib_wr->bind_mw.rkey; + } else { + struct verbs_mr *vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr); + + if (vmr->access & IBV_ACCESS_ZERO_BASED) { + err = EINVAL; + break; + } + info.op.bind_window.mw_stag = + calc_type2_mw_stag(ib_wr->bind_mw.rkey, ib_wr->bind_mw.mw->rkey); + ib_wr->bind_mw.mw->rkey = info.op.bind_window.mw_stag; + + } + + if (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_ZERO_BASED) { + info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_ZERO_BASED; + info.op.bind_window.va = NULL; + } else { + info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_VA_BASED; + info.op.bind_window.va = (void *)(uintptr_t)ib_wr->bind_mw.bind_info.addr; + } + info.op.bind_window.bind_len = ib_wr->bind_mw.bind_info.length; + info.op.bind_window.ena_reads = + (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_READ) ? 1 : 0; + info.op.bind_window.ena_writes = + (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_WRITE) ? 1 : 0; + + err = irdma_uk_mw_bind(&iwuqp->qp, &info, false); + break; + case IBV_WR_LOCAL_INV: + info.op_type = IRDMA_OP_TYPE_INV_STAG; + info.op.inv_local_stag.target_stag = ib_wr->imm_data; + err = irdma_uk_stag_local_invalidate(&iwuqp->qp, &info, true); + break; + default: + /* error */ + err = EINVAL; + printf("%s: post work request failed, invalid opcode: 0x%x\n", + __func__, ib_wr->opcode); + break; + } + if (err) + break; + + ib_wr = ib_wr->next; + } + + if (err) + *bad_wr = ib_wr; + + irdma_uk_qp_post_wr(&iwuqp->qp); + if (reflush) + irdma_issue_flush(ib_qp, 1, 0); + + pthread_spin_unlock(&iwuqp->lock); + + return err; +} + +/** + * irdma_post_recv - post receive wr for user application + * @ib_wr: work request for receive + * @bad_wr: bad wr caused an error + */ +int +irdma_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, + struct ibv_recv_wr **bad_wr) +{ + struct irdma_post_rq_info post_recv = {}; + struct irdma_uqp *iwuqp; + bool reflush = false; + int err = 0; + + iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); + err = pthread_spin_lock(&iwuqp->lock); + if (err) + return err; + + if (!IRDMA_RING_MORE_WORK(iwuqp->qp.rq_ring) && + ib_qp->state == IBV_QPS_ERR) + reflush = true; + + while (ib_wr) { + if (ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt) { + *bad_wr = ib_wr; + err = EINVAL; + goto error; + } + post_recv.num_sges = ib_wr->num_sge; + post_recv.wr_id = ib_wr->wr_id; + post_recv.sg_list = ib_wr->sg_list; + err = irdma_uk_post_receive(&iwuqp->qp, &post_recv); + if (err) { + *bad_wr = ib_wr; + goto error; + } + + if (reflush) + irdma_issue_flush(ib_qp, 0, 1); + + ib_wr = ib_wr->next; + } +error: + pthread_spin_unlock(&iwuqp->lock); + + return err; +} + +/** + * irdma_ucreate_ah - create address handle associated with a pd + * @ibpd: pd for the address handle + * @attr: attributes of address handle + */ +struct ibv_ah * +irdma_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) +{ + struct irdma_uah *ah; + union ibv_gid sgid; + struct irdma_ucreate_ah_resp resp = {}; + int err; + + if (ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, + &sgid)) { + fprintf(stderr, "irdma: Error from ibv_query_gid.\n"); + errno = ENOENT; + return NULL; + } + + ah = calloc(1, sizeof(*ah)); + if (!ah) + return NULL; + + err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp, + sizeof(resp)); + if (err) { + free(ah); + errno = err; + return NULL; + } + + ah->ah_id = resp.ah_id; + + return &ah->ibv_ah; +} + +/** + * irdma_udestroy_ah - destroy the address handle + * @ibah: address handle + */ +int +irdma_udestroy_ah(struct ibv_ah *ibah) +{ + struct irdma_uah *ah; + int ret; + + ah = container_of(ibah, struct irdma_uah, ibv_ah); + + ret = ibv_cmd_destroy_ah(ibah); + if (ret) + return ret; + + free(ah); + + return 0; +} + +/** + * irdma_uattach_mcast - Attach qp to multicast group implemented + * @qp: The queue pair + * @gid:The Global ID for multicast group + * @lid: The Local ID + */ +int +irdma_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, + uint16_t lid) +{ + return ibv_cmd_attach_mcast(qp, gid, lid); +} + +/** + * irdma_udetach_mcast - Detach qp from multicast group + * @qp: The queue pair + * @gid:The Global ID for multicast group + * @lid: The Local ID + */ +int +irdma_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, + uint16_t lid) +{ + return ibv_cmd_detach_mcast(qp, gid, lid); +} + +/** + * irdma_uresize_cq - resizes a cq + * @cq: cq to resize + * @cqe: the number of cqes of the new cq + */ +int +irdma_uresize_cq(struct ibv_cq *cq, int cqe) +{ + struct irdma_uvcontext *iwvctx; + struct irdma_uk_attrs *uk_attrs; + struct irdma_uresize_cq cmd = {}; + struct ibv_resize_cq_resp resp = {}; + struct irdma_ureg_mr reg_mr_cmd = {}; + struct ibv_reg_mr_resp reg_mr_resp = {}; + struct irdma_cq_buf *cq_buf = NULL; + struct irdma_cqe *cq_base = NULL; + struct verbs_mr new_mr = {}; + struct irdma_ucq *iwucq; + size_t cq_size; + u32 cq_pages; + int cqe_needed; + int ret = 0; + + iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); + iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); + uk_attrs = &iwvctx->uk_attrs; + + if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) + return EOPNOTSUPP; + + if (cqe < uk_attrs->min_hw_cq_size || cqe > uk_attrs->max_hw_cq_size - 1) + return EINVAL; + + cqe_needed = get_cq_size(cqe, uk_attrs->hw_rev); + if (cqe_needed == iwucq->cq.cq_size) + return 0; + + cq_size = get_cq_total_bytes(cqe_needed); + cq_pages = cq_size >> IRDMA_HW_PAGE_SHIFT; + cq_base = irdma_alloc_hw_buf(cq_size); + if (!cq_base) + return ENOMEM; + + memset(cq_base, 0, cq_size); + + cq_buf = malloc(sizeof(*cq_buf)); + if (!cq_buf) { + ret = ENOMEM; + goto err_buf; + } + + new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd; + reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; + reg_mr_cmd.cq_pages = cq_pages; + + ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size, + (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE, + &new_mr.ibv_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), + ®_mr_resp, sizeof(reg_mr_resp)); + if (ret) + goto err_dereg_mr; + + ret = pthread_spin_lock(&iwucq->lock); + if (ret) + goto err_lock; + + cmd.user_cq_buffer = (__u64) ((uintptr_t)cq_base); + ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd, + sizeof(cmd), &resp, sizeof(resp)); + if (ret) + goto err_resize; + + memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq)); + cq_buf->vmr = iwucq->vmr; + iwucq->vmr = new_mr; + irdma_uk_cq_resize(&iwucq->cq, cq_base, cqe_needed); + iwucq->verbs_cq.cq.cqe = cqe; + LIST_INSERT_HEAD(&iwucq->resize_list, cq_buf, list); + + pthread_spin_unlock(&iwucq->lock); + + return ret; + +err_resize: + pthread_spin_unlock(&iwucq->lock); +err_lock: + ibv_cmd_dereg_mr(&new_mr.ibv_mr); +err_dereg_mr: + free(cq_buf); +err_buf: + fprintf(stderr, "failed to resize CQ cq_id=%d ret=%d\n", iwucq->cq.cq_id, ret); + irdma_free_hw_buf(cq_base, cq_size); + return ret; +} diff --git a/contrib/ofed/libirdma/libirdma.map b/contrib/ofed/libirdma/libirdma.map new file mode 100644 index 00000000000..95be5c38913 --- /dev/null +++ b/contrib/ofed/libirdma/libirdma.map @@ -0,0 +1,8 @@ +/* Export symbols should be added below according to + Documentation/versioning.md document. */ +IRDMA_1.0 { + global: + libirdma_query_device; + local: *; +}; + diff --git a/contrib/ofed/libirdma/osdep.h b/contrib/ofed/libirdma/osdep.h new file mode 100644 index 00000000000..f59cbfdc68b --- /dev/null +++ b/contrib/ofed/libirdma/osdep.h @@ -0,0 +1,201 @@ +/*- + * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB + * + * Copyright (c) 2021 - 2023 Intel Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenFabrics.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _ICRDMA_OSDEP_H_ +#define _ICRDMA_OSDEP_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IOMEM +#define IRDMA_NTOHL(a) ntohl(a) +#define IRDMA_NTOHS(a) ntohs(a) +#define MAKEMASK(m, s) ((m) << (s)) +#define OS_TIMER timer_list +#define OS_LIST_HEAD list_head +#define OS_LIST_ENTRY list_head +#define DECLARE_HASHTABLE(n, b) struct hlist_head (n)[1 << (b)] +#define HASH_MIN(v, b) (sizeof(v) <= 4 ? hash_32(v, b) : hash_long(v, b)) +#define HASH_FOR_EACH_RCU(n, b, o, m) for ((b) = 0, o = NULL; o == NULL && (b) < ARRAY_SIZE(n);\ + (b)++)\ + hlist_for_each_entry_rcu(o, &n[(b)], m) +#define HASH_FOR_EACH_POSSIBLE_RCU(n, o, m, k) \ + hlist_for_each_entry_rcu(o, &n[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(n)))],\ + m) +#define HASH_FOR_EACH_POSSIBLE(n, o, m, k) \ + hlist_for_each_entry(o, &n[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(n)))],\ + m) +#define HASH_ADD_RCU(h, n, k) \ + hlist_add_head_rcu(n, &h[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(h)))]) +#define HASH_DEL_RCU(tbl, node) hlist_del_rcu(node) +#define HASH_ADD(h, n, k) \ + hlist_add_head(n, &h[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(h)))]) +#define HASH_DEL(tbl, node) hlist_del(node) + +#define WQ_UNBOUND_MAX_ACTIVE max_t(int, 512, num_possible_cpus() * 4) +#define if_addr_rlock(x) +#define if_addr_runlock(x) + +/* constants */ +#define STATS_TIMER_DELAY 60000 + +#define BIT_ULL(a) (1ULL << (a)) +#define min(a, b) ((a) > (b) ? (b) : (a)) +#ifndef likely +#define likely(x) __builtin_expect((x), 1) +#endif +#ifndef unlikely +#define unlikely(x) __builtin_expect((x), 0) +#endif + +#define __aligned_u64 uint64_t __aligned(8) + +#define VLAN_PRIO_SHIFT 13 +#define IB_USER_VERBS_EX_CMD_MODIFY_QP IB_USER_VERBS_CMD_MODIFY_QP + +/* + * debug definition section + */ +#define irdma_print(S, ...) printf("%s:%d "S, __FUNCTION__, __LINE__, ##__VA_ARGS__) +#define irdma_debug_buf(dev, mask, desc, buf, size) \ +do { \ + u32 i; \ + if (!((mask) & (dev)->debug_mask)) { \ + break; \ + } \ + irdma_debug(dev, mask, "%s\n", desc); \ + irdma_debug(dev, mask, "starting address virt=%p phy=%lxh\n", buf, irdma_get_virt_to_phy(buf)); \ + for (i = 0; i < size ; i += 8) \ + irdma_debug(dev, mask, "index %03d val: %016lx\n", i, ((unsigned long *)(buf))[i / 8]); \ +} while(0) + +#define irdma_debug(h, m, s, ...) \ +do { \ + if (!(h)) { \ + if ((m) == IRDMA_DEBUG_INIT) \ + printf("irdma INIT " s, ##__VA_ARGS__); \ + } else if (((m) & (h)->debug_mask)) { \ + printf("irdma " s, ##__VA_ARGS__); \ + } \ +} while (0) +extern unsigned int irdma_dbg; +#define libirdma_debug(fmt, args...) \ +do { \ + if (irdma_dbg) \ + printf("libirdma-%s: " fmt, __func__, ##args); \ +} while (0) +#define irdma_dev_err(ibdev, fmt, ...) \ + pr_err("%s:%s:%d ERR "fmt, (ibdev)->name, __func__, __LINE__, ##__VA_ARGS__) +#define irdma_dev_warn(ibdev, fmt, ...) \ + pr_warn("%s:%s:%d WARN "fmt, (ibdev)->name, __func__, __LINE__, ##__VA_ARGS__) +#define irdma_dev_info(a, b, ...) printf(b, ##__VA_ARGS__) +#define irdma_pr_warn printf + +/* + * debug definition end + */ + +typedef __be16 BE16; +typedef __be32 BE32; +typedef uintptr_t irdma_uintptr; + +struct irdma_hw; +struct irdma_pci_f; +struct irdma_sc_dev; +struct irdma_sc_qp; +struct irdma_sc_vsi; + +#define irdma_pr_info(fmt, args ...) printf("%s: WARN "fmt, __func__, ## args) +#define irdma_pr_err(fmt, args ...) printf("%s: ERR "fmt, __func__, ## args) +#define irdma_memcpy(a, b, c) memcpy((a), (b), (c)) +#define irdma_memset(a, b, c) memset((a), (b), (c)) +#define irdma_usec_delay(x) DELAY(x) +#define mdelay(x) DELAY((x) * 1000) + +#define rt_tos2priority(tos) (tos >> 5) +#define ah_attr_to_dmac(attr) ((attr).dmac) +#define kc_typeq_ib_wr const +#define kc_ifp_find ip_ifp_find +#define kc_ifp6_find ip6_ifp_find +#define kc_rdma_gid_attr_network_type(sgid_attr, gid_type, gid) \ + ib_gid_to_network_type(gid_type, gid) +#define irdma_del_timer_compat(tt) del_timer((tt)) +#define IRDMA_TAILQ_FOREACH CK_STAILQ_FOREACH +#define IRDMA_TAILQ_FOREACH_SAFE CK_STAILQ_FOREACH_SAFE +#define between(a, b, c) (bool)(c-a >= b-a) + +static inline void db_wr32(__u32 val, __u32 *wqe_word) +{ + *wqe_word = val; +} + +void *hw_to_dev(struct irdma_hw *hw); + +struct irdma_dma_mem { + void *va; + u64 pa; + bus_dma_tag_t tag; + bus_dmamap_t map; + bus_dma_segment_t seg; + bus_size_t size; + int nseg; + int flags; +}; + +struct irdma_virt_mem { + void *va; + u32 size; +}; + +#ifndef verbs_mr +enum ibv_mr_type { + IBV_MR_TYPE_MR, + IBV_MR_TYPE_NULL_MR, +}; + +struct verbs_mr { + struct ibv_mr ibv_mr; + enum ibv_mr_type mr_type; + int access; +}; +#define verbs_get_mr(mr) container_of((mr), struct verbs_mr, ibv_mr) +#endif +#endif /* _ICRDMA_OSDEP_H_ */ diff --git a/contrib/ofed/libmlx5/bitmap.h b/contrib/ofed/libmlx5/bitmap.h index 4b1869053e8..f7e50375ab3 100644 --- a/contrib/ofed/libmlx5/bitmap.h +++ b/contrib/ofed/libmlx5/bitmap.h @@ -95,17 +95,17 @@ static inline uint32_t mlx5_find_first_zero_bit(const unsigned long *addr, static inline void mlx5_set_bit(unsigned int nr, unsigned long *addr) { - addr[(nr / BITS_PER_LONG)] |= (1 << (nr % BITS_PER_LONG)); + addr[(nr / BITS_PER_LONG)] |= (1UL << (nr % BITS_PER_LONG)); } -static inline void mlx5_clear_bit(unsigned int nr, unsigned long *addr) +static inline void mlx5_clear_bit(unsigned int nr, unsigned long *addr) { - addr[(nr / BITS_PER_LONG)] &= ~(1 << (nr % BITS_PER_LONG)); + addr[(nr / BITS_PER_LONG)] &= ~(1UL << (nr % BITS_PER_LONG)); } static inline int mlx5_test_bit(unsigned int nr, const unsigned long *addr) { - return !!(addr[(nr / BITS_PER_LONG)] & (1 << (nr % BITS_PER_LONG))); + return !!(addr[(nr / BITS_PER_LONG)] & (1UL << (nr % BITS_PER_LONG))); } #endif diff --git a/contrib/ofed/libmlx5/mlx5.c b/contrib/ofed/libmlx5/mlx5.c index 1b860ed2ce7..8dbb268f55f 100644 --- a/contrib/ofed/libmlx5/mlx5.c +++ b/contrib/ofed/libmlx5/mlx5.c @@ -363,8 +363,7 @@ static int mlx5_enable_sandy_bridge_fix(struct ibv_device *ibdev) mlx5_local_cpu_set(ibdev, &dev_local_cpus); /* check if my cpu set is in dev cpu */ - CPU_OR(&result_set, &my_cpus); - CPU_OR(&result_set, &dev_local_cpus); + CPU_OR(&result_set, &my_cpus, &dev_local_cpus); stall_enable = CPU_EQUAL(&result_set, &dev_local_cpus) ? 0 : 1; out: diff --git a/contrib/ofed/librdmacm/examples/mckey.c b/contrib/ofed/librdmacm/examples/mckey.c index b39a2e0c937..65c80d4efc7 100644 --- a/contrib/ofed/librdmacm/examples/mckey.c +++ b/contrib/ofed/librdmacm/examples/mckey.c @@ -469,8 +469,7 @@ static int get_dst_addr(char *dst, struct sockaddr *addr) sib = (struct sockaddr_ib *) addr; memset(sib, 0, sizeof *sib); sib->sib_family = AF_IB; - inet_pton(AF_INET6, dst, &sib->sib_addr); - return 0; + return inet_pton(AF_INET6, dst, &sib->sib_addr) != 1; } static int run(void) diff --git a/contrib/ofed/librdmacm/librdmacm.map b/contrib/ofed/librdmacm/librdmacm.map index 1c8d7435026..2d826e03238 100644 --- a/contrib/ofed/librdmacm/librdmacm.map +++ b/contrib/ofed/librdmacm/librdmacm.map @@ -26,8 +26,6 @@ RDMACM_1.0 { rdma_free_devices; rdma_event_str; rdma_set_option; - rdma_get_local_addr; - rdma_get_peer_addr; rdma_migrate_id; rdma_getaddrinfo; rdma_freeaddrinfo; @@ -36,33 +34,6 @@ RDMACM_1.0 { rdma_destroy_ep; rdma_create_srq; rdma_destroy_srq; - rsocket; - rbind; - rlisten; - raccept; - rconnect; - rshutdown; - rclose; - rrecv; - rrecvfrom; - rrecvmsg; - rsend; - rsendto; - rsendmsg; - rread; - rreadv; - rwrite; - rwritev; - rpoll; - rselect; - rgetpeername; - rgetsockname; - rsetsockopt; - rgetsockopt; - rfcntl; - riomap; - riounmap; - riowrite; rdma_create_srq_ex; rdma_create_qp_ex; local: *; diff --git a/contrib/ofed/opensm/complib/libosmcomp.map b/contrib/ofed/opensm/complib/libosmcomp.map index 52410ccccf0..eaa5a1742a5 100644 --- a/contrib/ofed/opensm/complib/libosmcomp.map +++ b/contrib/ofed/opensm/complib/libosmcomp.map @@ -112,7 +112,6 @@ OSMCOMP_2.3 { cl_thread_stall; cl_proc_count; cl_is_current_thread; - cl_thread_pool_construct; cl_thread_pool_init; cl_thread_pool_destroy; cl_thread_pool_signal; @@ -126,7 +125,6 @@ OSMCOMP_2.3 { cl_timer_trim; cl_get_time_stamp; cl_get_time_stamp_sec; - cl_vector_copy_general; cl_vector_copy16; cl_vector_copy32; cl_vector_copy64; @@ -142,7 +140,6 @@ OSMCOMP_2.3 { cl_vector_find_from_start; cl_vector_find_from_end; cl_atomic_spinlock; - cl_atomic_dec; ib_error_str; ib_async_event_str; ib_wc_status_str; diff --git a/contrib/ofed/opensm/libvendor/libosmvendor.map b/contrib/ofed/opensm/libvendor/libosmvendor.map index 17416b35e9f..2656b42cef1 100644 --- a/contrib/ofed/opensm/libvendor/libosmvendor.map +++ b/contrib/ofed/opensm/libvendor/libosmvendor.map @@ -1,6 +1,5 @@ OSMVENDOR_2.0 { global: - umad_receiver; osm_vendor_init; osm_vendor_new; osm_vendor_delete; @@ -15,6 +14,5 @@ OSMVENDOR_2.0 { osm_vendor_set_debug; osmv_bind_sa; osmv_query_sa; - osm_vendor_get_guid_ca_and_port; local: *; }; diff --git a/contrib/ofed/opensm/opensm/libopensm.map b/contrib/ofed/opensm/opensm/libopensm.map index 087e69ad464..128f4716db7 100644 --- a/contrib/ofed/opensm/opensm/libopensm.map +++ b/contrib/ofed/opensm/opensm/libopensm.map @@ -71,7 +71,6 @@ OPENSM_1.5 { osm_dump_pkey_block; osm_dump_pkey_block_v2; osm_log_raw; - osm_get_sm_state_str; osm_get_sm_signal_str; osm_get_disp_msg_str; osm_get_port_state_str_fixed_width; @@ -91,7 +90,6 @@ OPENSM_1.5 { osm_dump_mlnx_ext_port_info_v2; osm_log_v2; osm_log_msg_box_v2; - osm_log_is_active_v2; osm_get_log_per_module; osm_set_log_per_module; osm_reset_log_per_module; diff --git a/contrib/ofed/opensm/opensm/osm_perfmgr.c b/contrib/ofed/opensm/opensm/osm_perfmgr.c index 3116cb3a49b..e7cd0ebb5e3 100644 --- a/contrib/ofed/opensm/opensm/osm_perfmgr.c +++ b/contrib/ofed/opensm/opensm/osm_perfmgr.c @@ -1311,6 +1311,14 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm, cl_plock_acquire(&pm->osm->lock); p_node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); + if (!p_node) { + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 5407: Node \"%s\" (guid 0x%" PRIx64 + ") no longer exists so removing from PerfMgr" + " monitoring\n", + mon_node->name, mon_node->guid); + goto Exit; + } lid = get_lid(p_node, port, mon_node); cl_plock_release(&pm->osm->lock); if (lid == 0) { @@ -1402,6 +1410,14 @@ static void perfmgr_check_pce_overflow(osm_perfmgr_t * pm, cl_plock_acquire(&pm->osm->lock); p_node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); + if (!p_node) { + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 5407: Node \"%s\" (guid 0x%" PRIx64 + ") no longer exists so removing from PerfMgr" + " monitoring\n", + mon_node->name, mon_node->guid); + goto Exit; + } lid = get_lid(p_node, port, mon_node); cl_plock_release(&pm->osm->lock); if (lid == 0) { @@ -1935,7 +1951,9 @@ ib_api_status_t osm_perfmgr_init(osm_perfmgr_t * pm, osm_opensm_t * osm, pm->state = p_opt->perfmgr ? PERFMGR_STATE_ENABLED : PERFMGR_STATE_DISABLE; pm->sweep_state = PERFMGR_SWEEP_SLEEP; - cl_spinlock_init(&pm->lock); + status = cl_spinlock_init(&pm->lock); + if (status != IB_SUCCESS) + goto Exit; pm->sweep_time_s = p_opt->perfmgr_sweep_time_s; pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries; pm->ignore_cas = p_opt->perfmgr_ignore_cas; diff --git a/contrib/ofed/opensm/opensm/osm_port.c b/contrib/ofed/opensm/opensm/osm_port.c index 35010e31bbf..47b9e920d29 100644 --- a/contrib/ofed/opensm/opensm/osm_port.c +++ b/contrib/ofed/opensm/opensm/osm_port.c @@ -161,8 +161,10 @@ osm_port_t *osm_port_new(IN const ib_node_info_t * p_ni, only the singular part that has this GUID is owned. */ p_physp = osm_node_get_physp_ptr(p_parent_node, port_num); - if (!p_physp) + if (!p_physp) { + osm_port_delete(&p_port); return NULL; + } CL_ASSERT(port_guid == osm_physp_get_port_guid(p_physp)); p_port->p_physp = p_physp; diff --git a/contrib/ofed/opensm/opensm/osm_sa_mad_ctrl.c b/contrib/ofed/opensm/opensm/osm_sa_mad_ctrl.c index dbab4a95ecf..2df6f18237f 100644 --- a/contrib/ofed/opensm/opensm/osm_sa_mad_ctrl.c +++ b/contrib/ofed/opensm/opensm/osm_sa_mad_ctrl.c @@ -373,6 +373,7 @@ static void sa_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, IN void *context, case IB_MAD_METHOD_GETMULTI: #endif is_get_request = TRUE; + /* FALLTHROUGH */ case IB_MAD_METHOD_SET: case IB_MAD_METHOD_DELETE: /* if we are closing down simply do nothing */ diff --git a/contrib/ofed/opensm/opensm/st.c b/contrib/ofed/opensm/opensm/st.c index c2ee01474c2..7dba30644d6 100644 --- a/contrib/ofed/opensm/opensm/st.c +++ b/contrib/ofed/opensm/opensm/st.c @@ -174,7 +174,7 @@ static int new_size(int size) static int collision = 0; static int init_st = 0; -static void stat_col() +static void stat_col(void) { FILE *f = fopen("/var/log/osm_st_col", "w"); fprintf(f, "collision: %d\n", collision); @@ -182,9 +182,7 @@ static void stat_col() } #endif -st_table *st_init_table_with_size(type, size) -struct st_hash_type *type; -size_t size; +st_table *st_init_table_with_size(struct st_hash_type *type, size_t size) { st_table *tbl; @@ -208,8 +206,7 @@ size_t size; return tbl; } -st_table *st_init_table(type) -struct st_hash_type *type; +st_table *st_init_table(struct st_hash_type *type) { return st_init_table_with_size(type, 0); } @@ -219,8 +216,7 @@ st_table *st_init_numtable(void) return st_init_table(&type_numhash); } -st_table *st_init_numtable_with_size(size) -size_t size; +st_table *st_init_numtable_with_size(size_t size) { return st_init_table_with_size(&type_numhash, size); } @@ -230,14 +226,12 @@ st_table *st_init_strtable(void) return st_init_table(&type_strhash); } -st_table *st_init_strtable_with_size(size) -size_t size; +st_table *st_init_strtable_with_size(size_t size) { return st_init_table_with_size(&type_strhash, size); } -void st_free_table(table) -st_table *table; +void st_free_table(st_table *table) { register st_table_entry *ptr, *next; int i; @@ -276,10 +270,7 @@ st_table *table; }\ } while (0) -int st_lookup(table, key, value) -st_table *table; -register st_data_t key; -st_data_t *value; +int st_lookup(st_table *table, st_data_t key, st_data_t *value) { unsigned int hash_val, bin_pos; register st_table_entry *ptr; @@ -315,10 +306,7 @@ do {\ table->num_entries++;\ } while (0); -int st_insert(table, key, value) -register st_table *table; -register st_data_t key; -st_data_t value; +int st_insert(st_table *table, st_data_t key, st_data_t value) { unsigned int hash_val, bin_pos; register st_table_entry *ptr; @@ -335,10 +323,7 @@ st_data_t value; } } -void st_add_direct(table, key, value) -st_table *table; -st_data_t key; -st_data_t value; +void st_add_direct(st_table *table, st_data_t key, st_data_t value) { unsigned int hash_val, bin_pos; @@ -347,8 +332,7 @@ st_data_t value; ADD_DIRECT(table, key, value, hash_val, bin_pos); } -static void rehash(table) -register st_table *table; +static void rehash(st_table *table) { register st_table_entry *ptr, *next, **new_bins; int i, old_num_bins = table->num_bins, new_num_bins; @@ -376,8 +360,7 @@ register st_table *table; table->bins = new_bins; } -st_table *st_copy(old_table) -st_table *old_table; +st_table *st_copy(st_table *old_table) { st_table *new_table; st_table_entry *ptr, *entry; @@ -416,10 +399,7 @@ st_table *old_table; return new_table; } -int st_delete(table, key, value) -register st_table *table; -register st_data_t *key; -st_data_t *value; +int st_delete(st_table *table, st_data_t *key, st_data_t *value) { unsigned int hash_val; st_table_entry *tmp; @@ -460,11 +440,8 @@ st_data_t *value; return 0; } -int st_delete_safe(table, key, value, never) -register st_table *table; -register st_data_t *key; -st_data_t *value; -st_data_t never; +int st_delete_safe(st_table *table, st_data_t *key, st_data_t *value, + st_data_t never) { unsigned int hash_val; register st_table_entry *ptr; @@ -499,9 +476,7 @@ static int delete_never(st_data_t key, st_data_t value, st_data_t never) return ST_CONTINUE; } -void st_cleanup_safe(table, never) -st_table *table; -st_data_t never; +void st_cleanup_safe(st_table *table, st_data_t never) { int num_entries = table->num_entries; @@ -509,10 +484,9 @@ st_data_t never; table->num_entries = num_entries; } -void st_foreach(table, func, arg) -st_table *table; -int (*func) (st_data_t key, st_data_t val, st_data_t arg); -st_data_t arg; +void st_foreach(st_table *table, + int (*func)(st_data_t key, st_data_t val, st_data_t arg), + st_data_t arg) { st_table_entry *ptr, *last, *tmp; enum st_retval retval; @@ -544,8 +518,7 @@ st_data_t arg; } } -static int strhash(string) -register const char *string; +static int strhash(const char *string) { register int c; @@ -578,14 +551,12 @@ register const char *string; #endif } -static int numcmp(x, y) -void *x, *y; +static int numcmp(void *x, void *y) { return (st_ptr_t) x != (st_ptr_t) y; } -static st_ptr_t numhash(n) -void *n; +static st_ptr_t numhash(void *n) { return (st_ptr_t) n; }