This patch includes the following commits from OFED 1.3 libibverbs: Implement eXtended Reliable Connections (a7df4af8eb84738f36db4161a4272fa02fc6741e) Re-define IBV_DEVICE_XRC to conform to its new position (5042a9cab0ae2f7ad61bdf88dfed6fb10b700797) Set "is_srq" flag only when the QP has an SRQ (6f6d29e74ca0c19a8821990aad603e3c575b7f4d) for XRC QPs, return xrc_domain in ibv_query_qp (018c44a44ff0344dfe7cf5f6598f81d81769164e) V2: 1. checkpatch.pl cleanups 2. fixed u64 alignment problems in uverbs.h ABI structs 3. eliminated unnecessary default_symvers 4. modified xrc_ops to more_ops Signed-off-by: Jack Morgenstein include/infiniband/driver.h | 11 +++++ include/infiniband/kern-abi.h | 47 ++++++++++++++++++++++- include/infiniband/verbs.h | 85 ++++++++++++++++++++++++++++++++++++++++- src/cmd.c | 72 ++++++++++++++++++++++++++++++++++- src/libibverbs.map | 6 +++ src/verbs.c | 54 ++++++++++++++++++++++++++ 6 files changed, 271 insertions(+), 4 deletions(-) Index: libibverbs/include/infiniband/driver.h =================================================================== --- libibverbs.orig/include/infiniband/driver.h 2009-11-01 15:18:17.920111000 +0200 +++ libibverbs/include/infiniband/driver.h 2009-11-01 15:18:20.624171000 +0200 @@ -99,6 +99,11 @@ int ibv_cmd_create_srq(struct ibv_pd *pd struct ibv_srq *srq, struct ibv_srq_init_attr *attr, struct ibv_create_srq *cmd, size_t cmd_size, struct ibv_create_srq_resp *resp, size_t resp_size); +int ibv_cmd_create_xrc_srq(struct ibv_pd *pd, + struct ibv_srq *srq, struct ibv_srq_init_attr *attr, + uint32_t xrc_domain, uint32_t xrc_cq, + struct ibv_create_xrc_srq *cmd, size_t cmd_size, + struct ibv_create_srq_resp *resp, size_t resp_size); int ibv_cmd_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, int srq_attr_mask, @@ -134,6 +139,12 @@ int ibv_cmd_detach_mcast(struct ibv_qp * int ibv_dontfork_range(void *base, size_t size); int ibv_dofork_range(void *base, size_t size); +int ibv_cmd_open_xrc_domain(struct ibv_context *context, int fd, int oflag, + struct ibv_xrc_domain *d, + struct ibv_open_xrc_domain_resp *resp, + size_t resp_size); +int ibv_cmd_close_xrc_domain(struct ibv_xrc_domain *d); + /* * sysfs helper functions Index: libibverbs/include/infiniband/kern-abi.h =================================================================== --- libibverbs.orig/include/infiniband/kern-abi.h 2009-11-01 15:18:17.921121000 +0200 +++ libibverbs/include/infiniband/kern-abi.h 2009-11-01 15:18:20.629168000 +0200 @@ -85,7 +85,10 @@ enum { IB_USER_VERBS_CMD_MODIFY_SRQ, IB_USER_VERBS_CMD_QUERY_SRQ, IB_USER_VERBS_CMD_DESTROY_SRQ, - IB_USER_VERBS_CMD_POST_SRQ_RECV + IB_USER_VERBS_CMD_POST_SRQ_RECV, + IB_USER_VERBS_CMD_CREATE_XRC_SRQ, + IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN, + IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN }; /* @@ -706,6 +709,21 @@ struct ibv_create_srq { __u64 driver_data[0]; }; +struct ibv_create_xrc_srq { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u32 xrcd_handle; + __u32 xrc_cq; + __u64 driver_data[0]; +}; + struct ibv_create_srq_resp { __u32 srq_handle; __u32 max_wr; @@ -754,6 +772,30 @@ struct ibv_destroy_srq_resp { __u32 events_reported; }; +struct ibv_open_xrc_domain { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u32 fd; + __u32 oflags; + __u64 driver_data[0]; +}; + +struct ibv_open_xrc_domain_resp { + __u32 xrcd_handle; +}; + +struct ibv_close_xrc_domain { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u32 xrcd_handle; + __u32 reserved; + __u64 driver_data[0]; +}; + /* * Compatibility with older ABI versions */ @@ -803,6 +845,9 @@ enum { * trick opcodes in IBV_INIT_CMD() doesn't break. */ IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL_V2 = -1, + IB_USER_VERBS_CMD_CREATE_XRC_SRQ_V2 = -1, + IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN_V2 = -1, + IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN_V2 = -1, }; struct ibv_destroy_cq_v1 { Index: libibverbs/include/infiniband/verbs.h =================================================================== --- libibverbs.orig/include/infiniband/verbs.h 2009-11-01 15:18:17.924118000 +0200 +++ libibverbs/include/infiniband/verbs.h 2009-11-01 15:18:20.635171000 +0200 @@ -92,7 +92,8 @@ enum ibv_device_cap_flags { IBV_DEVICE_SYS_IMAGE_GUID = 1 << 11, IBV_DEVICE_RC_RNR_NAK_GEN = 1 << 12, IBV_DEVICE_SRQ_RESIZE = 1 << 13, - IBV_DEVICE_N_NOTIFY_CQ = 1 << 14 + IBV_DEVICE_N_NOTIFY_CQ = 1 << 14, + IBV_DEVICE_XRC = 1 << 20 }; enum ibv_atomic_cap { @@ -371,6 +372,11 @@ struct ibv_ah_attr { uint8_t port_num; }; +struct ibv_xrc_domain { + struct ibv_context *context; + uint32_t handle; +}; + enum ibv_srq_attr_mask { IBV_SRQ_MAX_WR = 1 << 0, IBV_SRQ_LIMIT = 1 << 1 @@ -390,7 +396,8 @@ struct ibv_srq_init_attr { enum ibv_qp_type { IBV_QPT_RC = 2, IBV_QPT_UC, - IBV_QPT_UD + IBV_QPT_UD, + IBV_QPT_XRC }; struct ibv_qp_cap { @@ -409,6 +416,7 @@ struct ibv_qp_init_attr { struct ibv_qp_cap cap; enum ibv_qp_type qp_type; int sq_sig_all; + struct ibv_xrc_domain *xrc_domain; }; enum ibv_qp_attr_mask { @@ -527,6 +535,7 @@ struct ibv_send_wr { uint32_t remote_qkey; } ud; } wr; + uint32_t xrc_remote_srq_num; }; struct ibv_recv_wr { @@ -554,6 +563,10 @@ struct ibv_srq { pthread_mutex_t mutex; pthread_cond_t cond; uint32_t events_completed; + + uint32_t xrc_srq_num; + struct ibv_xrc_domain *xrc_domain; + struct ibv_cq *xrc_cq; }; struct ibv_qp { @@ -571,6 +584,8 @@ struct ibv_qp { pthread_mutex_t mutex; pthread_cond_t cond; uint32_t events_completed; + + struct ibv_xrc_domain *xrc_domain; }; struct ibv_comp_channel { @@ -625,6 +640,16 @@ struct ibv_device { char ibdev_path[IBV_SYSFS_PATH_MAX]; }; +struct ibv_more_ops { + struct ibv_srq * (*create_xrc_srq)(struct ibv_pd *pd, + struct ibv_xrc_domain *xrc_domain, + struct ibv_cq *xrc_cq, + struct ibv_srq_init_attr *srq_init_attr); + struct ibv_xrc_domain * (*open_xrc_domain)(struct ibv_context *context, + int fd, int oflag); + int (*close_xrc_domain)(struct ibv_xrc_domain *d); +}; + struct ibv_context_ops { int (*query_device)(struct ibv_context *context, struct ibv_device_attr *device_attr); @@ -691,6 +716,7 @@ struct ibv_context { int num_comp_vectors; pthread_mutex_t mutex; void *abi_compat; + struct ibv_more_ops *more_ops; }; /** @@ -913,6 +939,25 @@ struct ibv_srq *ibv_create_srq(struct ib struct ibv_srq_init_attr *srq_init_attr); /** + * ibv_create_xrc_srq - Creates a SRQ associated with the specified protection + * domain and xrc domain. + * @pd: The protection domain associated with the SRQ. + * @xrc_domain: The XRC domain associated with the SRQ. + * @xrc_cq: CQ to report completions for XRC packets on. + * + * @srq_init_attr: A list of initial attributes required to create the SRQ. + * + * srq_attr->max_wr and srq_attr->max_sge are read the determine the + * requested size of the SRQ, and set to the actual values allocated + * on return. If ibv_create_srq() succeeds, then max_wr and max_sge + * will always be at least as large as the requested values. + */ +struct ibv_srq *ibv_create_xrc_srq(struct ibv_pd *pd, + struct ibv_xrc_domain *xrc_domain, + struct ibv_cq *xrc_cq, + struct ibv_srq_init_attr *srq_init_attr); + +/** * ibv_modify_srq - Modifies the attributes for the specified SRQ. * @srq: The SRQ to modify. * @srq_attr: On input, specifies the SRQ attributes to modify. On output, @@ -1093,6 +1138,42 @@ const char *ibv_port_state_str(enum ibv_ */ const char *ibv_event_type_str(enum ibv_event_type event); +/** + * ibv_open_xrc_domain - open an XRC domain + * Returns a reference to an XRC domain. + * + * @context: Device context + * @fd: descriptor for inode associated with the domain + * If fd == -1, no inode is associated with the domain; in this ca= se, + * the only legal value for oflag is O_CREAT + * + * @oflag: oflag values are constructed by OR-ing flags from the following list + * + * O_CREAT + * If a domain belonging to device named by context is already associated + * with the inode, this flag has no effect, except as noted under O_EXCL + * below. Otherwise, a new XRC domain is created and is associated with + * inode specified by fd. + * + * O_EXCL + * If O_EXCL and O_CREAT are set, open will fail if a domain associated with + * the inode exists. The check for the existence of the domain and creation + * of the domain if it does not exist is atomic with respect to other + * processes executing open with fd naming the same inode. + */ +struct ibv_xrc_domain *ibv_open_xrc_domain(struct ibv_context *context, + int fd, int oflag); + +/** + * ibv_close_xrc_domain - close an XRC domain + * If this is the last reference, destroys the domain. + * + * @d: reference to XRC domain to close + * + * close is implicitly performed at process exit. + */ +int ibv_close_xrc_domain(struct ibv_xrc_domain *d); + END_C_DECLS # undef __attribute_const Index: libibverbs/src/cmd.c =================================================================== --- libibverbs.orig/src/cmd.c 2009-11-01 15:18:17.927111000 +0200 +++ libibverbs/src/cmd.c 2009-11-01 15:18:20.643167000 +0200 @@ -483,6 +483,34 @@ int ibv_cmd_create_srq(struct ibv_pd *pd return 0; } +int ibv_cmd_create_xrc_srq(struct ibv_pd *pd, + struct ibv_srq *srq, struct ibv_srq_init_attr *attr, + uint32_t xrcd_handle, uint32_t xrc_cq, + struct ibv_create_xrc_srq *cmd, size_t cmd_size, + struct ibv_create_srq_resp *resp, size_t resp_size) +{ + IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_XRC_SRQ, resp, resp_size); + cmd->user_handle = (uintptr_t) srq; + cmd->pd_handle = pd->handle; + cmd->max_wr = attr->attr.max_wr; + cmd->max_sge = attr->attr.max_sge; + cmd->srq_limit = attr->attr.srq_limit; + cmd->xrcd_handle = xrcd_handle; + cmd->xrc_cq = xrc_cq; + + if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size) + return errno; + + VALGRIND_MAKE_MEM_DEFINED(resp, resp_size); + + srq->handle = resp->srq_handle; + srq->context = pd->context; + attr->attr.max_wr = resp->max_wr; + attr->attr.max_sge = resp->max_sge; + + return 0; +} + static int ibv_cmd_modify_srq_v3(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, int srq_attr_mask, @@ -603,7 +631,6 @@ int ibv_cmd_create_qp(struct ibv_pd *pd, cmd->pd_handle = pd->handle; cmd->send_cq_handle = attr->send_cq->handle; cmd->recv_cq_handle = attr->recv_cq->handle; - cmd->srq_handle = attr->srq ? attr->srq->handle : 0; cmd->max_send_wr = attr->cap.max_send_wr; cmd->max_recv_wr = attr->cap.max_recv_wr; cmd->max_send_sge = attr->cap.max_send_sge; @@ -612,6 +639,9 @@ int ibv_cmd_create_qp(struct ibv_pd *pd, cmd->sq_sig_all = attr->sq_sig_all; cmd->qp_type = attr->qp_type; cmd->is_srq = !!attr->srq; + cmd->srq_handle = attr->qp_type == IBV_QPT_XRC ? + (attr->xrc_domain ? attr->xrc_domain->handle : 0) : + (attr->srq ? attr->srq->handle : 0); cmd->reserved = 0; if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size) @@ -722,6 +752,8 @@ int ibv_cmd_query_qp(struct ibv_qp *qp, init_attr->recv_cq = qp->recv_cq; init_attr->srq = qp->srq; init_attr->qp_type = qp->qp_type; + if (qp->qp_type == IBV_QPT_XRC) + init_attr->xrc_domain = qp->xrc_domain; init_attr->cap.max_send_wr = resp.max_send_wr; init_attr->cap.max_recv_wr = resp.max_recv_wr; init_attr->cap.max_send_sge = resp.max_send_sge; @@ -1122,3 +1154,41 @@ int ibv_cmd_detach_mcast(struct ibv_qp * return 0; } + +int ibv_cmd_open_xrc_domain(struct ibv_context *context, int fd, int oflag, + struct ibv_xrc_domain *d, + struct ibv_open_xrc_domain_resp *resp, + size_t resp_size) +{ + struct ibv_open_xrc_domain cmd; + + if (abi_ver < 6) + return ENOSYS; + + IBV_INIT_CMD_RESP(&cmd, sizeof cmd, OPEN_XRC_DOMAIN, resp, resp_size); + cmd.fd = fd; + cmd.oflags = oflag; + + if (write(context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) + return errno; + + d->handle = resp->xrcd_handle; + + return 0; +} + +int ibv_cmd_close_xrc_domain(struct ibv_xrc_domain *d) +{ + struct ibv_close_xrc_domain cmd; + + if (abi_ver < 6) + return ENOSYS; + + IBV_INIT_CMD(&cmd, sizeof cmd, CLOSE_XRC_DOMAIN); + cmd.xrcd_handle = d->handle; + + if (write(d->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) + return errno; + return 0; +} + Index: libibverbs/src/libibverbs.map =================================================================== --- libibverbs.orig/src/libibverbs.map 2009-11-01 15:18:17.928115000 +0200 +++ libibverbs/src/libibverbs.map 2009-11-01 15:18:20.646169000 +0200 @@ -91,6 +91,12 @@ IBVERBS_1.1 { ibv_dontfork_range; ibv_dofork_range; ibv_register_driver; + ibv_create_xrc_srq; + ibv_cmd_create_xrc_srq; + ibv_open_xrc_domain; + ibv_cmd_open_xrc_domain; + ibv_close_xrc_domain; + ibv_cmd_close_xrc_domain; ibv_node_type_str; ibv_port_state_str; Index: libibverbs/src/verbs.c =================================================================== --- libibverbs.orig/src/verbs.c 2009-11-01 15:18:17.931119000 +0200 +++ libibverbs/src/verbs.c 2009-11-01 15:18:20.650169000 +0200 @@ -366,6 +366,9 @@ struct ibv_srq *__ibv_create_srq(struct srq->context = pd->context; srq->srq_context = srq_init_attr->srq_context; srq->pd = pd; + srq->xrc_domain = NULL; + srq->xrc_cq = NULL; + srq->xrc_srq_num = 0; srq->events_completed = 0; pthread_mutex_init(&srq->mutex, NULL); pthread_cond_init(&srq->cond, NULL); @@ -375,6 +378,32 @@ struct ibv_srq *__ibv_create_srq(struct } default_symver(__ibv_create_srq, ibv_create_srq); +struct ibv_srq *ibv_create_xrc_srq(struct ibv_pd *pd, + struct ibv_xrc_domain *xrc_domain, + struct ibv_cq *xrc_cq, + struct ibv_srq_init_attr *srq_init_attr) +{ + struct ibv_srq *srq; + + if (!pd->context->more_ops) + return NULL; + + srq = pd->context->more_ops->create_xrc_srq(pd, xrc_domain, + xrc_cq, srq_init_attr); + if (srq) { + srq->context = pd->context; + srq->srq_context = srq_init_attr->srq_context; + srq->pd = pd; + srq->xrc_domain = xrc_domain; + srq->xrc_cq = xrc_cq; + srq->events_completed = 0; + pthread_mutex_init(&srq->mutex, NULL); + pthread_cond_init(&srq->cond, NULL); + } + + return srq; +} + int __ibv_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, int srq_attr_mask) @@ -410,6 +439,8 @@ struct ibv_qp *__ibv_create_qp(struct ib qp->qp_type = qp_init_attr->qp_type; qp->state = IBV_QPS_RESET; qp->events_completed = 0; + qp->xrc_domain = qp_init_attr->qp_type == IBV_QPT_XRC ? + qp_init_attr->xrc_domain : NULL; pthread_mutex_init(&qp->mutex, NULL); pthread_cond_init(&qp->cond, NULL); } @@ -543,3 +574,26 @@ int __ibv_detach_mcast(struct ibv_qp *qp return qp->context->ops.detach_mcast(qp, gid, lid); } default_symver(__ibv_detach_mcast, ibv_detach_mcast); + +struct ibv_xrc_domain *ibv_open_xrc_domain(struct ibv_context *context, + int fd, int oflag) +{ + struct ibv_xrc_domain *d; + + if (!context->more_ops) + return NULL; + + d = context->more_ops->open_xrc_domain(context, fd, oflag); + if (d) + d->context = context; + + return d; +} + +int ibv_close_xrc_domain(struct ibv_xrc_domain *d) +{ + if (!d->context->more_ops) + return 0; + + return d->context->more_ops->close_xrc_domain(d); +}