]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/ofed/libibverbs/cmd.c
Optionally bind ktls threads to NUMA domains
[FreeBSD/FreeBSD.git] / contrib / ofed / libibverbs / cmd.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
4  * Copyright (c) 2006 Cisco Systems, Inc.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34
35 #include <config.h>
36
37 #include <stdio.h>
38 #include <unistd.h>
39 #include <stdlib.h>
40 #include <errno.h>
41 #include <alloca.h>
42 #include <string.h>
43
44 #include "ibverbs.h"
45 #include <sys/param.h>
46
47 int ibv_cmd_get_context(struct ibv_context *context, struct ibv_get_context *cmd,
48                         size_t cmd_size, struct ibv_get_context_resp *resp,
49                         size_t resp_size)
50 {
51         if (abi_ver < IB_USER_VERBS_MIN_ABI_VERSION)
52                 return ENOSYS;
53
54         IBV_INIT_CMD_RESP(cmd, cmd_size, GET_CONTEXT, resp, resp_size);
55
56         if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
57                 return errno;
58
59         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
60
61         context->async_fd         = resp->async_fd;
62         context->num_comp_vectors = resp->num_comp_vectors;
63
64         return 0;
65 }
66
67 static void copy_query_dev_fields(struct ibv_device_attr *device_attr,
68                                   struct ibv_query_device_resp *resp,
69                                   uint64_t *raw_fw_ver)
70 {
71         *raw_fw_ver                             = resp->fw_ver;
72         device_attr->node_guid                  = resp->node_guid;
73         device_attr->sys_image_guid             = resp->sys_image_guid;
74         device_attr->max_mr_size                = resp->max_mr_size;
75         device_attr->page_size_cap              = resp->page_size_cap;
76         device_attr->vendor_id                  = resp->vendor_id;
77         device_attr->vendor_part_id             = resp->vendor_part_id;
78         device_attr->hw_ver                     = resp->hw_ver;
79         device_attr->max_qp                     = resp->max_qp;
80         device_attr->max_qp_wr                  = resp->max_qp_wr;
81         device_attr->device_cap_flags           = resp->device_cap_flags;
82         device_attr->max_sge                    = resp->max_sge;
83         device_attr->max_sge_rd                 = resp->max_sge_rd;
84         device_attr->max_cq                     = resp->max_cq;
85         device_attr->max_cqe                    = resp->max_cqe;
86         device_attr->max_mr                     = resp->max_mr;
87         device_attr->max_pd                     = resp->max_pd;
88         device_attr->max_qp_rd_atom             = resp->max_qp_rd_atom;
89         device_attr->max_ee_rd_atom             = resp->max_ee_rd_atom;
90         device_attr->max_res_rd_atom            = resp->max_res_rd_atom;
91         device_attr->max_qp_init_rd_atom        = resp->max_qp_init_rd_atom;
92         device_attr->max_ee_init_rd_atom        = resp->max_ee_init_rd_atom;
93         device_attr->atomic_cap                 = resp->atomic_cap;
94         device_attr->max_ee                     = resp->max_ee;
95         device_attr->max_rdd                    = resp->max_rdd;
96         device_attr->max_mw                     = resp->max_mw;
97         device_attr->max_raw_ipv6_qp            = resp->max_raw_ipv6_qp;
98         device_attr->max_raw_ethy_qp            = resp->max_raw_ethy_qp;
99         device_attr->max_mcast_grp              = resp->max_mcast_grp;
100         device_attr->max_mcast_qp_attach        = resp->max_mcast_qp_attach;
101         device_attr->max_total_mcast_qp_attach  = resp->max_total_mcast_qp_attach;
102         device_attr->max_ah                     = resp->max_ah;
103         device_attr->max_fmr                    = resp->max_fmr;
104         device_attr->max_map_per_fmr            = resp->max_map_per_fmr;
105         device_attr->max_srq                    = resp->max_srq;
106         device_attr->max_srq_wr                 = resp->max_srq_wr;
107         device_attr->max_srq_sge                = resp->max_srq_sge;
108         device_attr->max_pkeys                  = resp->max_pkeys;
109         device_attr->local_ca_ack_delay         = resp->local_ca_ack_delay;
110         device_attr->phys_port_cnt              = resp->phys_port_cnt;
111 }
112
113 int ibv_cmd_query_device(struct ibv_context *context,
114                          struct ibv_device_attr *device_attr,
115                          uint64_t *raw_fw_ver,
116                          struct ibv_query_device *cmd, size_t cmd_size)
117 {
118         struct ibv_query_device_resp resp;
119
120         IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_DEVICE, &resp, sizeof resp);
121
122         if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
123                 return errno;
124
125         (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
126
127         memset(device_attr->fw_ver, 0, sizeof device_attr->fw_ver);
128         copy_query_dev_fields(device_attr, &resp, raw_fw_ver);
129
130         return 0;
131 }
132
133 int ibv_cmd_query_device_ex(struct ibv_context *context,
134                             const struct ibv_query_device_ex_input *input,
135                             struct ibv_device_attr_ex *attr, size_t attr_size,
136                             uint64_t *raw_fw_ver,
137                             struct ibv_query_device_ex *cmd,
138                             size_t cmd_core_size,
139                             size_t cmd_size,
140                             struct ibv_query_device_resp_ex *resp,
141                             size_t resp_core_size,
142                             size_t resp_size)
143 {
144         int err;
145
146         if (input && input->comp_mask)
147                 return EINVAL;
148
149         if (attr_size < offsetof(struct ibv_device_attr_ex, comp_mask) +
150                         sizeof(attr->comp_mask))
151                 return EINVAL;
152
153         if (resp_core_size < offsetof(struct ibv_query_device_resp_ex,
154                                       response_length) +
155                              sizeof(resp->response_length))
156                 return EINVAL;
157
158         IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size,
159                                QUERY_DEVICE_EX, resp, resp_core_size,
160                                resp_size);
161         cmd->comp_mask = 0;
162         cmd->reserved = 0;
163         memset(attr->orig_attr.fw_ver, 0, sizeof(attr->orig_attr.fw_ver));
164         memset(&attr->comp_mask, 0, attr_size - sizeof(attr->orig_attr));
165         err = write(context->cmd_fd, cmd, cmd_size);
166         if (err != cmd_size)
167                 return errno;
168
169         (void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
170         copy_query_dev_fields(&attr->orig_attr, &resp->base, raw_fw_ver);
171         /* Report back supported comp_mask bits. For now no comp_mask bit is
172          * defined */
173         attr->comp_mask = resp->comp_mask & 0;
174         if (attr_size >= offsetof(struct ibv_device_attr_ex, odp_caps) +
175                          sizeof(attr->odp_caps)) {
176                 if (resp->response_length >=
177                     offsetof(struct ibv_query_device_resp_ex, odp_caps) +
178                     sizeof(resp->odp_caps)) {
179                         attr->odp_caps.general_caps = resp->odp_caps.general_caps;
180                         attr->odp_caps.per_transport_caps.rc_odp_caps =
181                                 resp->odp_caps.per_transport_caps.rc_odp_caps;
182                         attr->odp_caps.per_transport_caps.uc_odp_caps =
183                                 resp->odp_caps.per_transport_caps.uc_odp_caps;
184                         attr->odp_caps.per_transport_caps.ud_odp_caps =
185                                 resp->odp_caps.per_transport_caps.ud_odp_caps;
186                 }
187         }
188
189         if (attr_size >= offsetof(struct ibv_device_attr_ex,
190                                   completion_timestamp_mask) +
191                          sizeof(attr->completion_timestamp_mask)) {
192                 if (resp->response_length >=
193                     offsetof(struct ibv_query_device_resp_ex, timestamp_mask) +
194                     sizeof(resp->timestamp_mask))
195                         attr->completion_timestamp_mask = resp->timestamp_mask;
196         }
197
198         if (attr_size >= offsetof(struct ibv_device_attr_ex, hca_core_clock) +
199                          sizeof(attr->hca_core_clock)) {
200                 if (resp->response_length >=
201                     offsetof(struct ibv_query_device_resp_ex, hca_core_clock) +
202                     sizeof(resp->hca_core_clock))
203                         attr->hca_core_clock = resp->hca_core_clock;
204         }
205
206         if (attr_size >= offsetof(struct ibv_device_attr_ex, device_cap_flags_ex) +
207                          sizeof(attr->device_cap_flags_ex)) {
208                 if (resp->response_length >=
209                     offsetof(struct ibv_query_device_resp_ex, device_cap_flags_ex) +
210                     sizeof(resp->device_cap_flags_ex))
211                         attr->device_cap_flags_ex = resp->device_cap_flags_ex;
212         }
213
214         if (attr_size >= offsetof(struct ibv_device_attr_ex, rss_caps) +
215                          sizeof(attr->rss_caps)) {
216                 if (resp->response_length >=
217                     offsetof(struct ibv_query_device_resp_ex, rss_caps) +
218                     sizeof(resp->rss_caps)) {
219                         attr->rss_caps.supported_qpts = resp->rss_caps.supported_qpts;
220                         attr->rss_caps.max_rwq_indirection_tables = resp->rss_caps.max_rwq_indirection_tables;
221                         attr->rss_caps.max_rwq_indirection_table_size = resp->rss_caps.max_rwq_indirection_table_size;
222                 }
223         }
224
225         if (attr_size >= offsetof(struct ibv_device_attr_ex, max_wq_type_rq) +
226                          sizeof(attr->max_wq_type_rq)) {
227                 if (resp->response_length >=
228                     offsetof(struct ibv_query_device_resp_ex, max_wq_type_rq) +
229                     sizeof(resp->max_wq_type_rq))
230                         attr->max_wq_type_rq = resp->max_wq_type_rq;
231         }
232
233         if (attr_size >= offsetof(struct ibv_device_attr_ex, raw_packet_caps) +
234                          sizeof(attr->raw_packet_caps)) {
235                 if (resp->response_length >=
236                     offsetof(struct ibv_query_device_resp_ex, raw_packet_caps) +
237                     sizeof(resp->raw_packet_caps))
238                         attr->raw_packet_caps = resp->raw_packet_caps;
239         }
240
241         return 0;
242 }
243
244 int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
245                        struct ibv_port_attr *port_attr,
246                        struct ibv_query_port *cmd, size_t cmd_size)
247 {
248         struct ibv_query_port_resp resp;
249
250         IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_PORT, &resp, sizeof resp);
251         cmd->port_num = port_num;
252         memset(cmd->reserved, 0, sizeof cmd->reserved);
253
254         if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
255                 return errno;
256
257         (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
258
259         port_attr->state           = resp.state;
260         port_attr->max_mtu         = resp.max_mtu;
261         port_attr->active_mtu      = resp.active_mtu;
262         port_attr->gid_tbl_len     = resp.gid_tbl_len;
263         port_attr->port_cap_flags  = resp.port_cap_flags;
264         port_attr->max_msg_sz      = resp.max_msg_sz;
265         port_attr->bad_pkey_cntr   = resp.bad_pkey_cntr;
266         port_attr->qkey_viol_cntr  = resp.qkey_viol_cntr;
267         port_attr->pkey_tbl_len    = resp.pkey_tbl_len;
268         port_attr->lid             = resp.lid;
269         port_attr->sm_lid          = resp.sm_lid;
270         port_attr->lmc             = resp.lmc;
271         port_attr->max_vl_num      = resp.max_vl_num;
272         port_attr->sm_sl           = resp.sm_sl;
273         port_attr->subnet_timeout  = resp.subnet_timeout;
274         port_attr->init_type_reply = resp.init_type_reply;
275         port_attr->active_width    = resp.active_width;
276         port_attr->active_speed    = resp.active_speed;
277         port_attr->phys_state      = resp.phys_state;
278         port_attr->link_layer      = resp.link_layer;
279
280         return 0;
281 }
282
283 int ibv_cmd_alloc_pd(struct ibv_context *context, struct ibv_pd *pd,
284                      struct ibv_alloc_pd *cmd, size_t cmd_size,
285                      struct ibv_alloc_pd_resp *resp, size_t resp_size)
286 {
287         IBV_INIT_CMD_RESP(cmd, cmd_size, ALLOC_PD, resp, resp_size);
288
289         if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
290                 return errno;
291
292         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
293
294         pd->handle  = resp->pd_handle;
295         pd->context = context;
296
297         return 0;
298 }
299
300 int ibv_cmd_dealloc_pd(struct ibv_pd *pd)
301 {
302         struct ibv_dealloc_pd cmd;
303
304         IBV_INIT_CMD(&cmd, sizeof cmd, DEALLOC_PD);
305         cmd.pd_handle = pd->handle;
306
307         if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
308                 return errno;
309
310         return 0;
311 }
312
313 int ibv_cmd_open_xrcd(struct ibv_context *context, struct verbs_xrcd *xrcd,
314                       int vxrcd_size,
315                       struct ibv_xrcd_init_attr *attr,
316                       struct ibv_open_xrcd *cmd, size_t cmd_size,
317                       struct ibv_open_xrcd_resp *resp, size_t resp_size)
318 {
319         IBV_INIT_CMD_RESP(cmd, cmd_size, OPEN_XRCD, resp, resp_size);
320
321         if (attr->comp_mask >= IBV_XRCD_INIT_ATTR_RESERVED)
322                 return ENOSYS;
323
324         if (!(attr->comp_mask & IBV_XRCD_INIT_ATTR_FD) ||
325             !(attr->comp_mask & IBV_XRCD_INIT_ATTR_OFLAGS))
326                 return EINVAL;
327
328         cmd->fd = attr->fd;
329         cmd->oflags = attr->oflags;
330         if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
331                 return errno;
332
333         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
334
335         xrcd->xrcd.context = context;
336         xrcd->comp_mask = 0;
337         if (vext_field_avail(struct verbs_xrcd, handle, vxrcd_size)) {
338                 xrcd->comp_mask = VERBS_XRCD_HANDLE;
339                 xrcd->handle  = resp->xrcd_handle;
340         }
341
342         return 0;
343 }
344
345 int ibv_cmd_close_xrcd(struct verbs_xrcd *xrcd)
346 {
347         struct ibv_close_xrcd cmd;
348
349         IBV_INIT_CMD(&cmd, sizeof cmd, CLOSE_XRCD);
350         cmd.xrcd_handle = xrcd->handle;
351
352         if (write(xrcd->xrcd.context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
353                 return errno;
354
355         return 0;
356 }
357
358 int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
359                    uint64_t hca_va, int access,
360                    struct ibv_mr *mr, struct ibv_reg_mr *cmd,
361                    size_t cmd_size,
362                    struct ibv_reg_mr_resp *resp, size_t resp_size)
363 {
364
365         IBV_INIT_CMD_RESP(cmd, cmd_size, REG_MR, resp, resp_size);
366
367         cmd->start        = (uintptr_t) addr;
368         cmd->length       = length;
369         cmd->hca_va       = hca_va;
370         cmd->pd_handle    = pd->handle;
371         cmd->access_flags = access;
372
373         if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
374                 return errno;
375
376         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
377
378         mr->handle  = resp->mr_handle;
379         mr->lkey    = resp->lkey;
380         mr->rkey    = resp->rkey;
381         mr->context = pd->context;
382
383         return 0;
384 }
385
386 int ibv_cmd_rereg_mr(struct ibv_mr *mr, uint32_t flags, void *addr,
387                      size_t length, uint64_t hca_va, int access,
388                      struct ibv_pd *pd, struct ibv_rereg_mr *cmd,
389                      size_t cmd_sz, struct ibv_rereg_mr_resp *resp,
390                      size_t resp_sz)
391 {
392         IBV_INIT_CMD_RESP(cmd, cmd_sz, REREG_MR, resp, resp_sz);
393
394         cmd->mr_handle    = mr->handle;
395         cmd->flags        = flags;
396         cmd->start        = (uintptr_t)addr;
397         cmd->length       = length;
398         cmd->hca_va       = hca_va;
399         cmd->pd_handle    = (flags & IBV_REREG_MR_CHANGE_PD) ? pd->handle : 0;
400         cmd->access_flags = access;
401
402         if (write(mr->context->cmd_fd, cmd, cmd_sz) != cmd_sz)
403                 return errno;
404
405         (void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_sz);
406
407         mr->lkey    = resp->lkey;
408         mr->rkey    = resp->rkey;
409         if (flags & IBV_REREG_MR_CHANGE_PD)
410                 mr->context = pd->context;
411
412         return 0;
413 }
414
415 int ibv_cmd_dereg_mr(struct ibv_mr *mr)
416 {
417         struct ibv_dereg_mr cmd;
418
419         IBV_INIT_CMD(&cmd, sizeof cmd, DEREG_MR);
420         cmd.mr_handle = mr->handle;
421
422         if (write(mr->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
423                 return errno;
424
425         return 0;
426 }
427
428 int ibv_cmd_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type,
429                      struct ibv_mw *mw, struct ibv_alloc_mw *cmd,
430                      size_t cmd_size,
431                      struct ibv_alloc_mw_resp *resp, size_t resp_size)
432 {
433         IBV_INIT_CMD_RESP(cmd, cmd_size, ALLOC_MW, resp, resp_size);
434         cmd->pd_handle  = pd->handle;
435         cmd->mw_type    = type;
436         memset(cmd->reserved, 0, sizeof(cmd->reserved));
437
438         if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
439                 return errno;
440
441         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
442
443         mw->context = pd->context;
444         mw->pd      = pd;
445         mw->rkey    = resp->rkey;
446         mw->handle  = resp->mw_handle;
447         mw->type    = type;
448
449         return 0;
450 }
451
452 int ibv_cmd_dealloc_mw(struct ibv_mw *mw,
453                        struct ibv_dealloc_mw *cmd, size_t cmd_size)
454 {
455         IBV_INIT_CMD(cmd, cmd_size, DEALLOC_MW);
456         cmd->mw_handle = mw->handle;
457         cmd->reserved = 0;
458
459         if (write(mw->context->cmd_fd, cmd, cmd_size) != cmd_size)
460                 return errno;
461
462         return 0;
463 }
464
465 int ibv_cmd_create_cq(struct ibv_context *context, int cqe,
466                       struct ibv_comp_channel *channel,
467                       int comp_vector, struct ibv_cq *cq,
468                       struct ibv_create_cq *cmd, size_t cmd_size,
469                       struct ibv_create_cq_resp *resp, size_t resp_size)
470 {
471         IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_CQ, resp, resp_size);
472         cmd->user_handle   = (uintptr_t) cq;
473         cmd->cqe           = cqe;
474         cmd->comp_vector   = comp_vector;
475         cmd->comp_channel  = channel ? channel->fd : -1;
476         cmd->reserved      = 0;
477
478         if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
479                 return errno;
480
481         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
482
483         cq->handle  = resp->cq_handle;
484         cq->cqe     = resp->cqe;
485         cq->context = context;
486
487         return 0;
488 }
489
490 int ibv_cmd_create_cq_ex(struct ibv_context *context,
491                          struct ibv_cq_init_attr_ex *cq_attr,
492                          struct ibv_cq_ex *cq,
493                          struct ibv_create_cq_ex *cmd,
494                          size_t cmd_core_size,
495                          size_t cmd_size,
496                          struct ibv_create_cq_resp_ex *resp,
497                          size_t resp_core_size,
498                          size_t resp_size)
499 {
500         int err;
501
502         memset(cmd, 0, cmd_core_size);
503         IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size, CREATE_CQ_EX, resp,
504                                resp_core_size, resp_size);
505
506         if (cq_attr->comp_mask & ~(IBV_CQ_INIT_ATTR_MASK_RESERVED - 1))
507                 return EINVAL;
508
509         cmd->user_handle   = (uintptr_t)cq;
510         cmd->cqe           = cq_attr->cqe;
511         cmd->comp_vector   = cq_attr->comp_vector;
512         cmd->comp_channel  = cq_attr->channel ? cq_attr->channel->fd : -1;
513         cmd->comp_mask = 0;
514
515         if (cmd_core_size >= offsetof(struct ibv_create_cq_ex, flags) +
516             sizeof(cmd->flags)) {
517                 if ((cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS) &&
518                     (cq_attr->flags & ~(IBV_CREATE_CQ_ATTR_RESERVED - 1)))
519                         return EOPNOTSUPP;
520
521                 if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)
522                         cmd->flags |= IBV_CREATE_CQ_EX_KERNEL_FLAG_COMPLETION_TIMESTAMP;
523         }
524
525         err = write(context->cmd_fd, cmd, cmd_size);
526         if (err != cmd_size)
527                 return errno;
528
529         (void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
530
531         cq->handle  = resp->base.cq_handle;
532         cq->cqe     = resp->base.cqe;
533         cq->context = context;
534
535         return 0;
536 }
537
538 int ibv_cmd_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
539 {
540         struct ibv_poll_cq       cmd;
541         struct ibv_poll_cq_resp *resp;
542         int                      i;
543         int                      rsize;
544         int                      ret;
545
546         rsize = sizeof *resp + ne * sizeof(struct ibv_kern_wc);
547         resp  = malloc(rsize);
548         if (!resp)
549                 return -1;
550
551         IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POLL_CQ, resp, rsize);
552         cmd.cq_handle = ibcq->handle;
553         cmd.ne        = ne;
554
555         if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) {
556                 ret = -1;
557                 goto out;
558         }
559
560         (void) VALGRIND_MAKE_MEM_DEFINED(resp, rsize);
561
562         for (i = 0; i < resp->count; i++) {
563                 wc[i].wr_id          = resp->wc[i].wr_id;
564                 wc[i].status         = resp->wc[i].status;
565                 wc[i].opcode         = resp->wc[i].opcode;
566                 wc[i].vendor_err     = resp->wc[i].vendor_err;
567                 wc[i].byte_len       = resp->wc[i].byte_len;
568                 wc[i].imm_data       = resp->wc[i].imm_data;
569                 wc[i].qp_num         = resp->wc[i].qp_num;
570                 wc[i].src_qp         = resp->wc[i].src_qp;
571                 wc[i].wc_flags       = resp->wc[i].wc_flags;
572                 wc[i].pkey_index     = resp->wc[i].pkey_index;
573                 wc[i].slid           = resp->wc[i].slid;
574                 wc[i].sl             = resp->wc[i].sl;
575                 wc[i].dlid_path_bits = resp->wc[i].dlid_path_bits;
576         }
577
578         ret = resp->count;
579
580 out:
581         free(resp);
582         return ret;
583 }
584
585 int ibv_cmd_req_notify_cq(struct ibv_cq *ibcq, int solicited_only)
586 {
587         struct ibv_req_notify_cq cmd;
588
589         IBV_INIT_CMD(&cmd, sizeof cmd, REQ_NOTIFY_CQ);
590         cmd.cq_handle = ibcq->handle;
591         cmd.solicited = !!solicited_only;
592
593         if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
594                 return errno;
595
596         return 0;
597 }
598
599 int ibv_cmd_resize_cq(struct ibv_cq *cq, int cqe,
600                       struct ibv_resize_cq *cmd, size_t cmd_size,
601                       struct ibv_resize_cq_resp *resp, size_t resp_size)
602 {
603         IBV_INIT_CMD_RESP(cmd, cmd_size, RESIZE_CQ, resp, resp_size);
604         cmd->cq_handle = cq->handle;
605         cmd->cqe       = cqe;
606
607         if (write(cq->context->cmd_fd, cmd, cmd_size) != cmd_size)
608                 return errno;
609
610         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
611
612         cq->cqe = resp->cqe;
613
614         return 0;
615 }
616
617 int ibv_cmd_destroy_cq(struct ibv_cq *cq)
618 {
619         struct ibv_destroy_cq      cmd;
620         struct ibv_destroy_cq_resp resp;
621
622         IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_CQ, &resp, sizeof resp);
623         cmd.cq_handle = cq->handle;
624         cmd.reserved  = 0;
625
626         if (write(cq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
627                 return errno;
628
629         (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
630
631         pthread_mutex_lock(&cq->mutex);
632         while (cq->comp_events_completed  != resp.comp_events_reported ||
633                cq->async_events_completed != resp.async_events_reported)
634                 pthread_cond_wait(&cq->cond, &cq->mutex);
635         pthread_mutex_unlock(&cq->mutex);
636
637         return 0;
638 }
639
640 int ibv_cmd_create_srq(struct ibv_pd *pd,
641                        struct ibv_srq *srq, struct ibv_srq_init_attr *attr,
642                        struct ibv_create_srq *cmd, size_t cmd_size,
643                        struct ibv_create_srq_resp *resp, size_t resp_size)
644 {
645         IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_SRQ, resp, resp_size);
646         cmd->user_handle = (uintptr_t) srq;
647         cmd->pd_handle   = pd->handle;
648         cmd->max_wr      = attr->attr.max_wr;
649         cmd->max_sge     = attr->attr.max_sge;
650         cmd->srq_limit   = attr->attr.srq_limit;
651
652         if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
653                 return errno;
654
655         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
656
657         srq->handle  = resp->srq_handle;
658         srq->context = pd->context;
659
660         if (abi_ver > 5) {
661                 attr->attr.max_wr = resp->max_wr;
662                 attr->attr.max_sge = resp->max_sge;
663         } else {
664                 struct ibv_create_srq_resp_v5 *resp_v5 =
665                         (struct ibv_create_srq_resp_v5 *) resp;
666
667                 memmove((void *) resp + sizeof *resp,
668                         (void *) resp_v5 + sizeof *resp_v5,
669                         resp_size - sizeof *resp);
670         }
671
672         return 0;
673 }
674
675 int ibv_cmd_create_srq_ex(struct ibv_context *context,
676                           struct verbs_srq *srq, int vsrq_sz,
677                           struct ibv_srq_init_attr_ex *attr_ex,
678                           struct ibv_create_xsrq *cmd, size_t cmd_size,
679                           struct ibv_create_srq_resp *resp, size_t resp_size)
680 {
681         struct verbs_xrcd *vxrcd = NULL;
682
683         IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_XSRQ, resp, resp_size);
684
685         if (attr_ex->comp_mask >= IBV_SRQ_INIT_ATTR_RESERVED)
686                 return ENOSYS;
687
688         if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_PD))
689                 return EINVAL;
690
691         cmd->user_handle = (uintptr_t) srq;
692         cmd->pd_handle   = attr_ex->pd->handle;
693         cmd->max_wr      = attr_ex->attr.max_wr;
694         cmd->max_sge     = attr_ex->attr.max_sge;
695         cmd->srq_limit   = attr_ex->attr.srq_limit;
696
697         cmd->srq_type = (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ?
698                         attr_ex->srq_type : IBV_SRQT_BASIC;
699         if (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_XRCD) {
700                 if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_CQ))
701                         return EINVAL;
702
703                 vxrcd = container_of(attr_ex->xrcd, struct verbs_xrcd, xrcd);
704                 cmd->xrcd_handle = vxrcd->handle;
705                 cmd->cq_handle   = attr_ex->cq->handle;
706         }
707
708         if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
709                 return errno;
710
711         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
712
713         srq->srq.handle           = resp->srq_handle;
714         srq->srq.context          = context;
715         srq->srq.srq_context      = attr_ex->srq_context;
716         srq->srq.pd               = attr_ex->pd;
717         srq->srq.events_completed = 0;
718         pthread_mutex_init(&srq->srq.mutex, NULL);
719         pthread_cond_init(&srq->srq.cond, NULL);
720
721         /*
722          * check that the last field is available.
723          * If it is than all the others exist as well
724          */
725         if (vext_field_avail(struct verbs_srq, srq_num, vsrq_sz)) {
726                 srq->comp_mask = IBV_SRQ_INIT_ATTR_TYPE;
727                 srq->srq_type = (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ?
728                                 attr_ex->srq_type : IBV_SRQT_BASIC;
729                 if (srq->srq_type == IBV_SRQT_XRC) {
730                         srq->comp_mask |= VERBS_SRQ_NUM;
731                         srq->srq_num = resp->srqn;
732                 }
733                 if (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_XRCD) {
734                         srq->comp_mask |= VERBS_SRQ_XRCD;
735                         srq->xrcd = vxrcd;
736                 }
737                 if (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_CQ) {
738                         srq->comp_mask |= VERBS_SRQ_CQ;
739                         srq->cq = attr_ex->cq;
740                 }
741         }
742
743         attr_ex->attr.max_wr = resp->max_wr;
744         attr_ex->attr.max_sge = resp->max_sge;
745
746         return 0;
747 }
748
749
750 static int ibv_cmd_modify_srq_v3(struct ibv_srq *srq,
751                                  struct ibv_srq_attr *srq_attr,
752                                  int srq_attr_mask,
753                                  struct ibv_modify_srq *new_cmd,
754                                  size_t new_cmd_size)
755 {
756         struct ibv_modify_srq_v3 *cmd;
757         size_t cmd_size;
758
759         cmd_size = sizeof *cmd + new_cmd_size - sizeof *new_cmd;
760         cmd      = alloca(cmd_size);
761         memcpy(cmd->driver_data, new_cmd->driver_data, new_cmd_size - sizeof *new_cmd);
762
763         IBV_INIT_CMD(cmd, cmd_size, MODIFY_SRQ);
764
765         cmd->srq_handle = srq->handle;
766         cmd->attr_mask  = srq_attr_mask;
767         cmd->max_wr     = srq_attr->max_wr;
768         cmd->srq_limit  = srq_attr->srq_limit;
769         cmd->max_sge    = 0;
770         cmd->reserved   = 0;
771
772         if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
773                 return errno;
774
775         return 0;
776 }
777
778 int ibv_cmd_modify_srq(struct ibv_srq *srq,
779                        struct ibv_srq_attr *srq_attr,
780                        int srq_attr_mask,
781                        struct ibv_modify_srq *cmd, size_t cmd_size)
782 {
783         if (abi_ver == 3)
784                 return ibv_cmd_modify_srq_v3(srq, srq_attr, srq_attr_mask,
785                                              cmd, cmd_size);
786
787         IBV_INIT_CMD(cmd, cmd_size, MODIFY_SRQ);
788
789         cmd->srq_handle = srq->handle;
790         cmd->attr_mask  = srq_attr_mask;
791         cmd->max_wr     = srq_attr->max_wr;
792         cmd->srq_limit  = srq_attr->srq_limit;
793
794         if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
795                 return errno;
796
797         return 0;
798 }
799
800 int ibv_cmd_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
801                       struct ibv_query_srq *cmd, size_t cmd_size)
802 {
803         struct ibv_query_srq_resp resp;
804
805         IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_SRQ, &resp, sizeof resp);
806         cmd->srq_handle = srq->handle;
807         cmd->reserved   = 0;
808
809         if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
810                 return errno;
811
812         (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
813
814         srq_attr->max_wr    = resp.max_wr;
815         srq_attr->max_sge   = resp.max_sge;
816         srq_attr->srq_limit = resp.srq_limit;
817
818         return 0;
819 }
820
821 int ibv_cmd_destroy_srq(struct ibv_srq *srq)
822 {
823         struct ibv_destroy_srq      cmd;
824         struct ibv_destroy_srq_resp resp;
825
826         IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_SRQ, &resp, sizeof resp);
827         cmd.srq_handle = srq->handle;
828         cmd.reserved   = 0;
829
830         if (write(srq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
831                 return errno;
832
833         (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
834
835         pthread_mutex_lock(&srq->mutex);
836         while (srq->events_completed != resp.events_reported)
837                 pthread_cond_wait(&srq->cond, &srq->mutex);
838         pthread_mutex_unlock(&srq->mutex);
839
840         return 0;
841 }
842
843 static int create_qp_ex_common(struct verbs_qp *qp,
844                                struct ibv_qp_init_attr_ex *qp_attr,
845                                struct verbs_xrcd *vxrcd,
846                                struct ibv_create_qp_common *cmd)
847 {
848         cmd->user_handle = (uintptr_t)qp;
849
850         if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_XRCD) {
851                 vxrcd = container_of(qp_attr->xrcd, struct verbs_xrcd, xrcd);
852                 cmd->pd_handle  = vxrcd->handle;
853         } else {
854                 if (!(qp_attr->comp_mask & IBV_QP_INIT_ATTR_PD))
855                         return EINVAL;
856
857                 cmd->pd_handle  = qp_attr->pd->handle;
858                 if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_IND_TABLE) {
859                         if (cmd->max_recv_wr || cmd->max_recv_sge ||
860                             cmd->recv_cq_handle || qp_attr->srq)
861                                 return EINVAL;
862
863                         /* send_cq is optinal */
864                         if (qp_attr->cap.max_send_wr)
865                                 cmd->send_cq_handle = qp_attr->send_cq->handle;
866                 } else {
867                         cmd->send_cq_handle = qp_attr->send_cq->handle;
868
869                         if (qp_attr->qp_type != IBV_QPT_XRC_SEND) {
870                                 cmd->recv_cq_handle = qp_attr->recv_cq->handle;
871                                 cmd->srq_handle = qp_attr->srq ? qp_attr->srq->handle :
872                                                                  0;
873                         }
874                 }
875         }
876
877         cmd->max_send_wr     = qp_attr->cap.max_send_wr;
878         cmd->max_recv_wr     = qp_attr->cap.max_recv_wr;
879         cmd->max_send_sge    = qp_attr->cap.max_send_sge;
880         cmd->max_recv_sge    = qp_attr->cap.max_recv_sge;
881         cmd->max_inline_data = qp_attr->cap.max_inline_data;
882         cmd->sq_sig_all      = qp_attr->sq_sig_all;
883         cmd->qp_type         = qp_attr->qp_type;
884         cmd->is_srq          = !!qp_attr->srq;
885         cmd->reserved        = 0;
886
887         return 0;
888 }
889
890 static void create_qp_handle_resp_common(struct ibv_context *context,
891                                          struct verbs_qp *qp,
892                                          struct ibv_qp_init_attr_ex *qp_attr,
893                                          struct ibv_create_qp_resp *resp,
894                                          struct verbs_xrcd *vxrcd,
895                                          int vqp_sz)
896 {
897         if (abi_ver > 3) {
898                 qp_attr->cap.max_recv_sge    = resp->max_recv_sge;
899                 qp_attr->cap.max_send_sge    = resp->max_send_sge;
900                 qp_attr->cap.max_recv_wr     = resp->max_recv_wr;
901                 qp_attr->cap.max_send_wr     = resp->max_send_wr;
902                 qp_attr->cap.max_inline_data = resp->max_inline_data;
903         }
904
905         qp->qp.handle           = resp->qp_handle;
906         qp->qp.qp_num           = resp->qpn;
907         qp->qp.context          = context;
908         qp->qp.qp_context       = qp_attr->qp_context;
909         qp->qp.pd               = qp_attr->pd;
910         qp->qp.send_cq          = qp_attr->send_cq;
911         qp->qp.recv_cq          = qp_attr->recv_cq;
912         qp->qp.srq              = qp_attr->srq;
913         qp->qp.qp_type          = qp_attr->qp_type;
914         qp->qp.state            = IBV_QPS_RESET;
915         qp->qp.events_completed = 0;
916         pthread_mutex_init(&qp->qp.mutex, NULL);
917         pthread_cond_init(&qp->qp.cond, NULL);
918
919         qp->comp_mask = 0;
920         if (vext_field_avail(struct verbs_qp, xrcd, vqp_sz) &&
921             (qp_attr->comp_mask & IBV_QP_INIT_ATTR_XRCD)) {
922                 qp->comp_mask |= VERBS_QP_XRCD;
923                 qp->xrcd = vxrcd;
924         }
925 }
926
927 enum {
928         CREATE_QP_EX2_SUP_CREATE_FLAGS = IBV_QP_CREATE_BLOCK_SELF_MCAST_LB |
929                                          IBV_QP_CREATE_SCATTER_FCS |
930                                          IBV_QP_CREATE_CVLAN_STRIPPING,
931 };
932
933 int ibv_cmd_create_qp_ex2(struct ibv_context *context,
934                           struct verbs_qp *qp, int vqp_sz,
935                           struct ibv_qp_init_attr_ex *qp_attr,
936                           struct ibv_create_qp_ex *cmd,
937                           size_t cmd_core_size,
938                           size_t cmd_size,
939                           struct ibv_create_qp_resp_ex *resp,
940                           size_t resp_core_size,
941                           size_t resp_size)
942 {
943         struct verbs_xrcd *vxrcd = NULL;
944         int err;
945
946         if (qp_attr->comp_mask >= IBV_QP_INIT_ATTR_RESERVED)
947                 return EINVAL;
948
949         if (resp_core_size <
950             offsetof(struct ibv_create_qp_resp_ex, response_length) +
951             sizeof(resp->response_length))
952                 return EINVAL;
953
954         memset(cmd, 0, cmd_core_size);
955
956         IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size, CREATE_QP_EX, resp,
957                                resp_core_size, resp_size);
958
959         err = create_qp_ex_common(qp, qp_attr, vxrcd, &cmd->base);
960         if (err)
961                 return err;
962
963         if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_CREATE_FLAGS) {
964                 if (qp_attr->create_flags & ~CREATE_QP_EX2_SUP_CREATE_FLAGS)
965                         return EINVAL;
966                 if (cmd_core_size < offsetof(struct ibv_create_qp_ex, create_flags) +
967                                     sizeof(qp_attr->create_flags))
968                         return EINVAL;
969                 cmd->create_flags = qp_attr->create_flags;
970         }
971
972         if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_IND_TABLE) {
973                 if (cmd_core_size < offsetof(struct ibv_create_qp_ex, ind_tbl_handle) +
974                                     sizeof(cmd->ind_tbl_handle))
975                         return EINVAL;
976                 cmd->ind_tbl_handle = qp_attr->rwq_ind_tbl->ind_tbl_handle;
977                 cmd->comp_mask = IBV_CREATE_QP_EX_KERNEL_MASK_IND_TABLE;
978         }
979
980         err = write(context->cmd_fd, cmd, cmd_size);
981         if (err != cmd_size)
982                 return errno;
983
984         (void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
985
986         create_qp_handle_resp_common(context, qp, qp_attr, &resp->base, vxrcd,
987                                      vqp_sz);
988
989         return 0;
990 }
991
992 int ibv_cmd_create_qp_ex(struct ibv_context *context,
993                          struct verbs_qp *qp, int vqp_sz,
994                          struct ibv_qp_init_attr_ex *attr_ex,
995                          struct ibv_create_qp *cmd, size_t cmd_size,
996                          struct ibv_create_qp_resp *resp, size_t resp_size)
997 {
998         struct verbs_xrcd *vxrcd = NULL;
999         int err;
1000
1001         IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, resp, resp_size);
1002
1003         if (attr_ex->comp_mask > (IBV_QP_INIT_ATTR_XRCD | IBV_QP_INIT_ATTR_PD))
1004                 return ENOSYS;
1005
1006         err = create_qp_ex_common(qp, attr_ex, vxrcd,
1007                                   (struct ibv_create_qp_common *)&cmd->user_handle);
1008         if (err)
1009                 return err;
1010
1011         if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
1012                 return errno;
1013
1014         (void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1015
1016         if (abi_ver == 4) {
1017                 struct ibv_create_qp_resp_v4 *resp_v4 =
1018                         (struct ibv_create_qp_resp_v4 *)resp;
1019
1020                 memmove((void *)resp + sizeof *resp,
1021                         (void *)resp_v4 + sizeof *resp_v4,
1022                         resp_size - sizeof *resp);
1023         } else if (abi_ver <= 3) {
1024                 struct ibv_create_qp_resp_v3 *resp_v3 =
1025                         (struct ibv_create_qp_resp_v3 *)resp;
1026
1027                 memmove((void *)resp + sizeof *resp,
1028                         (void *)resp_v3 + sizeof *resp_v3,
1029                         resp_size - sizeof *resp);
1030         }
1031
1032         create_qp_handle_resp_common(context, qp, attr_ex, resp, vxrcd, vqp_sz);
1033
1034         return 0;
1035 }
1036
1037 int ibv_cmd_create_qp(struct ibv_pd *pd,
1038                       struct ibv_qp *qp, struct ibv_qp_init_attr *attr,
1039                       struct ibv_create_qp *cmd, size_t cmd_size,
1040                       struct ibv_create_qp_resp *resp, size_t resp_size)
1041 {
1042         IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, resp, resp_size);
1043
1044         cmd->user_handle     = (uintptr_t) qp;
1045         cmd->pd_handle       = pd->handle;
1046         cmd->send_cq_handle  = attr->send_cq->handle;
1047         cmd->recv_cq_handle  = attr->recv_cq->handle;
1048         cmd->srq_handle      = attr->srq ? attr->srq->handle : 0;
1049         cmd->max_send_wr     = attr->cap.max_send_wr;
1050         cmd->max_recv_wr     = attr->cap.max_recv_wr;
1051         cmd->max_send_sge    = attr->cap.max_send_sge;
1052         cmd->max_recv_sge    = attr->cap.max_recv_sge;
1053         cmd->max_inline_data = attr->cap.max_inline_data;
1054         cmd->sq_sig_all      = attr->sq_sig_all;
1055         cmd->qp_type         = attr->qp_type;
1056         cmd->is_srq          = !!attr->srq;
1057         cmd->reserved        = 0;
1058
1059         if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
1060                 return errno;
1061
1062         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1063
1064         qp->handle                = resp->qp_handle;
1065         qp->qp_num                = resp->qpn;
1066         qp->context               = pd->context;
1067
1068         if (abi_ver > 3) {
1069                 attr->cap.max_recv_sge    = resp->max_recv_sge;
1070                 attr->cap.max_send_sge    = resp->max_send_sge;
1071                 attr->cap.max_recv_wr     = resp->max_recv_wr;
1072                 attr->cap.max_send_wr     = resp->max_send_wr;
1073                 attr->cap.max_inline_data = resp->max_inline_data;
1074         }
1075
1076         if (abi_ver == 4) {
1077                 struct ibv_create_qp_resp_v4 *resp_v4 =
1078                         (struct ibv_create_qp_resp_v4 *) resp;
1079
1080                 memmove((void *) resp + sizeof *resp,
1081                         (void *) resp_v4 + sizeof *resp_v4,
1082                         resp_size - sizeof *resp);
1083         } else if (abi_ver <= 3) {
1084                 struct ibv_create_qp_resp_v3 *resp_v3 =
1085                         (struct ibv_create_qp_resp_v3 *) resp;
1086
1087                 memmove((void *) resp + sizeof *resp,
1088                         (void *) resp_v3 + sizeof *resp_v3,
1089                         resp_size - sizeof *resp);
1090         }
1091
1092         return 0;
1093 }
1094
1095 int ibv_cmd_open_qp(struct ibv_context *context, struct verbs_qp *qp,
1096                     int vqp_sz,
1097                     struct ibv_qp_open_attr *attr,
1098                     struct ibv_open_qp *cmd, size_t cmd_size,
1099                     struct ibv_create_qp_resp *resp, size_t resp_size)
1100 {
1101         struct verbs_xrcd *xrcd;
1102         IBV_INIT_CMD_RESP(cmd, cmd_size, OPEN_QP, resp, resp_size);
1103
1104         if (attr->comp_mask >= IBV_QP_OPEN_ATTR_RESERVED)
1105                 return ENOSYS;
1106
1107         if (!(attr->comp_mask & IBV_QP_OPEN_ATTR_XRCD) ||
1108             !(attr->comp_mask & IBV_QP_OPEN_ATTR_NUM) ||
1109             !(attr->comp_mask & IBV_QP_OPEN_ATTR_TYPE))
1110                 return EINVAL;
1111
1112         xrcd = container_of(attr->xrcd, struct verbs_xrcd, xrcd);
1113         cmd->user_handle = (uintptr_t) qp;
1114         cmd->pd_handle   = xrcd->handle;
1115         cmd->qpn         = attr->qp_num;
1116         cmd->qp_type     = attr->qp_type;
1117
1118         if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
1119                 return errno;
1120
1121         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1122
1123         qp->qp.handle     = resp->qp_handle;
1124         qp->qp.context    = context;
1125         qp->qp.qp_context = attr->qp_context;
1126         qp->qp.pd         = NULL;
1127         qp->qp.send_cq    = NULL;
1128         qp->qp.recv_cq    = NULL;
1129         qp->qp.srq        = NULL;
1130         qp->qp.qp_num     = attr->qp_num;
1131         qp->qp.qp_type    = attr->qp_type;
1132         qp->qp.state      = IBV_QPS_UNKNOWN;
1133         qp->qp.events_completed = 0;
1134         pthread_mutex_init(&qp->qp.mutex, NULL);
1135         pthread_cond_init(&qp->qp.cond, NULL);
1136         qp->comp_mask = 0;
1137         if (vext_field_avail(struct verbs_qp, xrcd, vqp_sz)) {
1138                 qp->comp_mask = VERBS_QP_XRCD;
1139                 qp->xrcd         = xrcd;
1140         }
1141
1142         return 0;
1143 }
1144
1145 int ibv_cmd_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1146                      int attr_mask,
1147                      struct ibv_qp_init_attr *init_attr,
1148                      struct ibv_query_qp *cmd, size_t cmd_size)
1149 {
1150         struct ibv_query_qp_resp resp;
1151
1152         /*
1153          * Masks over IBV_QP_DEST_QPN are not supported by
1154          * that not extended command.
1155          */
1156         if (attr_mask & ~((IBV_QP_DEST_QPN << 1) - 1))
1157                 return EOPNOTSUPP;
1158
1159         IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_QP, &resp, sizeof resp);
1160         cmd->qp_handle = qp->handle;
1161         cmd->attr_mask = attr_mask;
1162
1163         if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1164                 return errno;
1165
1166         (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1167
1168         attr->qkey                          = resp.qkey;
1169         attr->rq_psn                        = resp.rq_psn;
1170         attr->sq_psn                        = resp.sq_psn;
1171         attr->dest_qp_num                   = resp.dest_qp_num;
1172         attr->qp_access_flags               = resp.qp_access_flags;
1173         attr->pkey_index                    = resp.pkey_index;
1174         attr->alt_pkey_index                = resp.alt_pkey_index;
1175         attr->qp_state                      = resp.qp_state;
1176         attr->cur_qp_state                  = resp.cur_qp_state;
1177         attr->path_mtu                      = resp.path_mtu;
1178         attr->path_mig_state                = resp.path_mig_state;
1179         attr->sq_draining                   = resp.sq_draining;
1180         attr->max_rd_atomic                 = resp.max_rd_atomic;
1181         attr->max_dest_rd_atomic            = resp.max_dest_rd_atomic;
1182         attr->min_rnr_timer                 = resp.min_rnr_timer;
1183         attr->port_num                      = resp.port_num;
1184         attr->timeout                       = resp.timeout;
1185         attr->retry_cnt                     = resp.retry_cnt;
1186         attr->rnr_retry                     = resp.rnr_retry;
1187         attr->alt_port_num                  = resp.alt_port_num;
1188         attr->alt_timeout                   = resp.alt_timeout;
1189         attr->cap.max_send_wr               = resp.max_send_wr;
1190         attr->cap.max_recv_wr               = resp.max_recv_wr;
1191         attr->cap.max_send_sge              = resp.max_send_sge;
1192         attr->cap.max_recv_sge              = resp.max_recv_sge;
1193         attr->cap.max_inline_data           = resp.max_inline_data;
1194
1195         memcpy(attr->ah_attr.grh.dgid.raw, resp.dest.dgid, 16);
1196         attr->ah_attr.grh.flow_label        = resp.dest.flow_label;
1197         attr->ah_attr.dlid                  = resp.dest.dlid;
1198         attr->ah_attr.grh.sgid_index        = resp.dest.sgid_index;
1199         attr->ah_attr.grh.hop_limit         = resp.dest.hop_limit;
1200         attr->ah_attr.grh.traffic_class     = resp.dest.traffic_class;
1201         attr->ah_attr.sl                    = resp.dest.sl;
1202         attr->ah_attr.src_path_bits         = resp.dest.src_path_bits;
1203         attr->ah_attr.static_rate           = resp.dest.static_rate;
1204         attr->ah_attr.is_global             = resp.dest.is_global;
1205         attr->ah_attr.port_num              = resp.dest.port_num;
1206
1207         memcpy(attr->alt_ah_attr.grh.dgid.raw, resp.alt_dest.dgid, 16);
1208         attr->alt_ah_attr.grh.flow_label    = resp.alt_dest.flow_label;
1209         attr->alt_ah_attr.dlid              = resp.alt_dest.dlid;
1210         attr->alt_ah_attr.grh.sgid_index    = resp.alt_dest.sgid_index;
1211         attr->alt_ah_attr.grh.hop_limit     = resp.alt_dest.hop_limit;
1212         attr->alt_ah_attr.grh.traffic_class = resp.alt_dest.traffic_class;
1213         attr->alt_ah_attr.sl                = resp.alt_dest.sl;
1214         attr->alt_ah_attr.src_path_bits     = resp.alt_dest.src_path_bits;
1215         attr->alt_ah_attr.static_rate       = resp.alt_dest.static_rate;
1216         attr->alt_ah_attr.is_global         = resp.alt_dest.is_global;
1217         attr->alt_ah_attr.port_num          = resp.alt_dest.port_num;
1218
1219         init_attr->qp_context               = qp->qp_context;
1220         init_attr->send_cq                  = qp->send_cq;
1221         init_attr->recv_cq                  = qp->recv_cq;
1222         init_attr->srq                      = qp->srq;
1223         init_attr->qp_type                  = qp->qp_type;
1224         init_attr->cap.max_send_wr          = resp.max_send_wr;
1225         init_attr->cap.max_recv_wr          = resp.max_recv_wr;
1226         init_attr->cap.max_send_sge         = resp.max_send_sge;
1227         init_attr->cap.max_recv_sge         = resp.max_recv_sge;
1228         init_attr->cap.max_inline_data      = resp.max_inline_data;
1229         init_attr->sq_sig_all               = resp.sq_sig_all;
1230
1231         return 0;
1232 }
1233
1234 static void copy_modify_qp_fields(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1235                                   int attr_mask,
1236                                   struct ibv_modify_qp_common *cmd)
1237 {
1238         cmd->qp_handle = qp->handle;
1239         cmd->attr_mask = attr_mask;
1240
1241         if (attr_mask & IBV_QP_STATE)
1242                 cmd->qp_state = attr->qp_state;
1243         if (attr_mask & IBV_QP_CUR_STATE)
1244                 cmd->cur_qp_state = attr->cur_qp_state;
1245         if (attr_mask & IBV_QP_EN_SQD_ASYNC_NOTIFY)
1246                 cmd->en_sqd_async_notify = attr->en_sqd_async_notify;
1247         if (attr_mask & IBV_QP_ACCESS_FLAGS)
1248                 cmd->qp_access_flags = attr->qp_access_flags;
1249         if (attr_mask & IBV_QP_PKEY_INDEX)
1250                 cmd->pkey_index = attr->pkey_index;
1251         if (attr_mask & IBV_QP_PORT)
1252                 cmd->port_num = attr->port_num;
1253         if (attr_mask & IBV_QP_QKEY)
1254                 cmd->qkey = attr->qkey;
1255
1256         if (attr_mask & IBV_QP_AV) {
1257                 memcpy(cmd->dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
1258                 cmd->dest.flow_label = attr->ah_attr.grh.flow_label;
1259                 cmd->dest.dlid = attr->ah_attr.dlid;
1260                 cmd->dest.reserved = 0;
1261                 cmd->dest.sgid_index = attr->ah_attr.grh.sgid_index;
1262                 cmd->dest.hop_limit = attr->ah_attr.grh.hop_limit;
1263                 cmd->dest.traffic_class = attr->ah_attr.grh.traffic_class;
1264                 cmd->dest.sl = attr->ah_attr.sl;
1265                 cmd->dest.src_path_bits = attr->ah_attr.src_path_bits;
1266                 cmd->dest.static_rate = attr->ah_attr.static_rate;
1267                 cmd->dest.is_global = attr->ah_attr.is_global;
1268                 cmd->dest.port_num = attr->ah_attr.port_num;
1269         }
1270
1271         if (attr_mask & IBV_QP_PATH_MTU)
1272                 cmd->path_mtu = attr->path_mtu;
1273         if (attr_mask & IBV_QP_TIMEOUT)
1274                 cmd->timeout = attr->timeout;
1275         if (attr_mask & IBV_QP_RETRY_CNT)
1276                 cmd->retry_cnt = attr->retry_cnt;
1277         if (attr_mask & IBV_QP_RNR_RETRY)
1278                 cmd->rnr_retry = attr->rnr_retry;
1279         if (attr_mask & IBV_QP_RQ_PSN)
1280                 cmd->rq_psn = attr->rq_psn;
1281         if (attr_mask & IBV_QP_MAX_QP_RD_ATOMIC)
1282                 cmd->max_rd_atomic = attr->max_rd_atomic;
1283
1284         if (attr_mask & IBV_QP_ALT_PATH) {
1285                 cmd->alt_pkey_index = attr->alt_pkey_index;
1286                 cmd->alt_port_num = attr->alt_port_num;
1287                 cmd->alt_timeout = attr->alt_timeout;
1288
1289                 memcpy(cmd->alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
1290                 cmd->alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
1291                 cmd->alt_dest.dlid = attr->alt_ah_attr.dlid;
1292                 cmd->alt_dest.reserved = 0;
1293                 cmd->alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
1294                 cmd->alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
1295                 cmd->alt_dest.traffic_class =
1296                     attr->alt_ah_attr.grh.traffic_class;
1297                 cmd->alt_dest.sl = attr->alt_ah_attr.sl;
1298                 cmd->alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
1299                 cmd->alt_dest.static_rate = attr->alt_ah_attr.static_rate;
1300                 cmd->alt_dest.is_global = attr->alt_ah_attr.is_global;
1301                 cmd->alt_dest.port_num = attr->alt_ah_attr.port_num;
1302         }
1303
1304         if (attr_mask & IBV_QP_MIN_RNR_TIMER)
1305                 cmd->min_rnr_timer = attr->min_rnr_timer;
1306         if (attr_mask & IBV_QP_SQ_PSN)
1307                 cmd->sq_psn = attr->sq_psn;
1308         if (attr_mask & IBV_QP_MAX_DEST_RD_ATOMIC)
1309                 cmd->max_dest_rd_atomic = attr->max_dest_rd_atomic;
1310         if (attr_mask & IBV_QP_PATH_MIG_STATE)
1311                 cmd->path_mig_state = attr->path_mig_state;
1312         if (attr_mask & IBV_QP_DEST_QPN)
1313                 cmd->dest_qp_num = attr->dest_qp_num;
1314
1315         cmd->reserved[0] = cmd->reserved[1] = 0;
1316 }
1317
1318 int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1319                       int attr_mask,
1320                       struct ibv_modify_qp *cmd, size_t cmd_size)
1321 {
1322         /*
1323          * Masks over IBV_QP_DEST_QPN are only supported by
1324          * ibv_cmd_modify_qp_ex.
1325          */
1326         if (attr_mask & ~((IBV_QP_DEST_QPN << 1) - 1))
1327                 return EOPNOTSUPP;
1328
1329         IBV_INIT_CMD(cmd, cmd_size, MODIFY_QP);
1330
1331         copy_modify_qp_fields(qp, attr, attr_mask, &cmd->base);
1332
1333         if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1334                 return errno;
1335
1336         return 0;
1337 }
1338
1339 int ibv_cmd_modify_qp_ex(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1340                          int attr_mask, struct ibv_modify_qp_ex *cmd,
1341                          size_t cmd_core_size, size_t cmd_size,
1342                          struct ibv_modify_qp_resp_ex *resp,
1343                          size_t resp_core_size, size_t resp_size)
1344 {
1345         if (resp_core_size < offsetof(struct ibv_modify_qp_resp_ex,
1346                              response_length) + sizeof(resp->response_length))
1347                 return EINVAL;
1348
1349         IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size, MODIFY_QP_EX,
1350                                resp, resp_core_size, resp_size);
1351
1352         copy_modify_qp_fields(qp, attr, attr_mask, &cmd->base);
1353
1354         if (attr_mask & IBV_QP_RATE_LIMIT) {
1355                 if (cmd_size >= offsetof(struct ibv_modify_qp_ex, rate_limit) +
1356                     sizeof(cmd->rate_limit))
1357                         cmd->rate_limit = attr->rate_limit;
1358                 else
1359                         return EINVAL;
1360         }
1361
1362         if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1363                 return errno;
1364
1365         (void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1366
1367         return 0;
1368 }
1369
1370 int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
1371                       struct ibv_send_wr **bad_wr)
1372 {
1373         struct ibv_post_send     *cmd;
1374         struct ibv_post_send_resp resp;
1375         struct ibv_send_wr       *i;
1376         struct ibv_kern_send_wr  *n, *tmp;
1377         struct ibv_sge           *s;
1378         unsigned                  wr_count = 0;
1379         unsigned                  sge_count = 0;
1380         int                       cmd_size;
1381         int                       ret = 0;
1382
1383         for (i = wr; i; i = i->next) {
1384                 wr_count++;
1385                 sge_count += i->num_sge;
1386         }
1387
1388         cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
1389         cmd  = alloca(cmd_size);
1390
1391         IBV_INIT_CMD_RESP(cmd, cmd_size, POST_SEND, &resp, sizeof resp);
1392         cmd->qp_handle = ibqp->handle;
1393         cmd->wr_count  = wr_count;
1394         cmd->sge_count = sge_count;
1395         cmd->wqe_size  = sizeof *n;
1396
1397         n = (struct ibv_kern_send_wr *) ((void *) cmd + sizeof *cmd);
1398         s = (struct ibv_sge *) (n + wr_count);
1399
1400         tmp = n;
1401         for (i = wr; i; i = i->next) {
1402                 tmp->wr_id      = i->wr_id;
1403                 tmp->num_sge    = i->num_sge;
1404                 tmp->opcode     = i->opcode;
1405                 tmp->send_flags = i->send_flags;
1406                 tmp->imm_data   = i->imm_data;
1407                 if (ibqp->qp_type == IBV_QPT_UD) {
1408                         tmp->wr.ud.ah          = i->wr.ud.ah->handle;
1409                         tmp->wr.ud.remote_qpn  = i->wr.ud.remote_qpn;
1410                         tmp->wr.ud.remote_qkey = i->wr.ud.remote_qkey;
1411                 } else {
1412                         switch (i->opcode) {
1413                         case IBV_WR_RDMA_WRITE:
1414                         case IBV_WR_RDMA_WRITE_WITH_IMM:
1415                         case IBV_WR_RDMA_READ:
1416                                 tmp->wr.rdma.remote_addr =
1417                                         i->wr.rdma.remote_addr;
1418                                 tmp->wr.rdma.rkey = i->wr.rdma.rkey;
1419                                 break;
1420                         case IBV_WR_ATOMIC_CMP_AND_SWP:
1421                         case IBV_WR_ATOMIC_FETCH_AND_ADD:
1422                                 tmp->wr.atomic.remote_addr =
1423                                         i->wr.atomic.remote_addr;
1424                                 tmp->wr.atomic.compare_add =
1425                                         i->wr.atomic.compare_add;
1426                                 tmp->wr.atomic.swap = i->wr.atomic.swap;
1427                                 tmp->wr.atomic.rkey = i->wr.atomic.rkey;
1428                                 break;
1429                         default:
1430                                 break;
1431                         }
1432                 }
1433
1434                 if (tmp->num_sge) {
1435                         memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
1436                         s += tmp->num_sge;
1437                 }
1438
1439                 tmp++;
1440         }
1441
1442         resp.bad_wr = 0;
1443         if (write(ibqp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1444                 ret = errno;
1445
1446         (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1447
1448         wr_count = resp.bad_wr;
1449         if (wr_count) {
1450                 i = wr;
1451                 while (--wr_count)
1452                         i = i->next;
1453                 *bad_wr = i;
1454         } else if (ret)
1455                 *bad_wr = wr;
1456
1457         return ret;
1458 }
1459
1460 int ibv_cmd_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
1461                       struct ibv_recv_wr **bad_wr)
1462 {
1463         struct ibv_post_recv     *cmd;
1464         struct ibv_post_recv_resp resp;
1465         struct ibv_recv_wr       *i;
1466         struct ibv_kern_recv_wr  *n, *tmp;
1467         struct ibv_sge           *s;
1468         unsigned                  wr_count = 0;
1469         unsigned                  sge_count = 0;
1470         int                       cmd_size;
1471         int                       ret = 0;
1472
1473         for (i = wr; i; i = i->next) {
1474                 wr_count++;
1475                 sge_count += i->num_sge;
1476         }
1477
1478         cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
1479         cmd  = alloca(cmd_size);
1480
1481         IBV_INIT_CMD_RESP(cmd, cmd_size, POST_RECV, &resp, sizeof resp);
1482         cmd->qp_handle = ibqp->handle;
1483         cmd->wr_count  = wr_count;
1484         cmd->sge_count = sge_count;
1485         cmd->wqe_size  = sizeof *n;
1486
1487         n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
1488         s = (struct ibv_sge *) (n + wr_count);
1489
1490         tmp = n;
1491         for (i = wr; i; i = i->next) {
1492                 tmp->wr_id   = i->wr_id;
1493                 tmp->num_sge = i->num_sge;
1494
1495                 if (tmp->num_sge) {
1496                         memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
1497                         s += tmp->num_sge;
1498                 }
1499
1500                 tmp++;
1501         }
1502
1503         resp.bad_wr = 0;
1504         if (write(ibqp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1505                 ret = errno;
1506
1507         (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1508
1509         wr_count = resp.bad_wr;
1510         if (wr_count) {
1511                 i = wr;
1512                 while (--wr_count)
1513                         i = i->next;
1514                 *bad_wr = i;
1515         } else if (ret)
1516                 *bad_wr = wr;
1517
1518         return ret;
1519 }
1520
1521 int ibv_cmd_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *wr,
1522                       struct ibv_recv_wr **bad_wr)
1523 {
1524         struct ibv_post_srq_recv *cmd;
1525         struct ibv_post_srq_recv_resp resp;
1526         struct ibv_recv_wr       *i;
1527         struct ibv_kern_recv_wr  *n, *tmp;
1528         struct ibv_sge           *s;
1529         unsigned                  wr_count = 0;
1530         unsigned                  sge_count = 0;
1531         int                       cmd_size;
1532         int                       ret = 0;
1533
1534         for (i = wr; i; i = i->next) {
1535                 wr_count++;
1536                 sge_count += i->num_sge;
1537         }
1538
1539         cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
1540         cmd  = alloca(cmd_size);
1541
1542         IBV_INIT_CMD_RESP(cmd, cmd_size, POST_SRQ_RECV, &resp, sizeof resp);
1543         cmd->srq_handle = srq->handle;
1544         cmd->wr_count  = wr_count;
1545         cmd->sge_count = sge_count;
1546         cmd->wqe_size  = sizeof *n;
1547
1548         n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
1549         s = (struct ibv_sge *) (n + wr_count);
1550
1551         tmp = n;
1552         for (i = wr; i; i = i->next) {
1553                 tmp->wr_id = i->wr_id;
1554                 tmp->num_sge = i->num_sge;
1555
1556                 if (tmp->num_sge) {
1557                         memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
1558                         s += tmp->num_sge;
1559                 }
1560
1561                 tmp++;
1562         }
1563
1564         resp.bad_wr = 0;
1565         if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
1566                 ret = errno;
1567
1568         (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1569
1570         wr_count = resp.bad_wr;
1571         if (wr_count) {
1572                 i = wr;
1573                 while (--wr_count)
1574                         i = i->next;
1575                 *bad_wr = i;
1576         } else if (ret)
1577                 *bad_wr = wr;
1578
1579         return ret;
1580 }
1581
1582 int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
1583                       struct ibv_ah_attr *attr,
1584                       struct ibv_create_ah_resp *resp,
1585                       size_t resp_size)
1586 {
1587         struct ibv_create_ah      cmd;
1588
1589         IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_AH, resp, resp_size);
1590         cmd.user_handle            = (uintptr_t) ah;
1591         cmd.pd_handle              = pd->handle;
1592         cmd.attr.dlid              = attr->dlid;
1593         cmd.attr.sl                = attr->sl;
1594         cmd.attr.src_path_bits     = attr->src_path_bits;
1595         cmd.attr.static_rate       = attr->static_rate;
1596         cmd.attr.is_global         = attr->is_global;
1597         cmd.attr.port_num          = attr->port_num;
1598         cmd.attr.grh.flow_label    = attr->grh.flow_label;
1599         cmd.attr.grh.sgid_index    = attr->grh.sgid_index;
1600         cmd.attr.grh.hop_limit     = attr->grh.hop_limit;
1601         cmd.attr.grh.traffic_class = attr->grh.traffic_class;
1602         memcpy(cmd.attr.grh.dgid, attr->grh.dgid.raw, 16);
1603
1604         if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1605                 return errno;
1606
1607         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1608
1609         ah->handle  = resp->handle;
1610         ah->context = pd->context;
1611
1612         return 0;
1613 }
1614
1615 int ibv_cmd_destroy_ah(struct ibv_ah *ah)
1616 {
1617         struct ibv_destroy_ah cmd;
1618
1619         IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_AH);
1620         cmd.ah_handle = ah->handle;
1621
1622         if (write(ah->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1623                 return errno;
1624
1625         return 0;
1626 }
1627
1628 int ibv_cmd_destroy_qp(struct ibv_qp *qp)
1629 {
1630         struct ibv_destroy_qp      cmd;
1631         struct ibv_destroy_qp_resp resp;
1632
1633         IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_QP, &resp, sizeof resp);
1634         cmd.qp_handle = qp->handle;
1635         cmd.reserved  = 0;
1636
1637         if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1638                 return errno;
1639
1640         (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1641
1642         pthread_mutex_lock(&qp->mutex);
1643         while (qp->events_completed != resp.events_reported)
1644                 pthread_cond_wait(&qp->cond, &qp->mutex);
1645         pthread_mutex_unlock(&qp->mutex);
1646
1647         return 0;
1648 }
1649
1650 int ibv_cmd_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
1651 {
1652         struct ibv_attach_mcast cmd;
1653
1654         IBV_INIT_CMD(&cmd, sizeof cmd, ATTACH_MCAST);
1655         memcpy(cmd.gid, gid->raw, sizeof cmd.gid);
1656         cmd.qp_handle = qp->handle;
1657         cmd.mlid      = lid;
1658         cmd.reserved  = 0;
1659
1660         if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1661                 return errno;
1662
1663         return 0;
1664 }
1665
1666 int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
1667 {
1668         struct ibv_detach_mcast cmd;
1669
1670         IBV_INIT_CMD(&cmd, sizeof cmd, DETACH_MCAST);
1671         memcpy(cmd.gid, gid->raw, sizeof cmd.gid);
1672         cmd.qp_handle = qp->handle;
1673         cmd.mlid      = lid;
1674         cmd.reserved  = 0;
1675
1676         if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1677                 return errno;
1678
1679         return 0;
1680 }
1681
1682 static int buffer_is_zero(char *addr, ssize_t size)
1683 {
1684         return addr[0] == 0 && !memcmp(addr, addr + 1, size - 1);
1685 }
1686
1687 static int get_filters_size(struct ibv_flow_spec *ib_spec,
1688                             struct ibv_kern_spec *kern_spec,
1689                             int *ib_filter_size, int *kern_filter_size,
1690                             enum ibv_flow_spec_type type)
1691 {
1692         void *ib_spec_filter_mask;
1693         int curr_kern_filter_size;
1694         int min_filter_size;
1695
1696         *ib_filter_size = (ib_spec->hdr.size - sizeof(ib_spec->hdr)) / 2;
1697
1698         switch (type) {
1699         case IBV_FLOW_SPEC_IPV4_EXT:
1700                 min_filter_size =
1701                         offsetof(struct ibv_kern_ipv4_ext_filter, flags) +
1702                         sizeof(kern_spec->ipv4_ext.mask.flags);
1703                 curr_kern_filter_size = min_filter_size;
1704                 ib_spec_filter_mask = (void *)&ib_spec->ipv4_ext.val +
1705                         *ib_filter_size;
1706                 break;
1707         case IBV_FLOW_SPEC_IPV6:
1708                 min_filter_size =
1709                         offsetof(struct ibv_kern_ipv6_filter, hop_limit) +
1710                         sizeof(kern_spec->ipv6.mask.hop_limit);
1711                 curr_kern_filter_size = min_filter_size;
1712                 ib_spec_filter_mask = (void *)&ib_spec->ipv6.val +
1713                         *ib_filter_size;
1714                 break;
1715         case IBV_FLOW_SPEC_VXLAN_TUNNEL:
1716                 min_filter_size =
1717                         offsetof(struct ibv_kern_tunnel_filter,
1718                                  tunnel_id) +
1719                         sizeof(kern_spec->tunnel.mask.tunnel_id);
1720                 curr_kern_filter_size = min_filter_size;
1721                 ib_spec_filter_mask = (void *)&ib_spec->tunnel.val +
1722                         *ib_filter_size;
1723                 break;
1724         default:
1725                 return EINVAL;
1726         }
1727
1728         if (*ib_filter_size < min_filter_size)
1729                 return EINVAL;
1730
1731         if (*ib_filter_size > curr_kern_filter_size &&
1732             !buffer_is_zero(ib_spec_filter_mask + curr_kern_filter_size,
1733                             *ib_filter_size - curr_kern_filter_size))
1734                 return EOPNOTSUPP;
1735
1736         *kern_filter_size = min_t(int, curr_kern_filter_size, *ib_filter_size);
1737
1738         return 0;
1739 }
1740
1741 static int ib_spec_to_kern_spec(struct ibv_flow_spec *ib_spec,
1742                                 struct ibv_kern_spec *kern_spec)
1743 {
1744         int kern_filter_size;
1745         int ib_filter_size;
1746         int ret;
1747
1748         kern_spec->hdr.type = ib_spec->hdr.type;
1749
1750         switch (kern_spec->hdr.type) {
1751         case IBV_FLOW_SPEC_ETH:
1752         case IBV_FLOW_SPEC_ETH | IBV_FLOW_SPEC_INNER:
1753                 kern_spec->eth.size = sizeof(struct ibv_kern_spec_eth);
1754                 memcpy(&kern_spec->eth.val, &ib_spec->eth.val,
1755                        sizeof(struct ibv_flow_eth_filter));
1756                 memcpy(&kern_spec->eth.mask, &ib_spec->eth.mask,
1757                        sizeof(struct ibv_flow_eth_filter));
1758                 break;
1759         case IBV_FLOW_SPEC_IPV4:
1760         case IBV_FLOW_SPEC_IPV4 | IBV_FLOW_SPEC_INNER:
1761                 kern_spec->ipv4.size = sizeof(struct ibv_kern_spec_ipv4);
1762                 memcpy(&kern_spec->ipv4.val, &ib_spec->ipv4.val,
1763                        sizeof(struct ibv_flow_ipv4_filter));
1764                 memcpy(&kern_spec->ipv4.mask, &ib_spec->ipv4.mask,
1765                        sizeof(struct ibv_flow_ipv4_filter));
1766                 break;
1767         case IBV_FLOW_SPEC_IPV4_EXT:
1768         case IBV_FLOW_SPEC_IPV4_EXT | IBV_FLOW_SPEC_INNER:
1769                 ret = get_filters_size(ib_spec, kern_spec,
1770                                        &ib_filter_size, &kern_filter_size,
1771                                        IBV_FLOW_SPEC_IPV4_EXT);
1772                 if (ret)
1773                         return ret;
1774
1775                 kern_spec->hdr.type = IBV_FLOW_SPEC_IPV4 |
1776                                      (IBV_FLOW_SPEC_INNER & ib_spec->hdr.type);
1777                 kern_spec->ipv4_ext.size = sizeof(struct
1778                                                   ibv_kern_spec_ipv4_ext);
1779                 memcpy(&kern_spec->ipv4_ext.val, &ib_spec->ipv4_ext.val,
1780                        kern_filter_size);
1781                 memcpy(&kern_spec->ipv4_ext.mask, (void *)&ib_spec->ipv4_ext.val
1782                        + ib_filter_size, kern_filter_size);
1783                 break;
1784         case IBV_FLOW_SPEC_IPV6:
1785         case IBV_FLOW_SPEC_IPV6 | IBV_FLOW_SPEC_INNER:
1786                 ret = get_filters_size(ib_spec, kern_spec,
1787                                        &ib_filter_size, &kern_filter_size,
1788                                        IBV_FLOW_SPEC_IPV6);
1789                 if (ret)
1790                         return ret;
1791
1792                 kern_spec->ipv6.size = sizeof(struct ibv_kern_spec_ipv6);
1793                 memcpy(&kern_spec->ipv6.val, &ib_spec->ipv6.val,
1794                        kern_filter_size);
1795                 memcpy(&kern_spec->ipv6.mask, (void *)&ib_spec->ipv6.val
1796                        + ib_filter_size, kern_filter_size);
1797                 break;
1798         case IBV_FLOW_SPEC_TCP:
1799         case IBV_FLOW_SPEC_UDP:
1800         case IBV_FLOW_SPEC_TCP | IBV_FLOW_SPEC_INNER:
1801         case IBV_FLOW_SPEC_UDP | IBV_FLOW_SPEC_INNER:
1802                 kern_spec->tcp_udp.size = sizeof(struct ibv_kern_spec_tcp_udp);
1803                 memcpy(&kern_spec->tcp_udp.val, &ib_spec->tcp_udp.val,
1804                        sizeof(struct ibv_flow_ipv4_filter));
1805                 memcpy(&kern_spec->tcp_udp.mask, &ib_spec->tcp_udp.mask,
1806                        sizeof(struct ibv_flow_tcp_udp_filter));
1807                 break;
1808         case IBV_FLOW_SPEC_VXLAN_TUNNEL:
1809                 ret = get_filters_size(ib_spec, kern_spec,
1810                                        &ib_filter_size, &kern_filter_size,
1811                                        IBV_FLOW_SPEC_VXLAN_TUNNEL);
1812                 if (ret)
1813                         return ret;
1814
1815                 kern_spec->tunnel.size = sizeof(struct ibv_kern_spec_tunnel);
1816                 memcpy(&kern_spec->tunnel.val, &ib_spec->tunnel.val,
1817                        kern_filter_size);
1818                 memcpy(&kern_spec->tunnel.mask, (void *)&ib_spec->tunnel.val
1819                        + ib_filter_size, kern_filter_size);
1820                 break;
1821         case IBV_FLOW_SPEC_ACTION_TAG:
1822                 kern_spec->flow_tag.size =
1823                         sizeof(struct ibv_kern_spec_action_tag);
1824                 kern_spec->flow_tag.tag_id = ib_spec->flow_tag.tag_id;
1825                 break;
1826         case IBV_FLOW_SPEC_ACTION_DROP:
1827                 kern_spec->drop.size = sizeof(struct ibv_kern_spec_action_drop);
1828                 break;
1829         default:
1830                 return EINVAL;
1831         }
1832         return 0;
1833 }
1834
1835 struct ibv_flow *ibv_cmd_create_flow(struct ibv_qp *qp,
1836                                      struct ibv_flow_attr *flow_attr)
1837 {
1838         struct ibv_create_flow *cmd;
1839         struct ibv_create_flow_resp resp;
1840         struct ibv_flow *flow_id;
1841         size_t cmd_size;
1842         size_t written_size;
1843         int i, err;
1844         void *kern_spec;
1845         void *ib_spec;
1846
1847         cmd_size = sizeof(*cmd) + (flow_attr->num_of_specs *
1848                                   sizeof(struct ibv_kern_spec));
1849         cmd = alloca(cmd_size);
1850         flow_id = malloc(sizeof(*flow_id));
1851         if (!flow_id)
1852                 return NULL;
1853         memset(cmd, 0, cmd_size);
1854
1855         cmd->qp_handle = qp->handle;
1856
1857         cmd->flow_attr.type = flow_attr->type;
1858         cmd->flow_attr.priority = flow_attr->priority;
1859         cmd->flow_attr.num_of_specs = flow_attr->num_of_specs;
1860         cmd->flow_attr.port = flow_attr->port;
1861         cmd->flow_attr.flags = flow_attr->flags;
1862
1863         kern_spec = cmd + 1;
1864         ib_spec = flow_attr + 1;
1865         for (i = 0; i < flow_attr->num_of_specs; i++) {
1866                 err = ib_spec_to_kern_spec(ib_spec, kern_spec);
1867                 if (err) {
1868                         errno = err;
1869                         goto err;
1870                 }
1871                 cmd->flow_attr.size +=
1872                         ((struct ibv_kern_spec *)kern_spec)->hdr.size;
1873                 kern_spec += ((struct ibv_kern_spec *)kern_spec)->hdr.size;
1874                 ib_spec += ((struct ibv_flow_spec *)ib_spec)->hdr.size;
1875         }
1876
1877         written_size = sizeof(*cmd) + cmd->flow_attr.size;
1878         IBV_INIT_CMD_RESP_EX_VCMD(cmd, written_size, written_size, CREATE_FLOW,
1879                                   &resp, sizeof(resp));
1880         if (write(qp->context->cmd_fd, cmd, written_size) != written_size)
1881                 goto err;
1882
1883         (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof(resp));
1884
1885         flow_id->context = qp->context;
1886         flow_id->handle = resp.flow_handle;
1887         return flow_id;
1888 err:
1889         free(flow_id);
1890         return NULL;
1891 }
1892
1893 int ibv_cmd_destroy_flow(struct ibv_flow *flow_id)
1894 {
1895         struct ibv_destroy_flow cmd;
1896         int ret = 0;
1897
1898         memset(&cmd, 0, sizeof(cmd));
1899         IBV_INIT_CMD_EX(&cmd, sizeof(cmd), DESTROY_FLOW);
1900         cmd.flow_handle = flow_id->handle;
1901
1902         if (write(flow_id->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
1903                 ret = errno;
1904         free(flow_id);
1905         return ret;
1906 }
1907
1908 int ibv_cmd_create_wq(struct ibv_context *context,
1909                       struct ibv_wq_init_attr *wq_init_attr,
1910                       struct ibv_wq *wq,
1911                       struct ibv_create_wq *cmd,
1912                       size_t cmd_core_size,
1913                       size_t cmd_size,
1914                       struct ibv_create_wq_resp *resp,
1915                       size_t resp_core_size,
1916                       size_t resp_size)
1917 {
1918         int err;
1919
1920         if (wq_init_attr->comp_mask >= IBV_WQ_INIT_ATTR_RESERVED)
1921                 return EINVAL;
1922
1923         IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size,
1924                                CREATE_WQ, resp,
1925                                resp_core_size, resp_size);
1926
1927         cmd->user_handle   = (uintptr_t)wq;
1928         cmd->pd_handle           = wq_init_attr->pd->handle;
1929         cmd->cq_handle   = wq_init_attr->cq->handle;
1930         cmd->wq_type = wq_init_attr->wq_type;
1931         cmd->max_sge = wq_init_attr->max_sge;
1932         cmd->max_wr = wq_init_attr->max_wr;
1933         cmd->comp_mask = 0;
1934
1935         if (cmd_core_size >= offsetof(struct ibv_create_wq, create_flags) +
1936             sizeof(cmd->create_flags)) {
1937                 if (wq_init_attr->comp_mask & IBV_WQ_INIT_ATTR_FLAGS) {
1938                         if (wq_init_attr->create_flags & ~(IBV_WQ_FLAGS_RESERVED - 1))
1939                                 return EOPNOTSUPP;
1940                         cmd->create_flags = wq_init_attr->create_flags;
1941                 }
1942         }
1943
1944         err = write(context->cmd_fd, cmd, cmd_size);
1945         if (err != cmd_size)
1946                 return errno;
1947
1948         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1949
1950         if (resp->response_length < resp_core_size)
1951                 return EINVAL;
1952
1953         wq->handle  = resp->wq_handle;
1954         wq_init_attr->max_wr = resp->max_wr;
1955         wq_init_attr->max_sge = resp->max_sge;
1956         wq->wq_num = resp->wqn;
1957         wq->context = context;
1958         wq->cq = wq_init_attr->cq;
1959         wq->pd = wq_init_attr->pd;
1960         wq->wq_type = wq_init_attr->wq_type;
1961
1962         return 0;
1963 }
1964
1965 int ibv_cmd_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr,
1966                       struct ibv_modify_wq *cmd, size_t cmd_core_size,
1967                       size_t cmd_size)
1968 {
1969         if (attr->attr_mask >= IBV_WQ_ATTR_RESERVED)
1970                 return EINVAL;
1971
1972         memset(cmd, 0, cmd_core_size);
1973         IBV_INIT_CMD_EX(cmd, cmd_size, MODIFY_WQ);
1974
1975         cmd->curr_wq_state = attr->curr_wq_state;
1976         cmd->wq_state = attr->wq_state;
1977         if (cmd_core_size >= offsetof(struct ibv_modify_wq, flags_mask) +
1978             sizeof(cmd->flags_mask)) {
1979                 if (attr->attr_mask & IBV_WQ_ATTR_FLAGS) {
1980                         if (attr->flags_mask & ~(IBV_WQ_FLAGS_RESERVED - 1))
1981                                 return EOPNOTSUPP;
1982                         cmd->flags = attr->flags;
1983                         cmd->flags_mask = attr->flags_mask;
1984                 }
1985         }
1986         cmd->wq_handle = wq->handle;
1987         cmd->attr_mask = attr->attr_mask;
1988
1989         if (write(wq->context->cmd_fd, cmd, cmd_size) != cmd_size)
1990                 return errno;
1991
1992         if (attr->attr_mask & IBV_WQ_ATTR_STATE)
1993                 wq->state = attr->wq_state;
1994
1995         return 0;
1996 }
1997
1998 int ibv_cmd_destroy_wq(struct ibv_wq *wq)
1999 {
2000         struct ibv_destroy_wq cmd;
2001         struct ibv_destroy_wq_resp resp;
2002         int ret = 0;
2003
2004         memset(&cmd, 0, sizeof(cmd));
2005         memset(&resp, 0, sizeof(resp));
2006
2007         IBV_INIT_CMD_RESP_EX(&cmd, sizeof(cmd), DESTROY_WQ, &resp, sizeof(resp));
2008         cmd.wq_handle = wq->handle;
2009
2010         if (write(wq->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
2011                 return errno;
2012
2013         if (resp.response_length < sizeof(resp))
2014                 return EINVAL;
2015
2016         pthread_mutex_lock(&wq->mutex);
2017         while (wq->events_completed != resp.events_reported)
2018                 pthread_cond_wait(&wq->cond, &wq->mutex);
2019         pthread_mutex_unlock(&wq->mutex);
2020
2021         return ret;
2022 }
2023
2024 int ibv_cmd_create_rwq_ind_table(struct ibv_context *context,
2025                                  struct ibv_rwq_ind_table_init_attr *init_attr,
2026                                  struct ibv_rwq_ind_table *rwq_ind_table,
2027                                  struct ibv_create_rwq_ind_table *cmd,
2028                                  size_t cmd_core_size,
2029                                  size_t cmd_size,
2030                                  struct ibv_create_rwq_ind_table_resp *resp,
2031                                  size_t resp_core_size,
2032                                  size_t resp_size)
2033 {
2034         int err, i;
2035         uint32_t required_tbl_size, alloc_tbl_size;
2036         uint32_t *tbl_start;
2037         int num_tbl_entries;
2038
2039         if (init_attr->comp_mask >= IBV_CREATE_IND_TABLE_RESERVED)
2040                 return EINVAL;
2041
2042         alloc_tbl_size = cmd_core_size - sizeof(*cmd);
2043         num_tbl_entries = 1 << init_attr->log_ind_tbl_size;
2044
2045         /* Data must be u64 aligned */
2046         required_tbl_size = (num_tbl_entries * sizeof(uint32_t)) < sizeof(uint64_t) ?
2047                         sizeof(uint64_t) : (num_tbl_entries * sizeof(uint32_t));
2048
2049         if (alloc_tbl_size < required_tbl_size)
2050                 return EINVAL;
2051
2052         tbl_start = (uint32_t *)((uint8_t *)cmd + sizeof(*cmd));
2053         for (i = 0; i < num_tbl_entries; i++)
2054                 tbl_start[i] = init_attr->ind_tbl[i]->handle;
2055
2056         IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size,
2057                                CREATE_RWQ_IND_TBL, resp,
2058                                resp_core_size, resp_size);
2059         cmd->log_ind_tbl_size = init_attr->log_ind_tbl_size;
2060         cmd->comp_mask = 0;
2061
2062         err = write(context->cmd_fd, cmd, cmd_size);
2063         if (err != cmd_size)
2064                 return errno;
2065
2066         (void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
2067
2068         if (resp->response_length < resp_core_size)
2069                 return EINVAL;
2070
2071         rwq_ind_table->ind_tbl_handle = resp->ind_tbl_handle;
2072         rwq_ind_table->ind_tbl_num = resp->ind_tbl_num;
2073         rwq_ind_table->context = context;
2074         return 0;
2075 }
2076
2077 int ibv_cmd_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table)
2078 {
2079         struct ibv_destroy_rwq_ind_table cmd;
2080         int ret = 0;
2081
2082         memset(&cmd, 0, sizeof(cmd));
2083         IBV_INIT_CMD_EX(&cmd, sizeof(cmd), DESTROY_RWQ_IND_TBL);
2084         cmd.ind_tbl_handle = rwq_ind_table->ind_tbl_handle;
2085
2086         if (write(rwq_ind_table->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
2087                 ret = errno;
2088
2089         return ret;
2090 }