2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 #include <infiniband/endian.h>
44 #include <netinet/in.h>
45 #include <netinet/ip.h>
46 #include <sys/socket.h>
49 #ifndef NRESOLVE_NEIGH
51 #include <net/if_arp.h>
55 /* Hack to avoid GCC's -Wmissing-prototypes and the similar error from sparse
56 with these prototypes. Symbol versionining requires the goofy names, the
57 prototype must match the version in verbs.h.
59 int __ibv_query_device(struct ibv_context *context,
60 struct ibv_device_attr *device_attr);
61 int __ibv_query_port(struct ibv_context *context, uint8_t port_num,
62 struct ibv_port_attr *port_attr);
63 int __ibv_query_gid(struct ibv_context *context, uint8_t port_num, int index,
65 int __ibv_query_pkey(struct ibv_context *context, uint8_t port_num, int index,
67 struct ibv_pd *__ibv_alloc_pd(struct ibv_context *context);
68 int __ibv_dealloc_pd(struct ibv_pd *pd);
69 struct ibv_mr *__ibv_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
71 int __ibv_rereg_mr(struct ibv_mr *mr, int flags, struct ibv_pd *pd, void *addr,
72 size_t length, int access);
73 int __ibv_dereg_mr(struct ibv_mr *mr);
74 struct ibv_cq *__ibv_create_cq(struct ibv_context *context, int cqe,
76 struct ibv_comp_channel *channel,
78 int __ibv_resize_cq(struct ibv_cq *cq, int cqe);
79 int __ibv_destroy_cq(struct ibv_cq *cq);
80 int __ibv_get_cq_event(struct ibv_comp_channel *channel, struct ibv_cq **cq,
82 void __ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents);
83 struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd,
84 struct ibv_srq_init_attr *srq_init_attr);
85 int __ibv_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
87 int __ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr);
88 int __ibv_destroy_srq(struct ibv_srq *srq);
89 struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd,
90 struct ibv_qp_init_attr *qp_init_attr);
91 int __ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
92 struct ibv_qp_init_attr *init_attr);
93 int __ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask);
94 int __ibv_destroy_qp(struct ibv_qp *qp);
95 struct ibv_ah *__ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr);
96 int __ibv_destroy_ah(struct ibv_ah *ah);
97 int __ibv_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
99 int __ibv_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid,
102 int __attribute__((const)) ibv_rate_to_mult(enum ibv_rate rate)
105 case IBV_RATE_2_5_GBPS: return 1;
106 case IBV_RATE_5_GBPS: return 2;
107 case IBV_RATE_10_GBPS: return 4;
108 case IBV_RATE_20_GBPS: return 8;
109 case IBV_RATE_30_GBPS: return 12;
110 case IBV_RATE_40_GBPS: return 16;
111 case IBV_RATE_60_GBPS: return 24;
112 case IBV_RATE_80_GBPS: return 32;
113 case IBV_RATE_120_GBPS: return 48;
118 enum ibv_rate __attribute__((const)) mult_to_ibv_rate(int mult)
121 case 1: return IBV_RATE_2_5_GBPS;
122 case 2: return IBV_RATE_5_GBPS;
123 case 4: return IBV_RATE_10_GBPS;
124 case 8: return IBV_RATE_20_GBPS;
125 case 12: return IBV_RATE_30_GBPS;
126 case 16: return IBV_RATE_40_GBPS;
127 case 24: return IBV_RATE_60_GBPS;
128 case 32: return IBV_RATE_80_GBPS;
129 case 48: return IBV_RATE_120_GBPS;
130 default: return IBV_RATE_MAX;
134 int __attribute__((const)) ibv_rate_to_mbps(enum ibv_rate rate)
137 case IBV_RATE_2_5_GBPS: return 2500;
138 case IBV_RATE_5_GBPS: return 5000;
139 case IBV_RATE_10_GBPS: return 10000;
140 case IBV_RATE_20_GBPS: return 20000;
141 case IBV_RATE_30_GBPS: return 30000;
142 case IBV_RATE_40_GBPS: return 40000;
143 case IBV_RATE_60_GBPS: return 60000;
144 case IBV_RATE_80_GBPS: return 80000;
145 case IBV_RATE_120_GBPS: return 120000;
146 case IBV_RATE_14_GBPS: return 14062;
147 case IBV_RATE_56_GBPS: return 56250;
148 case IBV_RATE_112_GBPS: return 112500;
149 case IBV_RATE_168_GBPS: return 168750;
150 case IBV_RATE_25_GBPS: return 25781;
151 case IBV_RATE_100_GBPS: return 103125;
152 case IBV_RATE_200_GBPS: return 206250;
153 case IBV_RATE_300_GBPS: return 309375;
158 enum ibv_rate __attribute__((const)) mbps_to_ibv_rate(int mbps)
161 case 2500: return IBV_RATE_2_5_GBPS;
162 case 5000: return IBV_RATE_5_GBPS;
163 case 10000: return IBV_RATE_10_GBPS;
164 case 20000: return IBV_RATE_20_GBPS;
165 case 30000: return IBV_RATE_30_GBPS;
166 case 40000: return IBV_RATE_40_GBPS;
167 case 60000: return IBV_RATE_60_GBPS;
168 case 80000: return IBV_RATE_80_GBPS;
169 case 120000: return IBV_RATE_120_GBPS;
170 case 14062: return IBV_RATE_14_GBPS;
171 case 56250: return IBV_RATE_56_GBPS;
172 case 112500: return IBV_RATE_112_GBPS;
173 case 168750: return IBV_RATE_168_GBPS;
174 case 25781: return IBV_RATE_25_GBPS;
175 case 103125: return IBV_RATE_100_GBPS;
176 case 206250: return IBV_RATE_200_GBPS;
177 case 309375: return IBV_RATE_300_GBPS;
178 default: return IBV_RATE_MAX;
182 int __ibv_query_device(struct ibv_context *context,
183 struct ibv_device_attr *device_attr)
185 return context->ops.query_device(context, device_attr);
187 default_symver(__ibv_query_device, ibv_query_device);
189 int __ibv_query_port(struct ibv_context *context, uint8_t port_num,
190 struct ibv_port_attr *port_attr)
192 return context->ops.query_port(context, port_num, port_attr);
194 default_symver(__ibv_query_port, ibv_query_port);
196 int __ibv_query_gid(struct ibv_context *context, uint8_t port_num,
197 int index, union ibv_gid *gid)
204 snprintf(name, sizeof name, "ports/%d/gids/%d", port_num, index);
206 if (ibv_read_sysfs_file(context->device->ibdev_path, name,
207 attr, sizeof attr) < 0)
210 for (i = 0; i < 8; ++i) {
211 if (sscanf(attr + i * 5, "%hx", &val) != 1)
213 gid->raw[i * 2 ] = val >> 8;
214 gid->raw[i * 2 + 1] = val & 0xff;
219 default_symver(__ibv_query_gid, ibv_query_gid);
221 int __ibv_query_pkey(struct ibv_context *context, uint8_t port_num,
222 int index, __be16 *pkey)
228 snprintf(name, sizeof name, "ports/%d/pkeys/%d", port_num, index);
230 if (ibv_read_sysfs_file(context->device->ibdev_path, name,
231 attr, sizeof attr) < 0)
234 if (sscanf(attr, "%hx", &val) != 1)
237 *pkey = htobe16(val);
240 default_symver(__ibv_query_pkey, ibv_query_pkey);
242 struct ibv_pd *__ibv_alloc_pd(struct ibv_context *context)
246 pd = context->ops.alloc_pd(context);
248 pd->context = context;
252 default_symver(__ibv_alloc_pd, ibv_alloc_pd);
254 int __ibv_dealloc_pd(struct ibv_pd *pd)
256 return pd->context->ops.dealloc_pd(pd);
258 default_symver(__ibv_dealloc_pd, ibv_dealloc_pd);
260 struct ibv_mr *__ibv_reg_mr(struct ibv_pd *pd, void *addr,
261 size_t length, int access)
265 if (ibv_dontfork_range(addr, length))
268 mr = pd->context->ops.reg_mr(pd, addr, length, access);
270 mr->context = pd->context;
275 ibv_dofork_range(addr, length);
279 default_symver(__ibv_reg_mr, ibv_reg_mr);
281 int __ibv_rereg_mr(struct ibv_mr *mr, int flags,
282 struct ibv_pd *pd, void *addr,
283 size_t length, int access)
285 int dofork_onfail = 0;
290 if (flags & ~IBV_REREG_MR_FLAGS_SUPPORTED) {
292 return IBV_REREG_MR_ERR_INPUT;
295 if ((flags & IBV_REREG_MR_CHANGE_TRANSLATION) &&
296 (!length || !addr)) {
298 return IBV_REREG_MR_ERR_INPUT;
301 if (access && !(flags & IBV_REREG_MR_CHANGE_ACCESS)) {
303 return IBV_REREG_MR_ERR_INPUT;
306 if (!mr->context->ops.rereg_mr) {
308 return IBV_REREG_MR_ERR_INPUT;
311 if (flags & IBV_REREG_MR_CHANGE_TRANSLATION) {
312 err = ibv_dontfork_range(addr, length);
314 return IBV_REREG_MR_ERR_DONT_FORK_NEW;
319 old_len = mr->length;
320 err = mr->context->ops.rereg_mr(mr, flags, pd, addr, length, access);
322 if (flags & IBV_REREG_MR_CHANGE_PD)
324 if (flags & IBV_REREG_MR_CHANGE_TRANSLATION) {
327 err = ibv_dofork_range(old_addr, old_len);
329 return IBV_REREG_MR_ERR_DO_FORK_OLD;
332 err = IBV_REREG_MR_ERR_CMD;
334 if (ibv_dofork_range(addr, length))
335 err = IBV_REREG_MR_ERR_CMD_AND_DO_FORK_NEW;
341 default_symver(__ibv_rereg_mr, ibv_rereg_mr);
343 int __ibv_dereg_mr(struct ibv_mr *mr)
346 void *addr = mr->addr;
347 size_t length = mr->length;
349 ret = mr->context->ops.dereg_mr(mr);
351 ibv_dofork_range(addr, length);
355 default_symver(__ibv_dereg_mr, ibv_dereg_mr);
357 static struct ibv_comp_channel *ibv_create_comp_channel_v2(struct ibv_context *context)
359 struct ibv_abi_compat_v2 *t = context->abi_compat;
362 if (!pthread_mutex_trylock(&t->in_use))
366 fprintf(stderr, PFX "Warning: kernel's ABI version %d limits capacity.\n"
367 " Only one completion channel can be created per context.\n",
375 struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context)
377 struct ibv_comp_channel *channel;
378 struct ibv_create_comp_channel cmd;
379 struct ibv_create_comp_channel_resp resp;
382 return ibv_create_comp_channel_v2(context);
384 channel = malloc(sizeof *channel);
388 IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_COMP_CHANNEL, &resp, sizeof resp);
389 if (write(context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) {
394 (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
396 channel->context = context;
397 channel->fd = resp.fd;
403 static int ibv_destroy_comp_channel_v2(struct ibv_comp_channel *channel)
405 struct ibv_abi_compat_v2 *t = (struct ibv_abi_compat_v2 *) channel;
406 pthread_mutex_unlock(&t->in_use);
410 int ibv_destroy_comp_channel(struct ibv_comp_channel *channel)
412 struct ibv_context *context;
415 context = channel->context;
416 pthread_mutex_lock(&context->mutex);
418 if (channel->refcnt) {
424 ret = ibv_destroy_comp_channel_v2(channel);
433 pthread_mutex_unlock(&context->mutex);
438 struct ibv_cq *__ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context,
439 struct ibv_comp_channel *channel, int comp_vector)
443 cq = context->ops.create_cq(context, cqe, channel, comp_vector);
446 verbs_init_cq(cq, context, channel, cq_context);
450 default_symver(__ibv_create_cq, ibv_create_cq);
452 int __ibv_resize_cq(struct ibv_cq *cq, int cqe)
454 if (!cq->context->ops.resize_cq)
457 return cq->context->ops.resize_cq(cq, cqe);
459 default_symver(__ibv_resize_cq, ibv_resize_cq);
461 int __ibv_destroy_cq(struct ibv_cq *cq)
463 struct ibv_comp_channel *channel = cq->channel;
466 ret = cq->context->ops.destroy_cq(cq);
470 pthread_mutex_lock(&channel->context->mutex);
472 pthread_mutex_unlock(&channel->context->mutex);
478 default_symver(__ibv_destroy_cq, ibv_destroy_cq);
480 int __ibv_get_cq_event(struct ibv_comp_channel *channel,
481 struct ibv_cq **cq, void **cq_context)
483 struct ibv_comp_event ev;
485 if (read(channel->fd, &ev, sizeof ev) != sizeof ev)
488 *cq = (struct ibv_cq *) (uintptr_t) ev.cq_handle;
489 *cq_context = (*cq)->cq_context;
491 if ((*cq)->context->ops.cq_event)
492 (*cq)->context->ops.cq_event(*cq);
496 default_symver(__ibv_get_cq_event, ibv_get_cq_event);
498 void __ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents)
500 pthread_mutex_lock(&cq->mutex);
501 cq->comp_events_completed += nevents;
502 pthread_cond_signal(&cq->cond);
503 pthread_mutex_unlock(&cq->mutex);
505 default_symver(__ibv_ack_cq_events, ibv_ack_cq_events);
507 struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd,
508 struct ibv_srq_init_attr *srq_init_attr)
512 if (!pd->context->ops.create_srq)
515 srq = pd->context->ops.create_srq(pd, srq_init_attr);
517 srq->context = pd->context;
518 srq->srq_context = srq_init_attr->srq_context;
520 srq->events_completed = 0;
521 pthread_mutex_init(&srq->mutex, NULL);
522 pthread_cond_init(&srq->cond, NULL);
527 default_symver(__ibv_create_srq, ibv_create_srq);
529 int __ibv_modify_srq(struct ibv_srq *srq,
530 struct ibv_srq_attr *srq_attr,
533 return srq->context->ops.modify_srq(srq, srq_attr, srq_attr_mask);
535 default_symver(__ibv_modify_srq, ibv_modify_srq);
537 int __ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr)
539 return srq->context->ops.query_srq(srq, srq_attr);
541 default_symver(__ibv_query_srq, ibv_query_srq);
543 int __ibv_destroy_srq(struct ibv_srq *srq)
545 return srq->context->ops.destroy_srq(srq);
547 default_symver(__ibv_destroy_srq, ibv_destroy_srq);
549 struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd,
550 struct ibv_qp_init_attr *qp_init_attr)
552 struct ibv_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr);
555 qp->context = pd->context;
556 qp->qp_context = qp_init_attr->qp_context;
558 qp->send_cq = qp_init_attr->send_cq;
559 qp->recv_cq = qp_init_attr->recv_cq;
560 qp->srq = qp_init_attr->srq;
561 qp->qp_type = qp_init_attr->qp_type;
562 qp->state = IBV_QPS_RESET;
563 qp->events_completed = 0;
564 pthread_mutex_init(&qp->mutex, NULL);
565 pthread_cond_init(&qp->cond, NULL);
570 default_symver(__ibv_create_qp, ibv_create_qp);
572 int __ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
574 struct ibv_qp_init_attr *init_attr)
578 ret = qp->context->ops.query_qp(qp, attr, attr_mask, init_attr);
582 if (attr_mask & IBV_QP_STATE)
583 qp->state = attr->qp_state;
587 default_symver(__ibv_query_qp, ibv_query_qp);
589 int __ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
594 ret = qp->context->ops.modify_qp(qp, attr, attr_mask);
598 if (attr_mask & IBV_QP_STATE)
599 qp->state = attr->qp_state;
603 default_symver(__ibv_modify_qp, ibv_modify_qp);
605 int __ibv_destroy_qp(struct ibv_qp *qp)
607 return qp->context->ops.destroy_qp(qp);
609 default_symver(__ibv_destroy_qp, ibv_destroy_qp);
611 struct ibv_ah *__ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
613 struct ibv_ah *ah = pd->context->ops.create_ah(pd, attr);
616 ah->context = pd->context;
622 default_symver(__ibv_create_ah, ibv_create_ah);
624 /* GID types as appear in sysfs, no change is expected as of ABI
627 #define V1_TYPE "IB/RoCE v1"
628 #define V2_TYPE "RoCE v2"
629 int ibv_query_gid_type(struct ibv_context *context, uint8_t port_num,
630 unsigned int index, enum ibv_gid_type *type)
635 snprintf(name, sizeof(name), "ports/%d/gid_attrs/types/%d", port_num,
638 /* Reset errno so that we can rely on its value upon any error flow in
639 * ibv_read_sysfs_file.
642 if (ibv_read_sysfs_file(context->device->ibdev_path, name, buff,
643 sizeof(buff)) <= 0) {
647 if (errno == EINVAL) {
648 /* In IB, this file doesn't exist and the kernel sets
651 *type = IBV_GID_TYPE_IB_ROCE_V1;
654 if (asprintf(&dir_path, "%s/%s/%d/%s/",
655 context->device->ibdev_path, "ports", port_num,
658 dir = opendir(dir_path);
662 /* Assuming that if gid_attrs doesn't exist,
663 * we have an old kernel and all GIDs are
666 *type = IBV_GID_TYPE_IB_ROCE_V1;
675 if (!strcmp(buff, V1_TYPE)) {
676 *type = IBV_GID_TYPE_IB_ROCE_V1;
677 } else if (!strcmp(buff, V2_TYPE)) {
678 *type = IBV_GID_TYPE_ROCE_V2;
688 static int ibv_find_gid_index(struct ibv_context *context, uint8_t port_num,
689 union ibv_gid *gid, enum ibv_gid_type gid_type)
691 enum ibv_gid_type sgid_type = 0;
696 ret = ibv_query_gid(context, port_num, i, &sgid);
698 ret = ibv_query_gid_type(context, port_num, i,
702 } while (!ret && (memcmp(&sgid, gid, sizeof(*gid)) ||
703 (gid_type != sgid_type)));
705 return ret ? ret : i - 1;
708 static inline void map_ipv4_addr_to_ipv6(__be32 ipv4, struct in6_addr *ipv6)
710 ipv6->s6_addr32[0] = 0;
711 ipv6->s6_addr32[1] = 0;
712 ipv6->s6_addr32[2] = htobe32(0x0000FFFF);
713 ipv6->s6_addr32[3] = ipv4;
716 static inline __sum16 ipv4_calc_hdr_csum(uint16_t *data, unsigned int num_hwords)
721 for (i = 0; i < num_hwords; i++)
724 sum = (sum & 0xffff) + (sum >> 16);
726 return (__sum16)~sum;
729 static inline int get_grh_header_version(struct ibv_grh *grh)
731 int ip6h_version = (be32toh(grh->version_tclass_flow) >> 28) & 0xf;
732 struct ip *ip4h = (struct ip *)((void *)grh + 20);
733 struct ip ip4h_checked;
735 if (ip6h_version != 6) {
738 errno = EPROTONOSUPPORT;
741 /* version may be 6 or 4 */
742 if (ip4h->ip_hl != 5) /* IPv4 header length must be 5 for RoCE v2. */
746 * We can't write on scattered buffers so we have to copy to temp
749 memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
750 /* Need to set the checksum field (check) to 0 before re-calculating
753 ip4h_checked.ip_sum = 0;
754 ip4h_checked.ip_sum = ipv4_calc_hdr_csum((uint16_t *)&ip4h_checked, 10);
755 /* if IPv4 header checksum is OK, believe it */
756 if (ip4h->ip_sum == ip4h_checked.ip_sum)
761 static inline void set_ah_attr_generic_fields(struct ibv_ah_attr *ah_attr,
768 flow_class = be32toh(grh->version_tclass_flow);
769 ah_attr->grh.flow_label = flow_class & 0xFFFFF;
770 ah_attr->dlid = wc->slid;
771 ah_attr->sl = wc->sl;
772 ah_attr->src_path_bits = wc->dlid_path_bits;
773 ah_attr->port_num = port_num;
776 static inline int set_ah_attr_by_ipv4(struct ibv_context *context,
777 struct ibv_ah_attr *ah_attr,
778 struct ip *ip4h, uint8_t port_num)
783 /* No point searching multicast GIDs in GID table */
784 if (IN_CLASSD(be32toh(ip4h->ip_dst.s_addr))) {
789 map_ipv4_addr_to_ipv6(ip4h->ip_dst.s_addr, (struct in6_addr *)&sgid);
790 ret = ibv_find_gid_index(context, port_num, &sgid,
791 IBV_GID_TYPE_ROCE_V2);
795 map_ipv4_addr_to_ipv6(ip4h->ip_src.s_addr,
796 (struct in6_addr *)&ah_attr->grh.dgid);
797 ah_attr->grh.sgid_index = (uint8_t) ret;
798 ah_attr->grh.hop_limit = ip4h->ip_ttl;
799 ah_attr->grh.traffic_class = ip4h->ip_tos;
804 #define IB_NEXT_HDR 0x1b
805 static inline int set_ah_attr_by_ipv6(struct ibv_context *context,
806 struct ibv_ah_attr *ah_attr,
807 struct ibv_grh *grh, uint8_t port_num)
813 /* No point searching multicast GIDs in GID table */
814 if (grh->dgid.raw[0] == 0xFF) {
819 ah_attr->grh.dgid = grh->sgid;
820 if (grh->next_hdr == IPPROTO_UDP) {
821 sgid_type = IBV_GID_TYPE_ROCE_V2;
822 } else if (grh->next_hdr == IB_NEXT_HDR) {
823 sgid_type = IBV_GID_TYPE_IB_ROCE_V1;
825 errno = EPROTONOSUPPORT;
829 ret = ibv_find_gid_index(context, port_num, &grh->dgid,
834 ah_attr->grh.sgid_index = (uint8_t) ret;
835 flow_class = be32toh(grh->version_tclass_flow);
836 ah_attr->grh.hop_limit = grh->hop_limit;
837 ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
842 int ibv_init_ah_from_wc(struct ibv_context *context, uint8_t port_num,
843 struct ibv_wc *wc, struct ibv_grh *grh,
844 struct ibv_ah_attr *ah_attr)
849 memset(ah_attr, 0, sizeof *ah_attr);
850 set_ah_attr_generic_fields(ah_attr, wc, grh, port_num);
852 if (wc->wc_flags & IBV_WC_GRH) {
853 ah_attr->is_global = 1;
854 version = get_grh_header_version(grh);
857 ret = set_ah_attr_by_ipv4(context, ah_attr,
858 (struct ip *)((void *)grh + 20),
860 else if (version == 6)
861 ret = set_ah_attr_by_ipv6(context, ah_attr, grh,
870 struct ibv_ah *ibv_create_ah_from_wc(struct ibv_pd *pd, struct ibv_wc *wc,
871 struct ibv_grh *grh, uint8_t port_num)
873 struct ibv_ah_attr ah_attr;
876 ret = ibv_init_ah_from_wc(pd->context, port_num, wc, grh, &ah_attr);
880 return ibv_create_ah(pd, &ah_attr);
883 int __ibv_destroy_ah(struct ibv_ah *ah)
885 return ah->context->ops.destroy_ah(ah);
887 default_symver(__ibv_destroy_ah, ibv_destroy_ah);
889 int __ibv_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
891 return qp->context->ops.attach_mcast(qp, gid, lid);
893 default_symver(__ibv_attach_mcast, ibv_attach_mcast);
895 int __ibv_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
897 return qp->context->ops.detach_mcast(qp, gid, lid);
899 default_symver(__ibv_detach_mcast, ibv_detach_mcast);
901 static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
903 return IN6_IS_ADDR_V4MAPPED(a) ||
904 /* IPv4 encoded multicast addresses */
905 (a->s6_addr32[0] == htobe32(0xff0e0000) &&
907 (a->s6_addr32[2] ^ htobe32(0x0000ffff))) == 0UL));
910 struct peer_address {
915 static inline int create_peer_from_gid(int family, void *raw_gid,
916 struct peer_address *peer_address)
920 peer_address->address = raw_gid + 12;
921 peer_address->size = 4;
924 peer_address->address = raw_gid;
925 peer_address->size = 16;
934 #define NEIGH_GET_DEFAULT_TIMEOUT_MS 3000
935 int ibv_resolve_eth_l2_from_gid(struct ibv_context *context,
936 struct ibv_ah_attr *attr,
937 uint8_t eth_mac[ETHERNET_LL_SIZE],
940 #ifndef NRESOLVE_NEIGH
944 struct get_neigh_handler neigh_handler;
947 struct peer_address src;
948 struct peer_address dst;
953 err = ibv_query_gid(context, attr->port_num,
954 attr->grh.sgid_index, &sgid);
959 err = neigh_init_resources(&neigh_handler,
960 NEIGH_GET_DEFAULT_TIMEOUT_MS);
965 dst_family = ipv6_addr_v4mapped((struct in6_addr *)attr->grh.dgid.raw) ?
967 src_family = ipv6_addr_v4mapped((struct in6_addr *)sgid.raw) ?
970 if (create_peer_from_gid(dst_family, attr->grh.dgid.raw, &dst))
973 if (create_peer_from_gid(src_family, &sgid.raw, &src))
976 if (neigh_set_dst(&neigh_handler, dst_family, dst.address,
980 if (neigh_set_src(&neigh_handler, src_family, src.address,
984 oif = neigh_get_oif_from_src(&neigh_handler);
987 neigh_set_oif(&neigh_handler, oif);
994 if (process_get_neigh(&neigh_handler))
997 ret_vid = neigh_get_vlan_id_from_dev(&neigh_handler);
999 if (ret_vid <= 0xfff)
1000 neigh_set_vlan_id(&neigh_handler, ret_vid);
1002 /* We are using only Ethernet here */
1003 ether_len = neigh_get_ll(&neigh_handler,
1005 sizeof(uint8_t) * ETHERNET_LL_SIZE);
1008 goto free_resources;
1015 neigh_free_resources(&neigh_handler);