2 * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 #include <stdatomic.h>
41 #include <sys/queue.h>
43 #include <infiniband/driver.h>
44 #include <infiniband/udma_barrier.h>
53 MLX5_IB_MMAP_CMD_SHIFT = 8,
54 MLX5_IB_MMAP_CMD_MASK = 0xff,
58 MLX5_MMAP_GET_REGULAR_PAGES_CMD = 0,
59 MLX5_MMAP_GET_CONTIGUOUS_PAGES_CMD = 1,
60 MLX5_MMAP_GET_CORE_CLOCK_CMD = 5
64 MLX5_CQE_VERSION_V0 = 0,
65 MLX5_CQE_VERSION_V1 = 1,
69 MLX5_ADAPTER_PAGE_SIZE = 4096,
72 #define MLX5_CQ_PREFIX "MLX_CQ"
73 #define MLX5_QP_PREFIX "MLX_QP"
74 #define MLX5_MR_PREFIX "MLX_MR"
75 #define MLX5_RWQ_PREFIX "MLX_RWQ"
76 #define MLX5_MAX_LOG2_CONTIG_BLOCK_SIZE 23
77 #define MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE 12
82 MLX5_DBG_QP_SEND = 1 << 2,
83 MLX5_DBG_QP_SEND_ERR = 1 << 3,
84 MLX5_DBG_CQ_CQE = 1 << 4,
85 MLX5_DBG_CONTIG = 1 << 5,
88 extern uint32_t mlx5_debug_mask;
89 extern int mlx5_freeze_on_error_cqe;
92 #define mlx5_dbg(fp, mask, format, arg...) \
94 if (mask & mlx5_debug_mask) \
95 fprintf(fp, "%s:%d: " format, __func__, __LINE__, ##arg); \
99 static inline void mlx5_dbg(FILE *fp, uint32_t mask, const char *fmt, ...)
100 __attribute__((format(printf, 3, 4)));
101 static inline void mlx5_dbg(FILE *fp, uint32_t mask, const char *fmt, ...)
107 MLX5_STAT_RATE_OFFSET = 5
111 MLX5_QP_TABLE_SHIFT = 12,
112 MLX5_QP_TABLE_MASK = (1 << MLX5_QP_TABLE_SHIFT) - 1,
113 MLX5_QP_TABLE_SIZE = 1 << (24 - MLX5_QP_TABLE_SHIFT),
117 MLX5_UIDX_TABLE_SHIFT = 12,
118 MLX5_UIDX_TABLE_MASK = (1 << MLX5_UIDX_TABLE_SHIFT) - 1,
119 MLX5_UIDX_TABLE_SIZE = 1 << (24 - MLX5_UIDX_TABLE_SHIFT),
123 MLX5_SRQ_TABLE_SHIFT = 12,
124 MLX5_SRQ_TABLE_MASK = (1 << MLX5_SRQ_TABLE_SHIFT) - 1,
125 MLX5_SRQ_TABLE_SIZE = 1 << (24 - MLX5_SRQ_TABLE_SHIFT),
129 MLX5_BF_OFFSET = 0x800
133 MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00,
134 MLX5_RECV_OPCODE_SEND = 0x01,
135 MLX5_RECV_OPCODE_SEND_IMM = 0x02,
136 MLX5_RECV_OPCODE_SEND_INVAL = 0x03,
138 MLX5_CQE_OPCODE_ERROR = 0x1e,
139 MLX5_CQE_OPCODE_RESIZE = 0x16,
143 MLX5_SRQ_FLAG_SIGNATURE = 1 << 0,
147 MLX5_MAX_PORTS_NUM = 2,
151 MLX5_CSUM_SUPPORT_RAW_OVER_ETH = (1 << 0),
153 * Only report rx checksum when the validation
156 MLX5_RX_CSUM_VALID = (1 << 16),
159 enum mlx5_alloc_type {
160 MLX5_ALLOC_TYPE_ANON,
161 MLX5_ALLOC_TYPE_HUGE,
162 MLX5_ALLOC_TYPE_CONTIG,
163 MLX5_ALLOC_TYPE_PREFER_HUGE,
164 MLX5_ALLOC_TYPE_PREFER_CONTIG,
177 MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0,
178 MLX5_USER_CMDS_SUPP_UHW_CREATE_AH = 1 << 1,
181 enum mlx5_vendor_cap_flags {
182 MLX5_VENDOR_CAP_FLAGS_MPW = 1 << 0,
186 MLX5_FLOW_TAG_MASK = 0x000fffff,
189 struct mlx5_resource {
190 enum mlx5_rsc_type type;
195 struct verbs_device verbs_dev;
202 struct mlx5_spinlock {
203 pthread_spinlock_t lock;
207 struct mlx5_hugetlb_mem;
209 struct mlx5_context {
210 struct ibv_context ibv_ctx;
215 int num_uars_per_page;
216 int bf_regs_per_page;
221 struct mlx5_qp **table;
223 } qp_table[MLX5_QP_TABLE_SIZE];
224 pthread_mutex_t qp_table_mutex;
227 struct mlx5_srq **table;
229 } srq_table[MLX5_SRQ_TABLE_SIZE];
230 pthread_mutex_t srq_table_mutex;
233 struct mlx5_resource **table;
235 } uidx_table[MLX5_UIDX_TABLE_SIZE];
236 pthread_mutex_t uidx_table_mutex;
238 void *uar[MLX5_MAX_UARS];
239 struct mlx5_spinlock lock32;
240 struct mlx5_db_page *db_list;
241 pthread_mutex_t db_list_mutex;
247 unsigned max_srq_recv_wr;
250 int stall_adaptive_enable;
255 struct mlx5_spinlock hugetlb_lock;
256 TAILQ_HEAD(,mlx5_hugetlb_mem) hugetlb_list;
258 uint8_t cached_link_layer[MLX5_MAX_PORTS_NUM];
259 int cached_device_cap_flags;
260 enum ibv_atomic_cap atomic_cap;
265 void *hca_core_clock;
266 struct ibv_tso_caps cached_tso_caps;
269 uint64_t vendor_cap_flags; /* Use enum mlx5_vendor_cap_flags */
270 struct mlx5dv_cqe_comp_caps cqe_comp_caps;
279 unsigned long *table;
282 struct mlx5_hugetlb_mem {
285 struct mlx5_bitmap bitmap;
286 TAILQ_ENTRY(mlx5_hugetlb_mem) entry;
293 struct mlx5_hugetlb_mem *hmem;
294 enum mlx5_alloc_type type;
298 struct ibv_pd ibv_pd;
308 MLX5_CQ_FLAGS_RX_CSUM_VALID = 1 << 0,
309 MLX5_CQ_FLAGS_EMPTY_DURING_POLL = 1 << 1,
310 MLX5_CQ_FLAGS_FOUND_CQES = 1 << 2,
311 MLX5_CQ_FLAGS_EXTENDED = 1 << 3,
312 MLX5_CQ_FLAGS_SINGLE_THREADED = 1 << 4,
313 MLX5_CQ_FLAGS_DV_OWNED = 1 << 5,
317 /* ibv_cq should always be subset of ibv_cq_ex */
318 struct ibv_cq_ex ibv_cq;
319 struct mlx5_buf buf_a;
320 struct mlx5_buf buf_b;
321 struct mlx5_buf *active_buf;
322 struct mlx5_buf *resize_buf;
325 struct mlx5_spinlock lock;
334 uint64_t stall_last_count;
335 int stall_adaptive_enable;
337 struct mlx5_resource *cur_rsc;
338 struct mlx5_srq *cur_srq;
339 struct mlx5_cqe64 *cqe64;
345 struct mlx5_resource rsc; /* This struct must be first */
346 struct verbs_srq vsrq;
348 struct mlx5_spinlock lock;
369 struct mlx5_spinlock lock;
385 struct mlx5_spinlock lock;
392 struct ibv_mr ibv_mr;
394 uint32_t alloc_flags;
398 struct mlx5_resource rsc; /* This struct must be first */
399 struct verbs_qp verbs_qp;
400 struct ibv_qp *ibv_qp;
405 /* For Raw Packet QP, use different buffers for the SQ and RQ */
406 struct mlx5_buf sq_buf;
411 uint8_t sq_signal_bits;
417 uint32_t qp_cap_cache;
420 uint16_t max_tso_header;
425 struct ibv_ah ibv_ah;
426 struct mlx5_wqe_av av;
431 struct mlx5_resource rsc;
442 static inline int mlx5_ilog2(int n)
456 extern int mlx5_stall_num_loop;
457 extern int mlx5_stall_cq_poll_min;
458 extern int mlx5_stall_cq_poll_max;
459 extern int mlx5_stall_cq_inc_step;
460 extern int mlx5_stall_cq_dec_step;
461 extern int mlx5_single_threaded;
463 static inline unsigned DIV_ROUND_UP(unsigned n, unsigned d)
465 return (n + d - 1u) / d;
468 static inline unsigned long align(unsigned long val, unsigned long align)
470 return (val + align - 1) & ~(align - 1);
473 #define to_mxxx(xxx, type) \
474 ((struct mlx5_##type *) \
475 ((void *) ib##xxx - offsetof(struct mlx5_##type, ibv_##xxx)))
477 static inline struct mlx5_device *to_mdev(struct ibv_device *ibdev)
479 struct mlx5_device *ret;
481 ret = (void *)ibdev - offsetof(struct mlx5_device, verbs_dev);
485 static inline struct mlx5_context *to_mctx(struct ibv_context *ibctx)
487 return to_mxxx(ctx, context);
490 static inline struct mlx5_pd *to_mpd(struct ibv_pd *ibpd)
492 return to_mxxx(pd, pd);
495 static inline struct mlx5_cq *to_mcq(struct ibv_cq *ibcq)
497 return to_mxxx(cq, cq);
500 static inline struct mlx5_srq *to_msrq(struct ibv_srq *ibsrq)
502 struct verbs_srq *vsrq = (struct verbs_srq *)ibsrq;
504 return container_of(vsrq, struct mlx5_srq, vsrq);
507 static inline struct mlx5_qp *to_mqp(struct ibv_qp *ibqp)
509 struct verbs_qp *vqp = (struct verbs_qp *)ibqp;
511 return container_of(vqp, struct mlx5_qp, verbs_qp);
514 static inline struct mlx5_rwq *to_mrwq(struct ibv_wq *ibwq)
516 return container_of(ibwq, struct mlx5_rwq, wq);
519 static inline struct mlx5_mr *to_mmr(struct ibv_mr *ibmr)
521 return to_mxxx(mr, mr);
524 static inline struct mlx5_ah *to_mah(struct ibv_ah *ibah)
526 return to_mxxx(ah, ah);
529 static inline int max_int(int a, int b)
531 return a > b ? a : b;
534 static inline struct mlx5_qp *rsc_to_mqp(struct mlx5_resource *rsc)
536 return (struct mlx5_qp *)rsc;
539 static inline struct mlx5_srq *rsc_to_msrq(struct mlx5_resource *rsc)
541 return (struct mlx5_srq *)rsc;
544 static inline struct mlx5_rwq *rsc_to_mrwq(struct mlx5_resource *rsc)
546 return (struct mlx5_rwq *)rsc;
549 int mlx5_alloc_buf(struct mlx5_buf *buf, size_t size, int page_size);
550 void mlx5_free_buf(struct mlx5_buf *buf);
551 int mlx5_alloc_buf_contig(struct mlx5_context *mctx, struct mlx5_buf *buf,
552 size_t size, int page_size, const char *component);
553 void mlx5_free_buf_contig(struct mlx5_context *mctx, struct mlx5_buf *buf);
554 int mlx5_alloc_prefered_buf(struct mlx5_context *mctx,
555 struct mlx5_buf *buf,
556 size_t size, int page_size,
557 enum mlx5_alloc_type alloc_type,
558 const char *component);
559 int mlx5_free_actual_buf(struct mlx5_context *ctx, struct mlx5_buf *buf);
560 void mlx5_get_alloc_type(const char *component,
561 enum mlx5_alloc_type *alloc_type,
562 enum mlx5_alloc_type default_alloc_type);
563 int mlx5_use_huge(const char *key);
565 uint32_t *mlx5_alloc_dbrec(struct mlx5_context *context);
566 void mlx5_free_db(struct mlx5_context *context, uint32_t *db);
568 int mlx5_query_device(struct ibv_context *context,
569 struct ibv_device_attr *attr);
570 int mlx5_query_device_ex(struct ibv_context *context,
571 const struct ibv_query_device_ex_input *input,
572 struct ibv_device_attr_ex *attr,
574 int mlx5_query_rt_values(struct ibv_context *context,
575 struct ibv_values_ex *values);
576 struct ibv_qp *mlx5_create_qp_ex(struct ibv_context *context,
577 struct ibv_qp_init_attr_ex *attr);
578 int mlx5_query_port(struct ibv_context *context, uint8_t port,
579 struct ibv_port_attr *attr);
581 struct ibv_pd *mlx5_alloc_pd(struct ibv_context *context);
582 int mlx5_free_pd(struct ibv_pd *pd);
584 struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr,
585 size_t length, int access);
586 int mlx5_rereg_mr(struct ibv_mr *mr, int flags, struct ibv_pd *pd, void *addr,
587 size_t length, int access);
588 int mlx5_dereg_mr(struct ibv_mr *mr);
589 struct ibv_mw *mlx5_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type);
590 int mlx5_dealloc_mw(struct ibv_mw *mw);
591 int mlx5_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
592 struct ibv_mw_bind *mw_bind);
594 struct ibv_cq *mlx5_create_cq(struct ibv_context *context, int cqe,
595 struct ibv_comp_channel *channel,
597 struct ibv_cq_ex *mlx5_create_cq_ex(struct ibv_context *context,
598 struct ibv_cq_init_attr_ex *cq_attr);
599 void mlx5_cq_fill_pfns(struct mlx5_cq *cq, const struct ibv_cq_init_attr_ex *cq_attr);
600 int mlx5_alloc_cq_buf(struct mlx5_context *mctx, struct mlx5_cq *cq,
601 struct mlx5_buf *buf, int nent, int cqe_sz);
602 int mlx5_free_cq_buf(struct mlx5_context *ctx, struct mlx5_buf *buf);
603 int mlx5_resize_cq(struct ibv_cq *cq, int cqe);
604 int mlx5_destroy_cq(struct ibv_cq *cq);
605 int mlx5_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc);
606 int mlx5_poll_cq_v1(struct ibv_cq *cq, int ne, struct ibv_wc *wc);
607 int mlx5_arm_cq(struct ibv_cq *cq, int solicited);
608 void mlx5_cq_event(struct ibv_cq *cq);
609 void __mlx5_cq_clean(struct mlx5_cq *cq, uint32_t qpn, struct mlx5_srq *srq);
610 void mlx5_cq_clean(struct mlx5_cq *cq, uint32_t qpn, struct mlx5_srq *srq);
611 void mlx5_cq_resize_copy_cqes(struct mlx5_cq *cq);
613 struct ibv_srq *mlx5_create_srq(struct ibv_pd *pd,
614 struct ibv_srq_init_attr *attr);
615 int mlx5_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr,
617 int mlx5_query_srq(struct ibv_srq *srq,
618 struct ibv_srq_attr *attr);
619 int mlx5_destroy_srq(struct ibv_srq *srq);
620 int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq);
621 void mlx5_free_srq_wqe(struct mlx5_srq *srq, int ind);
622 int mlx5_post_srq_recv(struct ibv_srq *ibsrq,
623 struct ibv_recv_wr *wr,
624 struct ibv_recv_wr **bad_wr);
626 struct ibv_qp *mlx5_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
627 int mlx5_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
629 struct ibv_qp_init_attr *init_attr);
630 int mlx5_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
632 int mlx5_destroy_qp(struct ibv_qp *qp);
633 void mlx5_init_qp_indices(struct mlx5_qp *qp);
634 void mlx5_init_rwq_indices(struct mlx5_rwq *rwq);
635 int mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
636 struct ibv_send_wr **bad_wr);
637 int mlx5_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
638 struct ibv_recv_wr **bad_wr);
639 int mlx5_post_wq_recv(struct ibv_wq *ibwq, struct ibv_recv_wr *wr,
640 struct ibv_recv_wr **bad_wr);
641 void mlx5_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
643 void mlx5_set_sq_sizes(struct mlx5_qp *qp, struct ibv_qp_cap *cap,
644 enum ibv_qp_type type);
645 struct mlx5_qp *mlx5_find_qp(struct mlx5_context *ctx, uint32_t qpn);
646 int mlx5_store_qp(struct mlx5_context *ctx, uint32_t qpn, struct mlx5_qp *qp);
647 void mlx5_clear_qp(struct mlx5_context *ctx, uint32_t qpn);
648 int32_t mlx5_store_uidx(struct mlx5_context *ctx, void *rsc);
649 void mlx5_clear_uidx(struct mlx5_context *ctx, uint32_t uidx);
650 struct mlx5_srq *mlx5_find_srq(struct mlx5_context *ctx, uint32_t srqn);
651 int mlx5_store_srq(struct mlx5_context *ctx, uint32_t srqn,
652 struct mlx5_srq *srq);
653 void mlx5_clear_srq(struct mlx5_context *ctx, uint32_t srqn);
654 struct ibv_ah *mlx5_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr);
655 int mlx5_destroy_ah(struct ibv_ah *ah);
656 int mlx5_alloc_av(struct mlx5_pd *pd, struct ibv_ah_attr *attr,
658 void mlx5_free_av(struct mlx5_ah *ah);
659 int mlx5_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
660 int mlx5_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
661 int mlx5_round_up_power_of_two(long long sz);
662 void *mlx5_get_atomic_laddr(struct mlx5_qp *qp, uint16_t idx, int *byte_count);
663 void *mlx5_get_send_wqe(struct mlx5_qp *qp, int n);
664 int mlx5_copy_to_recv_wqe(struct mlx5_qp *qp, int idx, void *buf, int size);
665 int mlx5_copy_to_send_wqe(struct mlx5_qp *qp, int idx, void *buf, int size);
666 int mlx5_copy_to_recv_srq(struct mlx5_srq *srq, int idx, void *buf, int size);
667 struct ibv_xrcd *mlx5_open_xrcd(struct ibv_context *context,
668 struct ibv_xrcd_init_attr *xrcd_init_attr);
669 int mlx5_get_srq_num(struct ibv_srq *srq, uint32_t *srq_num);
670 int mlx5_close_xrcd(struct ibv_xrcd *ib_xrcd);
671 struct ibv_wq *mlx5_create_wq(struct ibv_context *context,
672 struct ibv_wq_init_attr *attr);
673 int mlx5_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr);
674 int mlx5_destroy_wq(struct ibv_wq *wq);
675 struct ibv_rwq_ind_table *mlx5_create_rwq_ind_table(struct ibv_context *context,
676 struct ibv_rwq_ind_table_init_attr *init_attr);
677 int mlx5_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table);
678 struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context,
679 struct ibv_srq_init_attr_ex *attr);
681 static inline void *mlx5_find_uidx(struct mlx5_context *ctx, uint32_t uidx)
683 int tind = uidx >> MLX5_UIDX_TABLE_SHIFT;
685 if (likely(ctx->uidx_table[tind].refcnt))
686 return ctx->uidx_table[tind].table[uidx & MLX5_UIDX_TABLE_MASK];
691 static inline int mlx5_spin_lock(struct mlx5_spinlock *lock)
693 if (!mlx5_single_threaded)
694 return pthread_spin_lock(&lock->lock);
696 if (unlikely(lock->in_use)) {
697 fprintf(stderr, "*** ERROR: multithreading vilation ***\n"
698 "You are running a multithreaded application but\n"
699 "you set MLX5_SINGLE_THREADED=1. Please unset it.\n");
704 * This fence is not at all correct, but it increases the
705 * chance that in_use is detected by another thread without
706 * much runtime cost. */
707 atomic_thread_fence(memory_order_acq_rel);
713 static inline int mlx5_spin_unlock(struct mlx5_spinlock *lock)
715 if (!mlx5_single_threaded)
716 return pthread_spin_unlock(&lock->lock);
723 static inline int mlx5_spinlock_init(struct mlx5_spinlock *lock)
726 return pthread_spin_init(&lock->lock, PTHREAD_PROCESS_PRIVATE);
729 static inline int mlx5_spinlock_destroy(struct mlx5_spinlock *lock)
731 return pthread_spin_destroy(&lock->lock);
734 static inline void set_command(int command, off_t *offset)
736 *offset |= (command << MLX5_IB_MMAP_CMD_SHIFT);
739 static inline void set_arg(int arg, off_t *offset)
744 static inline void set_order(int order, off_t *offset)
746 set_arg(order, offset);
749 static inline void set_index(int index, off_t *offset)
751 set_arg(index, offset);
754 static inline uint8_t calc_sig(void *wqe, int size)
760 for (i = 0; i < size; ++i)