]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/ofed/libcxgb4/dev.c
Add two missing eventhandler.h headers
[FreeBSD/FreeBSD.git] / contrib / ofed / libcxgb4 / dev.c
1 /*
2  * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <config.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <unistd.h>
37 #include <errno.h>
38 #include <sys/mman.h>
39 #include <pthread.h>
40 #include <string.h>
41 #include <signal.h>
42 #include <stdbool.h>
43
44 #include "libcxgb4.h"
45 #include "cxgb4-abi.h"
46
47 #define PCI_VENDOR_ID_CHELSIO           0x1425
48
49 /*
50  * Macros needed to support the PCI Device ID Table ...
51  */
52 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
53         static struct { \
54                 unsigned vendor; \
55                 unsigned device; \
56         } hca_table[] = {
57
58 #define CH_PCI_DEVICE_ID_FUNCTION \
59                 0x4
60
61 #define CH_PCI_ID_TABLE_ENTRY(__DeviceID) \
62                 { \
63                         .vendor = PCI_VENDOR_ID_CHELSIO, \
64                         .device = (__DeviceID), \
65                 }
66
67 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \
68         }
69
70 #include "t4_chip_type.h"
71 #include "t4_pci_id_tbl.h"
72
73 unsigned long c4iw_page_size;
74 unsigned long c4iw_page_shift;
75 unsigned long c4iw_page_mask;
76 int ma_wr;
77 int t5_en_wc = 1;
78
79 static TAILQ_HEAD(,c4iw_dev) devices = TAILQ_HEAD_INITIALIZER(devices);
80
81 static struct ibv_context_ops c4iw_ctx_ops = {
82         .query_device = c4iw_query_device,
83         .query_port = c4iw_query_port,
84         .alloc_pd = c4iw_alloc_pd,
85         .dealloc_pd = c4iw_free_pd,
86         .reg_mr = c4iw_reg_mr,
87         .dereg_mr = c4iw_dereg_mr,
88         .create_cq = c4iw_create_cq,
89         .resize_cq = c4iw_resize_cq,
90         .destroy_cq = c4iw_destroy_cq,
91         .create_srq = c4iw_create_srq,
92         .modify_srq = c4iw_modify_srq,
93         .destroy_srq = c4iw_destroy_srq,
94         .create_qp = c4iw_create_qp,
95         .modify_qp = c4iw_modify_qp,
96         .destroy_qp = c4iw_destroy_qp,
97         .query_qp = c4iw_query_qp,
98         .create_ah = c4iw_create_ah,
99         .destroy_ah = c4iw_destroy_ah,
100         .attach_mcast = c4iw_attach_mcast,
101         .detach_mcast = c4iw_detach_mcast,
102         .post_srq_recv = c4iw_post_srq_recv,
103         .req_notify_cq = c4iw_arm_cq,
104 };
105
106 static struct ibv_context *c4iw_alloc_context(struct ibv_device *ibdev,
107                                               int cmd_fd)
108 {
109         struct c4iw_context *context;
110         struct ibv_get_context cmd;
111         struct c4iw_alloc_ucontext_resp resp;
112         struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
113         struct ibv_query_device qcmd;
114         uint64_t raw_fw_ver;
115         struct ibv_device_attr attr;
116
117         context = malloc(sizeof *context);
118         if (!context)
119                 return NULL;
120
121         memset(context, 0, sizeof *context);
122         context->ibv_ctx.cmd_fd = cmd_fd;
123
124         resp.status_page_size = 0;
125         resp.reserved = 0;
126         if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
127                                 &resp.ibv_resp, sizeof resp))
128                 goto err_free;
129
130         if (resp.reserved)
131                 PDBG("%s c4iw_alloc_ucontext_resp reserved field modified by kernel\n",
132                      __FUNCTION__);
133
134         context->status_page_size = resp.status_page_size;
135         if (resp.status_page_size) {
136                 context->status_page = mmap(NULL, resp.status_page_size,
137                                             PROT_READ, MAP_SHARED, cmd_fd,
138                                             resp.status_page_key);
139                 if (context->status_page == MAP_FAILED)
140                         goto err_free;
141         } 
142
143         context->ibv_ctx.device = ibdev;
144         context->ibv_ctx.ops = c4iw_ctx_ops;
145
146         switch (rhp->chip_version) {
147         case CHELSIO_T6:
148                 PDBG("%s T6/T5/T4 device\n", __FUNCTION__);
149         case CHELSIO_T5:
150                 PDBG("%s T5/T4 device\n", __FUNCTION__);
151         case CHELSIO_T4:
152                 PDBG("%s T4 device\n", __FUNCTION__);
153                 context->ibv_ctx.ops.async_event = c4iw_async_event;
154                 context->ibv_ctx.ops.post_send = c4iw_post_send;
155                 context->ibv_ctx.ops.post_recv = c4iw_post_receive;
156                 context->ibv_ctx.ops.poll_cq = c4iw_poll_cq;
157                 context->ibv_ctx.ops.req_notify_cq = c4iw_arm_cq;
158                 break;
159         default:
160                 PDBG("%s unknown hca type %d\n", __FUNCTION__,
161                      rhp->chip_version);
162                 goto err_unmap;
163                 break;
164         }
165
166         if (!rhp->mmid2ptr) {
167                 int ret;
168
169                 ret = ibv_cmd_query_device(&context->ibv_ctx, &attr, &raw_fw_ver, &qcmd,
170                                            sizeof qcmd);
171                 if (ret)
172                         goto err_unmap;
173                 rhp->max_mr = attr.max_mr;
174                 rhp->mmid2ptr = calloc(attr.max_mr, sizeof(void *));
175                 if (!rhp->mmid2ptr) {
176                         goto err_unmap;
177                 }
178                 if (rhp->abi_version < 3) {
179                         fprintf(stderr, "Warning: iw_cxgb4 driver is of older version"
180                                         " than libcxgb4:: %d\n", rhp->abi_version);
181                         rhp->max_qp = T4_QID_BASE + attr.max_qp;
182                 } else {
183                         rhp->max_qp = context->status_page->qp_start +
184                                         context->status_page->qp_size;
185                 }
186                 rhp->qpid2ptr = calloc(rhp->max_qp, sizeof(void *));
187                 if (!rhp->qpid2ptr) {
188                         goto err_unmap;
189                 }
190                 if (rhp->abi_version < 3)
191                         rhp->max_cq = T4_QID_BASE + attr.max_cq;
192                 else
193                         rhp->max_cq = context->status_page->cq_start +
194                                         context->status_page->cq_size;
195                 rhp->cqid2ptr = calloc(rhp->max_cq, sizeof(void *));
196                 if (!rhp->cqid2ptr)
197                         goto err_unmap;
198
199                 /* Disable userspace WC if architecture/adapter does not
200                  * support WC.
201                  * Note: To forcefully disable WC in kernel driver use the
202                  * loader tunable "hw.cxl.write_combine=0"
203                  */
204                 if (t5_en_wc && !context->status_page->wc_supported) {
205                         t5_en_wc = 0;
206                 }
207         }
208
209         return &context->ibv_ctx;
210
211 err_unmap:
212         munmap(context->status_page, context->status_page_size);
213 err_free:
214         if (rhp->cqid2ptr)
215                 free(rhp->cqid2ptr);
216         if (rhp->qpid2ptr)
217                 free(rhp->cqid2ptr);
218         if (rhp->mmid2ptr)
219                 free(rhp->cqid2ptr);
220         free(context);
221         return NULL;
222 }
223
224 static void c4iw_free_context(struct ibv_context *ibctx)
225 {
226         struct c4iw_context *context = to_c4iw_context(ibctx);
227
228         if (context->status_page_size)
229                 munmap(context->status_page, context->status_page_size);
230         free(context);
231 }
232
233 static struct verbs_device_ops c4iw_dev_ops = {
234         .alloc_context = c4iw_alloc_context,
235         .free_context = c4iw_free_context
236 };
237
238 #ifdef STALL_DETECTION
239
240 int stall_to;
241
242 static void dump_cq(struct c4iw_cq *chp)
243 {
244         int i;
245
246         fprintf(stderr,
247                 "CQ: %p id %u queue %p cidx 0x%08x sw_queue %p sw_cidx %d sw_pidx %d sw_in_use %d depth %u error %u gen %d "
248                 "cidx_inc %d bits_type_ts %016" PRIx64 " notempty %d\n", chp,
249                 chp->cq.cqid, chp->cq.queue, chp->cq.cidx,
250                 chp->cq.sw_queue, chp->cq.sw_cidx, chp->cq.sw_pidx, chp->cq.sw_in_use,
251                 chp->cq.size, chp->cq.error, chp->cq.gen, chp->cq.cidx_inc, be64toh(chp->cq.bits_type_ts),
252                 t4_cq_notempty(&chp->cq));
253
254         for (i=0; i < chp->cq.size; i++) {
255                 u64 *p = (u64 *)(chp->cq.queue + i);
256
257                 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64, i, be64toh(p[0]), be64toh(p[1]));
258                 if (i == chp->cq.cidx)
259                         fprintf(stderr, " <-- cidx\n");
260                 else
261                         fprintf(stderr, "\n");
262                 p+= 2;
263                 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
264                 p+= 2;
265                 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
266                 p+= 2;
267                 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
268                 p+= 2;
269         }
270 }
271
272 static void dump_qp(struct c4iw_qp *qhp)
273 {
274         int i;
275         int j;
276         struct t4_swsqe *swsqe;
277         struct t4_swrqe *swrqe;
278         u16 cidx, pidx;
279         u64 *p;
280
281         fprintf(stderr,
282                 "QP: %p id %u error %d flushed %d qid_mask 0x%x\n"
283                 "    SQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u wq_pidx %u depth %u flags 0x%x flush_cidx %d\n"
284                 "    RQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u depth %u\n",
285                 qhp,
286                 qhp->wq.sq.qid,
287                 qhp->wq.error,
288                 qhp->wq.flushed,
289                 qhp->wq.qid_mask,
290                 qhp->wq.sq.qid,
291                 qhp->wq.sq.queue,
292                 qhp->wq.sq.sw_sq,
293                 qhp->wq.sq.cidx,
294                 qhp->wq.sq.pidx,
295                 qhp->wq.sq.in_use,
296                 qhp->wq.sq.wq_pidx,
297                 qhp->wq.sq.size,
298                 qhp->wq.sq.flags,
299                 qhp->wq.sq.flush_cidx,
300                 qhp->wq.rq.qid,
301                 qhp->wq.rq.queue,
302                 qhp->wq.rq.sw_rq,
303                 qhp->wq.rq.cidx,
304                 qhp->wq.rq.pidx,
305                 qhp->wq.rq.in_use,
306                 qhp->wq.rq.size);
307         cidx = qhp->wq.sq.cidx;
308         pidx = qhp->wq.sq.pidx;
309         if (cidx != pidx)
310                 fprintf(stderr, "SQ: \n");
311         while (cidx != pidx) {
312                 swsqe = &qhp->wq.sq.sw_sq[cidx];
313                 fprintf(stderr, "%04u: wr_id %016" PRIx64
314                         " sq_wptr %08x read_len %u opcode 0x%x "
315                         "complete %u signaled %u cqe %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n",
316                         cidx,
317                         swsqe->wr_id,
318                         swsqe->idx,
319                         swsqe->read_len,
320                         swsqe->opcode,
321                         swsqe->complete,
322                         swsqe->signaled,
323                         htobe64(((uint64_t *)&swsqe->cqe)[0]),
324                         htobe64(((uint64_t *)&swsqe->cqe)[1]),
325                         htobe64(((uint64_t *)&swsqe->cqe)[2]),
326                         htobe64(((uint64_t *)&swsqe->cqe)[3]));
327                 if (++cidx == qhp->wq.sq.size)
328                         cidx = 0;
329         }
330
331         fprintf(stderr, "SQ WQ: \n");
332         p = (u64 *)qhp->wq.sq.queue;
333         for (i=0; i < qhp->wq.sq.size * T4_SQ_NUM_SLOTS; i++) {
334                 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
335                         fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
336                                 i, be64toh(p[0]), be64toh(p[1]));
337                         if (j == 0 && i == qhp->wq.sq.wq_pidx)
338                                 fprintf(stderr, " <-- pidx");
339                         fprintf(stderr, "\n");
340                         p += 2;
341                 }
342         }
343         cidx = qhp->wq.rq.cidx;
344         pidx = qhp->wq.rq.pidx;
345         if (cidx != pidx)
346                 fprintf(stderr, "RQ: \n");
347         while (cidx != pidx) {
348                 swrqe = &qhp->wq.rq.sw_rq[cidx];
349                 fprintf(stderr, "%04u: wr_id %016" PRIx64 "\n",
350                         cidx,
351                         swrqe->wr_id );
352                 if (++cidx == qhp->wq.rq.size)
353                         cidx = 0;
354         }
355
356         fprintf(stderr, "RQ WQ: \n");
357         p = (u64 *)qhp->wq.rq.queue;
358         for (i=0; i < qhp->wq.rq.size * T4_RQ_NUM_SLOTS; i++) {
359                 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
360                         fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
361                                 i, be64toh(p[0]), be64toh(p[1]));
362                         if (j == 0 && i == qhp->wq.rq.pidx)
363                                 fprintf(stderr, " <-- pidx");
364                         if (j == 0 && i == qhp->wq.rq.cidx)
365                                 fprintf(stderr, " <-- cidx");
366                         fprintf(stderr, "\n");
367                         p+=2;
368                 }
369         }
370 }
371
372 void dump_state(void)
373 {
374         struct c4iw_dev *dev;
375         int i;
376
377         fprintf(stderr, "STALL DETECTED:\n");
378         TAILQ_FOREACH(dev, &devices, list) {
379                 //pthread_spin_lock(&dev->lock);
380                 fprintf(stderr, "Device %s\n", dev->ibv_dev.name);
381                 for (i=0; i < dev->max_cq; i++) {
382                         if (dev->cqid2ptr[i]) {
383                                 struct c4iw_cq *chp = dev->cqid2ptr[i];
384                                 //pthread_spin_lock(&chp->lock);
385                                 dump_cq(chp);
386                                 //pthread_spin_unlock(&chp->lock);
387                         }
388                 }
389                 for (i=0; i < dev->max_qp; i++) {
390                         if (dev->qpid2ptr[i]) {
391                                 struct c4iw_qp *qhp = dev->qpid2ptr[i];
392                                 //pthread_spin_lock(&qhp->lock);
393                                 dump_qp(qhp);
394                                 //pthread_spin_unlock(&qhp->lock);
395                         }
396                 }
397                 //pthread_spin_unlock(&dev->lock);
398         }
399         fprintf(stderr, "DUMP COMPLETE:\n");
400         fflush(stderr);
401 }
402 #endif /* end of STALL_DETECTION */
403
404 /*
405  * c4iw_abi_version is used to store ABI for iw_cxgb4 so the user mode library
406  * can know if the driver supports the kernel mode db ringing. 
407  */
408 int c4iw_abi_version = 1;
409
410 static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path,
411                                               int abi_version)
412 {
413         char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp;
414         char dev_str[IBV_SYSFS_PATH_MAX];
415         struct c4iw_dev *dev;
416         unsigned vendor, device, fw_maj, fw_min;
417         int i;
418         char devnum;
419         char ib_param[16];
420
421 #ifndef __linux__
422         if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
423                                 ibdev, sizeof ibdev) < 0)
424                 return NULL;
425
426         devnum = atoi(&ibdev[5]);
427
428         if (ibdev[0] == 't' && ibdev[1] >= '4' && ibdev[1] <= '6' &&
429             strstr(&ibdev[2], "nex") && devnum >= 0) {
430                 snprintf(dev_str, sizeof(dev_str), "/dev/t%cnex/%d", ibdev[1],
431                     devnum);
432         } else
433                 return NULL;
434
435         if (ibv_read_sysfs_file(dev_str, "\%pnpinfo", value, sizeof value) < 0)
436                 return NULL;
437         else {
438                 if (strstr(value, "vendor=")) {
439                         strncpy(ib_param, strstr(value, "vendor=") +
440                                         strlen("vendor="), 6);
441                         sscanf(ib_param, "%i", &vendor);
442                 }
443
444                 if (strstr(value, "device=")) {
445                         strncpy(ib_param, strstr(value, "device=") +
446                                         strlen("device="), 6);
447                         sscanf(ib_param, "%i", &device);
448                 }
449         }
450 #else
451         if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
452                                 value, sizeof value) < 0)
453                 return NULL;
454         sscanf(value, "%i", &vendor);
455
456         if (ibv_read_sysfs_file(uverbs_sys_path, "device/device",
457                                 value, sizeof value) < 0)
458                 return NULL;
459         sscanf(value, "%i", &device);
460 #endif
461
462         for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
463                 if (vendor == hca_table[i].vendor &&
464                     device == hca_table[i].device)
465                         goto found;
466
467         return NULL;
468
469 found:
470         c4iw_abi_version = abi_version; 
471
472 #ifndef __linux__
473         if (ibv_read_sysfs_file(dev_str, "firmware_version",
474                                 value, sizeof value) < 0)
475                 return NULL;
476 #else
477         /*
478          * Verify that the firmware major number matches.  Major number
479          * mismatches are fatal.  Minor number mismatches are tolerated.
480          */
481         if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
482                                 ibdev, sizeof ibdev) < 0)
483                 return NULL;
484
485         memset(devstr, 0, sizeof devstr);
486         snprintf(devstr, sizeof devstr, "%s/class/infiniband/%s",
487                  ibv_get_sysfs_path(), ibdev);
488         if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0)
489                 return NULL;
490 #endif
491
492         cp = strtok(value+1, ".");
493         sscanf(cp, "%i", &fw_maj);
494         cp = strtok(NULL, ".");
495         sscanf(cp, "%i", &fw_min);
496
497         if ((signed int)fw_maj < FW_MAJ) {
498                 fprintf(stderr, "libcxgb4: Fatal firmware version mismatch.  "
499                         "Firmware major number is %u and libcxgb4 needs %u.\n",
500                         fw_maj, FW_MAJ);
501                 fflush(stderr);
502                 return NULL;
503         }
504
505         DBGLOG("libcxgb4");
506
507         if ((signed int)fw_min < FW_MIN) {
508                 PDBG("libcxgb4: non-fatal firmware version mismatch.  "
509                         "Firmware minor number is %u and libcxgb4 needs %u.\n",
510                         fw_min, FW_MIN);
511                 fflush(stderr);
512         }
513
514         PDBG("%s found vendor %d device %d type %d\n",
515              __FUNCTION__, vendor, device, CHELSIO_CHIP_VERSION(hca_table[i].device >> 8));
516
517         dev = calloc(1, sizeof *dev);
518         if (!dev) {
519                 return NULL;
520         }
521
522         pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE);
523         dev->ibv_dev.ops = &c4iw_dev_ops;
524         dev->chip_version = CHELSIO_CHIP_VERSION(hca_table[i].device >> 8);
525         dev->abi_version = abi_version;
526
527         PDBG("%s device claimed\n", __FUNCTION__);
528         TAILQ_INSERT_TAIL(&devices, dev, list);
529 #ifdef STALL_DETECTION
530 {
531         char *c = getenv("CXGB4_STALL_TIMEOUT");
532         if (c) {
533                 stall_to = strtol(c, NULL, 0);
534                 if (errno || stall_to < 0)
535                         stall_to = 0;
536         }
537 }
538 #endif
539 {
540         char *c = getenv("CXGB4_MA_WR");
541         if (c) {
542                 ma_wr = strtol(c, NULL, 0);
543                 if (ma_wr != 1)
544                         ma_wr = 0;
545         }
546 }
547 {
548         char *c = getenv("T5_ENABLE_WC");
549         if (c) {
550                 t5_en_wc = strtol(c, NULL, 0);
551                 if (t5_en_wc != 1)
552                         t5_en_wc = 0;
553         }
554 }
555
556         return &dev->ibv_dev;
557 }
558
559 static __attribute__((constructor)) void cxgb4_register_driver(void)
560 {
561         c4iw_page_size = sysconf(_SC_PAGESIZE);
562         c4iw_page_shift = long_log2(c4iw_page_size);
563         c4iw_page_mask = ~(c4iw_page_size - 1);
564         verbs_register_driver("cxgb4", cxgb4_driver_init);
565 }
566
567 #ifdef STATS
568 void __attribute__ ((destructor)) cs_fini(void);
569 void  __attribute__ ((destructor)) cs_fini(void)
570 {
571         syslog(LOG_NOTICE, "cxgb4 stats - sends %lu recv %lu read %lu "
572                "write %lu arm %lu cqe %lu mr %lu qp %lu cq %lu\n",
573                c4iw_stats.send, c4iw_stats.recv, c4iw_stats.read,
574                c4iw_stats.write, c4iw_stats.arm, c4iw_stats.cqe,
575                c4iw_stats.mr, c4iw_stats.qp, c4iw_stats.cq);
576 }
577 #endif