]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/cxgbe/cxgbei/cxgbei.c
cxgbei: Parse all PDUs received prior to enabling offload mode.
[FreeBSD/FreeBSD.git] / sys / dev / cxgbe / cxgbei / cxgbei.c
1 /*-
2  * Copyright (c) 2012 Chelsio Communications, Inc.
3  * All rights reserved.
4  *
5  * Chelsio T5xx iSCSI driver
6  *
7  * Written by: Sreenivasa Honnur <shonnur@chelsio.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include "opt_inet.h"
35 #include "opt_inet6.h"
36
37 #include <sys/types.h>
38 #include <sys/param.h>
39 #include <sys/kernel.h>
40 #include <sys/ktr.h>
41 #include <sys/module.h>
42 #include <sys/systm.h>
43
44 #ifdef TCP_OFFLOAD
45 #include <sys/errno.h>
46 #include <sys/gsb_crc32.h>
47 #include <sys/kthread.h>
48 #include <sys/smp.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/mbuf.h>
52 #include <sys/lock.h>
53 #include <sys/mutex.h>
54 #include <sys/condvar.h>
55
56 #include <netinet/in.h>
57 #include <netinet/in_pcb.h>
58 #include <netinet/toecore.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_fsm.h>
61
62 #include <cam/scsi/scsi_all.h>
63 #include <cam/scsi/scsi_da.h>
64 #include <cam/ctl/ctl_io.h>
65 #include <cam/ctl/ctl.h>
66 #include <cam/ctl/ctl_backend.h>
67 #include <cam/ctl/ctl_error.h>
68 #include <cam/ctl/ctl_frontend.h>
69 #include <cam/ctl/ctl_debug.h>
70 #include <cam/ctl/ctl_ha.h>
71 #include <cam/ctl/ctl_ioctl.h>
72
73 #include <dev/iscsi/icl.h>
74 #include <dev/iscsi/iscsi_proto.h>
75 #include <dev/iscsi/iscsi_ioctl.h>
76 #include <dev/iscsi/iscsi.h>
77 #include <cam/ctl/ctl_frontend_iscsi.h>
78
79 #include <cam/cam.h>
80 #include <cam/cam_ccb.h>
81 #include <cam/cam_xpt.h>
82 #include <cam/cam_debug.h>
83 #include <cam/cam_sim.h>
84 #include <cam/cam_xpt_sim.h>
85 #include <cam/cam_xpt_periph.h>
86 #include <cam/cam_periph.h>
87 #include <cam/cam_compat.h>
88 #include <cam/scsi/scsi_message.h>
89
90 #include "common/common.h"
91 #include "common/t4_msg.h"
92 #include "common/t4_regs.h"     /* for PCIE_MEM_ACCESS */
93 #include "tom/t4_tom.h"
94 #include "cxgbei.h"
95
96 static int worker_thread_count;
97 static struct cxgbei_worker_thread_softc *cwt_softc;
98 static struct proc *cxgbei_proc;
99
100 static void
101 read_pdu_limits(struct adapter *sc, uint32_t *max_tx_data_len,
102     uint32_t *max_rx_data_len, struct ppod_region *pr)
103 {
104         uint32_t tx_len, rx_len, r, v;
105
106         rx_len = t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE);
107         tx_len = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
108
109         r = t4_read_reg(sc, A_TP_PARA_REG2);
110         rx_len = min(rx_len, G_MAXRXDATA(r));
111         tx_len = min(tx_len, G_MAXRXDATA(r));
112
113         r = t4_read_reg(sc, A_TP_PARA_REG7);
114         v = min(G_PMMAXXFERLEN0(r), G_PMMAXXFERLEN1(r));
115         rx_len = min(rx_len, v);
116         tx_len = min(tx_len, v);
117
118         /*
119          * AHS is not supported by the kernel so we'll not account for
120          * it either in our PDU len -> data segment len conversions.
121          */
122         rx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE +
123             ISCSI_DATA_DIGEST_SIZE;
124         tx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE +
125             ISCSI_DATA_DIGEST_SIZE;
126
127         /*
128          * DDP can place only 4 pages for a single PDU.  A single
129          * request might use larger pages than the smallest page size,
130          * but that cannot be guaranteed.  Assume the smallest DDP
131          * page size for this limit.
132          */
133         rx_len = min(rx_len, 4 * (1U << pr->pr_page_shift[0]));
134
135         if (chip_id(sc) == CHELSIO_T5) {
136                 tx_len = min(tx_len, 15360);
137
138                 rx_len = rounddown2(rx_len, 512);
139                 tx_len = rounddown2(tx_len, 512);
140         }
141
142         *max_tx_data_len = tx_len;
143         *max_rx_data_len = rx_len;
144 }
145
146 /*
147  * Initialize the software state of the iSCSI ULP driver.
148  *
149  * ENXIO means firmware didn't set up something that it was supposed to.
150  */
151 static int
152 cxgbei_init(struct adapter *sc, struct cxgbei_data *ci)
153 {
154         struct sysctl_oid *oid;
155         struct sysctl_oid_list *children;
156         struct ppod_region *pr;
157         uint32_t r;
158         int rc;
159
160         MPASS(sc->vres.iscsi.size > 0);
161         MPASS(ci != NULL);
162
163         pr = &ci->pr;
164         r = t4_read_reg(sc, A_ULP_RX_ISCSI_PSZ);
165         rc = t4_init_ppod_region(pr, &sc->vres.iscsi, r, "iSCSI page pods");
166         if (rc != 0) {
167                 device_printf(sc->dev,
168                     "%s: failed to initialize the iSCSI page pod region: %u.\n",
169                     __func__, rc);
170                 return (rc);
171         }
172
173         r = t4_read_reg(sc, A_ULP_RX_ISCSI_TAGMASK);
174         r &= V_ISCSITAGMASK(M_ISCSITAGMASK);
175         if (r != pr->pr_tag_mask) {
176                 /*
177                  * Recent firmwares are supposed to set up the iSCSI tagmask
178                  * but we'll do it ourselves it the computed value doesn't match
179                  * what's in the register.
180                  */
181                 device_printf(sc->dev,
182                     "tagmask 0x%08x does not match computed mask 0x%08x.\n", r,
183                     pr->pr_tag_mask);
184                 t4_set_reg_field(sc, A_ULP_RX_ISCSI_TAGMASK,
185                     V_ISCSITAGMASK(M_ISCSITAGMASK), pr->pr_tag_mask);
186         }
187
188         read_pdu_limits(sc, &ci->max_tx_data_len, &ci->max_rx_data_len, pr);
189
190         sysctl_ctx_init(&ci->ctx);
191         oid = device_get_sysctl_tree(sc->dev);  /* dev.t5nex.X */
192         children = SYSCTL_CHILDREN(oid);
193
194         oid = SYSCTL_ADD_NODE(&ci->ctx, children, OID_AUTO, "iscsi",
195             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "iSCSI ULP settings");
196         children = SYSCTL_CHILDREN(oid);
197
198         ci->ddp_threshold = 2048;
199         SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "ddp_threshold",
200             CTLFLAG_RW, &ci->ddp_threshold, 0, "Rx zero copy threshold");
201
202         SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_rx_data_len",
203             CTLFLAG_RD, &ci->max_rx_data_len, 0,
204             "Maximum receive data segment length");
205         SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_tx_data_len",
206             CTLFLAG_RD, &ci->max_tx_data_len, 0,
207             "Maximum transmit data segment length");
208
209         return (0);
210 }
211
212 static int
213 do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
214 {
215         struct adapter *sc = iq->adapter;
216         struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *);
217         u_int tid = GET_TID(cpl);
218         struct toepcb *toep = lookup_tid(sc, tid);
219         struct icl_pdu *ip;
220         struct icl_cxgbei_pdu *icp;
221         uint16_t len_ddp = be16toh(cpl->pdu_len_ddp);
222         uint16_t len = be16toh(cpl->len);
223
224         M_ASSERTPKTHDR(m);
225         MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
226
227         ip = icl_cxgbei_new_pdu(M_NOWAIT);
228         if (ip == NULL)
229                 CXGBE_UNIMPLEMENTED("PDU allocation failure");
230         m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
231         ip->ip_data_len = G_ISCSI_PDU_LEN(len_ddp) - len;
232         icp = ip_to_icp(ip);
233         icp->icp_seq = ntohl(cpl->seq);
234         icp->icp_flags = ICPF_RX_HDR;
235
236         /* This is the start of a new PDU.  There should be no old state. */
237         MPASS(toep->ulpcb2 == NULL);
238         toep->ulpcb2 = icp;
239
240 #if 0
241         CTR5(KTR_CXGBE, "%s: tid %u, cpl->len %u, pdu_len_ddp 0x%04x, icp %p",
242             __func__, tid, len, len_ddp, icp);
243 #endif
244
245         m_freem(m);
246         return (0);
247 }
248
249 static int
250 do_rx_iscsi_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
251 {
252         struct adapter *sc = iq->adapter;
253         struct cpl_iscsi_data *cpl =  mtod(m, struct cpl_iscsi_data *);
254         u_int tid = GET_TID(cpl);
255         struct toepcb *toep = lookup_tid(sc, tid);
256         struct icl_cxgbei_pdu *icp = toep->ulpcb2;
257         struct icl_pdu *ip;
258
259         M_ASSERTPKTHDR(m);
260         MPASS(m->m_pkthdr.len == be16toh(cpl->len) + sizeof(*cpl));
261
262         if (icp == NULL) {
263                 /*
264                  * T6 completion enabled, start of a new pdu. Header
265                  * will come in completion CPL.
266                  */
267                 ip = icl_cxgbei_new_pdu(M_NOWAIT);
268                 if (ip == NULL)
269                         CXGBE_UNIMPLEMENTED("PDU allocation failure");
270                 icp = ip_to_icp(ip);
271         } else {
272                 /* T5 mode, header is already received. */
273                 MPASS(icp->icp_flags == ICPF_RX_HDR);
274                 MPASS(icp->ip.ip_data_mbuf == NULL);
275                 MPASS(icp->ip.ip_data_len == m->m_pkthdr.len - sizeof(*cpl));
276         }
277
278         /* Trim the cpl header from mbuf. */
279         m_adj(m, sizeof(*cpl));
280
281         icp->icp_flags |= ICPF_RX_FLBUF;
282         icp->ip.ip_data_mbuf = m;
283         toep->ofld_rxq->rx_iscsi_fl_pdus++;
284         toep->ofld_rxq->rx_iscsi_fl_octets += m->m_pkthdr.len;
285
286         /*
287          * For T6, save the icp for further processing in the
288          * completion handler.
289          */
290         if (icp->icp_flags == ICPF_RX_FLBUF) {
291                 MPASS(toep->ulpcb2 == NULL);
292                 toep->ulpcb2 = icp;
293         }
294
295 #if 0
296         CTR4(KTR_CXGBE, "%s: tid %u, cpl->len %u, icp %p", __func__, tid,
297             be16toh(cpl->len), icp);
298 #endif
299
300         return (0);
301 }
302
303 static int
304 mbuf_crc32c_helper(void *arg, void *data, u_int len)
305 {
306         uint32_t *digestp = arg;
307
308         *digestp = calculate_crc32c(*digestp, data, len);
309         return (0);
310 }
311
312 static bool
313 parse_pdus(struct toepcb *toep, struct icl_cxgbei_conn *icc, struct sockbuf *sb)
314 {
315         struct iscsi_bhs bhs;
316         struct mbuf *m;
317         struct icl_pdu *ip;
318         u_int ahs_len, data_len, header_len, pdu_len, total_len;
319         uint32_t calc_digest, wire_digest;
320
321         total_len = sbused(sb);
322         CTR3(KTR_CXGBE, "%s: tid %u, %u bytes in so_rcv", __func__, toep->tid,
323             total_len);
324
325         m = sbcut_locked(sb, total_len);
326         KASSERT(m_length(m, NULL) == total_len,
327             ("sbcut returned less data (%u vs %u)", total_len,
328             m_length(m, NULL)));
329
330         header_len = sizeof(struct iscsi_bhs);
331         if (icc->ic.ic_header_crc32c)
332                 header_len += ISCSI_HEADER_DIGEST_SIZE;
333         for (;;) {
334                 if (total_len < sizeof(struct iscsi_bhs)) {
335                         ICL_WARN("truncated pre-offload PDU with len %u",
336                             total_len);
337                         m_freem(m);
338                         return (false);
339                 }
340                 m_copydata(m, 0, sizeof(struct iscsi_bhs), (caddr_t)&bhs);
341
342                 ahs_len = bhs.bhs_total_ahs_len * 4;
343                 data_len = bhs.bhs_data_segment_len[0] << 16 |
344                     bhs.bhs_data_segment_len[1] << 8 |
345                     bhs.bhs_data_segment_len[0];
346                 pdu_len = header_len + ahs_len + roundup2(data_len, 4);
347                 if (icc->ic.ic_data_crc32c && data_len != 0)
348                         pdu_len += ISCSI_DATA_DIGEST_SIZE;
349
350                 if (total_len < pdu_len) {
351                         ICL_WARN("truncated pre-offload PDU len %u vs %u",
352                             total_len, pdu_len);
353                         m_freem(m);
354                         return (false);
355                 }
356
357                 if (ahs_len != 0) {
358                         ICL_WARN("received pre-offload PDU with AHS");
359                         m_freem(m);
360                         return (false);
361                 }
362
363                 if (icc->ic.ic_header_crc32c) {
364                         m_copydata(m, sizeof(struct iscsi_bhs),
365                             sizeof(wire_digest), (caddr_t)&wire_digest);
366
367                         calc_digest = calculate_crc32c(0xffffffff,
368                             (caddr_t)&bhs, sizeof(bhs));
369                         calc_digest ^= 0xffffffff;
370                         if (calc_digest != wire_digest) {
371                                 ICL_WARN("received pre-offload PDU 0x%02x "
372                                     "with invalid header digest (0x%x vs 0x%x)",
373                                     bhs.bhs_opcode, wire_digest, calc_digest);
374                                 toep->ofld_rxq->rx_iscsi_header_digest_errors++;
375                                 m_free(m);
376                                 return (false);
377                         }
378                 }
379
380                 m_adj(m, header_len);
381
382                 if (icc->ic.ic_data_crc32c && data_len != 0) {
383                         m_copydata(m, data_len, sizeof(wire_digest),
384                             (caddr_t)&wire_digest);
385
386                         calc_digest = 0xffffffff;
387                         m_apply(m, 0, roundup2(data_len, 4), mbuf_crc32c_helper,
388                             &calc_digest);
389                         calc_digest ^= 0xffffffff;
390                         if (calc_digest != wire_digest) {
391                                 ICL_WARN("received pre-offload PDU 0x%02x "
392                                     "with invalid data digest (0x%x vs 0x%x)",
393                                     bhs.bhs_opcode, wire_digest, calc_digest);
394                                 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
395                                 m_free(m);
396                                 return (false);
397                         }
398                 }
399
400                 ip = icl_cxgbei_new_pdu(M_NOWAIT);
401                 if (ip == NULL)
402                         CXGBE_UNIMPLEMENTED("PDU allocation failure");
403                 icl_cxgbei_new_pdu_set_conn(ip, &icc->ic);
404                 *ip->ip_bhs = bhs;
405                 ip->ip_data_len = data_len;
406                 if (data_len != 0)
407                         ip->ip_data_mbuf = m;
408
409                 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
410
411                 total_len -= pdu_len;
412                 if (total_len == 0) {
413                         if (data_len == 0)
414                                 m_freem(m);
415                         return (true);
416                 }
417
418                 if (data_len != 0) {
419                         m = m_split(m, roundup2(data_len, 4), M_NOWAIT);
420                         if (m == NULL) {
421                                 ICL_WARN("failed to split mbuf chain for "
422                                     "pre-offload PDU");
423
424                                 /* Don't free the mbuf chain as 'ip' owns it. */
425                                 return (false);
426                         }
427                         if (icc->ic.ic_data_crc32c)
428                                 m_adj(m, ISCSI_DATA_DIGEST_SIZE);
429                 }
430         }
431 }
432
433 static int
434 do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
435 {
436         struct adapter *sc = iq->adapter;
437         const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1);
438         u_int tid = GET_TID(cpl);
439         struct toepcb *toep = lookup_tid(sc, tid);
440         struct inpcb *inp = toep->inp;
441         struct socket *so;
442         struct sockbuf *sb;
443         struct tcpcb *tp;
444         struct icl_cxgbei_conn *icc;
445         struct icl_conn *ic;
446         struct icl_cxgbei_pdu *icp = toep->ulpcb2;
447         struct icl_pdu *ip;
448         u_int pdu_len, val;
449         struct epoch_tracker et;
450
451         MPASS(m == NULL);
452
453         /* Must already be assembling a PDU. */
454         MPASS(icp != NULL);
455         MPASS(icp->icp_flags & ICPF_RX_HDR);    /* Data is optional. */
456         MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
457
458         pdu_len = be16toh(cpl->len);    /* includes everything. */
459         val = be32toh(cpl->ddpvld);
460
461 #if 0
462         CTR5(KTR_CXGBE,
463             "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp_flags 0x%08x",
464             __func__, tid, pdu_len, val, icp->icp_flags);
465 #endif
466
467         icp->icp_flags |= ICPF_RX_STATUS;
468         ip = &icp->ip;
469         if (val & F_DDP_PADDING_ERR) {
470                 ICL_WARN("received PDU 0x%02x with invalid padding",
471                     ip->ip_bhs->bhs_opcode);
472                 toep->ofld_rxq->rx_iscsi_padding_errors++;
473         }
474         if (val & F_DDP_HDRCRC_ERR) {
475                 ICL_WARN("received PDU 0x%02x with invalid header digest",
476                     ip->ip_bhs->bhs_opcode);
477                 toep->ofld_rxq->rx_iscsi_header_digest_errors++;
478         }
479         if (val & F_DDP_DATACRC_ERR) {
480                 ICL_WARN("received PDU 0x%02x with invalid data digest",
481                     ip->ip_bhs->bhs_opcode);
482                 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
483         }
484         if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
485                 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
486                 MPASS(ip->ip_data_len > 0);
487                 icp->icp_flags |= ICPF_RX_DDP;
488                 toep->ofld_rxq->rx_iscsi_ddp_pdus++;
489                 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
490         }
491
492         INP_WLOCK(inp);
493         if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {
494                 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
495                     __func__, tid, pdu_len, inp->inp_flags);
496                 INP_WUNLOCK(inp);
497                 icl_cxgbei_conn_pdu_free(NULL, ip);
498                 toep->ulpcb2 = NULL;
499                 return (0);
500         }
501
502         /*
503          * T6+ does not report data PDUs received via DDP without F
504          * set.  This can result in gaps in the TCP sequence space.
505          */
506         tp = intotcpcb(inp);
507         MPASS(chip_id(sc) >= CHELSIO_T6 || icp->icp_seq == tp->rcv_nxt);
508         tp->rcv_nxt = icp->icp_seq + pdu_len;
509         tp->t_rcvtime = ticks;
510
511         /*
512          * Don't update the window size or return credits since RX
513          * flow control is disabled.
514          */
515
516         so = inp->inp_socket;
517         sb = &so->so_rcv;
518         SOCKBUF_LOCK(sb);
519
520         icc = toep->ulpcb;
521         if (__predict_false(icc == NULL || sb->sb_state & SBS_CANTRCVMORE)) {
522                 CTR5(KTR_CXGBE,
523                     "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
524                     __func__, tid, pdu_len, icc, sb->sb_state);
525                 SOCKBUF_UNLOCK(sb);
526                 INP_WUNLOCK(inp);
527
528                 CURVNET_SET(so->so_vnet);
529                 NET_EPOCH_ENTER(et);
530                 INP_WLOCK(inp);
531                 tp = tcp_drop(tp, ECONNRESET);
532                 if (tp)
533                         INP_WUNLOCK(inp);
534                 NET_EPOCH_EXIT(et);
535                 CURVNET_RESTORE();
536
537                 icl_cxgbei_conn_pdu_free(NULL, ip);
538                 toep->ulpcb2 = NULL;
539                 return (0);
540         }
541         MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
542         ic = &icc->ic;
543         if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR |
544             F_DDP_DATACRC_ERR)) != 0) {
545                 SOCKBUF_UNLOCK(sb);
546                 INP_WUNLOCK(inp);
547
548                 icl_cxgbei_conn_pdu_free(NULL, ip);
549                 toep->ulpcb2 = NULL;
550                 ic->ic_error(ic);
551                 return (0);
552         }
553
554         if (__predict_false(sbused(sb)) != 0) {
555                 /*
556                  * PDUs were received before the tid transitioned to ULP mode.
557                  * Convert them to icl_cxgbei_pdus and send them to ICL before
558                  * the PDU in icp/ip.
559                  */
560                 if (!parse_pdus(toep, icc, sb)) {
561                         SOCKBUF_UNLOCK(sb);
562                         INP_WUNLOCK(inp);
563
564                         icl_cxgbei_conn_pdu_free(NULL, ip);
565                         toep->ulpcb2 = NULL;
566                         ic->ic_error(ic);
567                         return (0);
568                 }
569         }
570         icl_cxgbei_new_pdu_set_conn(ip, ic);
571
572         STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
573         if ((icc->rx_flags & RXF_ACTIVE) == 0) {
574                 struct cxgbei_worker_thread_softc *cwt = &cwt_softc[icc->cwt];
575
576                 mtx_lock(&cwt->cwt_lock);
577                 icc->rx_flags |= RXF_ACTIVE;
578                 TAILQ_INSERT_TAIL(&cwt->rx_head, icc, rx_link);
579                 if (cwt->cwt_state == CWT_SLEEPING) {
580                         cwt->cwt_state = CWT_RUNNING;
581                         cv_signal(&cwt->cwt_cv);
582                 }
583                 mtx_unlock(&cwt->cwt_lock);
584         }
585         SOCKBUF_UNLOCK(sb);
586         INP_WUNLOCK(inp);
587
588         toep->ulpcb2 = NULL;
589
590         return (0);
591 }
592
593 static int
594 do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
595 {
596         struct epoch_tracker et;
597         struct adapter *sc = iq->adapter;
598         struct cpl_rx_iscsi_cmp *cpl = mtod(m, struct cpl_rx_iscsi_cmp *);
599         u_int tid = GET_TID(cpl);
600         struct toepcb *toep = lookup_tid(sc, tid);
601         struct icl_cxgbei_pdu *icp = toep->ulpcb2;
602         struct icl_pdu *ip;
603         struct cxgbei_cmp *cmp;
604         struct inpcb *inp = toep->inp;
605 #ifdef INVARIANTS
606         uint16_t len = be16toh(cpl->len);
607         u_int data_digest_len;
608 #endif
609         struct socket *so;
610         struct sockbuf *sb;
611         struct tcpcb *tp;
612         struct icl_cxgbei_conn *icc;
613         struct icl_conn *ic;
614         struct iscsi_bhs_data_out *bhsdo;
615         u_int val = be32toh(cpl->ddpvld);
616         u_int npdus, pdu_len;
617         uint32_t prev_seg_len;
618
619         M_ASSERTPKTHDR(m);
620         MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
621
622         if ((val & F_DDP_PDU) == 0) {
623                 MPASS(icp != NULL);
624                 MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
625                 ip = &icp->ip;
626         }
627
628         if (icp == NULL) {
629                 /* T6 completion enabled, start of a new PDU. */
630                 ip = icl_cxgbei_new_pdu(M_NOWAIT);
631                 if (ip == NULL)
632                         CXGBE_UNIMPLEMENTED("PDU allocation failure");
633                 icp = ip_to_icp(ip);
634         }
635         pdu_len = G_ISCSI_PDU_LEN(be16toh(cpl->pdu_len_ddp));
636
637 #if 0
638         CTR5(KTR_CXGBE,
639             "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp %p",
640             __func__, tid, pdu_len, val, icp);
641 #endif
642
643         /* Copy header */
644         m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
645         bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
646         ip->ip_data_len = bhsdo->bhsdo_data_segment_len[0] << 16 |
647             bhsdo->bhsdo_data_segment_len[1] << 8 |
648             bhsdo->bhsdo_data_segment_len[2];
649         icp->icp_seq = ntohl(cpl->seq);
650         icp->icp_flags |= ICPF_RX_HDR;
651         icp->icp_flags |= ICPF_RX_STATUS;
652
653         if (val & F_DDP_PADDING_ERR) {
654                 ICL_WARN("received PDU 0x%02x with invalid padding",
655                     ip->ip_bhs->bhs_opcode);
656                 toep->ofld_rxq->rx_iscsi_padding_errors++;
657         }
658         if (val & F_DDP_HDRCRC_ERR) {
659                 ICL_WARN("received PDU 0x%02x with invalid header digest",
660                     ip->ip_bhs->bhs_opcode);
661                 toep->ofld_rxq->rx_iscsi_header_digest_errors++;
662         }
663         if (val & F_DDP_DATACRC_ERR) {
664                 ICL_WARN("received PDU 0x%02x with invalid data digest",
665                     ip->ip_bhs->bhs_opcode);
666                 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
667         }
668
669         INP_WLOCK(inp);
670         if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {
671                 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
672                     __func__, tid, pdu_len, inp->inp_flags);
673                 INP_WUNLOCK(inp);
674                 icl_cxgbei_conn_pdu_free(NULL, ip);
675                 toep->ulpcb2 = NULL;
676                 m_freem(m);
677                 return (0);
678         }
679
680         tp = intotcpcb(inp);
681
682         /*
683          * If icc is NULL, the connection is being closed in
684          * icl_cxgbei_conn_close(), just drop this data.
685          */
686         icc = toep->ulpcb;
687         if (__predict_false(icc == NULL)) {
688                 CTR4(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes), icc %p",
689                     __func__, tid, pdu_len, icc);
690
691                 /*
692                  * Update rcv_nxt so the sequence number of the FIN
693                  * doesn't appear wrong.
694                  */
695                 tp->rcv_nxt = icp->icp_seq + pdu_len;
696                 tp->t_rcvtime = ticks;
697                 INP_WUNLOCK(inp);
698
699                 icl_cxgbei_conn_pdu_free(NULL, ip);
700                 toep->ulpcb2 = NULL;
701                 m_freem(m);
702                 return (0);
703         }
704
705         MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
706         ic = &icc->ic;
707         if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR |
708             F_DDP_DATACRC_ERR)) != 0) {
709                 INP_WUNLOCK(inp);
710
711                 icl_cxgbei_conn_pdu_free(NULL, ip);
712                 toep->ulpcb2 = NULL;
713                 m_freem(m);
714                 ic->ic_error(ic);
715                 return (0);
716         }
717
718 #ifdef INVARIANTS
719         data_digest_len = (icc->ulp_submode & ULP_CRC_DATA) ?
720             ISCSI_DATA_DIGEST_SIZE : 0;
721         MPASS(roundup2(ip->ip_data_len, 4) == pdu_len - len - data_digest_len);
722 #endif
723
724         if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
725                 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
726                 MPASS(ip->ip_data_len > 0);
727                 icp->icp_flags |= ICPF_RX_DDP;
728                 bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
729
730                 switch (ip->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) {
731                 case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
732                         cmp = cxgbei_find_cmp(icc,
733                             be32toh(bhsdo->bhsdo_initiator_task_tag));
734                         break;
735                 case ISCSI_BHS_OPCODE_SCSI_DATA_OUT:
736                         cmp = cxgbei_find_cmp(icc,
737                             be32toh(bhsdo->bhsdo_target_transfer_tag));
738                         break;
739                 default:
740                         __assert_unreachable();
741                 }
742                 MPASS(cmp != NULL);
743
744                 /*
745                  * The difference between the end of the last burst
746                  * and the offset of the last PDU in this burst is
747                  * the additional data received via DDP.
748                  */
749                 prev_seg_len = be32toh(bhsdo->bhsdo_buffer_offset) -
750                     cmp->next_buffer_offset;
751
752                 if (prev_seg_len != 0) {
753                         uint32_t orig_datasn;
754
755                         /*
756                          * Return a "large" PDU representing the burst
757                          * of PDUs.  Adjust the offset and length of
758                          * this PDU to represent the entire burst.
759                          */
760                         ip->ip_data_len += prev_seg_len;
761                         bhsdo->bhsdo_data_segment_len[2] = ip->ip_data_len;
762                         bhsdo->bhsdo_data_segment_len[1] = ip->ip_data_len >> 8;
763                         bhsdo->bhsdo_data_segment_len[0] = ip->ip_data_len >> 16;
764                         bhsdo->bhsdo_buffer_offset =
765                             htobe32(cmp->next_buffer_offset);
766
767                         orig_datasn = htobe32(bhsdo->bhsdo_datasn);
768                         npdus = orig_datasn - cmp->last_datasn;
769                         bhsdo->bhsdo_datasn = htobe32(cmp->last_datasn + 1);
770                         cmp->last_datasn = orig_datasn;
771                         ip->ip_additional_pdus = npdus - 1;
772                 } else {
773                         MPASS(htobe32(bhsdo->bhsdo_datasn) ==
774                             cmp->last_datasn + 1);
775                         npdus = 1;
776                         cmp->last_datasn = htobe32(bhsdo->bhsdo_datasn);
777                 }
778
779                 cmp->next_buffer_offset += ip->ip_data_len;
780                 toep->ofld_rxq->rx_iscsi_ddp_pdus += npdus;
781                 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
782         } else {
783                 MPASS(icp->icp_flags & (ICPF_RX_FLBUF));
784                 MPASS(ip->ip_data_len == ip->ip_data_mbuf->m_pkthdr.len);
785         }
786
787         tp->rcv_nxt = icp->icp_seq + pdu_len;
788         tp->t_rcvtime = ticks;
789
790         /*
791          * Don't update the window size or return credits since RX
792          * flow control is disabled.
793          */
794
795         so = inp->inp_socket;
796         sb = &so->so_rcv;
797         SOCKBUF_LOCK(sb);
798         if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
799                 CTR5(KTR_CXGBE,
800                     "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
801                     __func__, tid, pdu_len, icc, sb->sb_state);
802                 SOCKBUF_UNLOCK(sb);
803                 INP_WUNLOCK(inp);
804
805                 CURVNET_SET(so->so_vnet);
806                 NET_EPOCH_ENTER(et);
807                 INP_WLOCK(inp);
808                 tp = tcp_drop(tp, ECONNRESET);
809                 if (tp != NULL)
810                         INP_WUNLOCK(inp);
811                 NET_EPOCH_EXIT(et);
812                 CURVNET_RESTORE();
813
814                 icl_cxgbei_conn_pdu_free(NULL, ip);
815                 toep->ulpcb2 = NULL;
816                 m_freem(m);
817                 return (0);
818         }
819
820         if (__predict_false(sbused(sb)) != 0) {
821                 /*
822                  * PDUs were received before the tid transitioned to ULP mode.
823                  * Convert them to icl_cxgbei_pdus and send them to ICL before
824                  * the PDU in icp/ip.
825                  */
826                 if (!parse_pdus(toep, icc, sb)) {
827                         SOCKBUF_UNLOCK(sb);
828                         INP_WUNLOCK(inp);
829
830                         icl_cxgbei_conn_pdu_free(NULL, ip);
831                         toep->ulpcb2 = NULL;
832                         ic->ic_error(ic);
833                         return (0);
834                 }
835         }
836         icl_cxgbei_new_pdu_set_conn(ip, ic);
837
838         /* Enqueue the PDU to the received pdus queue. */
839         STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
840         if ((icc->rx_flags & RXF_ACTIVE) == 0) {
841                 struct cxgbei_worker_thread_softc *cwt = &cwt_softc[icc->cwt];
842
843                 mtx_lock(&cwt->cwt_lock);
844                 icc->rx_flags |= RXF_ACTIVE;
845                 TAILQ_INSERT_TAIL(&cwt->rx_head, icc, rx_link);
846                 if (cwt->cwt_state == CWT_SLEEPING) {
847                         cwt->cwt_state = CWT_RUNNING;
848                         cv_signal(&cwt->cwt_cv);
849                 }
850                 mtx_unlock(&cwt->cwt_lock);
851         }
852         SOCKBUF_UNLOCK(sb);
853         INP_WUNLOCK(inp);
854
855         toep->ulpcb2 = NULL;
856         m_freem(m);
857
858         return (0);
859 }
860
861 static int
862 cxgbei_activate(struct adapter *sc)
863 {
864         struct cxgbei_data *ci;
865         int rc;
866
867         ASSERT_SYNCHRONIZED_OP(sc);
868
869         if (uld_active(sc, ULD_ISCSI)) {
870                 KASSERT(0, ("%s: iSCSI offload already enabled on adapter %p",
871                     __func__, sc));
872                 return (0);
873         }
874
875         if (sc->iscsicaps == 0 || sc->vres.iscsi.size == 0) {
876                 device_printf(sc->dev,
877                     "not iSCSI offload capable, or capability disabled.\n");
878                 return (ENOSYS);
879         }
880
881         /* per-adapter softc for iSCSI */
882         ci = malloc(sizeof(*ci), M_CXGBE, M_ZERO | M_WAITOK);
883         if (ci == NULL)
884                 return (ENOMEM);
885
886         rc = cxgbei_init(sc, ci);
887         if (rc != 0) {
888                 free(ci, M_CXGBE);
889                 return (rc);
890         }
891
892         sc->iscsi_ulp_softc = ci;
893
894         return (0);
895 }
896
897 static int
898 cxgbei_deactivate(struct adapter *sc)
899 {
900         struct cxgbei_data *ci = sc->iscsi_ulp_softc;
901
902         ASSERT_SYNCHRONIZED_OP(sc);
903
904         if (ci != NULL) {
905                 sysctl_ctx_free(&ci->ctx);
906                 t4_free_ppod_region(&ci->pr);
907                 free(ci, M_CXGBE);
908                 sc->iscsi_ulp_softc = NULL;
909         }
910
911         return (0);
912 }
913
914 static void
915 cxgbei_activate_all(struct adapter *sc, void *arg __unused)
916 {
917
918         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isact") != 0)
919                 return;
920
921         /* Activate iSCSI if any port on this adapter has IFCAP_TOE enabled. */
922         if (sc->offload_map && !uld_active(sc, ULD_ISCSI))
923                 (void) t4_activate_uld(sc, ULD_ISCSI);
924
925         end_synchronized_op(sc, 0);
926 }
927
928 static void
929 cxgbei_deactivate_all(struct adapter *sc, void *arg __unused)
930 {
931
932         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isdea") != 0)
933                 return;
934
935         if (uld_active(sc, ULD_ISCSI))
936             (void) t4_deactivate_uld(sc, ULD_ISCSI);
937
938         end_synchronized_op(sc, 0);
939 }
940
941 static struct uld_info cxgbei_uld_info = {
942         .uld_id = ULD_ISCSI,
943         .activate = cxgbei_activate,
944         .deactivate = cxgbei_deactivate,
945 };
946
947 static void
948 cwt_main(void *arg)
949 {
950         struct cxgbei_worker_thread_softc *cwt = arg;
951         struct icl_cxgbei_conn *icc = NULL;
952         struct icl_conn *ic;
953         struct icl_pdu *ip;
954         struct sockbuf *sb;
955         STAILQ_HEAD(, icl_pdu) rx_pdus = STAILQ_HEAD_INITIALIZER(rx_pdus);
956
957         MPASS(cwt != NULL);
958
959         mtx_lock(&cwt->cwt_lock);
960         MPASS(cwt->cwt_state == 0);
961         cwt->cwt_state = CWT_RUNNING;
962         cv_signal(&cwt->cwt_cv);
963
964         while (__predict_true(cwt->cwt_state != CWT_STOP)) {
965                 cwt->cwt_state = CWT_RUNNING;
966                 while ((icc = TAILQ_FIRST(&cwt->rx_head)) != NULL) {
967                         TAILQ_REMOVE(&cwt->rx_head, icc, rx_link);
968                         mtx_unlock(&cwt->cwt_lock);
969
970                         ic = &icc->ic;
971                         sb = &ic->ic_socket->so_rcv;
972
973                         SOCKBUF_LOCK(sb);
974                         MPASS(icc->rx_flags & RXF_ACTIVE);
975                         if (__predict_true(!(sb->sb_state & SBS_CANTRCVMORE))) {
976                                 MPASS(STAILQ_EMPTY(&rx_pdus));
977                                 STAILQ_SWAP(&icc->rcvd_pdus, &rx_pdus, icl_pdu);
978                                 SOCKBUF_UNLOCK(sb);
979
980                                 /* Hand over PDUs to ICL. */
981                                 while ((ip = STAILQ_FIRST(&rx_pdus)) != NULL) {
982                                         STAILQ_REMOVE_HEAD(&rx_pdus, ip_next);
983                                         ic->ic_receive(ip);
984                                 }
985
986                                 SOCKBUF_LOCK(sb);
987                                 MPASS(STAILQ_EMPTY(&rx_pdus));
988                         }
989                         MPASS(icc->rx_flags & RXF_ACTIVE);
990                         if (STAILQ_EMPTY(&icc->rcvd_pdus) ||
991                             __predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
992                                 icc->rx_flags &= ~RXF_ACTIVE;
993                         } else {
994                                 /*
995                                  * More PDUs were received while we were busy
996                                  * handing over the previous batch to ICL.
997                                  * Re-add this connection to the end of the
998                                  * queue.
999                                  */
1000                                 mtx_lock(&cwt->cwt_lock);
1001                                 TAILQ_INSERT_TAIL(&cwt->rx_head, icc,
1002                                     rx_link);
1003                                 mtx_unlock(&cwt->cwt_lock);
1004                         }
1005                         SOCKBUF_UNLOCK(sb);
1006
1007                         mtx_lock(&cwt->cwt_lock);
1008                 }
1009
1010                 /* Inner loop doesn't check for CWT_STOP, do that first. */
1011                 if (__predict_false(cwt->cwt_state == CWT_STOP))
1012                         break;
1013                 cwt->cwt_state = CWT_SLEEPING;
1014                 cv_wait(&cwt->cwt_cv, &cwt->cwt_lock);
1015         }
1016
1017         MPASS(TAILQ_FIRST(&cwt->rx_head) == NULL);
1018         mtx_assert(&cwt->cwt_lock, MA_OWNED);
1019         cwt->cwt_state = CWT_STOPPED;
1020         cv_signal(&cwt->cwt_cv);
1021         mtx_unlock(&cwt->cwt_lock);
1022         kthread_exit();
1023 }
1024
1025 static int
1026 start_worker_threads(void)
1027 {
1028         int i, rc;
1029         struct cxgbei_worker_thread_softc *cwt;
1030
1031         worker_thread_count = min(mp_ncpus, 32);
1032         cwt_softc = malloc(worker_thread_count * sizeof(*cwt), M_CXGBE,
1033             M_WAITOK | M_ZERO);
1034
1035         MPASS(cxgbei_proc == NULL);
1036         for (i = 0, cwt = &cwt_softc[0]; i < worker_thread_count; i++, cwt++) {
1037                 mtx_init(&cwt->cwt_lock, "cwt lock", NULL, MTX_DEF);
1038                 cv_init(&cwt->cwt_cv, "cwt cv");
1039                 TAILQ_INIT(&cwt->rx_head);
1040                 rc = kproc_kthread_add(cwt_main, cwt, &cxgbei_proc, NULL, 0, 0,
1041                     "cxgbei", "%d", i);
1042                 if (rc != 0) {
1043                         printf("cxgbei: failed to start thread #%d/%d (%d)\n",
1044                             i + 1, worker_thread_count, rc);
1045                         mtx_destroy(&cwt->cwt_lock);
1046                         cv_destroy(&cwt->cwt_cv);
1047                         bzero(cwt, sizeof(*cwt));
1048                         if (i == 0) {
1049                                 free(cwt_softc, M_CXGBE);
1050                                 worker_thread_count = 0;
1051
1052                                 return (rc);
1053                         }
1054
1055                         /* Not fatal, carry on with fewer threads. */
1056                         worker_thread_count = i;
1057                         rc = 0;
1058                         break;
1059                 }
1060
1061                 /* Wait for thread to start before moving on to the next one. */
1062                 mtx_lock(&cwt->cwt_lock);
1063                 while (cwt->cwt_state == 0)
1064                         cv_wait(&cwt->cwt_cv, &cwt->cwt_lock);
1065                 mtx_unlock(&cwt->cwt_lock);
1066         }
1067
1068         MPASS(cwt_softc != NULL);
1069         MPASS(worker_thread_count > 0);
1070         return (0);
1071 }
1072
1073 static void
1074 stop_worker_threads(void)
1075 {
1076         int i;
1077         struct cxgbei_worker_thread_softc *cwt = &cwt_softc[0];
1078
1079         MPASS(worker_thread_count >= 0);
1080
1081         for (i = 0, cwt = &cwt_softc[0]; i < worker_thread_count; i++, cwt++) {
1082                 mtx_lock(&cwt->cwt_lock);
1083                 MPASS(cwt->cwt_state == CWT_RUNNING ||
1084                     cwt->cwt_state == CWT_SLEEPING);
1085                 cwt->cwt_state = CWT_STOP;
1086                 cv_signal(&cwt->cwt_cv);
1087                 do {
1088                         cv_wait(&cwt->cwt_cv, &cwt->cwt_lock);
1089                 } while (cwt->cwt_state != CWT_STOPPED);
1090                 mtx_unlock(&cwt->cwt_lock);
1091                 mtx_destroy(&cwt->cwt_lock);
1092                 cv_destroy(&cwt->cwt_cv);
1093         }
1094         free(cwt_softc, M_CXGBE);
1095 }
1096
1097 /* Select a worker thread for a connection. */
1098 u_int
1099 cxgbei_select_worker_thread(struct icl_cxgbei_conn *icc)
1100 {
1101         struct adapter *sc = icc->sc;
1102         struct toepcb *toep = icc->toep;
1103         u_int i, n;
1104
1105         n = worker_thread_count / sc->sge.nofldrxq;
1106         if (n > 0)
1107                 i = toep->vi->pi->port_id * n + arc4random() % n;
1108         else
1109                 i = arc4random() % worker_thread_count;
1110
1111         CTR3(KTR_CXGBE, "%s: tid %u, cwt %u", __func__, toep->tid, i);
1112
1113         return (i);
1114 }
1115
1116 static int
1117 cxgbei_mod_load(void)
1118 {
1119         int rc;
1120
1121         t4_register_cpl_handler(CPL_ISCSI_HDR, do_rx_iscsi_hdr);
1122         t4_register_cpl_handler(CPL_ISCSI_DATA, do_rx_iscsi_data);
1123         t4_register_cpl_handler(CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp);
1124         t4_register_cpl_handler(CPL_RX_ISCSI_CMP, do_rx_iscsi_cmp);
1125
1126         rc = start_worker_threads();
1127         if (rc != 0)
1128                 return (rc);
1129
1130         rc = t4_register_uld(&cxgbei_uld_info);
1131         if (rc != 0) {
1132                 stop_worker_threads();
1133                 return (rc);
1134         }
1135
1136         t4_iterate(cxgbei_activate_all, NULL);
1137
1138         return (rc);
1139 }
1140
1141 static int
1142 cxgbei_mod_unload(void)
1143 {
1144
1145         t4_iterate(cxgbei_deactivate_all, NULL);
1146
1147         if (t4_unregister_uld(&cxgbei_uld_info) == EBUSY)
1148                 return (EBUSY);
1149
1150         stop_worker_threads();
1151
1152         t4_register_cpl_handler(CPL_ISCSI_HDR, NULL);
1153         t4_register_cpl_handler(CPL_ISCSI_DATA, NULL);
1154         t4_register_cpl_handler(CPL_RX_ISCSI_DDP, NULL);
1155         t4_register_cpl_handler(CPL_RX_ISCSI_CMP, NULL);
1156
1157         return (0);
1158 }
1159 #endif
1160
1161 static int
1162 cxgbei_modevent(module_t mod, int cmd, void *arg)
1163 {
1164         int rc = 0;
1165
1166 #ifdef TCP_OFFLOAD
1167         switch (cmd) {
1168         case MOD_LOAD:
1169                 rc = cxgbei_mod_load();
1170                 if (rc == 0)
1171                         rc = icl_cxgbei_mod_load();
1172                 break;
1173
1174         case MOD_UNLOAD:
1175                 rc = icl_cxgbei_mod_unload();
1176                 if (rc == 0)
1177                         rc = cxgbei_mod_unload();
1178                 break;
1179
1180         default:
1181                 rc = EINVAL;
1182         }
1183 #else
1184         printf("cxgbei: compiled without TCP_OFFLOAD support.\n");
1185         rc = EOPNOTSUPP;
1186 #endif
1187
1188         return (rc);
1189 }
1190
1191 static moduledata_t cxgbei_mod = {
1192         "cxgbei",
1193         cxgbei_modevent,
1194         NULL,
1195 };
1196
1197 MODULE_VERSION(cxgbei, 1);
1198 DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1199 MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1);
1200 MODULE_DEPEND(cxgbei, cxgbe, 1, 1, 1);
1201 MODULE_DEPEND(cxgbei, icl, 1, 1, 1);