2 * Copyright (c) 2012 Chelsio Communications, Inc.
5 * Chelsio T5xx iSCSI driver
7 * Written by: Sreenivasa Honnur <shonnur@chelsio.com>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
35 #include "opt_inet6.h"
37 #include <sys/types.h>
38 #include <sys/param.h>
39 #include <sys/kernel.h>
40 #include <sys/module.h>
41 #include <sys/systm.h>
44 #include <sys/errno.h>
45 #include <sys/kthread.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
51 #include <sys/mutex.h>
52 #include <sys/condvar.h>
54 #include <netinet/in.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/toecore.h>
57 #include <netinet/tcp_var.h>
58 #include <netinet/tcp_fsm.h>
60 #include <cam/scsi/scsi_all.h>
61 #include <cam/scsi/scsi_da.h>
62 #include <cam/ctl/ctl_io.h>
63 #include <cam/ctl/ctl.h>
64 #include <cam/ctl/ctl_backend.h>
65 #include <cam/ctl/ctl_error.h>
66 #include <cam/ctl/ctl_frontend.h>
67 #include <cam/ctl/ctl_debug.h>
68 #include <cam/ctl/ctl_ha.h>
69 #include <cam/ctl/ctl_ioctl.h>
71 #include <dev/iscsi/icl.h>
72 #include <dev/iscsi/iscsi_proto.h>
73 #include <dev/iscsi/iscsi_ioctl.h>
74 #include <dev/iscsi/iscsi.h>
75 #include <cam/ctl/ctl_frontend_iscsi.h>
78 #include <cam/cam_ccb.h>
79 #include <cam/cam_xpt.h>
80 #include <cam/cam_debug.h>
81 #include <cam/cam_sim.h>
82 #include <cam/cam_xpt_sim.h>
83 #include <cam/cam_xpt_periph.h>
84 #include <cam/cam_periph.h>
85 #include <cam/cam_compat.h>
86 #include <cam/scsi/scsi_message.h>
88 #include "common/common.h"
89 #include "common/t4_msg.h"
90 #include "common/t4_regs.h" /* for PCIE_MEM_ACCESS */
91 #include "tom/t4_tom.h"
93 #include "cxgbei_ulp2_ddp.h"
95 static int worker_thread_count;
96 static struct cxgbei_worker_thread_softc *cwt_softc;
97 static struct proc *cxgbei_proc;
99 /* XXXNP some header instead. */
100 struct icl_pdu *icl_cxgbei_new_pdu(int);
101 void icl_cxgbei_new_pdu_set_conn(struct icl_pdu *, struct icl_conn *);
102 void icl_cxgbei_conn_pdu_free(struct icl_conn *, struct icl_pdu *);
105 * Direct Data Placement -
106 * Directly place the iSCSI Data-In or Data-Out PDU's payload into pre-posted
107 * final destination host-memory buffers based on the Initiator Task Tag (ITT)
108 * in Data-In or Target Task Tag (TTT) in Data-Out PDUs.
109 * The host memory address is programmed into h/w in the format of pagepod
111 * The location of the pagepod entry is encoded into ddp tag which is used as
112 * the base for ITT/TTT.
116 * functions to program the pagepod in h/w
119 ppod_set(struct pagepod *ppod,
120 struct cxgbei_ulp2_pagepod_hdr *hdr,
121 struct cxgbei_ulp2_gather_list *gl,
126 memcpy(ppod, hdr, sizeof(*hdr));
128 for (i = 0; i < (PPOD_PAGES + 1); i++, pidx++) {
129 ppod->addr[i] = pidx < gl->nelem ?
130 cpu_to_be64(gl->dma_sg[pidx].phys_addr) : 0ULL;
135 ppod_clear(struct pagepod *ppod)
137 memset(ppod, 0, sizeof(*ppod));
141 ulp_mem_io_set_hdr(struct adapter *sc, int tid, struct ulp_mem_io *req,
142 unsigned int wr_len, unsigned int dlen,
143 unsigned int pm_addr)
145 struct ulptx_idata *idata = (struct ulptx_idata *)(req + 1);
147 INIT_ULPTX_WR(req, wr_len, 0, 0);
148 req->cmd = cpu_to_be32(V_ULPTX_CMD(ULP_TX_MEM_WRITE) |
149 V_ULP_MEMIO_ORDER(is_t4(sc)) |
150 V_T5_ULP_MEMIO_IMM(is_t5(sc)));
151 req->dlen = htonl(V_ULP_MEMIO_DATA_LEN(dlen >> 5));
152 req->len16 = htonl(DIV_ROUND_UP(wr_len - sizeof(req->wr), 16)
153 | V_FW_WR_FLOWID(tid));
154 req->lock_addr = htonl(V_ULP_MEMIO_ADDR(pm_addr >> 5));
156 idata->cmd_more = htonl(V_ULPTX_CMD(ULP_TX_SC_IMM));
157 idata->len = htonl(dlen);
160 #define PPOD_SIZE sizeof(struct pagepod)
161 #define ULPMEM_IDATA_MAX_NPPODS 1 /* 256/PPOD_SIZE */
162 #define PCIE_MEMWIN_MAX_NPPODS 16 /* 1024/PPOD_SIZE */
165 ppod_write_idata(struct cxgbei_data *ci,
166 struct cxgbei_ulp2_pagepod_hdr *hdr,
167 unsigned int idx, unsigned int npods,
168 struct cxgbei_ulp2_gather_list *gl,
169 unsigned int gl_pidx, struct toepcb *toep)
171 u_int dlen = PPOD_SIZE * npods;
172 u_int pm_addr = idx * PPOD_SIZE + ci->llimit;
173 u_int wr_len = roundup(sizeof(struct ulp_mem_io) +
174 sizeof(struct ulptx_idata) + dlen, 16);
175 struct ulp_mem_io *req;
176 struct ulptx_idata *idata;
177 struct pagepod *ppod;
180 struct adapter *sc = toep->vi->pi->adapter;
182 wr = alloc_wrqe(wr_len, toep->ctrlq);
184 CXGBE_UNIMPLEMENTED("ppod_write_idata: alloc_wrqe failure");
189 memset(req, 0, wr_len);
190 ulp_mem_io_set_hdr(sc, toep->tid, req, wr_len, dlen, pm_addr);
191 idata = (struct ulptx_idata *)(req + 1);
193 ppod = (struct pagepod *)(idata + 1);
194 for (i = 0; i < npods; i++, ppod++, gl_pidx += PPOD_PAGES) {
195 if (!hdr) /* clear the pagepod */
197 else /* set the pagepod */
198 ppod_set(ppod, hdr, gl, gl_pidx);
206 t4_ddp_set_map(struct cxgbei_data *ci, void *iccp,
207 struct cxgbei_ulp2_pagepod_hdr *hdr, u_int idx, u_int npods,
208 struct cxgbei_ulp2_gather_list *gl, int reply)
210 struct icl_cxgbei_conn *icc = (struct icl_cxgbei_conn *)iccp;
211 struct toepcb *toep = icc->toep;
213 unsigned int pidx = 0, w_npods = 0, cnt;
216 * on T4, if we use a mix of IMMD and DSGL with ULP_MEM_WRITE,
217 * the order would not be guaranteed, so we will stick with IMMD
220 gl->port_id = toep->vi->pi->port_id;
221 gl->egress_dev = (void *)toep->vi->ifp;
223 /* send via immediate data */
224 for (; w_npods < npods; idx += cnt, w_npods += cnt,
225 pidx += PPOD_PAGES) {
226 cnt = npods - w_npods;
227 if (cnt > ULPMEM_IDATA_MAX_NPPODS)
228 cnt = ULPMEM_IDATA_MAX_NPPODS;
229 err = ppod_write_idata(ci, hdr, idx, cnt, gl, pidx, toep);
231 printf("%s: ppod_write_idata failed\n", __func__);
239 t4_ddp_clear_map(struct cxgbei_data *ci, struct cxgbei_ulp2_gather_list *gl,
240 u_int tag, u_int idx, u_int npods, struct icl_cxgbei_conn *icc)
242 struct toepcb *toep = icc->toep;
248 for (; w_npods < npods; idx += cnt, w_npods += cnt,
249 pidx += PPOD_PAGES) {
250 cnt = npods - w_npods;
251 if (cnt > ULPMEM_IDATA_MAX_NPPODS)
252 cnt = ULPMEM_IDATA_MAX_NPPODS;
253 err = ppod_write_idata(ci, NULL, idx, cnt, gl, 0, toep);
260 cxgbei_map_sg(struct cxgbei_sgl *sgl, struct ccb_scsiio *csio)
262 unsigned int data_len = csio->dxfer_len;
263 unsigned int sgoffset = (uint64_t)csio->data_ptr & PAGE_MASK;
265 unsigned char *sgaddr = csio->data_ptr;
266 unsigned int len = 0;
268 nsge = (csio->dxfer_len + sgoffset + PAGE_SIZE - 1) >> PAGE_SHIFT;
269 sgl->sg_addr = sgaddr;
270 sgl->sg_offset = sgoffset;
271 if (data_len < (PAGE_SIZE - sgoffset))
274 len = PAGE_SIZE - sgoffset;
276 sgl->sg_length = len;
282 while (data_len > 0) {
283 sgl->sg_addr = sgaddr;
284 len = (data_len < PAGE_SIZE)? data_len: PAGE_SIZE;
285 sgl->sg_length = len;
295 cxgbei_map_sg_tgt(struct cxgbei_sgl *sgl, union ctl_io *io)
297 unsigned int data_len, sgoffset, nsge;
298 unsigned char *sgaddr;
299 unsigned int len = 0, index = 0, ctl_sg_count, i;
300 struct ctl_sg_entry ctl_sg_entry, *ctl_sglist;
302 if (io->scsiio.kern_sg_entries > 0) {
303 ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr;
304 ctl_sg_count = io->scsiio.kern_sg_entries;
306 ctl_sglist = &ctl_sg_entry;
307 ctl_sglist->addr = io->scsiio.kern_data_ptr;
308 ctl_sglist->len = io->scsiio.kern_data_len;
312 sgaddr = sgl->sg_addr = ctl_sglist[index].addr;
313 sgoffset = sgl->sg_offset = (uint64_t)sgl->sg_addr & PAGE_MASK;
314 data_len = ctl_sglist[index].len;
316 if (data_len < (PAGE_SIZE - sgoffset))
319 len = PAGE_SIZE - sgoffset;
321 sgl->sg_length = len;
328 for (i = 0; i< ctl_sg_count; i++)
329 len += ctl_sglist[i].len;
330 nsge = (len + sgoffset + PAGE_SIZE -1) >> PAGE_SHIFT;
331 while (data_len > 0) {
332 sgl->sg_addr = sgaddr;
333 len = (data_len < PAGE_SIZE)? data_len: PAGE_SIZE;
334 sgl->sg_length = len;
339 if (index == ctl_sg_count - 1)
342 sgaddr = ctl_sglist[index].addr;
343 data_len = ctl_sglist[index].len;
351 t4_sk_ddp_tag_reserve(struct cxgbei_data *ci, struct icl_cxgbei_conn *icc,
352 u_int xferlen, struct cxgbei_sgl *sgl, u_int sgcnt, u_int *ddp_tag)
354 struct cxgbei_ulp2_gather_list *gl;
356 struct toepcb *toep = icc->toep;
358 gl = cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec(xferlen, sgl, sgcnt, ci, 0);
360 err = cxgbei_ulp2_ddp_tag_reserve(ci, icc, toep->tid,
361 &ci->tag_format, ddp_tag, gl, 0, 0);
363 cxgbei_ulp2_ddp_release_gl(ci, gl);
371 cxgbei_task_reserve_itt(struct icl_conn *ic, void **prv,
372 struct ccb_scsiio *scmd, unsigned int *itt)
374 struct icl_cxgbei_conn *icc = ic_to_icc(ic);
375 int xferlen = scmd->dxfer_len;
376 struct cxgbei_task_data *tdata = NULL;
377 struct cxgbei_sgl *sge = NULL;
378 struct toepcb *toep = icc->toep;
379 struct adapter *sc = td_adapter(toep->td);
380 struct cxgbei_data *ci = sc->iscsi_ulp_softc;
383 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
385 tdata = (struct cxgbei_task_data *)*prv;
386 if (xferlen == 0 || tdata == NULL)
388 if (xferlen < DDP_THRESHOLD)
391 if ((scmd->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
392 tdata->nsge = cxgbei_map_sg(tdata->sgl, scmd);
393 if (tdata->nsge == 0) {
394 CTR1(KTR_CXGBE, "%s: map_sg failed", __func__);
399 tdata->sc_ddp_tag = *itt;
401 CTR3(KTR_CXGBE, "%s: *itt:0x%x sc_ddp_tag:0x%x",
402 __func__, *itt, tdata->sc_ddp_tag);
403 if (cxgbei_ulp2_sw_tag_usable(&ci->tag_format,
404 tdata->sc_ddp_tag)) {
405 err = t4_sk_ddp_tag_reserve(ci, icc, scmd->dxfer_len,
406 sge, tdata->nsge, &tdata->sc_ddp_tag);
409 "%s: itt:0x%x sc_ddp_tag:0x%x not usable",
410 __func__, *itt, tdata->sc_ddp_tag);
416 cxgbei_ulp2_set_non_ddp_tag(&ci->tag_format, *itt);
418 return tdata->sc_ddp_tag;
422 cxgbei_task_reserve_ttt(struct icl_conn *ic, void **prv, union ctl_io *io,
425 struct icl_cxgbei_conn *icc = ic_to_icc(ic);
426 struct toepcb *toep = icc->toep;
427 struct adapter *sc = td_adapter(toep->td);
428 struct cxgbei_data *ci = sc->iscsi_ulp_softc;
429 struct cxgbei_task_data *tdata = NULL;
430 int xferlen, err = -1;
431 struct cxgbei_sgl *sge = NULL;
433 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
435 xferlen = (io->scsiio.kern_data_len - io->scsiio.ext_data_filled);
436 tdata = (struct cxgbei_task_data *)*prv;
437 if ((xferlen == 0) || (tdata == NULL))
439 if (xferlen < DDP_THRESHOLD)
441 tdata->nsge = cxgbei_map_sg_tgt(tdata->sgl, io);
442 if (tdata->nsge == 0) {
443 CTR1(KTR_CXGBE, "%s: map_sg failed", __func__);
448 tdata->sc_ddp_tag = *ttt;
449 if (cxgbei_ulp2_sw_tag_usable(&ci->tag_format, tdata->sc_ddp_tag)) {
450 err = t4_sk_ddp_tag_reserve(ci, icc, xferlen, sge,
451 tdata->nsge, &tdata->sc_ddp_tag);
453 CTR2(KTR_CXGBE, "%s: sc_ddp_tag:0x%x not usable",
454 __func__, tdata->sc_ddp_tag);
459 cxgbei_ulp2_set_non_ddp_tag(&ci->tag_format, *ttt);
460 return tdata->sc_ddp_tag;
464 t4_sk_ddp_tag_release(struct icl_cxgbei_conn *icc, unsigned int ddp_tag)
466 struct toepcb *toep = icc->toep;
467 struct adapter *sc = td_adapter(toep->td);
468 struct cxgbei_data *ci = sc->iscsi_ulp_softc;
470 cxgbei_ulp2_ddp_tag_release(ci, ddp_tag, icc);
476 cxgbei_ddp_init(struct adapter *sc, struct cxgbei_data *ci)
478 int nppods, bits, max_sz, rc;
479 static const u_int pgsz_order[] = {0, 1, 2, 3};
481 MPASS(sc->vres.iscsi.size > 0);
483 ci->llimit = sc->vres.iscsi.start;
484 ci->ulimit = sc->vres.iscsi.start + sc->vres.iscsi.size - 1;
485 max_sz = G_MAXRXDATA(t4_read_reg(sc, A_TP_PARA_REG2));
487 nppods = sc->vres.iscsi.size >> IPPOD_SIZE_SHIFT;
492 if (bits > IPPOD_IDX_MAX_SIZE)
493 bits = IPPOD_IDX_MAX_SIZE;
494 nppods = (1 << (bits - 1)) - 1;
496 rc = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR,
497 BUS_SPACE_MAXADDR, NULL, NULL, UINT32_MAX , 8, BUS_SPACE_MAXSIZE,
498 BUS_DMA_ALLOCNOW, NULL, NULL, &ci->ulp_ddp_tag);
500 device_printf(sc->dev, "%s: failed to create DMA tag: %u.\n",
505 ci->colors = malloc(nppods * sizeof(char), M_CXGBE, M_NOWAIT | M_ZERO);
506 ci->gl_map = malloc(nppods * sizeof(struct cxgbei_ulp2_gather_list *),
507 M_CXGBE, M_NOWAIT | M_ZERO);
508 if (ci->colors == NULL || ci->gl_map == NULL) {
509 bus_dma_tag_destroy(ci->ulp_ddp_tag);
510 free(ci->colors, M_CXGBE);
511 free(ci->gl_map, M_CXGBE);
515 mtx_init(&ci->map_lock, "ddp lock", NULL, MTX_DEF | MTX_DUPOK);
516 ci->max_txsz = ci->max_rxsz = min(max_sz, ULP2_MAX_PKT_SIZE);
518 ci->idx_last = nppods;
520 ci->idx_mask = (1 << bits) - 1;
521 ci->rsvd_tag_mask = (1 << (bits + IPPOD_IDX_SHIFT)) - 1;
523 ci->tag_format.sw_bits = bits;
524 ci->tag_format.rsvd_bits = bits;
525 ci->tag_format.rsvd_shift = IPPOD_IDX_SHIFT;
526 ci->tag_format.rsvd_mask = ci->idx_mask;
528 t4_iscsi_init(sc, ci->idx_mask << IPPOD_IDX_SHIFT, pgsz_order);
534 do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
536 struct adapter *sc = iq->adapter;
537 struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *);
538 u_int tid = GET_TID(cpl);
539 struct toepcb *toep = lookup_tid(sc, tid);
541 struct icl_cxgbei_pdu *icp;
545 ip = icl_cxgbei_new_pdu(M_NOWAIT);
547 CXGBE_UNIMPLEMENTED("PDU allocation failure");
549 bcopy(mtod(m, caddr_t) + sizeof(*cpl), icp->ip.ip_bhs, sizeof(struct
551 icp->pdu_seq = ntohl(cpl->seq);
552 icp->pdu_flags = SBUF_ULP_FLAG_HDR_RCVD;
554 /* This is the start of a new PDU. There should be no old state. */
555 MPASS(toep->ulpcb2 == NULL);
559 CTR4(KTR_CXGBE, "%s: tid %u, cpl->len hlen %u, m->m_len hlen %u",
560 __func__, tid, ntohs(cpl->len), m->m_len);
568 do_rx_iscsi_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
570 struct adapter *sc = iq->adapter;
571 struct cpl_iscsi_data *cpl = mtod(m, struct cpl_iscsi_data *);
572 u_int tid = GET_TID(cpl);
573 struct toepcb *toep = lookup_tid(sc, tid);
574 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
578 /* Must already have received the header (but not the data). */
580 MPASS(icp->pdu_flags == SBUF_ULP_FLAG_HDR_RCVD);
581 MPASS(icp->ip.ip_data_mbuf == NULL);
582 MPASS(icp->ip.ip_data_len == 0);
584 m_adj(m, sizeof(*cpl));
586 icp->pdu_flags |= SBUF_ULP_FLAG_DATA_RCVD;
587 icp->ip.ip_data_mbuf = m;
588 icp->ip.ip_data_len = m->m_pkthdr.len;
591 CTR4(KTR_CXGBE, "%s: tid %u, cpl->len dlen %u, m->m_len dlen %u",
592 __func__, tid, ntohs(cpl->len), m->m_len);
599 do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
601 struct adapter *sc = iq->adapter;
602 const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1);
603 u_int tid = GET_TID(cpl);
604 struct toepcb *toep = lookup_tid(sc, tid);
605 struct inpcb *inp = toep->inp;
609 struct icl_cxgbei_conn *icc;
611 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
617 /* Must already be assembling a PDU. */
619 MPASS(icp->pdu_flags & SBUF_ULP_FLAG_HDR_RCVD); /* Data is optional. */
621 icp->pdu_flags |= SBUF_ULP_FLAG_STATUS_RCVD;
622 val = ntohl(cpl->ddpvld);
623 if (val & F_DDP_PADDING_ERR)
624 icp->pdu_flags |= SBUF_ULP_FLAG_PAD_ERROR;
625 if (val & F_DDP_HDRCRC_ERR)
626 icp->pdu_flags |= SBUF_ULP_FLAG_HCRC_ERROR;
627 if (val & F_DDP_DATACRC_ERR)
628 icp->pdu_flags |= SBUF_ULP_FLAG_DCRC_ERROR;
629 if (ip->ip_data_mbuf == NULL) {
630 /* XXXNP: what should ip->ip_data_len be, and why? */
631 icp->pdu_flags |= SBUF_ULP_FLAG_DATA_DDPED;
633 pdu_len = ntohs(cpl->len); /* includes everything. */
636 if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {
637 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
638 __func__, tid, pdu_len, inp->inp_flags);
640 icl_cxgbei_conn_pdu_free(NULL, ip);
648 MPASS(icp->pdu_seq == tp->rcv_nxt);
649 MPASS(tp->rcv_wnd >= pdu_len);
650 tp->rcv_nxt += pdu_len;
651 tp->rcv_wnd -= pdu_len;
652 tp->t_rcvtime = ticks;
654 /* update rx credits */
655 toep->rx_credits += pdu_len;
656 t4_rcvd(&toep->td->tod, tp); /* XXX: sc->tom_softc.tod */
658 so = inp->inp_socket;
663 if (__predict_false(icc == NULL || sb->sb_state & SBS_CANTRCVMORE)) {
665 "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
666 __func__, tid, pdu_len, icc, sb->sb_state);
670 INP_INFO_RLOCK(&V_tcbinfo);
672 tp = tcp_drop(tp, ECONNRESET);
675 INP_INFO_RUNLOCK(&V_tcbinfo);
677 icl_cxgbei_conn_pdu_free(NULL, ip);
683 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
685 icl_cxgbei_new_pdu_set_conn(ip, ic);
687 MPASS(m == NULL); /* was unused, we'll use it now. */
688 m = sbcut_locked(sb, sbused(sb)); /* XXXNP: toep->sb_cc accounting? */
689 if (__predict_false(m != NULL)) {
690 int len = m_length(m, NULL);
693 * PDUs were received before the tid transitioned to ULP mode.
694 * Convert them to icl_cxgbei_pdus and send them to ICL before
697 CTR3(KTR_CXGBE, "%s: tid %u, %u bytes in so_rcv", __func__, tid,
700 /* XXXNP: needs to be rewritten. */
701 if (len == sizeof(struct iscsi_bhs) || len == 4 + sizeof(struct
703 struct icl_cxgbei_pdu *icp0;
706 ip0 = icl_cxgbei_new_pdu(M_NOWAIT);
707 icl_cxgbei_new_pdu_set_conn(ip0, ic);
709 CXGBE_UNIMPLEMENTED("PDU allocation failure");
710 icp0 = ip_to_icp(ip0);
711 icp0->pdu_seq = 0; /* XXX */
712 icp0->pdu_flags = SBUF_ULP_FLAG_HDR_RCVD |
713 SBUF_ULP_FLAG_STATUS_RCVD;
714 m_copydata(m, 0, sizeof(struct iscsi_bhs), (void *)ip0->ip_bhs);
715 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip0, ip_next);
721 CTR4(KTR_CXGBE, "%s: tid %u, pdu_len %u, pdu_flags 0x%x",
722 __func__, tid, pdu_len, icp->pdu_flags);
725 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
726 if ((icc->rx_flags & RXF_ACTIVE) == 0) {
727 struct cxgbei_worker_thread_softc *cwt = &cwt_softc[icc->cwt];
729 mtx_lock(&cwt->cwt_lock);
730 icc->rx_flags |= RXF_ACTIVE;
731 TAILQ_INSERT_TAIL(&cwt->rx_head, icc, rx_link);
732 if (cwt->cwt_state == CWT_SLEEPING) {
733 cwt->cwt_state = CWT_RUNNING;
734 cv_signal(&cwt->cwt_cv);
736 mtx_unlock(&cwt->cwt_lock);
749 t4_register_cpl_handler_with_tom(struct adapter *sc)
752 t4_register_cpl_handler(sc, CPL_ISCSI_HDR, do_rx_iscsi_hdr);
753 t4_register_cpl_handler(sc, CPL_ISCSI_DATA, do_rx_iscsi_data);
754 t4_register_cpl_handler(sc, CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp);
758 t4_unregister_cpl_handler_with_tom(struct adapter *sc)
761 t4_register_cpl_handler(sc, CPL_ISCSI_HDR, NULL);
762 t4_register_cpl_handler(sc, CPL_ISCSI_DATA, NULL);
763 t4_register_cpl_handler(sc, CPL_RX_ISCSI_DDP, NULL);
768 cxgbei_conn_task_reserve_itt(void *conn, void **prv,
769 void *scmd, unsigned int *itt)
772 tag = cxgbei_task_reserve_itt(conn, prv, scmd, itt);
780 cxgbei_conn_transfer_reserve_ttt(void *conn, void **prv,
781 void *scmd, unsigned int *ttt)
784 tag = cxgbei_task_reserve_ttt(conn, prv, scmd, ttt);
791 cxgbei_cleanup_task(void *conn, void *ofld_priv)
793 struct icl_conn *ic = (struct icl_conn *)conn;
794 struct icl_cxgbei_conn *icc = ic_to_icc(ic);
795 struct cxgbei_task_data *tdata = ofld_priv;
796 struct adapter *sc = icc->sc;
797 struct cxgbei_data *ci = sc->iscsi_ulp_softc;
799 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
800 MPASS(tdata != NULL);
802 if (cxgbei_ulp2_is_ddp_tag(&ci->tag_format, tdata->sc_ddp_tag))
803 t4_sk_ddp_tag_release(icc, tdata->sc_ddp_tag);
804 memset(tdata, 0, sizeof(*tdata));
808 cxgbei_activate(struct adapter *sc)
810 struct cxgbei_data *ci;
813 ASSERT_SYNCHRONIZED_OP(sc);
815 if (uld_active(sc, ULD_ISCSI)) {
816 KASSERT(0, ("%s: iSCSI offload already enabled on adapter %p",
821 if (sc->iscsicaps == 0 || sc->vres.iscsi.size == 0) {
822 device_printf(sc->dev,
823 "not iSCSI offload capable, or capability disabled.\n");
827 /* per-adapter softc for iSCSI */
828 ci = malloc(sizeof(*ci), M_CXGBE, M_ZERO | M_NOWAIT);
832 rc = cxgbei_ddp_init(sc, ci);
838 t4_register_cpl_handler_with_tom(sc);
839 sc->iscsi_ulp_softc = ci;
845 cxgbei_deactivate(struct adapter *sc)
848 ASSERT_SYNCHRONIZED_OP(sc);
850 if (sc->iscsi_ulp_softc != NULL) {
851 cxgbei_ddp_cleanup(sc->iscsi_ulp_softc);
852 t4_unregister_cpl_handler_with_tom(sc);
853 free(sc->iscsi_ulp_softc, M_CXGBE);
854 sc->iscsi_ulp_softc = NULL;
861 cxgbei_activate_all(struct adapter *sc, void *arg __unused)
864 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isact") != 0)
867 /* Activate iSCSI if any port on this adapter has IFCAP_TOE enabled. */
868 if (sc->offload_map && !uld_active(sc, ULD_ISCSI))
869 (void) t4_activate_uld(sc, ULD_ISCSI);
871 end_synchronized_op(sc, 0);
875 cxgbei_deactivate_all(struct adapter *sc, void *arg __unused)
878 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isdea") != 0)
881 if (uld_active(sc, ULD_ISCSI))
882 (void) t4_deactivate_uld(sc, ULD_ISCSI);
884 end_synchronized_op(sc, 0);
887 static struct uld_info cxgbei_uld_info = {
889 .activate = cxgbei_activate,
890 .deactivate = cxgbei_deactivate,
896 struct cxgbei_worker_thread_softc *cwt = arg;
897 struct icl_cxgbei_conn *icc = NULL;
901 STAILQ_HEAD(, icl_pdu) rx_pdus = STAILQ_HEAD_INITIALIZER(rx_pdus);
905 mtx_lock(&cwt->cwt_lock);
906 MPASS(cwt->cwt_state == 0);
907 cwt->cwt_state = CWT_RUNNING;
908 cv_signal(&cwt->cwt_cv);
910 while (__predict_true(cwt->cwt_state != CWT_STOP)) {
911 cwt->cwt_state = CWT_RUNNING;
912 while ((icc = TAILQ_FIRST(&cwt->rx_head)) != NULL) {
913 TAILQ_REMOVE(&cwt->rx_head, icc, rx_link);
914 mtx_unlock(&cwt->cwt_lock);
917 sb = &ic->ic_socket->so_rcv;
920 MPASS(icc->rx_flags & RXF_ACTIVE);
921 if (__predict_true(!(sb->sb_state & SBS_CANTRCVMORE))) {
922 MPASS(STAILQ_EMPTY(&rx_pdus));
923 STAILQ_SWAP(&icc->rcvd_pdus, &rx_pdus, icl_pdu);
926 /* Hand over PDUs to ICL. */
927 while ((ip = STAILQ_FIRST(&rx_pdus)) != NULL) {
928 STAILQ_REMOVE_HEAD(&rx_pdus, ip_next);
933 MPASS(STAILQ_EMPTY(&rx_pdus));
935 MPASS(icc->rx_flags & RXF_ACTIVE);
936 if (STAILQ_EMPTY(&icc->rcvd_pdus) ||
937 __predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
938 icc->rx_flags &= ~RXF_ACTIVE;
941 * More PDUs were received while we were busy
942 * handing over the previous batch to ICL.
943 * Re-add this connection to the end of the
946 mtx_lock(&cwt->cwt_lock);
947 TAILQ_INSERT_TAIL(&cwt->rx_head, icc,
949 mtx_unlock(&cwt->cwt_lock);
953 mtx_lock(&cwt->cwt_lock);
956 /* Inner loop doesn't check for CWT_STOP, do that first. */
957 if (__predict_false(cwt->cwt_state == CWT_STOP))
959 cwt->cwt_state = CWT_SLEEPING;
960 cv_wait(&cwt->cwt_cv, &cwt->cwt_lock);
963 MPASS(TAILQ_FIRST(&cwt->rx_head) == NULL);
964 mtx_assert(&cwt->cwt_lock, MA_OWNED);
965 cwt->cwt_state = CWT_STOPPED;
966 cv_signal(&cwt->cwt_cv);
967 mtx_unlock(&cwt->cwt_lock);
972 start_worker_threads(void)
975 struct cxgbei_worker_thread_softc *cwt;
977 worker_thread_count = min(mp_ncpus, 32);
978 cwt_softc = malloc(worker_thread_count * sizeof(*cwt), M_CXGBE,
981 MPASS(cxgbei_proc == NULL);
982 for (i = 0, cwt = &cwt_softc[0]; i < worker_thread_count; i++, cwt++) {
983 mtx_init(&cwt->cwt_lock, "cwt lock", NULL, MTX_DEF);
984 cv_init(&cwt->cwt_cv, "cwt cv");
985 TAILQ_INIT(&cwt->rx_head);
986 rc = kproc_kthread_add(cwt_main, cwt, &cxgbei_proc, NULL, 0, 0,
989 printf("cxgbei: failed to start thread #%d/%d (%d)\n",
990 i + 1, worker_thread_count, rc);
991 mtx_destroy(&cwt->cwt_lock);
992 cv_destroy(&cwt->cwt_cv);
993 bzero(&cwt, sizeof(*cwt));
995 free(cwt_softc, M_CXGBE);
996 worker_thread_count = 0;
1001 /* Not fatal, carry on with fewer threads. */
1002 worker_thread_count = i;
1007 /* Wait for thread to start before moving on to the next one. */
1008 mtx_lock(&cwt->cwt_lock);
1009 while (cwt->cwt_state == 0)
1010 cv_wait(&cwt->cwt_cv, &cwt->cwt_lock);
1011 mtx_unlock(&cwt->cwt_lock);
1014 MPASS(cwt_softc != NULL);
1015 MPASS(worker_thread_count > 0);
1020 stop_worker_threads(void)
1023 struct cxgbei_worker_thread_softc *cwt = &cwt_softc[0];
1025 MPASS(worker_thread_count >= 0);
1027 for (i = 0, cwt = &cwt_softc[0]; i < worker_thread_count; i++, cwt++) {
1028 mtx_lock(&cwt->cwt_lock);
1029 MPASS(cwt->cwt_state == CWT_RUNNING ||
1030 cwt->cwt_state == CWT_SLEEPING);
1031 cwt->cwt_state = CWT_STOP;
1032 cv_signal(&cwt->cwt_cv);
1034 cv_wait(&cwt->cwt_cv, &cwt->cwt_lock);
1035 } while (cwt->cwt_state != CWT_STOPPED);
1036 mtx_unlock(&cwt->cwt_lock);
1038 free(cwt_softc, M_CXGBE);
1041 /* Select a worker thread for a connection. */
1043 cxgbei_select_worker_thread(struct icl_cxgbei_conn *icc)
1045 struct adapter *sc = icc->sc;
1046 struct toepcb *toep = icc->toep;
1049 n = worker_thread_count / sc->sge.nofldrxq;
1051 i = toep->vi->pi->port_id * n + arc4random() % n;
1053 i = arc4random() % worker_thread_count;
1055 CTR3(KTR_CXGBE, "%s: tid %u, cwt %u", __func__, toep->tid, i);
1061 cxgbei_mod_load(void)
1065 rc = start_worker_threads();
1069 rc = t4_register_uld(&cxgbei_uld_info);
1071 stop_worker_threads();
1075 t4_iterate(cxgbei_activate_all, NULL);
1081 cxgbei_mod_unload(void)
1084 t4_iterate(cxgbei_deactivate_all, NULL);
1086 if (t4_unregister_uld(&cxgbei_uld_info) == EBUSY)
1089 stop_worker_threads();
1096 cxgbei_modevent(module_t mod, int cmd, void *arg)
1103 rc = cxgbei_mod_load();
1107 rc = cxgbei_mod_unload();
1114 printf("cxgbei: compiled without TCP_OFFLOAD support.\n");
1121 static moduledata_t cxgbei_mod = {
1127 MODULE_VERSION(cxgbei, 1);
1128 DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1129 MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1);
1130 MODULE_DEPEND(cxgbei, cxgbe, 1, 1, 1);
1131 MODULE_DEPEND(cxgbei, icl, 1, 1, 1);