2 * Copyright (c) 2012 Chelsio Communications, Inc.
5 * Chelsio T5xx iSCSI driver
7 * Written by: Sreenivasa Honnur <shonnur@chelsio.com>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
35 #include "opt_inet6.h"
37 #include <sys/types.h>
38 #include <sys/param.h>
39 #include <sys/kernel.h>
40 #include <sys/module.h>
41 #include <sys/systm.h>
44 #include <sys/errno.h>
45 #include <sys/kthread.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
51 #include <sys/mutex.h>
52 #include <sys/condvar.h>
54 #include <netinet/in.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/toecore.h>
57 #include <netinet/tcp_var.h>
58 #include <netinet/tcp_fsm.h>
60 #include <cam/scsi/scsi_all.h>
61 #include <cam/scsi/scsi_da.h>
62 #include <cam/ctl/ctl_io.h>
63 #include <cam/ctl/ctl.h>
64 #include <cam/ctl/ctl_backend.h>
65 #include <cam/ctl/ctl_error.h>
66 #include <cam/ctl/ctl_frontend.h>
67 #include <cam/ctl/ctl_debug.h>
68 #include <cam/ctl/ctl_ha.h>
69 #include <cam/ctl/ctl_ioctl.h>
71 #include <dev/iscsi/icl.h>
72 #include <dev/iscsi/iscsi_proto.h>
73 #include <dev/iscsi/iscsi_ioctl.h>
74 #include <dev/iscsi/iscsi.h>
75 #include <cam/ctl/ctl_frontend_iscsi.h>
78 #include <cam/cam_ccb.h>
79 #include <cam/cam_xpt.h>
80 #include <cam/cam_debug.h>
81 #include <cam/cam_sim.h>
82 #include <cam/cam_xpt_sim.h>
83 #include <cam/cam_xpt_periph.h>
84 #include <cam/cam_periph.h>
85 #include <cam/cam_compat.h>
86 #include <cam/scsi/scsi_message.h>
88 #include "common/common.h"
89 #include "common/t4_msg.h"
90 #include "common/t4_regs.h" /* for PCIE_MEM_ACCESS */
91 #include "tom/t4_tom.h"
93 #include "cxgbei_ulp2_ddp.h"
95 static int worker_thread_count;
96 static struct cxgbei_worker_thread_softc *cwt_softc;
97 static struct proc *cxgbei_proc;
99 /* XXXNP some header instead. */
100 struct icl_pdu *icl_cxgbei_new_pdu(int);
101 void icl_cxgbei_new_pdu_set_conn(struct icl_pdu *, struct icl_conn *);
102 void icl_cxgbei_conn_pdu_free(struct icl_conn *, struct icl_pdu *);
105 * Direct Data Placement -
106 * Directly place the iSCSI Data-In or Data-Out PDU's payload into pre-posted
107 * final destination host-memory buffers based on the Initiator Task Tag (ITT)
108 * in Data-In or Target Task Tag (TTT) in Data-Out PDUs.
109 * The host memory address is programmed into h/w in the format of pagepod
111 * The location of the pagepod entry is encoded into ddp tag which is used as
112 * the base for ITT/TTT.
116 * functions to program the pagepod in h/w
119 ppod_set(struct pagepod *ppod,
120 struct cxgbei_ulp2_pagepod_hdr *hdr,
121 struct cxgbei_ulp2_gather_list *gl,
126 memcpy(ppod, hdr, sizeof(*hdr));
128 for (i = 0; i < (PPOD_PAGES + 1); i++, pidx++) {
129 ppod->addr[i] = pidx < gl->nelem ?
130 cpu_to_be64(gl->dma_sg[pidx].phys_addr) : 0ULL;
135 ppod_clear(struct pagepod *ppod)
137 memset(ppod, 0, sizeof(*ppod));
141 ulp_mem_io_set_hdr(struct adapter *sc, int tid, struct ulp_mem_io *req,
142 unsigned int wr_len, unsigned int dlen,
143 unsigned int pm_addr)
145 struct ulptx_idata *idata = (struct ulptx_idata *)(req + 1);
147 INIT_ULPTX_WR(req, wr_len, 0, 0);
148 req->cmd = cpu_to_be32(V_ULPTX_CMD(ULP_TX_MEM_WRITE) |
149 V_ULP_MEMIO_ORDER(is_t4(sc)) |
150 V_T5_ULP_MEMIO_IMM(is_t5(sc)));
151 req->dlen = htonl(V_ULP_MEMIO_DATA_LEN(dlen >> 5));
152 req->len16 = htonl(DIV_ROUND_UP(wr_len - sizeof(req->wr), 16)
153 | V_FW_WR_FLOWID(tid));
154 req->lock_addr = htonl(V_ULP_MEMIO_ADDR(pm_addr >> 5));
156 idata->cmd_more = htonl(V_ULPTX_CMD(ULP_TX_SC_IMM));
157 idata->len = htonl(dlen);
160 #define ULPMEM_IDATA_MAX_NPPODS 1 /* 256/PPOD_SIZE */
161 #define PCIE_MEMWIN_MAX_NPPODS 16 /* 1024/PPOD_SIZE */
164 ppod_write_idata(struct cxgbei_data *ci,
165 struct cxgbei_ulp2_pagepod_hdr *hdr,
166 unsigned int idx, unsigned int npods,
167 struct cxgbei_ulp2_gather_list *gl,
168 unsigned int gl_pidx, struct toepcb *toep)
170 u_int dlen = PPOD_SIZE * npods;
171 u_int pm_addr = idx * PPOD_SIZE + ci->llimit;
172 u_int wr_len = roundup(sizeof(struct ulp_mem_io) +
173 sizeof(struct ulptx_idata) + dlen, 16);
174 struct ulp_mem_io *req;
175 struct ulptx_idata *idata;
176 struct pagepod *ppod;
179 struct adapter *sc = toep->vi->pi->adapter;
181 wr = alloc_wrqe(wr_len, toep->ctrlq);
183 CXGBE_UNIMPLEMENTED("ppod_write_idata: alloc_wrqe failure");
188 memset(req, 0, wr_len);
189 ulp_mem_io_set_hdr(sc, toep->tid, req, wr_len, dlen, pm_addr);
190 idata = (struct ulptx_idata *)(req + 1);
192 ppod = (struct pagepod *)(idata + 1);
193 for (i = 0; i < npods; i++, ppod++, gl_pidx += PPOD_PAGES) {
194 if (!hdr) /* clear the pagepod */
196 else /* set the pagepod */
197 ppod_set(ppod, hdr, gl, gl_pidx);
205 t4_ddp_set_map(struct cxgbei_data *ci, void *iccp,
206 struct cxgbei_ulp2_pagepod_hdr *hdr, u_int idx, u_int npods,
207 struct cxgbei_ulp2_gather_list *gl, int reply)
209 struct icl_cxgbei_conn *icc = (struct icl_cxgbei_conn *)iccp;
210 struct toepcb *toep = icc->toep;
212 unsigned int pidx = 0, w_npods = 0, cnt;
215 * on T4, if we use a mix of IMMD and DSGL with ULP_MEM_WRITE,
216 * the order would not be guaranteed, so we will stick with IMMD
219 gl->port_id = toep->vi->pi->port_id;
220 gl->egress_dev = (void *)toep->vi->ifp;
222 /* send via immediate data */
223 for (; w_npods < npods; idx += cnt, w_npods += cnt,
224 pidx += PPOD_PAGES) {
225 cnt = npods - w_npods;
226 if (cnt > ULPMEM_IDATA_MAX_NPPODS)
227 cnt = ULPMEM_IDATA_MAX_NPPODS;
228 err = ppod_write_idata(ci, hdr, idx, cnt, gl, pidx, toep);
230 printf("%s: ppod_write_idata failed\n", __func__);
238 t4_ddp_clear_map(struct cxgbei_data *ci, struct cxgbei_ulp2_gather_list *gl,
239 u_int tag, u_int idx, u_int npods, struct icl_cxgbei_conn *icc)
241 struct toepcb *toep = icc->toep;
247 for (; w_npods < npods; idx += cnt, w_npods += cnt,
248 pidx += PPOD_PAGES) {
249 cnt = npods - w_npods;
250 if (cnt > ULPMEM_IDATA_MAX_NPPODS)
251 cnt = ULPMEM_IDATA_MAX_NPPODS;
252 err = ppod_write_idata(ci, NULL, idx, cnt, gl, 0, toep);
259 cxgbei_map_sg(struct cxgbei_sgl *sgl, struct ccb_scsiio *csio)
261 unsigned int data_len = csio->dxfer_len;
262 unsigned int sgoffset = (uint64_t)csio->data_ptr & PAGE_MASK;
264 unsigned char *sgaddr = csio->data_ptr;
265 unsigned int len = 0;
267 nsge = (csio->dxfer_len + sgoffset + PAGE_SIZE - 1) >> PAGE_SHIFT;
268 sgl->sg_addr = sgaddr;
269 sgl->sg_offset = sgoffset;
270 if (data_len < (PAGE_SIZE - sgoffset))
273 len = PAGE_SIZE - sgoffset;
275 sgl->sg_length = len;
281 while (data_len > 0) {
282 sgl->sg_addr = sgaddr;
283 len = (data_len < PAGE_SIZE)? data_len: PAGE_SIZE;
284 sgl->sg_length = len;
294 cxgbei_map_sg_tgt(struct cxgbei_sgl *sgl, union ctl_io *io)
296 unsigned int data_len, sgoffset, nsge;
297 unsigned char *sgaddr;
298 unsigned int len = 0, index = 0, ctl_sg_count, i;
299 struct ctl_sg_entry ctl_sg_entry, *ctl_sglist;
301 if (io->scsiio.kern_sg_entries > 0) {
302 ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr;
303 ctl_sg_count = io->scsiio.kern_sg_entries;
305 ctl_sglist = &ctl_sg_entry;
306 ctl_sglist->addr = io->scsiio.kern_data_ptr;
307 ctl_sglist->len = io->scsiio.kern_data_len;
311 sgaddr = sgl->sg_addr = ctl_sglist[index].addr;
312 sgoffset = sgl->sg_offset = (uint64_t)sgl->sg_addr & PAGE_MASK;
313 data_len = ctl_sglist[index].len;
315 if (data_len < (PAGE_SIZE - sgoffset))
318 len = PAGE_SIZE - sgoffset;
320 sgl->sg_length = len;
327 for (i = 0; i< ctl_sg_count; i++)
328 len += ctl_sglist[i].len;
329 nsge = (len + sgoffset + PAGE_SIZE -1) >> PAGE_SHIFT;
330 while (data_len > 0) {
331 sgl->sg_addr = sgaddr;
332 len = (data_len < PAGE_SIZE)? data_len: PAGE_SIZE;
333 sgl->sg_length = len;
338 if (index == ctl_sg_count - 1)
341 sgaddr = ctl_sglist[index].addr;
342 data_len = ctl_sglist[index].len;
350 t4_sk_ddp_tag_reserve(struct cxgbei_data *ci, struct icl_cxgbei_conn *icc,
351 u_int xferlen, struct cxgbei_sgl *sgl, u_int sgcnt, u_int *ddp_tag)
353 struct cxgbei_ulp2_gather_list *gl;
355 struct toepcb *toep = icc->toep;
357 gl = cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec(xferlen, sgl, sgcnt, ci, 0);
359 err = cxgbei_ulp2_ddp_tag_reserve(ci, icc, toep->tid,
360 &ci->tag_format, ddp_tag, gl, 0, 0);
362 cxgbei_ulp2_ddp_release_gl(ci, gl);
370 cxgbei_task_reserve_itt(struct icl_conn *ic, void **prv,
371 struct ccb_scsiio *scmd, unsigned int *itt)
373 struct icl_cxgbei_conn *icc = ic_to_icc(ic);
374 int xferlen = scmd->dxfer_len;
375 struct cxgbei_task_data *tdata = NULL;
376 struct cxgbei_sgl *sge = NULL;
377 struct toepcb *toep = icc->toep;
378 struct adapter *sc = td_adapter(toep->td);
379 struct cxgbei_data *ci = sc->iscsi_ulp_softc;
382 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
384 tdata = (struct cxgbei_task_data *)*prv;
385 if (xferlen == 0 || tdata == NULL)
387 if (xferlen < DDP_THRESHOLD)
390 if ((scmd->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
391 tdata->nsge = cxgbei_map_sg(tdata->sgl, scmd);
392 if (tdata->nsge == 0) {
393 CTR1(KTR_CXGBE, "%s: map_sg failed", __func__);
398 tdata->sc_ddp_tag = *itt;
400 CTR3(KTR_CXGBE, "%s: *itt:0x%x sc_ddp_tag:0x%x",
401 __func__, *itt, tdata->sc_ddp_tag);
402 if (cxgbei_ulp2_sw_tag_usable(&ci->tag_format,
403 tdata->sc_ddp_tag)) {
404 err = t4_sk_ddp_tag_reserve(ci, icc, scmd->dxfer_len,
405 sge, tdata->nsge, &tdata->sc_ddp_tag);
408 "%s: itt:0x%x sc_ddp_tag:0x%x not usable",
409 __func__, *itt, tdata->sc_ddp_tag);
415 cxgbei_ulp2_set_non_ddp_tag(&ci->tag_format, *itt);
417 return tdata->sc_ddp_tag;
421 cxgbei_task_reserve_ttt(struct icl_conn *ic, void **prv, union ctl_io *io,
424 struct icl_cxgbei_conn *icc = ic_to_icc(ic);
425 struct toepcb *toep = icc->toep;
426 struct adapter *sc = td_adapter(toep->td);
427 struct cxgbei_data *ci = sc->iscsi_ulp_softc;
428 struct cxgbei_task_data *tdata = NULL;
429 int xferlen, err = -1;
430 struct cxgbei_sgl *sge = NULL;
432 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
434 xferlen = (io->scsiio.kern_data_len - io->scsiio.ext_data_filled);
435 tdata = (struct cxgbei_task_data *)*prv;
436 if ((xferlen == 0) || (tdata == NULL))
438 if (xferlen < DDP_THRESHOLD)
440 tdata->nsge = cxgbei_map_sg_tgt(tdata->sgl, io);
441 if (tdata->nsge == 0) {
442 CTR1(KTR_CXGBE, "%s: map_sg failed", __func__);
447 tdata->sc_ddp_tag = *ttt;
448 if (cxgbei_ulp2_sw_tag_usable(&ci->tag_format, tdata->sc_ddp_tag)) {
449 err = t4_sk_ddp_tag_reserve(ci, icc, xferlen, sge,
450 tdata->nsge, &tdata->sc_ddp_tag);
452 CTR2(KTR_CXGBE, "%s: sc_ddp_tag:0x%x not usable",
453 __func__, tdata->sc_ddp_tag);
458 cxgbei_ulp2_set_non_ddp_tag(&ci->tag_format, *ttt);
459 return tdata->sc_ddp_tag;
463 t4_sk_ddp_tag_release(struct icl_cxgbei_conn *icc, unsigned int ddp_tag)
465 struct toepcb *toep = icc->toep;
466 struct adapter *sc = td_adapter(toep->td);
467 struct cxgbei_data *ci = sc->iscsi_ulp_softc;
469 cxgbei_ulp2_ddp_tag_release(ci, ddp_tag, icc);
475 cxgbei_ddp_init(struct adapter *sc, struct cxgbei_data *ci)
477 int nppods, bits, max_sz, rc;
478 static const u_int pgsz_order[] = {0, 1, 2, 3};
480 MPASS(sc->vres.iscsi.size > 0);
482 ci->llimit = sc->vres.iscsi.start;
483 ci->ulimit = sc->vres.iscsi.start + sc->vres.iscsi.size - 1;
484 max_sz = G_MAXRXDATA(t4_read_reg(sc, A_TP_PARA_REG2));
486 nppods = sc->vres.iscsi.size >> IPPOD_SIZE_SHIFT;
491 if (bits > IPPOD_IDX_MAX_SIZE)
492 bits = IPPOD_IDX_MAX_SIZE;
493 nppods = (1 << (bits - 1)) - 1;
495 rc = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR,
496 BUS_SPACE_MAXADDR, NULL, NULL, UINT32_MAX , 8, BUS_SPACE_MAXSIZE,
497 BUS_DMA_ALLOCNOW, NULL, NULL, &ci->ulp_ddp_tag);
499 device_printf(sc->dev, "%s: failed to create DMA tag: %u.\n",
504 ci->colors = malloc(nppods * sizeof(char), M_CXGBE, M_NOWAIT | M_ZERO);
505 ci->gl_map = malloc(nppods * sizeof(struct cxgbei_ulp2_gather_list *),
506 M_CXGBE, M_NOWAIT | M_ZERO);
507 if (ci->colors == NULL || ci->gl_map == NULL) {
508 bus_dma_tag_destroy(ci->ulp_ddp_tag);
509 free(ci->colors, M_CXGBE);
510 free(ci->gl_map, M_CXGBE);
514 mtx_init(&ci->map_lock, "ddp lock", NULL, MTX_DEF | MTX_DUPOK);
515 ci->max_txsz = ci->max_rxsz = min(max_sz, ULP2_MAX_PKT_SIZE);
517 ci->idx_last = nppods;
519 ci->idx_mask = (1 << bits) - 1;
520 ci->rsvd_tag_mask = (1 << (bits + IPPOD_IDX_SHIFT)) - 1;
522 ci->tag_format.sw_bits = bits;
523 ci->tag_format.rsvd_bits = bits;
524 ci->tag_format.rsvd_shift = IPPOD_IDX_SHIFT;
525 ci->tag_format.rsvd_mask = ci->idx_mask;
527 t4_iscsi_init(sc, ci->idx_mask << IPPOD_IDX_SHIFT, pgsz_order);
533 do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
535 struct adapter *sc = iq->adapter;
536 struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *);
537 u_int tid = GET_TID(cpl);
538 struct toepcb *toep = lookup_tid(sc, tid);
540 struct icl_cxgbei_pdu *icp;
544 ip = icl_cxgbei_new_pdu(M_NOWAIT);
546 CXGBE_UNIMPLEMENTED("PDU allocation failure");
548 bcopy(mtod(m, caddr_t) + sizeof(*cpl), icp->ip.ip_bhs, sizeof(struct
550 icp->pdu_seq = ntohl(cpl->seq);
551 icp->pdu_flags = SBUF_ULP_FLAG_HDR_RCVD;
553 /* This is the start of a new PDU. There should be no old state. */
554 MPASS(toep->ulpcb2 == NULL);
558 CTR4(KTR_CXGBE, "%s: tid %u, cpl->len hlen %u, m->m_len hlen %u",
559 __func__, tid, ntohs(cpl->len), m->m_len);
567 do_rx_iscsi_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
569 struct adapter *sc = iq->adapter;
570 struct cpl_iscsi_data *cpl = mtod(m, struct cpl_iscsi_data *);
571 u_int tid = GET_TID(cpl);
572 struct toepcb *toep = lookup_tid(sc, tid);
573 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
577 /* Must already have received the header (but not the data). */
579 MPASS(icp->pdu_flags == SBUF_ULP_FLAG_HDR_RCVD);
580 MPASS(icp->ip.ip_data_mbuf == NULL);
581 MPASS(icp->ip.ip_data_len == 0);
583 m_adj(m, sizeof(*cpl));
585 icp->pdu_flags |= SBUF_ULP_FLAG_DATA_RCVD;
586 icp->ip.ip_data_mbuf = m;
587 icp->ip.ip_data_len = m->m_pkthdr.len;
590 CTR4(KTR_CXGBE, "%s: tid %u, cpl->len dlen %u, m->m_len dlen %u",
591 __func__, tid, ntohs(cpl->len), m->m_len);
598 do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
600 struct adapter *sc = iq->adapter;
601 const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1);
602 u_int tid = GET_TID(cpl);
603 struct toepcb *toep = lookup_tid(sc, tid);
604 struct inpcb *inp = toep->inp;
608 struct icl_cxgbei_conn *icc;
610 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
616 /* Must already be assembling a PDU. */
618 MPASS(icp->pdu_flags & SBUF_ULP_FLAG_HDR_RCVD); /* Data is optional. */
620 icp->pdu_flags |= SBUF_ULP_FLAG_STATUS_RCVD;
621 val = ntohl(cpl->ddpvld);
622 if (val & F_DDP_PADDING_ERR)
623 icp->pdu_flags |= SBUF_ULP_FLAG_PAD_ERROR;
624 if (val & F_DDP_HDRCRC_ERR)
625 icp->pdu_flags |= SBUF_ULP_FLAG_HCRC_ERROR;
626 if (val & F_DDP_DATACRC_ERR)
627 icp->pdu_flags |= SBUF_ULP_FLAG_DCRC_ERROR;
628 if (ip->ip_data_mbuf == NULL) {
629 /* XXXNP: what should ip->ip_data_len be, and why? */
630 icp->pdu_flags |= SBUF_ULP_FLAG_DATA_DDPED;
632 pdu_len = ntohs(cpl->len); /* includes everything. */
635 if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {
636 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
637 __func__, tid, pdu_len, inp->inp_flags);
639 icl_cxgbei_conn_pdu_free(NULL, ip);
647 MPASS(icp->pdu_seq == tp->rcv_nxt);
648 MPASS(tp->rcv_wnd >= pdu_len);
649 tp->rcv_nxt += pdu_len;
650 tp->rcv_wnd -= pdu_len;
651 tp->t_rcvtime = ticks;
653 /* update rx credits */
654 t4_rcvd(&toep->td->tod, tp); /* XXX: sc->tom_softc.tod */
656 so = inp->inp_socket;
661 if (__predict_false(icc == NULL || sb->sb_state & SBS_CANTRCVMORE)) {
663 "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
664 __func__, tid, pdu_len, icc, sb->sb_state);
668 INP_INFO_RLOCK(&V_tcbinfo);
670 tp = tcp_drop(tp, ECONNRESET);
673 INP_INFO_RUNLOCK(&V_tcbinfo);
675 icl_cxgbei_conn_pdu_free(NULL, ip);
681 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
683 icl_cxgbei_new_pdu_set_conn(ip, ic);
685 MPASS(m == NULL); /* was unused, we'll use it now. */
686 m = sbcut_locked(sb, sbused(sb)); /* XXXNP: toep->sb_cc accounting? */
687 if (__predict_false(m != NULL)) {
688 int len = m_length(m, NULL);
691 * PDUs were received before the tid transitioned to ULP mode.
692 * Convert them to icl_cxgbei_pdus and send them to ICL before
695 CTR3(KTR_CXGBE, "%s: tid %u, %u bytes in so_rcv", __func__, tid,
698 /* XXXNP: needs to be rewritten. */
699 if (len == sizeof(struct iscsi_bhs) || len == 4 + sizeof(struct
701 struct icl_cxgbei_pdu *icp0;
704 ip0 = icl_cxgbei_new_pdu(M_NOWAIT);
705 icl_cxgbei_new_pdu_set_conn(ip0, ic);
707 CXGBE_UNIMPLEMENTED("PDU allocation failure");
708 icp0 = ip_to_icp(ip0);
709 icp0->pdu_seq = 0; /* XXX */
710 icp0->pdu_flags = SBUF_ULP_FLAG_HDR_RCVD |
711 SBUF_ULP_FLAG_STATUS_RCVD;
712 m_copydata(m, 0, sizeof(struct iscsi_bhs), (void *)ip0->ip_bhs);
713 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip0, ip_next);
719 CTR4(KTR_CXGBE, "%s: tid %u, pdu_len %u, pdu_flags 0x%x",
720 __func__, tid, pdu_len, icp->pdu_flags);
723 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
724 if ((icc->rx_flags & RXF_ACTIVE) == 0) {
725 struct cxgbei_worker_thread_softc *cwt = &cwt_softc[icc->cwt];
727 mtx_lock(&cwt->cwt_lock);
728 icc->rx_flags |= RXF_ACTIVE;
729 TAILQ_INSERT_TAIL(&cwt->rx_head, icc, rx_link);
730 if (cwt->cwt_state == CWT_SLEEPING) {
731 cwt->cwt_state = CWT_RUNNING;
732 cv_signal(&cwt->cwt_cv);
734 mtx_unlock(&cwt->cwt_lock);
748 cxgbei_conn_task_reserve_itt(void *conn, void **prv,
749 void *scmd, unsigned int *itt)
752 tag = cxgbei_task_reserve_itt(conn, prv, scmd, itt);
760 cxgbei_conn_transfer_reserve_ttt(void *conn, void **prv,
761 void *scmd, unsigned int *ttt)
764 tag = cxgbei_task_reserve_ttt(conn, prv, scmd, ttt);
771 cxgbei_cleanup_task(void *conn, void *ofld_priv)
773 struct icl_conn *ic = (struct icl_conn *)conn;
774 struct icl_cxgbei_conn *icc = ic_to_icc(ic);
775 struct cxgbei_task_data *tdata = ofld_priv;
776 struct adapter *sc = icc->sc;
777 struct cxgbei_data *ci = sc->iscsi_ulp_softc;
779 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
780 MPASS(tdata != NULL);
782 if (cxgbei_ulp2_is_ddp_tag(&ci->tag_format, tdata->sc_ddp_tag))
783 t4_sk_ddp_tag_release(icc, tdata->sc_ddp_tag);
784 memset(tdata, 0, sizeof(*tdata));
788 cxgbei_activate(struct adapter *sc)
790 struct cxgbei_data *ci;
793 ASSERT_SYNCHRONIZED_OP(sc);
795 if (uld_active(sc, ULD_ISCSI)) {
796 KASSERT(0, ("%s: iSCSI offload already enabled on adapter %p",
801 if (sc->iscsicaps == 0 || sc->vres.iscsi.size == 0) {
802 device_printf(sc->dev,
803 "not iSCSI offload capable, or capability disabled.\n");
807 /* per-adapter softc for iSCSI */
808 ci = malloc(sizeof(*ci), M_CXGBE, M_ZERO | M_NOWAIT);
812 rc = cxgbei_ddp_init(sc, ci);
818 sc->iscsi_ulp_softc = ci;
824 cxgbei_deactivate(struct adapter *sc)
827 ASSERT_SYNCHRONIZED_OP(sc);
829 if (sc->iscsi_ulp_softc != NULL) {
830 cxgbei_ddp_cleanup(sc->iscsi_ulp_softc);
831 free(sc->iscsi_ulp_softc, M_CXGBE);
832 sc->iscsi_ulp_softc = NULL;
839 cxgbei_activate_all(struct adapter *sc, void *arg __unused)
842 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isact") != 0)
845 /* Activate iSCSI if any port on this adapter has IFCAP_TOE enabled. */
846 if (sc->offload_map && !uld_active(sc, ULD_ISCSI))
847 (void) t4_activate_uld(sc, ULD_ISCSI);
849 end_synchronized_op(sc, 0);
853 cxgbei_deactivate_all(struct adapter *sc, void *arg __unused)
856 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isdea") != 0)
859 if (uld_active(sc, ULD_ISCSI))
860 (void) t4_deactivate_uld(sc, ULD_ISCSI);
862 end_synchronized_op(sc, 0);
865 static struct uld_info cxgbei_uld_info = {
867 .activate = cxgbei_activate,
868 .deactivate = cxgbei_deactivate,
874 struct cxgbei_worker_thread_softc *cwt = arg;
875 struct icl_cxgbei_conn *icc = NULL;
879 STAILQ_HEAD(, icl_pdu) rx_pdus = STAILQ_HEAD_INITIALIZER(rx_pdus);
883 mtx_lock(&cwt->cwt_lock);
884 MPASS(cwt->cwt_state == 0);
885 cwt->cwt_state = CWT_RUNNING;
886 cv_signal(&cwt->cwt_cv);
888 while (__predict_true(cwt->cwt_state != CWT_STOP)) {
889 cwt->cwt_state = CWT_RUNNING;
890 while ((icc = TAILQ_FIRST(&cwt->rx_head)) != NULL) {
891 TAILQ_REMOVE(&cwt->rx_head, icc, rx_link);
892 mtx_unlock(&cwt->cwt_lock);
895 sb = &ic->ic_socket->so_rcv;
898 MPASS(icc->rx_flags & RXF_ACTIVE);
899 if (__predict_true(!(sb->sb_state & SBS_CANTRCVMORE))) {
900 MPASS(STAILQ_EMPTY(&rx_pdus));
901 STAILQ_SWAP(&icc->rcvd_pdus, &rx_pdus, icl_pdu);
904 /* Hand over PDUs to ICL. */
905 while ((ip = STAILQ_FIRST(&rx_pdus)) != NULL) {
906 STAILQ_REMOVE_HEAD(&rx_pdus, ip_next);
911 MPASS(STAILQ_EMPTY(&rx_pdus));
913 MPASS(icc->rx_flags & RXF_ACTIVE);
914 if (STAILQ_EMPTY(&icc->rcvd_pdus) ||
915 __predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
916 icc->rx_flags &= ~RXF_ACTIVE;
919 * More PDUs were received while we were busy
920 * handing over the previous batch to ICL.
921 * Re-add this connection to the end of the
924 mtx_lock(&cwt->cwt_lock);
925 TAILQ_INSERT_TAIL(&cwt->rx_head, icc,
927 mtx_unlock(&cwt->cwt_lock);
931 mtx_lock(&cwt->cwt_lock);
934 /* Inner loop doesn't check for CWT_STOP, do that first. */
935 if (__predict_false(cwt->cwt_state == CWT_STOP))
937 cwt->cwt_state = CWT_SLEEPING;
938 cv_wait(&cwt->cwt_cv, &cwt->cwt_lock);
941 MPASS(TAILQ_FIRST(&cwt->rx_head) == NULL);
942 mtx_assert(&cwt->cwt_lock, MA_OWNED);
943 cwt->cwt_state = CWT_STOPPED;
944 cv_signal(&cwt->cwt_cv);
945 mtx_unlock(&cwt->cwt_lock);
950 start_worker_threads(void)
953 struct cxgbei_worker_thread_softc *cwt;
955 worker_thread_count = min(mp_ncpus, 32);
956 cwt_softc = malloc(worker_thread_count * sizeof(*cwt), M_CXGBE,
959 MPASS(cxgbei_proc == NULL);
960 for (i = 0, cwt = &cwt_softc[0]; i < worker_thread_count; i++, cwt++) {
961 mtx_init(&cwt->cwt_lock, "cwt lock", NULL, MTX_DEF);
962 cv_init(&cwt->cwt_cv, "cwt cv");
963 TAILQ_INIT(&cwt->rx_head);
964 rc = kproc_kthread_add(cwt_main, cwt, &cxgbei_proc, NULL, 0, 0,
967 printf("cxgbei: failed to start thread #%d/%d (%d)\n",
968 i + 1, worker_thread_count, rc);
969 mtx_destroy(&cwt->cwt_lock);
970 cv_destroy(&cwt->cwt_cv);
971 bzero(cwt, sizeof(*cwt));
973 free(cwt_softc, M_CXGBE);
974 worker_thread_count = 0;
979 /* Not fatal, carry on with fewer threads. */
980 worker_thread_count = i;
985 /* Wait for thread to start before moving on to the next one. */
986 mtx_lock(&cwt->cwt_lock);
987 while (cwt->cwt_state == 0)
988 cv_wait(&cwt->cwt_cv, &cwt->cwt_lock);
989 mtx_unlock(&cwt->cwt_lock);
992 MPASS(cwt_softc != NULL);
993 MPASS(worker_thread_count > 0);
998 stop_worker_threads(void)
1001 struct cxgbei_worker_thread_softc *cwt = &cwt_softc[0];
1003 MPASS(worker_thread_count >= 0);
1005 for (i = 0, cwt = &cwt_softc[0]; i < worker_thread_count; i++, cwt++) {
1006 mtx_lock(&cwt->cwt_lock);
1007 MPASS(cwt->cwt_state == CWT_RUNNING ||
1008 cwt->cwt_state == CWT_SLEEPING);
1009 cwt->cwt_state = CWT_STOP;
1010 cv_signal(&cwt->cwt_cv);
1012 cv_wait(&cwt->cwt_cv, &cwt->cwt_lock);
1013 } while (cwt->cwt_state != CWT_STOPPED);
1014 mtx_unlock(&cwt->cwt_lock);
1016 free(cwt_softc, M_CXGBE);
1019 /* Select a worker thread for a connection. */
1021 cxgbei_select_worker_thread(struct icl_cxgbei_conn *icc)
1023 struct adapter *sc = icc->sc;
1024 struct toepcb *toep = icc->toep;
1027 n = worker_thread_count / sc->sge.nofldrxq;
1029 i = toep->vi->pi->port_id * n + arc4random() % n;
1031 i = arc4random() % worker_thread_count;
1033 CTR3(KTR_CXGBE, "%s: tid %u, cwt %u", __func__, toep->tid, i);
1039 cxgbei_mod_load(void)
1043 t4_register_cpl_handler(CPL_ISCSI_HDR, do_rx_iscsi_hdr);
1044 t4_register_cpl_handler(CPL_ISCSI_DATA, do_rx_iscsi_data);
1045 t4_register_cpl_handler(CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp);
1047 rc = start_worker_threads();
1051 rc = t4_register_uld(&cxgbei_uld_info);
1053 stop_worker_threads();
1057 t4_iterate(cxgbei_activate_all, NULL);
1063 cxgbei_mod_unload(void)
1066 t4_iterate(cxgbei_deactivate_all, NULL);
1068 if (t4_unregister_uld(&cxgbei_uld_info) == EBUSY)
1071 stop_worker_threads();
1073 t4_register_cpl_handler(CPL_ISCSI_HDR, NULL);
1074 t4_register_cpl_handler(CPL_ISCSI_DATA, NULL);
1075 t4_register_cpl_handler(CPL_RX_ISCSI_DDP, NULL);
1082 cxgbei_modevent(module_t mod, int cmd, void *arg)
1089 rc = cxgbei_mod_load();
1093 rc = cxgbei_mod_unload();
1100 printf("cxgbei: compiled without TCP_OFFLOAD support.\n");
1107 static moduledata_t cxgbei_mod = {
1113 MODULE_VERSION(cxgbei, 1);
1114 DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1115 MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1);
1116 MODULE_DEPEND(cxgbei, cxgbe, 1, 1, 1);
1117 MODULE_DEPEND(cxgbei, icl, 1, 1, 1);