1 /**************************************************************************
3 Copyright (c) 2007, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/fcntl.h>
38 #include <sys/limits.h>
40 #include <sys/eventhandler.h>
42 #include <sys/module.h>
43 #include <sys/condvar.h>
44 #include <sys/mutex.h>
45 #include <sys/socket.h>
46 #include <sys/sockopt.h>
47 #include <sys/sockstate.h>
48 #include <sys/sockbuf.h>
49 #include <sys/syslog.h>
50 #include <sys/taskqueue.h>
53 #include <net/route.h>
55 #include <netinet/in.h>
56 #include <netinet/in_pcb.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/in_var.h>
60 #include <cxgb_osdep.h>
61 #include <sys/mbufq.h>
63 #include <netinet/in_pcb.h>
65 #include <ulp/tom/cxgb_tcp_offload.h>
66 #include <netinet/tcp.h>
67 #include <netinet/tcp_var.h>
68 #include <netinet/tcp_offload.h>
69 #include <netinet/tcp_fsm.h>
71 #include <cxgb_include.h>
73 #include <net/if_vlan_var.h>
74 #include <net/route.h>
77 #include <common/cxgb_firmware_exports.h>
78 #include <common/cxgb_tcb.h>
79 #include <cxgb_include.h>
80 #include <common/cxgb_ctl_defs.h>
81 #include <common/cxgb_t3_cpl.h>
82 #include <cxgb_offload.h>
83 #include <ulp/toecore/cxgb_toedev.h>
84 #include <ulp/tom/cxgb_l2t.h>
85 #include <ulp/tom/cxgb_tom.h>
86 #include <ulp/tom/cxgb_defs.h>
87 #include <ulp/tom/cxgb_t3_ddp.h>
88 #include <ulp/tom/cxgb_toepcb.h>
89 #include <ulp/tom/cxgb_tcp.h>
92 TAILQ_HEAD(, adapter) adapter_list;
93 static struct rwlock adapter_list_lock;
95 static TAILQ_HEAD(, tom_data) cxgb_list;
96 static struct mtx cxgb_list_lock;
97 static const unsigned int MAX_ATIDS = 64 * 1024;
98 static const unsigned int ATID_BASE = 0x100000;
100 static int t3_toe_attach(struct toedev *dev, const struct offload_id *entry);
101 static void cxgb_register_listeners(void);
102 static void t3c_tom_add(struct t3cdev *cdev);
105 * Handlers for each CPL opcode
107 static cxgb_cpl_handler_func tom_cpl_handlers[256];
110 static eventhandler_tag listen_tag;
112 static struct offload_id t3_toe_id_tab[] = {
113 { TOE_ID_CHELSIO_T3, 0 },
114 { TOE_ID_CHELSIO_T3B, 0 },
115 { TOE_ID_CHELSIO_T3C, 0 },
119 static struct tom_info t3_tom_info = {
120 .ti_attach = t3_toe_attach,
121 .ti_id_table = t3_toe_id_tab,
122 .ti_name = "Chelsio-T3"
125 struct cxgb_client t3c_tom_client = {
129 .handlers = tom_cpl_handlers,
134 cxgb_log_tcb(struct adapter *sc, unsigned int tid)
138 uint64_t *tcb = (uint64_t *)buf;
140 struct mc7 *mem = &sc->cm;
142 error = t3_mc7_bd_read(mem, tid*TCB_SIZE/8, TCB_SIZE/8, tcb);
144 printf("cxgb_tcb_log failed\n");
147 CTR1(KTR_CXGB, "TCB tid=%u", tid);
148 for (i = 0; i < TCB_SIZE / 32; i++) {
150 CTR5(KTR_CXGB, "%1d: %08x %08x %08x %08x",
151 i, (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
152 (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
155 CTR4(KTR_CXGB, " %08x %08x %08x %08x",
156 (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
157 (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
163 * Add an skb to the deferred skb queue for processing from process context.
166 t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler)
168 struct tom_data *td = TOM_DATA(dev);
170 m_set_handler(m, handler);
171 mtx_lock(&td->deferq.lock);
173 mbufq_tail(&td->deferq, m);
174 if (mbufq_len(&td->deferq) == 1)
175 taskqueue_enqueue(td->tq, &td->deferq_task);
176 mtx_lock(&td->deferq.lock);
184 toep = malloc(sizeof(struct toepcb), M_CXGB, M_NOWAIT|M_ZERO);
194 toepcb_init(struct toepcb *toep)
196 toep->tp_refcount = 1;
197 cv_init(&toep->tp_cv, "toep cv");
201 toepcb_hold(struct toepcb *toep)
203 atomic_add_acq_int(&toep->tp_refcount, 1);
207 toepcb_release(struct toepcb *toep)
209 if (toep->tp_refcount == 1) {
213 atomic_add_acq_int(&toep->tp_refcount, -1);
218 * Add a T3 offload device to the list of devices we are managing.
221 t3cdev_add(struct tom_data *t)
223 mtx_lock(&cxgb_list_lock);
224 TAILQ_INSERT_TAIL(&cxgb_list, t, entry);
225 mtx_unlock(&cxgb_list_lock);
229 cdev2type(struct t3cdev *cdev)
233 switch (cdev->type) {
235 type = TOE_ID_CHELSIO_T3;
238 type = TOE_ID_CHELSIO_T3B;
241 type = TOE_ID_CHELSIO_T3C;
248 * Allocate and initialize the TID tables. Returns 0 on success.
251 init_tid_tabs(struct tid_info *t, unsigned int ntids,
252 unsigned int natids, unsigned int nstids,
253 unsigned int atid_base, unsigned int stid_base)
255 unsigned long size = ntids * sizeof(*t->tid_tab) +
256 natids * sizeof(*t->atid_tab) + nstids * sizeof(*t->stid_tab);
258 t->tid_tab = cxgb_alloc_mem(size);
262 t->stid_tab = (union listen_entry *)&t->tid_tab[ntids];
263 t->atid_tab = (union active_open_entry *)&t->stid_tab[nstids];
266 t->stid_base = stid_base;
269 t->atid_base = atid_base;
271 t->stids_in_use = t->atids_in_use = 0;
272 atomic_set_int(&t->tids_in_use, 0);
273 mtx_init(&t->stid_lock, "stid", NULL, MTX_DUPOK|MTX_DEF);
274 mtx_init(&t->atid_lock, "atid", NULL, MTX_DUPOK|MTX_DEF);
277 * Setup the free lists for stid_tab and atid_tab.
281 t->stid_tab[nstids - 1].next = &t->stid_tab[nstids];
282 t->sfree = t->stid_tab;
286 t->atid_tab[natids - 1].next = &t->atid_tab[natids];
287 t->afree = t->atid_tab;
293 free_tid_maps(struct tid_info *t)
295 mtx_destroy(&t->stid_lock);
296 mtx_destroy(&t->atid_lock);
297 cxgb_free_mem(t->tid_tab);
301 add_adapter(adapter_t *adap)
303 rw_wlock(&adapter_list_lock);
304 TAILQ_INSERT_TAIL(&adapter_list, adap, adapter_entry);
305 rw_wunlock(&adapter_list_lock);
309 remove_adapter(adapter_t *adap)
311 rw_wlock(&adapter_list_lock);
312 TAILQ_REMOVE(&adapter_list, adap, adapter_entry);
313 rw_wunlock(&adapter_list_lock);
317 * Populate a TID_RELEASE WR. The mbuf must be already propely sized.
320 mk_tid_release(struct mbuf *m, unsigned int tid)
322 struct cpl_tid_release *req;
324 m_set_priority(m, CPL_PRIORITY_SETUP);
325 req = mtod(m, struct cpl_tid_release *);
326 m->m_pkthdr.len = m->m_len = sizeof(*req);
327 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
328 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
332 t3_process_tid_release_list(void *data, int pending)
335 struct t3cdev *tdev = data;
336 struct t3c_data *td = T3C_DATA (tdev);
338 mtx_lock(&td->tid_release_lock);
339 while (td->tid_release_list) {
340 struct toe_tid_entry *p = td->tid_release_list;
342 td->tid_release_list = (struct toe_tid_entry *)p->ctx;
343 mtx_unlock(&td->tid_release_lock);
344 m = m_get(M_WAIT, MT_DATA);
345 mk_tid_release(m, p - td->tid_maps.tid_tab);
346 cxgb_ofld_send(tdev, m);
348 mtx_lock(&td->tid_release_lock);
350 mtx_unlock(&td->tid_release_lock);
354 cxgb_offload_activate(struct adapter *adapter)
356 struct t3cdev *dev = &adapter->tdev;
359 struct tid_range stid_range, tid_range;
360 struct mtutab mtutab;
361 unsigned int l2t_capacity;
363 t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
366 dev->adapter = adapter;
369 if (dev->ctl(dev, GET_TX_MAX_CHUNK, &t->tx_max_chunk) < 0 ||
370 dev->ctl(dev, GET_MAX_OUTSTANDING_WR, &t->max_wrs) < 0 ||
371 dev->ctl(dev, GET_L2T_CAPACITY, &l2t_capacity) < 0 ||
372 dev->ctl(dev, GET_MTUS, &mtutab) < 0 ||
373 dev->ctl(dev, GET_TID_RANGE, &tid_range) < 0 ||
374 dev->ctl(dev, GET_STID_RANGE, &stid_range) < 0) {
375 device_printf(adapter->dev, "%s: dev->ctl check failed\n", __FUNCTION__);
380 L2DATA(dev) = t3_init_l2t(l2t_capacity);
382 device_printf(adapter->dev, "%s: t3_init_l2t failed\n", __FUNCTION__);
385 natids = min(tid_range.num / 2, MAX_ATIDS);
386 err = init_tid_tabs(&t->tid_maps, tid_range.num, natids,
387 stid_range.num, ATID_BASE, stid_range.base);
389 device_printf(adapter->dev, "%s: init_tid_tabs failed\n", __FUNCTION__);
393 t->mtus = mtutab.mtus;
394 t->nmtus = mtutab.size;
396 TASK_INIT(&t->tid_release_task, 0 /* XXX? */, t3_process_tid_release_list, dev);
397 mtx_init(&t->tid_release_lock, "tid release", NULL, MTX_DUPOK|MTX_DEF);
401 dev->recv = process_rx;
402 dev->arp_update = t3_l2t_update;
403 /* Register netevent handler once */
404 if (TAILQ_EMPTY(&adapter_list)) {
405 #if defined(CONFIG_CHELSIO_T3_MODULE)
406 if (prepare_arp_with_t3core())
407 log(LOG_ERR, "Unable to set offload capabilities\n");
410 CTR1(KTR_CXGB, "adding adapter %p", adapter);
411 add_adapter(adapter);
412 device_printf(adapter->dev, "offload started\n");
413 adapter->flags |= CXGB_OFLD_INIT;
417 t3_free_l2t(L2DATA(dev));
425 cxgb_offload_deactivate(struct adapter *adapter)
427 struct t3cdev *tdev = &adapter->tdev;
428 struct t3c_data *t = T3C_DATA(tdev);
430 printf("removing adapter %p\n", adapter);
431 remove_adapter(adapter);
432 if (TAILQ_EMPTY(&adapter_list)) {
433 #if defined(CONFIG_CHELSIO_T3_MODULE)
434 restore_arp_sans_t3core();
437 free_tid_maps(&t->tid_maps);
438 T3C_DATA(tdev) = NULL;
439 t3_free_l2t(L2DATA(tdev));
441 mtx_destroy(&t->tid_release_lock);
446 * Sends an sk_buff to a T3C driver after dealing with any active network taps.
449 cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m)
453 r = dev->send(dev, m);
457 static struct ifnet *
458 get_iff_from_mac(adapter_t *adapter, const uint8_t *mac, unsigned int vlan)
462 for_each_port(adapter, i) {
464 const struct vlan_group *grp;
466 const struct port_info *p = &adapter->port[i];
467 struct ifnet *ifp = p->ifp;
469 if (!memcmp(p->hw_addr, mac, ETHER_ADDR_LEN)) {
472 if (vlan && vlan != EVL_VLID_MASK) {
474 dev = grp ? grp->vlan_devices[vlan] : NULL;
486 failover_fixup(adapter_t *adapter, int port)
488 if (adapter->params.rev == 0) {
489 struct ifnet *ifp = adapter->port[port].ifp;
490 struct cmac *mac = &adapter->port[port].mac;
491 if (!(ifp->if_flags & IFF_UP)) {
492 /* Failover triggered by the interface ifdown */
493 t3_write_reg(adapter, A_XGM_TX_CTRL + mac->offset,
495 t3_read_reg(adapter, A_XGM_TX_CTRL + mac->offset);
497 /* Failover triggered by the interface link down */
498 t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, 0);
499 t3_read_reg(adapter, A_XGM_RX_CTRL + mac->offset);
500 t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset,
507 cxgb_ulp_iscsi_ctl(adapter_t *adapter, unsigned int req, void *data)
510 struct ulp_iscsi_info *uiip = data;
513 case ULP_ISCSI_GET_PARAMS:
514 uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT);
515 uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT);
516 uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK);
518 * On tx, the iscsi pdu has to be <= tx page size and has to
519 * fit into the Tx PM FIFO.
521 uiip->max_txsz = min(adapter->params.tp.tx_pg_size,
522 t3_read_reg(adapter, A_PM1_TX_CFG) >> 17);
523 /* on rx, the iscsi pdu has to be < rx page size and the
524 whole pdu + cpl headers has to fit into one sge buffer */
525 /* also check the max rx data length programmed in TP */
526 uiip->max_rxsz = min(uiip->max_rxsz,
527 ((t3_read_reg(adapter, A_TP_PARA_REG2))
528 >> S_MAXRXDATA) & M_MAXRXDATA);
530 case ULP_ISCSI_SET_PARAMS:
531 t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask);
539 /* Response queue used for RDMA events. */
540 #define ASYNC_NOTIF_RSPQ 0
543 cxgb_rdma_ctl(adapter_t *adapter, unsigned int req, void *data)
548 case RDMA_GET_PARAMS: {
549 struct rdma_info *req = data;
551 req->udbell_physbase = rman_get_start(adapter->udbs_res);
552 req->udbell_len = rman_get_size(adapter->udbs_res);
553 req->tpt_base = t3_read_reg(adapter, A_ULPTX_TPT_LLIMIT);
554 req->tpt_top = t3_read_reg(adapter, A_ULPTX_TPT_ULIMIT);
555 req->pbl_base = t3_read_reg(adapter, A_ULPTX_PBL_LLIMIT);
556 req->pbl_top = t3_read_reg(adapter, A_ULPTX_PBL_ULIMIT);
557 req->rqt_base = t3_read_reg(adapter, A_ULPRX_RQ_LLIMIT);
558 req->rqt_top = t3_read_reg(adapter, A_ULPRX_RQ_ULIMIT);
559 req->kdb_addr = (void *)((unsigned long)rman_get_virtual(adapter->regs_res) + A_SG_KDOORBELL); break;
562 struct rdma_cq_op *req = data;
564 /* may be called in any context */
565 mtx_lock_spin(&adapter->sge.reg_lock);
566 ret = t3_sge_cqcntxt_op(adapter, req->id, req->op,
568 mtx_unlock_spin(&adapter->sge.reg_lock);
572 struct ch_mem_range *t = data;
575 if ((t->addr & 7) || (t->len & 7))
577 if (t->mem_id == MEM_CM)
579 else if (t->mem_id == MEM_PMRX)
580 mem = &adapter->pmrx;
581 else if (t->mem_id == MEM_PMTX)
582 mem = &adapter->pmtx;
586 ret = t3_mc7_bd_read(mem, t->addr/8, t->len/8, (u64 *)t->buf);
591 case RDMA_CQ_SETUP: {
592 struct rdma_cq_setup *req = data;
594 mtx_lock_spin(&adapter->sge.reg_lock);
595 ret = t3_sge_init_cqcntxt(adapter, req->id, req->base_addr,
596 req->size, ASYNC_NOTIF_RSPQ,
597 req->ovfl_mode, req->credits,
599 mtx_unlock_spin(&adapter->sge.reg_lock);
602 case RDMA_CQ_DISABLE:
603 mtx_lock_spin(&adapter->sge.reg_lock);
604 ret = t3_sge_disable_cqcntxt(adapter, *(unsigned int *)data);
605 mtx_unlock_spin(&adapter->sge.reg_lock);
607 case RDMA_CTRL_QP_SETUP: {
608 struct rdma_ctrlqp_setup *req = data;
610 mtx_lock_spin(&adapter->sge.reg_lock);
611 ret = t3_sge_init_ecntxt(adapter, FW_RI_SGEEC_START, 0,
612 SGE_CNTXT_RDMA, ASYNC_NOTIF_RSPQ,
613 req->base_addr, req->size,
614 FW_RI_TID_START, 1, 0);
615 mtx_unlock_spin(&adapter->sge.reg_lock);
625 cxgb_offload_ctl(struct t3cdev *tdev, unsigned int req, void *data)
627 struct adapter *adapter = tdev2adap(tdev);
628 struct tid_range *tid;
630 struct iff_mac *iffmacp;
631 struct ddp_params *ddpp;
632 struct adap_ports *ports;
633 struct ofld_page_info *rx_page_info;
634 struct tp_params *tp = &adapter->params.tp;
638 case GET_MAX_OUTSTANDING_WR:
639 *(unsigned int *)data = FW_WR_NUM;
642 *(unsigned int *)data = WR_FLITS;
644 case GET_TX_MAX_CHUNK:
645 *(unsigned int *)data = 1 << 20; /* 1MB */
649 tid->num = t3_mc5_size(&adapter->mc5) -
650 adapter->params.mc5.nroutes -
651 adapter->params.mc5.nfilters -
652 adapter->params.mc5.nservers;
657 tid->num = adapter->params.mc5.nservers;
658 tid->base = t3_mc5_size(&adapter->mc5) - tid->num -
659 adapter->params.mc5.nfilters -
660 adapter->params.mc5.nroutes;
662 case GET_L2T_CAPACITY:
663 *(unsigned int *)data = 2048;
668 mtup->mtus = adapter->params.mtus;
670 case GET_IFF_FROM_MAC:
672 iffmacp->dev = get_iff_from_mac(adapter, iffmacp->mac_addr,
673 iffmacp->vlan_tag & EVL_VLID_MASK);
677 ddpp->llimit = t3_read_reg(adapter, A_ULPRX_TDDP_LLIMIT);
678 ddpp->ulimit = t3_read_reg(adapter, A_ULPRX_TDDP_ULIMIT);
679 ddpp->tag_mask = t3_read_reg(adapter, A_ULPRX_TDDP_TAGMASK);
683 ports->nports = adapter->params.nports;
684 for_each_port(adapter, port)
685 ports->lldevs[port] = adapter->port[port].ifp;
689 t3_port_failover(adapter, port);
690 failover_fixup(adapter, port);
694 t3_failover_done(adapter, port);
697 t3_failover_clear(adapter);
699 case GET_RX_PAGE_INFO:
701 rx_page_info->page_size = tp->rx_pg_size;
702 rx_page_info->num = tp->rx_num_pgs;
704 case ULP_ISCSI_GET_PARAMS:
705 case ULP_ISCSI_SET_PARAMS:
706 if (!offload_running(adapter))
708 return cxgb_ulp_iscsi_ctl(adapter, req, data);
709 case RDMA_GET_PARAMS:
712 case RDMA_CQ_DISABLE:
713 case RDMA_CTRL_QP_SETUP:
715 if (!offload_running(adapter))
717 return cxgb_rdma_ctl(adapter, req, data);
725 * Allocate a TOM data structure,
726 * initialize its cpl_handlers
727 * and register it as a T3C client
730 t3c_tom_add(struct t3cdev *cdev)
736 struct adap_ports *port_info;
738 t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
742 cdev->send = t3_offload_tx;
743 cdev->ctl = cxgb_offload_ctl;
745 if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0)
748 port_info = malloc(sizeof(*port_info), M_CXGB, M_NOWAIT|M_ZERO);
752 if (cdev->ctl(cdev, GET_PORTS, port_info) < 0)
755 t3_init_wr_tab(wr_len);
757 t->client = &t3c_tom_client;
759 /* Register TCP offload device */
761 tdev->tod_ttid = cdev2type(cdev);
762 tdev->tod_lldev = cdev->lldev;
764 if (register_toedev(tdev, "toe%d")) {
765 printf("unable to register offload device");
770 for (i = 0; i < port_info->nports; i++) {
771 struct ifnet *ifp = port_info->lldevs[i];
774 CTR1(KTR_TOM, "enabling toe on %p", ifp);
775 ifp->if_capabilities |= IFCAP_TOE4;
776 ifp->if_capenable |= IFCAP_TOE4;
778 t->ports = port_info;
780 /* Add device to the list of offload devices */
783 /* Activate TCP offload device */
784 cxgb_offload_activate(TOM_DATA(tdev)->cdev->adapter);
786 activate_offload(tdev);
787 cxgb_register_listeners();
791 printf("out_free_all fail\n");
792 free(port_info, M_CXGB);
794 printf("out_free_tom fail\n");
802 do_act_open_rpl(struct t3cdev *dev, struct mbuf *m)
804 struct cpl_act_open_rpl *rpl = cplhdr(m);
805 unsigned int atid = G_TID(ntohl(rpl->atid));
806 struct toe_tid_entry *toe_tid;
808 toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
809 if (toe_tid->ctx && toe_tid->client && toe_tid->client->handlers &&
810 toe_tid->client->handlers[CPL_ACT_OPEN_RPL]) {
811 return toe_tid->client->handlers[CPL_ACT_OPEN_RPL] (dev, m,
814 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
815 dev->name, CPL_ACT_OPEN_RPL);
816 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
821 do_stid_rpl(struct t3cdev *dev, struct mbuf *m)
823 union opcode_tid *p = cplhdr(m);
824 unsigned int stid = G_TID(ntohl(p->opcode_tid));
825 struct toe_tid_entry *toe_tid;
827 toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
828 if (toe_tid->ctx && toe_tid->client->handlers &&
829 toe_tid->client->handlers[p->opcode]) {
830 return toe_tid->client->handlers[p->opcode] (dev, m, toe_tid->ctx);
832 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
833 dev->name, p->opcode);
834 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
839 do_hwtid_rpl(struct t3cdev *dev, struct mbuf *m)
841 union opcode_tid *p = cplhdr(m);
843 struct toe_tid_entry *toe_tid;
845 DPRINTF("do_hwtid_rpl opcode=0x%x\n", p->opcode);
846 hwtid = G_TID(ntohl(p->opcode_tid));
848 toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
849 if (toe_tid->ctx && toe_tid->client->handlers &&
850 toe_tid->client->handlers[p->opcode]) {
851 return toe_tid->client->handlers[p->opcode]
852 (dev, m, toe_tid->ctx);
854 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
855 dev->name, p->opcode);
856 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
861 do_cr(struct t3cdev *dev, struct mbuf *m)
863 struct cpl_pass_accept_req *req = cplhdr(m);
864 unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
865 struct toe_tid_entry *toe_tid;
867 toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
868 if (toe_tid->ctx && toe_tid->client->handlers &&
869 toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]) {
870 return toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]
871 (dev, m, toe_tid->ctx);
873 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
874 dev->name, CPL_PASS_ACCEPT_REQ);
875 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
880 do_abort_req_rss(struct t3cdev *dev, struct mbuf *m)
882 union opcode_tid *p = cplhdr(m);
883 unsigned int hwtid = G_TID(ntohl(p->opcode_tid));
884 struct toe_tid_entry *toe_tid;
886 toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
887 if (toe_tid->ctx && toe_tid->client->handlers &&
888 toe_tid->client->handlers[p->opcode]) {
889 return toe_tid->client->handlers[p->opcode]
890 (dev, m, toe_tid->ctx);
892 struct cpl_abort_req_rss *req = cplhdr(m);
893 struct cpl_abort_rpl *rpl;
895 struct mbuf *m = m_get(M_NOWAIT, MT_DATA);
897 log(LOG_NOTICE, "do_abort_req_rss: couldn't get mbuf!\n");
901 m_set_priority(m, CPL_PRIORITY_DATA);
904 htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
905 rpl->wr.wr_lo = htonl(V_WR_TID(GET_TID(req)));
907 htonl(MK_OPCODE_TID(CPL_ABORT_RPL, GET_TID(req)));
908 rpl->cmd = req->status;
909 cxgb_ofld_send(dev, m);
911 return (CPL_RET_BUF_DONE);
916 do_act_establish(struct t3cdev *dev, struct mbuf *m)
918 struct cpl_act_establish *req;
920 struct toe_tid_entry *toe_tid;
923 atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
924 toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
925 if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
926 toe_tid->client->handlers[CPL_ACT_ESTABLISH]) {
928 return toe_tid->client->handlers[CPL_ACT_ESTABLISH]
929 (dev, m, toe_tid->ctx);
932 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
933 dev->name, CPL_ACT_ESTABLISH);
934 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
940 do_term(struct t3cdev *dev, struct mbuf *m)
942 unsigned int hwtid = ntohl(m_get_priority(m)) >> 8 & 0xfffff;
943 unsigned int opcode = G_OPCODE(ntohl(m->m_pkthdr.csum_data));
944 struct toe_tid_entry *toe_tid;
946 toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
947 if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
948 toe_tid->client->handlers[opcode]) {
949 return toe_tid->client->handlers[opcode](dev, m, toe_tid->ctx);
951 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
953 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
959 * Process a received packet with an unknown/unexpected CPL opcode.
962 do_bad_cpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
964 log(LOG_ERR, "%s: received bad CPL command %u\n", cdev->name,
965 0xFF & *mtod(m, unsigned int *));
966 return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
970 * Add a new handler to the CPL dispatch table. A NULL handler may be supplied
971 * to unregister an existing handler.
974 t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h)
976 if (opcode < UCHAR_MAX)
977 tom_cpl_handlers[opcode] = h ? h : do_bad_cpl;
979 log(LOG_ERR, "Chelsio T3 TOM: handler registration for "
980 "opcode %u failed\n", opcode);
984 * Make a preliminary determination if a connection can be offloaded. It's OK
985 * to fail the offload later if we say we can offload here. For now this
986 * always accepts the offload request unless there are IP options.
989 can_offload(struct toedev *dev, struct socket *so)
991 struct tom_data *tomd = TOM_DATA(dev);
992 struct t3cdev *cdev = T3CDEV(dev->tod_lldev);
993 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
995 return so_sotoinpcb(so)->inp_depend4.inp4_options == NULL &&
996 tomd->conf.activated &&
997 (tomd->conf.max_conn < 0 ||
998 atomic_load_acq_int(&t->tids_in_use) + t->atids_in_use < tomd->conf.max_conn);
1002 tom_ctl(struct toedev *dev, unsigned int req, void *data)
1004 struct tom_data *t = TOM_DATA(dev);
1005 struct t3cdev *cdev = t->cdev;
1008 return cdev->ctl(cdev, req, data);
1010 return (EOPNOTSUPP);
1014 * Free an active-open TID.
1017 cxgb_free_atid(struct t3cdev *tdev, int atid)
1019 struct tid_info *t = &(T3C_DATA(tdev))->tid_maps;
1020 union active_open_entry *p = atid2entry(t, atid);
1021 void *ctx = p->toe_tid.ctx;
1023 mtx_lock(&t->atid_lock);
1027 mtx_unlock(&t->atid_lock);
1033 * Free a server TID and return it to the free pool.
1036 cxgb_free_stid(struct t3cdev *tdev, int stid)
1038 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1039 union listen_entry *p = stid2entry(t, stid);
1041 mtx_lock(&t->stid_lock);
1045 mtx_unlock(&t->stid_lock);
1049 * Free a server TID and return it to the free pool.
1052 cxgb_get_lctx(struct t3cdev *tdev, int stid)
1054 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1055 union listen_entry *p = stid2entry(t, stid);
1057 return (p->toe_tid.ctx);
1061 cxgb_insert_tid(struct t3cdev *tdev, struct cxgb_client *client,
1062 void *ctx, unsigned int tid)
1064 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1066 t->tid_tab[tid].client = client;
1067 t->tid_tab[tid].ctx = ctx;
1068 atomic_add_int(&t->tids_in_use, 1);
1071 /* use ctx as a next pointer in the tid release list */
1073 cxgb_queue_tid_release(struct t3cdev *tdev, unsigned int tid)
1075 struct t3c_data *td = T3C_DATA (tdev);
1076 struct toe_tid_entry *p = &td->tid_maps.tid_tab[tid];
1078 CTR0(KTR_TOM, "queuing tid release\n");
1080 mtx_lock(&td->tid_release_lock);
1081 p->ctx = td->tid_release_list;
1082 td->tid_release_list = p;
1085 taskqueue_enqueue(tdev->adapter->tq, &td->tid_release_task);
1087 mtx_unlock(&td->tid_release_lock);
1091 * Remove a tid from the TID table. A client may defer processing its last
1092 * CPL message if it is locked at the time it arrives, and while the message
1093 * sits in the client's backlog the TID may be reused for another connection.
1094 * To handle this we atomically switch the TID association if it still points
1095 * to the original client context.
1098 cxgb_remove_tid(struct t3cdev *tdev, void *ctx, unsigned int tid)
1100 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1102 if (tid >= t->ntids)
1103 panic("tid=%d >= t->ntids=%d", tid, t->ntids);
1105 if (tdev->type == T3A)
1106 atomic_cmpset_ptr((uintptr_t *)&t->tid_tab[tid].ctx, (long)NULL, (long)ctx);
1110 m = m_get(M_NOWAIT, MT_DATA);
1111 if (__predict_true(m != NULL)) {
1112 mk_tid_release(m, tid);
1113 CTR1(KTR_CXGB, "releasing tid=%u", tid);
1115 cxgb_ofld_send(tdev, m);
1116 t->tid_tab[tid].ctx = NULL;
1118 cxgb_queue_tid_release(tdev, tid);
1120 atomic_add_int(&t->tids_in_use, -1);
1124 cxgb_alloc_atid(struct t3cdev *tdev, struct cxgb_client *client,
1128 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1130 mtx_lock(&t->atid_lock);
1132 union active_open_entry *p = t->afree;
1134 atid = (p - t->atid_tab) + t->atid_base;
1136 p->toe_tid.ctx = ctx;
1137 p->toe_tid.client = client;
1140 mtx_unlock(&t->atid_lock);
1145 cxgb_alloc_stid(struct t3cdev *tdev, struct cxgb_client *client,
1149 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1151 mtx_lock(&t->stid_lock);
1153 union listen_entry *p = t->sfree;
1155 stid = (p - t->stid_tab) + t->stid_base;
1157 p->toe_tid.ctx = ctx;
1158 p->toe_tid.client = client;
1161 mtx_unlock(&t->stid_lock);
1167 is_offloading(struct ifnet *ifp)
1169 struct adapter *adapter;
1172 rw_rlock(&adapter_list_lock);
1173 TAILQ_FOREACH(adapter, &adapter_list, adapter_entry) {
1174 for_each_port(adapter, port) {
1175 if (ifp == adapter->port[port].ifp) {
1176 rw_runlock(&adapter_list_lock);
1181 rw_runlock(&adapter_list_lock);
1187 cxgb_arp_update_event(void *unused, struct rtentry *rt0,
1188 uint8_t *enaddr, struct sockaddr *sa)
1191 if (!is_offloading(rt0->rt_ifp))
1196 cxgb_neigh_update(rt0, enaddr, sa);
1202 cxgb_redirect_event(void *unused, int event, struct rtentry *rt0,
1203 struct rtentry *rt1, struct sockaddr *sa)
1206 * ignore events on non-offloaded interfaces
1208 if (!is_offloading(rt0->rt_ifp))
1212 * Cannot redirect to non-offload device.
1214 if (!is_offloading(rt1->rt_ifp)) {
1215 log(LOG_WARNING, "%s: Redirect to non-offload"
1216 "device ignored.\n", __FUNCTION__);
1221 * avoid LORs by dropping the route lock but keeping a reference
1229 cxgb_redirect(rt0, rt1, sa);
1230 cxgb_neigh_update(rt1, NULL, sa);
1239 cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa)
1242 if (rt->rt_ifp && is_offloading(rt->rt_ifp) && (rt->rt_ifp->if_flags & IFCAP_TOE)) {
1243 struct t3cdev *tdev = T3CDEV(rt->rt_ifp);
1246 t3_l2t_update(tdev, rt, enaddr, sa);
1251 set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e)
1254 struct cpl_set_tcb_field *req;
1256 m = m_gethdr(M_NOWAIT, MT_DATA);
1258 log(LOG_ERR, "%s: cannot allocate mbuf!\n", __FUNCTION__);
1262 m_set_priority(m, CPL_PRIORITY_CONTROL);
1263 req = mtod(m, struct cpl_set_tcb_field *);
1264 m->m_pkthdr.len = m->m_len = sizeof(*req);
1266 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1267 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
1270 req->word = htons(W_TCB_L2T_IX);
1271 req->mask = htobe64(V_TCB_L2T_IX(M_TCB_L2T_IX));
1272 req->val = htobe64(V_TCB_L2T_IX(e->idx));
1273 tdev->send(tdev, m);
1277 cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa)
1279 struct ifnet *olddev, *newdev;
1280 struct tid_info *ti;
1281 struct t3cdev *tdev;
1284 struct l2t_entry *e;
1285 struct toe_tid_entry *te;
1287 olddev = old->rt_ifp;
1288 newdev = new->rt_ifp;
1289 if (!is_offloading(olddev))
1291 if (!is_offloading(newdev)) {
1292 log(LOG_WARNING, "%s: Redirect to non-offload"
1293 "device ignored.\n", __FUNCTION__);
1296 tdev = T3CDEV(olddev);
1298 if (tdev != T3CDEV(newdev)) {
1299 log(LOG_WARNING, "%s: Redirect to different "
1300 "offload device ignored.\n", __FUNCTION__);
1304 /* Add new L2T entry */
1305 e = t3_l2t_get(tdev, new, new->rt_ifp, sa);
1307 log(LOG_ERR, "%s: couldn't allocate new l2t entry!\n",
1312 /* Walk tid table and notify clients of dst change. */
1313 ti = &(T3C_DATA (tdev))->tid_maps;
1314 for (tid=0; tid < ti->ntids; tid++) {
1315 te = lookup_tid(ti, tid);
1317 if (te->ctx && te->client && te->client->redirect) {
1318 update_tcb = te->client->redirect(te->ctx, old, new,
1321 l2t_hold(L2DATA(tdev), e);
1322 set_l2t_ix(tdev, tid, e);
1326 l2t_release(L2DATA(tdev), e);
1330 * Initialize the CPL dispatch table.
1333 init_cpl_handlers(void)
1337 for (i = 0; i < 256; ++i)
1338 tom_cpl_handlers[i] = do_bad_cpl;
1340 t3_init_listen_cpl_handlers();
1344 t3_toe_attach(struct toedev *dev, const struct offload_id *entry)
1346 struct tom_data *t = TOM_DATA(dev);
1347 struct t3cdev *cdev = t->cdev;
1348 struct ddp_params ddp;
1349 struct ofld_page_info rx_page_info;
1352 t3_init_tunables(t);
1353 mtx_init(&t->listen_lock, "tom data listeners", NULL, MTX_DEF);
1354 CTR2(KTR_TOM, "t3_toe_attach dev=%p entry=%p", dev, entry);
1356 dev->tod_can_offload = can_offload;
1357 dev->tod_connect = t3_connect;
1358 dev->tod_ctl = tom_ctl;
1360 dev->tod_failover = t3_failover;
1362 err = cdev->ctl(cdev, GET_DDP_PARAMS, &ddp);
1366 err = cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info);
1370 t->ddp_llimit = ddp.llimit;
1371 t->ddp_ulimit = ddp.ulimit;
1373 t->rx_page_size = rx_page_info.page_size;
1374 /* OK if this fails, we just can't do DDP */
1375 t->nppods = (ddp.ulimit + 1 - ddp.llimit) / PPOD_SIZE;
1376 t->ppod_map = malloc(t->nppods, M_DEVBUF, M_NOWAIT|M_ZERO);
1378 mtx_init(&t->ppod_map_lock, "ppod map", NULL, MTX_DEF);
1381 t3_sysctl_register(cdev->adapter, &t->conf);
1386 cxgb_toe_listen_start(void *unused, struct tcpcb *tp)
1388 struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
1391 mtx_lock(&cxgb_list_lock);
1392 TAILQ_FOREACH(p, &cxgb_list, entry) {
1393 t3_listen_start(&p->tdev, so, p->cdev);
1395 mtx_unlock(&cxgb_list_lock);
1399 cxgb_toe_listen_stop(void *unused, struct tcpcb *tp)
1401 struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
1404 mtx_lock(&cxgb_list_lock);
1405 TAILQ_FOREACH(p, &cxgb_list, entry) {
1406 if (tp->t_state == TCPS_LISTEN)
1407 t3_listen_stop(&p->tdev, so, p->cdev);
1409 mtx_unlock(&cxgb_list_lock);
1413 cxgb_toe_listen_start_handler(struct inpcb *inp, void *arg)
1415 struct tcpcb *tp = intotcpcb(inp);
1417 if (tp->t_state == TCPS_LISTEN)
1418 cxgb_toe_listen_start(NULL, tp);
1422 cxgb_register_listeners(void)
1425 inp_apply_all(cxgb_toe_listen_start_handler, NULL);
1431 init_cpl_handlers();
1432 if (t3_init_cpl_io() < 0) {
1434 "Unable to initialize cpl io ops\n");
1437 t3_init_socket_ops();
1439 /* Register with the TOE device layer. */
1441 if (register_tom(&t3_tom_info) != 0) {
1443 "Unable to register Chelsio T3 TCP offload module.\n");
1447 rw_init(&adapter_list_lock, "ofld adap list");
1448 TAILQ_INIT(&adapter_list);
1449 EVENTHANDLER_REGISTER(route_arp_update_event, cxgb_arp_update_event,
1450 NULL, EVENTHANDLER_PRI_ANY);
1451 EVENTHANDLER_REGISTER(route_redirect_event, cxgb_redirect_event,
1452 NULL, EVENTHANDLER_PRI_ANY);
1454 mtx_init(&cxgb_list_lock, "cxgb tom list", NULL, MTX_DEF);
1455 listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
1456 cxgb_toe_listen_start, NULL, EVENTHANDLER_PRI_ANY);
1457 listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
1458 cxgb_toe_listen_stop, NULL, EVENTHANDLER_PRI_ANY);
1459 TAILQ_INIT(&cxgb_list);
1463 t3_register_cpl_handler(CPL_PASS_OPEN_RPL, do_stid_rpl);
1464 t3_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_stid_rpl);
1465 t3_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_cr);
1466 t3_register_cpl_handler(CPL_PASS_ESTABLISH, do_hwtid_rpl);
1467 t3_register_cpl_handler(CPL_ABORT_RPL_RSS, do_hwtid_rpl);
1468 t3_register_cpl_handler(CPL_ABORT_RPL, do_hwtid_rpl);
1469 t3_register_cpl_handler(CPL_RX_URG_NOTIFY, do_hwtid_rpl);
1470 t3_register_cpl_handler(CPL_RX_DATA, do_hwtid_rpl);
1471 t3_register_cpl_handler(CPL_TX_DATA_ACK, do_hwtid_rpl);
1472 t3_register_cpl_handler(CPL_TX_DMA_ACK, do_hwtid_rpl);
1473 t3_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
1474 t3_register_cpl_handler(CPL_PEER_CLOSE, do_hwtid_rpl);
1475 t3_register_cpl_handler(CPL_CLOSE_CON_RPL, do_hwtid_rpl);
1476 t3_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req_rss);
1477 t3_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
1478 t3_register_cpl_handler(CPL_RDMA_TERMINATE, do_term);
1479 t3_register_cpl_handler(CPL_RDMA_EC_STATUS, do_hwtid_rpl);
1480 t3_register_cpl_handler(CPL_RX_DATA_DDP, do_hwtid_rpl);
1481 t3_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_hwtid_rpl);
1482 t3_register_cpl_handler(CPL_ISCSI_HDR, do_hwtid_rpl);
1483 t3_register_cpl_handler(CPL_GET_TCB_RPL, do_hwtid_rpl);
1484 t3_register_cpl_handler(CPL_SET_TCB_RPL, do_hwtid_rpl);
1486 /* Register to offloading devices */
1487 cxgb_register_client(&t3c_tom_client);
1493 t3_tom_load(module_t mod, int cmd, void *arg)
1504 printf("uhm, ... unloading isn't really supported for toe\n");
1516 static moduledata_t mod_data= {
1521 MODULE_VERSION(t3_tom, 1);
1522 MODULE_DEPEND(t3_tom, toecore, 1, 1, 1);
1523 MODULE_DEPEND(t3_tom, if_cxgb, 1, 1, 1);
1524 DECLARE_MODULE(t3_tom, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);