1 /**************************************************************************
3 Copyright (c) 2007, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/fcntl.h>
38 #include <sys/limits.h>
40 #include <sys/eventhandler.h>
42 #include <sys/module.h>
43 #include <sys/condvar.h>
44 #include <sys/mutex.h>
45 #include <sys/socket.h>
46 #include <sys/sockopt.h>
47 #include <sys/sockstate.h>
48 #include <sys/sockbuf.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 #include <sys/taskqueue.h>
54 #include <net/route.h>
56 #include <netinet/in.h>
57 #include <netinet/in_pcb.h>
58 #include <netinet/in_systm.h>
59 #include <netinet/in_var.h>
61 #include <cxgb_osdep.h>
62 #include <sys/mbufq.h>
64 #include <netinet/in_pcb.h>
66 #include <ulp/tom/cxgb_tcp_offload.h>
67 #include <netinet/tcp.h>
68 #include <netinet/tcp_var.h>
69 #include <netinet/tcp_offload.h>
70 #include <netinet/tcp_fsm.h>
72 #include <cxgb_include.h>
74 #include <net/if_vlan_var.h>
75 #include <net/route.h>
78 #include <common/cxgb_firmware_exports.h>
79 #include <common/cxgb_tcb.h>
80 #include <cxgb_include.h>
81 #include <common/cxgb_ctl_defs.h>
82 #include <common/cxgb_t3_cpl.h>
83 #include <cxgb_offload.h>
84 #include <ulp/toecore/cxgb_toedev.h>
85 #include <ulp/tom/cxgb_l2t.h>
86 #include <ulp/tom/cxgb_tom.h>
87 #include <ulp/tom/cxgb_defs.h>
88 #include <ulp/tom/cxgb_t3_ddp.h>
89 #include <ulp/tom/cxgb_toepcb.h>
90 #include <ulp/tom/cxgb_tcp.h>
93 static int activated = 1;
94 TUNABLE_INT("hw.t3toe.activated", &activated);
95 SYSCTL_NODE(_hw, OID_AUTO, t3toe, CTLFLAG_RD, 0, "T3 toe driver parameters");
96 SYSCTL_UINT(_hw_t3toe, OID_AUTO, activated, CTLFLAG_RDTUN, &activated, 0,
97 "enable TOE at init time");
100 TAILQ_HEAD(, adapter) adapter_list;
101 static struct rwlock adapter_list_lock;
103 static TAILQ_HEAD(, tom_data) cxgb_list;
104 static struct mtx cxgb_list_lock;
105 static const unsigned int MAX_ATIDS = 64 * 1024;
106 static const unsigned int ATID_BASE = 0x100000;
108 static int t3_toe_attach(struct toedev *dev, const struct offload_id *entry);
109 static void cxgb_register_listeners(void);
110 static void t3c_tom_add(struct t3cdev *cdev);
113 * Handlers for each CPL opcode
115 static cxgb_cpl_handler_func tom_cpl_handlers[256];
118 static eventhandler_tag listen_tag;
120 static struct offload_id t3_toe_id_tab[] = {
121 { TOE_ID_CHELSIO_T3, 0 },
122 { TOE_ID_CHELSIO_T3B, 0 },
123 { TOE_ID_CHELSIO_T3C, 0 },
127 static struct tom_info t3_tom_info = {
128 .ti_attach = t3_toe_attach,
129 .ti_id_table = t3_toe_id_tab,
130 .ti_name = "Chelsio-T3"
133 struct cxgb_client t3c_tom_client = {
137 .handlers = tom_cpl_handlers,
142 cxgb_log_tcb(struct adapter *sc, unsigned int tid)
146 uint64_t *tcb = (uint64_t *)buf;
148 struct mc7 *mem = &sc->cm;
150 error = t3_mc7_bd_read(mem, tid*TCB_SIZE/8, TCB_SIZE/8, tcb);
152 printf("cxgb_tcb_log failed\n");
155 CTR1(KTR_CXGB, "TCB tid=%u", tid);
156 for (i = 0; i < TCB_SIZE / 32; i++) {
158 CTR5(KTR_CXGB, "%1d: %08x %08x %08x %08x",
159 i, (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
160 (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
163 CTR4(KTR_CXGB, " %08x %08x %08x %08x",
164 (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
165 (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
171 * Add an skb to the deferred skb queue for processing from process context.
174 t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler)
176 struct tom_data *td = TOM_DATA(dev);
178 m_set_handler(m, handler);
179 mtx_lock(&td->deferq.lock);
181 mbufq_tail(&td->deferq, m);
182 if (mbufq_len(&td->deferq) == 1)
183 taskqueue_enqueue(td->tq, &td->deferq_task);
184 mtx_lock(&td->deferq.lock);
192 toep = malloc(sizeof(struct toepcb), M_CXGB, M_NOWAIT|M_ZERO);
202 toepcb_init(struct toepcb *toep)
204 toep->tp_refcount = 1;
205 cv_init(&toep->tp_cv, "toep cv");
209 toepcb_hold(struct toepcb *toep)
211 atomic_add_acq_int(&toep->tp_refcount, 1);
215 toepcb_release(struct toepcb *toep)
217 if (toep->tp_refcount == 1) {
221 atomic_add_acq_int(&toep->tp_refcount, -1);
226 * Add a T3 offload device to the list of devices we are managing.
229 t3cdev_add(struct tom_data *t)
231 mtx_lock(&cxgb_list_lock);
232 TAILQ_INSERT_TAIL(&cxgb_list, t, entry);
233 mtx_unlock(&cxgb_list_lock);
237 cdev2type(struct t3cdev *cdev)
241 switch (cdev->type) {
243 type = TOE_ID_CHELSIO_T3;
246 type = TOE_ID_CHELSIO_T3B;
249 type = TOE_ID_CHELSIO_T3C;
256 * Allocate and initialize the TID tables. Returns 0 on success.
259 init_tid_tabs(struct tid_info *t, unsigned int ntids,
260 unsigned int natids, unsigned int nstids,
261 unsigned int atid_base, unsigned int stid_base)
263 unsigned long size = ntids * sizeof(*t->tid_tab) +
264 natids * sizeof(*t->atid_tab) + nstids * sizeof(*t->stid_tab);
266 t->tid_tab = cxgb_alloc_mem(size);
270 t->stid_tab = (union listen_entry *)&t->tid_tab[ntids];
271 t->atid_tab = (union active_open_entry *)&t->stid_tab[nstids];
274 t->stid_base = stid_base;
277 t->atid_base = atid_base;
279 t->stids_in_use = t->atids_in_use = 0;
280 atomic_set_int(&t->tids_in_use, 0);
281 mtx_init(&t->stid_lock, "stid", NULL, MTX_DUPOK|MTX_DEF);
282 mtx_init(&t->atid_lock, "atid", NULL, MTX_DUPOK|MTX_DEF);
285 * Setup the free lists for stid_tab and atid_tab.
289 t->stid_tab[nstids - 1].next = &t->stid_tab[nstids];
290 t->sfree = t->stid_tab;
294 t->atid_tab[natids - 1].next = &t->atid_tab[natids];
295 t->afree = t->atid_tab;
301 free_tid_maps(struct tid_info *t)
303 mtx_destroy(&t->stid_lock);
304 mtx_destroy(&t->atid_lock);
305 cxgb_free_mem(t->tid_tab);
309 add_adapter(adapter_t *adap)
311 rw_wlock(&adapter_list_lock);
312 TAILQ_INSERT_TAIL(&adapter_list, adap, adapter_entry);
313 rw_wunlock(&adapter_list_lock);
317 remove_adapter(adapter_t *adap)
319 rw_wlock(&adapter_list_lock);
320 TAILQ_REMOVE(&adapter_list, adap, adapter_entry);
321 rw_wunlock(&adapter_list_lock);
325 * Populate a TID_RELEASE WR. The mbuf must be already propely sized.
328 mk_tid_release(struct mbuf *m, unsigned int tid)
330 struct cpl_tid_release *req;
332 m_set_priority(m, CPL_PRIORITY_SETUP);
333 req = mtod(m, struct cpl_tid_release *);
334 m->m_pkthdr.len = m->m_len = sizeof(*req);
335 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
336 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
340 t3_process_tid_release_list(void *data, int pending)
343 struct t3cdev *tdev = data;
344 struct t3c_data *td = T3C_DATA (tdev);
346 mtx_lock(&td->tid_release_lock);
347 while (td->tid_release_list) {
348 struct toe_tid_entry *p = td->tid_release_list;
350 td->tid_release_list = (struct toe_tid_entry *)p->ctx;
351 mtx_unlock(&td->tid_release_lock);
352 m = m_get(M_WAIT, MT_DATA);
353 mk_tid_release(m, p - td->tid_maps.tid_tab);
354 cxgb_ofld_send(tdev, m);
356 mtx_lock(&td->tid_release_lock);
358 mtx_unlock(&td->tid_release_lock);
362 cxgb_offload_activate(struct adapter *adapter)
364 struct t3cdev *dev = &adapter->tdev;
367 struct tid_range stid_range, tid_range;
368 struct mtutab mtutab;
369 unsigned int l2t_capacity;
371 t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
374 dev->adapter = adapter;
377 if (dev->ctl(dev, GET_TX_MAX_CHUNK, &t->tx_max_chunk) < 0 ||
378 dev->ctl(dev, GET_MAX_OUTSTANDING_WR, &t->max_wrs) < 0 ||
379 dev->ctl(dev, GET_L2T_CAPACITY, &l2t_capacity) < 0 ||
380 dev->ctl(dev, GET_MTUS, &mtutab) < 0 ||
381 dev->ctl(dev, GET_TID_RANGE, &tid_range) < 0 ||
382 dev->ctl(dev, GET_STID_RANGE, &stid_range) < 0) {
383 device_printf(adapter->dev, "%s: dev->ctl check failed\n", __FUNCTION__);
388 L2DATA(dev) = t3_init_l2t(l2t_capacity);
390 device_printf(adapter->dev, "%s: t3_init_l2t failed\n", __FUNCTION__);
393 natids = min(tid_range.num / 2, MAX_ATIDS);
394 err = init_tid_tabs(&t->tid_maps, tid_range.num, natids,
395 stid_range.num, ATID_BASE, stid_range.base);
397 device_printf(adapter->dev, "%s: init_tid_tabs failed\n", __FUNCTION__);
401 t->mtus = mtutab.mtus;
402 t->nmtus = mtutab.size;
404 TASK_INIT(&t->tid_release_task, 0 /* XXX? */, t3_process_tid_release_list, dev);
405 mtx_init(&t->tid_release_lock, "tid release", NULL, MTX_DUPOK|MTX_DEF);
409 dev->recv = process_rx;
410 dev->arp_update = t3_l2t_update;
411 /* Register netevent handler once */
412 if (TAILQ_EMPTY(&adapter_list)) {
413 #if defined(CONFIG_CHELSIO_T3_MODULE)
414 if (prepare_arp_with_t3core())
415 log(LOG_ERR, "Unable to set offload capabilities\n");
418 CTR1(KTR_CXGB, "adding adapter %p", adapter);
419 add_adapter(adapter);
420 device_printf(adapter->dev, "offload started\n");
421 adapter->flags |= CXGB_OFLD_INIT;
425 t3_free_l2t(L2DATA(dev));
433 cxgb_offload_deactivate(struct adapter *adapter)
435 struct t3cdev *tdev = &adapter->tdev;
436 struct t3c_data *t = T3C_DATA(tdev);
438 printf("removing adapter %p\n", adapter);
439 remove_adapter(adapter);
440 if (TAILQ_EMPTY(&adapter_list)) {
441 #if defined(CONFIG_CHELSIO_T3_MODULE)
442 restore_arp_sans_t3core();
445 free_tid_maps(&t->tid_maps);
446 T3C_DATA(tdev) = NULL;
447 t3_free_l2t(L2DATA(tdev));
449 mtx_destroy(&t->tid_release_lock);
454 * Sends an sk_buff to a T3C driver after dealing with any active network taps.
457 cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m)
461 r = dev->send(dev, m);
465 static struct ifnet *
466 get_iff_from_mac(adapter_t *adapter, const uint8_t *mac, unsigned int vlan)
470 for_each_port(adapter, i) {
472 const struct vlan_group *grp;
474 const struct port_info *p = &adapter->port[i];
475 struct ifnet *ifp = p->ifp;
477 if (!memcmp(p->hw_addr, mac, ETHER_ADDR_LEN)) {
480 if (vlan && vlan != EVL_VLID_MASK) {
482 dev = grp ? grp->vlan_devices[vlan] : NULL;
494 failover_fixup(adapter_t *adapter, int port)
496 if (adapter->params.rev == 0) {
497 struct ifnet *ifp = adapter->port[port].ifp;
498 struct cmac *mac = &adapter->port[port].mac;
499 if (!(ifp->if_flags & IFF_UP)) {
500 /* Failover triggered by the interface ifdown */
501 t3_write_reg(adapter, A_XGM_TX_CTRL + mac->offset,
503 t3_read_reg(adapter, A_XGM_TX_CTRL + mac->offset);
505 /* Failover triggered by the interface link down */
506 t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, 0);
507 t3_read_reg(adapter, A_XGM_RX_CTRL + mac->offset);
508 t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset,
515 cxgb_ulp_iscsi_ctl(adapter_t *adapter, unsigned int req, void *data)
518 struct ulp_iscsi_info *uiip = data;
521 case ULP_ISCSI_GET_PARAMS:
522 uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT);
523 uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT);
524 uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK);
526 * On tx, the iscsi pdu has to be <= tx page size and has to
527 * fit into the Tx PM FIFO.
529 uiip->max_txsz = min(adapter->params.tp.tx_pg_size,
530 t3_read_reg(adapter, A_PM1_TX_CFG) >> 17);
531 /* on rx, the iscsi pdu has to be < rx page size and the
532 whole pdu + cpl headers has to fit into one sge buffer */
533 /* also check the max rx data length programmed in TP */
534 uiip->max_rxsz = min(uiip->max_rxsz,
535 ((t3_read_reg(adapter, A_TP_PARA_REG2))
536 >> S_MAXRXDATA) & M_MAXRXDATA);
538 case ULP_ISCSI_SET_PARAMS:
539 t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask);
547 /* Response queue used for RDMA events. */
548 #define ASYNC_NOTIF_RSPQ 0
551 cxgb_rdma_ctl(adapter_t *adapter, unsigned int req, void *data)
556 case RDMA_GET_PARAMS: {
557 struct rdma_info *req = data;
559 req->udbell_physbase = rman_get_start(adapter->udbs_res);
560 req->udbell_len = rman_get_size(adapter->udbs_res);
561 req->tpt_base = t3_read_reg(adapter, A_ULPTX_TPT_LLIMIT);
562 req->tpt_top = t3_read_reg(adapter, A_ULPTX_TPT_ULIMIT);
563 req->pbl_base = t3_read_reg(adapter, A_ULPTX_PBL_LLIMIT);
564 req->pbl_top = t3_read_reg(adapter, A_ULPTX_PBL_ULIMIT);
565 req->rqt_base = t3_read_reg(adapter, A_ULPRX_RQ_LLIMIT);
566 req->rqt_top = t3_read_reg(adapter, A_ULPRX_RQ_ULIMIT);
567 req->kdb_addr = (void *)((unsigned long)rman_get_virtual(adapter->regs_res) + A_SG_KDOORBELL); break;
570 struct rdma_cq_op *req = data;
572 /* may be called in any context */
573 mtx_lock_spin(&adapter->sge.reg_lock);
574 ret = t3_sge_cqcntxt_op(adapter, req->id, req->op,
576 mtx_unlock_spin(&adapter->sge.reg_lock);
580 struct ch_mem_range *t = data;
583 if ((t->addr & 7) || (t->len & 7))
585 if (t->mem_id == MEM_CM)
587 else if (t->mem_id == MEM_PMRX)
588 mem = &adapter->pmrx;
589 else if (t->mem_id == MEM_PMTX)
590 mem = &adapter->pmtx;
594 ret = t3_mc7_bd_read(mem, t->addr/8, t->len/8, (u64 *)t->buf);
599 case RDMA_CQ_SETUP: {
600 struct rdma_cq_setup *req = data;
602 mtx_lock_spin(&adapter->sge.reg_lock);
603 ret = t3_sge_init_cqcntxt(adapter, req->id, req->base_addr,
604 req->size, ASYNC_NOTIF_RSPQ,
605 req->ovfl_mode, req->credits,
607 mtx_unlock_spin(&adapter->sge.reg_lock);
610 case RDMA_CQ_DISABLE:
611 mtx_lock_spin(&adapter->sge.reg_lock);
612 ret = t3_sge_disable_cqcntxt(adapter, *(unsigned int *)data);
613 mtx_unlock_spin(&adapter->sge.reg_lock);
615 case RDMA_CTRL_QP_SETUP: {
616 struct rdma_ctrlqp_setup *req = data;
618 mtx_lock_spin(&adapter->sge.reg_lock);
619 ret = t3_sge_init_ecntxt(adapter, FW_RI_SGEEC_START, 0,
620 SGE_CNTXT_RDMA, ASYNC_NOTIF_RSPQ,
621 req->base_addr, req->size,
622 FW_RI_TID_START, 1, 0);
623 mtx_unlock_spin(&adapter->sge.reg_lock);
633 cxgb_offload_ctl(struct t3cdev *tdev, unsigned int req, void *data)
635 struct adapter *adapter = tdev2adap(tdev);
636 struct tid_range *tid;
638 struct iff_mac *iffmacp;
639 struct ddp_params *ddpp;
640 struct adap_ports *ports;
641 struct ofld_page_info *rx_page_info;
642 struct tp_params *tp = &adapter->params.tp;
646 case GET_MAX_OUTSTANDING_WR:
647 *(unsigned int *)data = FW_WR_NUM;
650 *(unsigned int *)data = WR_FLITS;
652 case GET_TX_MAX_CHUNK:
653 *(unsigned int *)data = 1 << 20; /* 1MB */
657 tid->num = t3_mc5_size(&adapter->mc5) -
658 adapter->params.mc5.nroutes -
659 adapter->params.mc5.nfilters -
660 adapter->params.mc5.nservers;
665 tid->num = adapter->params.mc5.nservers;
666 tid->base = t3_mc5_size(&adapter->mc5) - tid->num -
667 adapter->params.mc5.nfilters -
668 adapter->params.mc5.nroutes;
670 case GET_L2T_CAPACITY:
671 *(unsigned int *)data = 2048;
676 mtup->mtus = adapter->params.mtus;
678 case GET_IFF_FROM_MAC:
680 iffmacp->dev = get_iff_from_mac(adapter, iffmacp->mac_addr,
681 iffmacp->vlan_tag & EVL_VLID_MASK);
685 ddpp->llimit = t3_read_reg(adapter, A_ULPRX_TDDP_LLIMIT);
686 ddpp->ulimit = t3_read_reg(adapter, A_ULPRX_TDDP_ULIMIT);
687 ddpp->tag_mask = t3_read_reg(adapter, A_ULPRX_TDDP_TAGMASK);
691 ports->nports = adapter->params.nports;
692 for_each_port(adapter, port)
693 ports->lldevs[port] = adapter->port[port].ifp;
697 t3_port_failover(adapter, port);
698 failover_fixup(adapter, port);
702 t3_failover_done(adapter, port);
705 t3_failover_clear(adapter);
707 case GET_RX_PAGE_INFO:
709 rx_page_info->page_size = tp->rx_pg_size;
710 rx_page_info->num = tp->rx_num_pgs;
712 case ULP_ISCSI_GET_PARAMS:
713 case ULP_ISCSI_SET_PARAMS:
714 if (!offload_running(adapter))
716 return cxgb_ulp_iscsi_ctl(adapter, req, data);
717 case RDMA_GET_PARAMS:
720 case RDMA_CQ_DISABLE:
721 case RDMA_CTRL_QP_SETUP:
723 if (!offload_running(adapter))
725 return cxgb_rdma_ctl(adapter, req, data);
733 * Allocate a TOM data structure,
734 * initialize its cpl_handlers
735 * and register it as a T3C client
738 t3c_tom_add(struct t3cdev *cdev)
744 struct adap_ports *port_info;
746 t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
750 cdev->send = t3_offload_tx;
751 cdev->ctl = cxgb_offload_ctl;
753 if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0)
756 port_info = malloc(sizeof(*port_info), M_CXGB, M_NOWAIT|M_ZERO);
760 if (cdev->ctl(cdev, GET_PORTS, port_info) < 0)
763 t3_init_wr_tab(wr_len);
765 t->client = &t3c_tom_client;
767 /* Register TCP offload device */
769 tdev->tod_ttid = cdev2type(cdev);
770 tdev->tod_lldev = cdev->lldev;
772 if (register_toedev(tdev, "toe%d")) {
773 printf("unable to register offload device");
778 for (i = 0; i < port_info->nports; i++) {
779 struct ifnet *ifp = port_info->lldevs[i];
782 CTR1(KTR_TOM, "enabling toe on %p", ifp);
783 ifp->if_capabilities |= IFCAP_TOE4;
784 ifp->if_capenable |= IFCAP_TOE4;
786 t->ports = port_info;
788 /* Add device to the list of offload devices */
791 /* Activate TCP offload device */
792 cxgb_offload_activate(TOM_DATA(tdev)->cdev->adapter);
794 activate_offload(tdev);
795 cxgb_register_listeners();
799 printf("out_free_all fail\n");
800 free(port_info, M_CXGB);
802 printf("out_free_tom fail\n");
810 do_act_open_rpl(struct t3cdev *dev, struct mbuf *m)
812 struct cpl_act_open_rpl *rpl = cplhdr(m);
813 unsigned int atid = G_TID(ntohl(rpl->atid));
814 struct toe_tid_entry *toe_tid;
816 toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
817 if (toe_tid->ctx && toe_tid->client && toe_tid->client->handlers &&
818 toe_tid->client->handlers[CPL_ACT_OPEN_RPL]) {
819 return toe_tid->client->handlers[CPL_ACT_OPEN_RPL] (dev, m,
822 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
823 dev->name, CPL_ACT_OPEN_RPL);
824 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
829 do_stid_rpl(struct t3cdev *dev, struct mbuf *m)
831 union opcode_tid *p = cplhdr(m);
832 unsigned int stid = G_TID(ntohl(p->opcode_tid));
833 struct toe_tid_entry *toe_tid;
835 toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
836 if (toe_tid->ctx && toe_tid->client->handlers &&
837 toe_tid->client->handlers[p->opcode]) {
838 return toe_tid->client->handlers[p->opcode] (dev, m, toe_tid->ctx);
840 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
841 dev->name, p->opcode);
842 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
847 do_hwtid_rpl(struct t3cdev *dev, struct mbuf *m)
849 union opcode_tid *p = cplhdr(m);
851 struct toe_tid_entry *toe_tid;
853 DPRINTF("do_hwtid_rpl opcode=0x%x\n", p->opcode);
854 hwtid = G_TID(ntohl(p->opcode_tid));
856 toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
857 if (toe_tid->ctx && toe_tid->client->handlers &&
858 toe_tid->client->handlers[p->opcode]) {
859 return toe_tid->client->handlers[p->opcode]
860 (dev, m, toe_tid->ctx);
862 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
863 dev->name, p->opcode);
864 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
869 do_cr(struct t3cdev *dev, struct mbuf *m)
871 struct cpl_pass_accept_req *req = cplhdr(m);
872 unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
873 struct toe_tid_entry *toe_tid;
875 toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
876 if (toe_tid->ctx && toe_tid->client->handlers &&
877 toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]) {
878 return toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]
879 (dev, m, toe_tid->ctx);
881 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
882 dev->name, CPL_PASS_ACCEPT_REQ);
883 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
888 do_abort_req_rss(struct t3cdev *dev, struct mbuf *m)
890 union opcode_tid *p = cplhdr(m);
891 unsigned int hwtid = G_TID(ntohl(p->opcode_tid));
892 struct toe_tid_entry *toe_tid;
894 toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
895 if (toe_tid->ctx && toe_tid->client->handlers &&
896 toe_tid->client->handlers[p->opcode]) {
897 return toe_tid->client->handlers[p->opcode]
898 (dev, m, toe_tid->ctx);
900 struct cpl_abort_req_rss *req = cplhdr(m);
901 struct cpl_abort_rpl *rpl;
903 struct mbuf *m = m_get(M_NOWAIT, MT_DATA);
905 log(LOG_NOTICE, "do_abort_req_rss: couldn't get mbuf!\n");
909 m_set_priority(m, CPL_PRIORITY_DATA);
912 htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
913 rpl->wr.wr_lo = htonl(V_WR_TID(GET_TID(req)));
915 htonl(MK_OPCODE_TID(CPL_ABORT_RPL, GET_TID(req)));
916 rpl->cmd = req->status;
917 cxgb_ofld_send(dev, m);
919 return (CPL_RET_BUF_DONE);
924 do_act_establish(struct t3cdev *dev, struct mbuf *m)
926 struct cpl_act_establish *req;
928 struct toe_tid_entry *toe_tid;
931 atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
932 toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
933 if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
934 toe_tid->client->handlers[CPL_ACT_ESTABLISH]) {
936 return toe_tid->client->handlers[CPL_ACT_ESTABLISH]
937 (dev, m, toe_tid->ctx);
940 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
941 dev->name, CPL_PASS_ACCEPT_REQ);
942 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
948 do_term(struct t3cdev *dev, struct mbuf *m)
950 unsigned int hwtid = ntohl(m_get_priority(m)) >> 8 & 0xfffff;
951 unsigned int opcode = G_OPCODE(ntohl(m->m_pkthdr.csum_data));
952 struct toe_tid_entry *toe_tid;
954 toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
955 if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
956 toe_tid->client->handlers[opcode]) {
957 return toe_tid->client->handlers[opcode](dev, m, toe_tid->ctx);
959 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
961 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
967 * Process a received packet with an unknown/unexpected CPL opcode.
970 do_bad_cpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
972 log(LOG_ERR, "%s: received bad CPL command %u\n", cdev->name,
973 0xFF & *mtod(m, unsigned int *));
974 return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
978 * Add a new handler to the CPL dispatch table. A NULL handler may be supplied
979 * to unregister an existing handler.
982 t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h)
984 if (opcode < UCHAR_MAX)
985 tom_cpl_handlers[opcode] = h ? h : do_bad_cpl;
987 log(LOG_ERR, "Chelsio T3 TOM: handler registration for "
988 "opcode %u failed\n", opcode);
992 * Make a preliminary determination if a connection can be offloaded. It's OK
993 * to fail the offload later if we say we can offload here. For now this
994 * always accepts the offload request unless there are IP options.
997 can_offload(struct toedev *dev, struct socket *so)
999 struct tom_data *tomd = TOM_DATA(dev);
1000 struct t3cdev *cdev = T3CDEV(dev->tod_lldev);
1001 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
1003 return so_sotoinpcb(so)->inp_depend4.inp4_options == NULL &&
1004 tomd->conf.activated &&
1005 (tomd->conf.max_conn < 0 ||
1006 atomic_load_acq_int(&t->tids_in_use) + t->atids_in_use < tomd->conf.max_conn);
1010 tom_ctl(struct toedev *dev, unsigned int req, void *data)
1012 struct tom_data *t = TOM_DATA(dev);
1013 struct t3cdev *cdev = t->cdev;
1016 return cdev->ctl(cdev, req, data);
1018 return (EOPNOTSUPP);
1022 * Free an active-open TID.
1025 cxgb_free_atid(struct t3cdev *tdev, int atid)
1027 struct tid_info *t = &(T3C_DATA(tdev))->tid_maps;
1028 union active_open_entry *p = atid2entry(t, atid);
1029 void *ctx = p->toe_tid.ctx;
1031 mtx_lock(&t->atid_lock);
1035 mtx_unlock(&t->atid_lock);
1041 * Free a server TID and return it to the free pool.
1044 cxgb_free_stid(struct t3cdev *tdev, int stid)
1046 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1047 union listen_entry *p = stid2entry(t, stid);
1049 mtx_lock(&t->stid_lock);
1053 mtx_unlock(&t->stid_lock);
1057 * Free a server TID and return it to the free pool.
1060 cxgb_get_lctx(struct t3cdev *tdev, int stid)
1062 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1063 union listen_entry *p = stid2entry(t, stid);
1065 return (p->toe_tid.ctx);
1069 cxgb_insert_tid(struct t3cdev *tdev, struct cxgb_client *client,
1070 void *ctx, unsigned int tid)
1072 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1074 t->tid_tab[tid].client = client;
1075 t->tid_tab[tid].ctx = ctx;
1076 atomic_add_int(&t->tids_in_use, 1);
1079 /* use ctx as a next pointer in the tid release list */
1081 cxgb_queue_tid_release(struct t3cdev *tdev, unsigned int tid)
1083 struct t3c_data *td = T3C_DATA (tdev);
1084 struct toe_tid_entry *p = &td->tid_maps.tid_tab[tid];
1086 CTR0(KTR_TOM, "queuing tid release\n");
1088 mtx_lock(&td->tid_release_lock);
1089 p->ctx = td->tid_release_list;
1090 td->tid_release_list = p;
1093 taskqueue_enqueue(tdev->adapter->tq, &td->tid_release_task);
1095 mtx_unlock(&td->tid_release_lock);
1099 * Remove a tid from the TID table. A client may defer processing its last
1100 * CPL message if it is locked at the time it arrives, and while the message
1101 * sits in the client's backlog the TID may be reused for another connection.
1102 * To handle this we atomically switch the TID association if it still points
1103 * to the original client context.
1106 cxgb_remove_tid(struct t3cdev *tdev, void *ctx, unsigned int tid)
1108 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1110 if (tid >= t->ntids)
1111 panic("tid=%d >= t->ntids=%d", tid, t->ntids);
1113 if (tdev->type == T3A)
1114 atomic_cmpset_ptr((uintptr_t *)&t->tid_tab[tid].ctx, (long)NULL, (long)ctx);
1118 m = m_get(M_NOWAIT, MT_DATA);
1119 if (__predict_true(m != NULL)) {
1120 mk_tid_release(m, tid);
1121 CTR1(KTR_CXGB, "releasing tid=%u", tid);
1123 cxgb_ofld_send(tdev, m);
1124 t->tid_tab[tid].ctx = NULL;
1126 cxgb_queue_tid_release(tdev, tid);
1128 atomic_add_int(&t->tids_in_use, -1);
1132 cxgb_alloc_atid(struct t3cdev *tdev, struct cxgb_client *client,
1136 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1138 mtx_lock(&t->atid_lock);
1140 union active_open_entry *p = t->afree;
1142 atid = (p - t->atid_tab) + t->atid_base;
1144 p->toe_tid.ctx = ctx;
1145 p->toe_tid.client = client;
1148 mtx_unlock(&t->atid_lock);
1153 cxgb_alloc_stid(struct t3cdev *tdev, struct cxgb_client *client,
1157 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1159 mtx_lock(&t->stid_lock);
1161 union listen_entry *p = t->sfree;
1163 stid = (p - t->stid_tab) + t->stid_base;
1165 p->toe_tid.ctx = ctx;
1166 p->toe_tid.client = client;
1169 mtx_unlock(&t->stid_lock);
1175 is_offloading(struct ifnet *ifp)
1177 struct adapter *adapter;
1180 rw_rlock(&adapter_list_lock);
1181 TAILQ_FOREACH(adapter, &adapter_list, adapter_entry) {
1182 for_each_port(adapter, port) {
1183 if (ifp == adapter->port[port].ifp) {
1184 rw_runlock(&adapter_list_lock);
1189 rw_runlock(&adapter_list_lock);
1195 cxgb_arp_update_event(void *unused, struct rtentry *rt0,
1196 uint8_t *enaddr, struct sockaddr *sa)
1199 if (!is_offloading(rt0->rt_ifp))
1204 cxgb_neigh_update(rt0, enaddr, sa);
1210 cxgb_redirect_event(void *unused, int event, struct rtentry *rt0,
1211 struct rtentry *rt1, struct sockaddr *sa)
1214 * ignore events on non-offloaded interfaces
1216 if (!is_offloading(rt0->rt_ifp))
1220 * Cannot redirect to non-offload device.
1222 if (!is_offloading(rt1->rt_ifp)) {
1223 log(LOG_WARNING, "%s: Redirect to non-offload"
1224 "device ignored.\n", __FUNCTION__);
1229 * avoid LORs by dropping the route lock but keeping a reference
1237 cxgb_redirect(rt0, rt1, sa);
1238 cxgb_neigh_update(rt1, NULL, sa);
1247 cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa)
1250 if (rt->rt_ifp && is_offloading(rt->rt_ifp) && (rt->rt_ifp->if_flags & IFCAP_TOE)) {
1251 struct t3cdev *tdev = T3CDEV(rt->rt_ifp);
1254 t3_l2t_update(tdev, rt, enaddr, sa);
1259 set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e)
1262 struct cpl_set_tcb_field *req;
1264 m = m_gethdr(M_NOWAIT, MT_DATA);
1266 log(LOG_ERR, "%s: cannot allocate mbuf!\n", __FUNCTION__);
1270 m_set_priority(m, CPL_PRIORITY_CONTROL);
1271 req = mtod(m, struct cpl_set_tcb_field *);
1272 m->m_pkthdr.len = m->m_len = sizeof(*req);
1274 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1275 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
1278 req->word = htons(W_TCB_L2T_IX);
1279 req->mask = htobe64(V_TCB_L2T_IX(M_TCB_L2T_IX));
1280 req->val = htobe64(V_TCB_L2T_IX(e->idx));
1281 tdev->send(tdev, m);
1285 cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa)
1287 struct ifnet *olddev, *newdev;
1288 struct tid_info *ti;
1289 struct t3cdev *tdev;
1292 struct l2t_entry *e;
1293 struct toe_tid_entry *te;
1295 olddev = old->rt_ifp;
1296 newdev = new->rt_ifp;
1297 if (!is_offloading(olddev))
1299 if (!is_offloading(newdev)) {
1300 log(LOG_WARNING, "%s: Redirect to non-offload"
1301 "device ignored.\n", __FUNCTION__);
1304 tdev = T3CDEV(olddev);
1306 if (tdev != T3CDEV(newdev)) {
1307 log(LOG_WARNING, "%s: Redirect to different "
1308 "offload device ignored.\n", __FUNCTION__);
1312 /* Add new L2T entry */
1313 e = t3_l2t_get(tdev, new, new->rt_ifp, sa);
1315 log(LOG_ERR, "%s: couldn't allocate new l2t entry!\n",
1320 /* Walk tid table and notify clients of dst change. */
1321 ti = &(T3C_DATA (tdev))->tid_maps;
1322 for (tid=0; tid < ti->ntids; tid++) {
1323 te = lookup_tid(ti, tid);
1325 if (te->ctx && te->client && te->client->redirect) {
1326 update_tcb = te->client->redirect(te->ctx, old, new,
1329 l2t_hold(L2DATA(tdev), e);
1330 set_l2t_ix(tdev, tid, e);
1334 l2t_release(L2DATA(tdev), e);
1338 * Initialize the CPL dispatch table.
1341 init_cpl_handlers(void)
1345 for (i = 0; i < 256; ++i)
1346 tom_cpl_handlers[i] = do_bad_cpl;
1348 t3_init_listen_cpl_handlers();
1352 t3_toe_attach(struct toedev *dev, const struct offload_id *entry)
1354 struct tom_data *t = TOM_DATA(dev);
1355 struct t3cdev *cdev = t->cdev;
1356 struct ddp_params ddp;
1357 struct ofld_page_info rx_page_info;
1360 t3_init_tunables(t);
1361 mtx_init(&t->listen_lock, "tom data listeners", NULL, MTX_DEF);
1362 CTR2(KTR_TOM, "t3_toe_attach dev=%p entry=%p", dev, entry);
1363 /* Adjust TOE activation for this module */
1364 t->conf.activated = activated;
1366 dev->tod_can_offload = can_offload;
1367 dev->tod_connect = t3_connect;
1368 dev->tod_ctl = tom_ctl;
1370 dev->tod_failover = t3_failover;
1372 err = cdev->ctl(cdev, GET_DDP_PARAMS, &ddp);
1376 err = cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info);
1380 t->ddp_llimit = ddp.llimit;
1381 t->ddp_ulimit = ddp.ulimit;
1383 t->rx_page_size = rx_page_info.page_size;
1384 /* OK if this fails, we just can't do DDP */
1385 t->nppods = (ddp.ulimit + 1 - ddp.llimit) / PPOD_SIZE;
1386 t->ppod_map = malloc(t->nppods, M_DEVBUF, M_NOWAIT|M_ZERO);
1388 mtx_init(&t->ppod_map_lock, "ppod map", NULL, MTX_DEF);
1391 t3_sysctl_register(cdev->adapter, &t->conf);
1396 cxgb_toe_listen_start(void *unused, struct tcpcb *tp)
1398 struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
1401 mtx_lock(&cxgb_list_lock);
1402 TAILQ_FOREACH(p, &cxgb_list, entry) {
1403 t3_listen_start(&p->tdev, so, p->cdev);
1405 mtx_unlock(&cxgb_list_lock);
1409 cxgb_toe_listen_stop(void *unused, struct tcpcb *tp)
1411 struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
1414 mtx_lock(&cxgb_list_lock);
1415 TAILQ_FOREACH(p, &cxgb_list, entry) {
1416 if (tp->t_state == TCPS_LISTEN)
1417 t3_listen_stop(&p->tdev, so, p->cdev);
1419 mtx_unlock(&cxgb_list_lock);
1423 cxgb_toe_listen_start_handler(struct inpcb *inp, void *arg)
1425 struct tcpcb *tp = intotcpcb(inp);
1427 if (tp->t_state == TCPS_LISTEN)
1428 cxgb_toe_listen_start(NULL, tp);
1432 cxgb_register_listeners(void)
1435 inp_apply_all(cxgb_toe_listen_start_handler, NULL);
1441 init_cpl_handlers();
1442 if (t3_init_cpl_io() < 0) {
1444 "Unable to initialize cpl io ops\n");
1447 t3_init_socket_ops();
1449 /* Register with the TOE device layer. */
1451 if (register_tom(&t3_tom_info) != 0) {
1453 "Unable to register Chelsio T3 TCP offload module.\n");
1457 rw_init(&adapter_list_lock, "ofld adap list");
1458 TAILQ_INIT(&adapter_list);
1459 EVENTHANDLER_REGISTER(route_arp_update_event, cxgb_arp_update_event,
1460 NULL, EVENTHANDLER_PRI_ANY);
1461 EVENTHANDLER_REGISTER(route_redirect_event, cxgb_redirect_event,
1462 NULL, EVENTHANDLER_PRI_ANY);
1464 mtx_init(&cxgb_list_lock, "cxgb tom list", NULL, MTX_DEF);
1465 listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
1466 cxgb_toe_listen_start, NULL, EVENTHANDLER_PRI_ANY);
1467 listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
1468 cxgb_toe_listen_stop, NULL, EVENTHANDLER_PRI_ANY);
1469 TAILQ_INIT(&cxgb_list);
1473 t3_register_cpl_handler(CPL_PASS_OPEN_RPL, do_stid_rpl);
1474 t3_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_stid_rpl);
1475 t3_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_cr);
1476 t3_register_cpl_handler(CPL_PASS_ESTABLISH, do_hwtid_rpl);
1477 t3_register_cpl_handler(CPL_ABORT_RPL_RSS, do_hwtid_rpl);
1478 t3_register_cpl_handler(CPL_ABORT_RPL, do_hwtid_rpl);
1479 t3_register_cpl_handler(CPL_RX_URG_NOTIFY, do_hwtid_rpl);
1480 t3_register_cpl_handler(CPL_RX_DATA, do_hwtid_rpl);
1481 t3_register_cpl_handler(CPL_TX_DATA_ACK, do_hwtid_rpl);
1482 t3_register_cpl_handler(CPL_TX_DMA_ACK, do_hwtid_rpl);
1483 t3_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
1484 t3_register_cpl_handler(CPL_PEER_CLOSE, do_hwtid_rpl);
1485 t3_register_cpl_handler(CPL_CLOSE_CON_RPL, do_hwtid_rpl);
1486 t3_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req_rss);
1487 t3_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
1488 t3_register_cpl_handler(CPL_RDMA_TERMINATE, do_term);
1489 t3_register_cpl_handler(CPL_RDMA_EC_STATUS, do_hwtid_rpl);
1490 t3_register_cpl_handler(CPL_RX_DATA_DDP, do_hwtid_rpl);
1491 t3_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_hwtid_rpl);
1492 t3_register_cpl_handler(CPL_ISCSI_HDR, do_hwtid_rpl);
1493 t3_register_cpl_handler(CPL_GET_TCB_RPL, do_hwtid_rpl);
1494 t3_register_cpl_handler(CPL_SET_TCB_RPL, do_hwtid_rpl);
1496 /* Register to offloading devices */
1497 cxgb_register_client(&t3c_tom_client);
1503 t3_tom_load(module_t mod, int cmd, void *arg)
1514 printf("uhm, ... unloading isn't really supported for toe\n");
1526 static moduledata_t mod_data= {
1531 MODULE_VERSION(t3_tom, 1);
1532 MODULE_DEPEND(t3_tom, toecore, 1, 1, 1);
1533 MODULE_DEPEND(t3_tom, if_cxgb, 1, 1, 1);
1534 DECLARE_MODULE(t3_tom, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);