1 /**************************************************************************
3 Copyright (c) 2007, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/fcntl.h>
38 #include <sys/limits.h>
40 #include <sys/eventhandler.h>
42 #include <sys/module.h>
43 #include <sys/condvar.h>
44 #include <sys/mutex.h>
45 #include <sys/socket.h>
46 #include <sys/syslog.h>
47 #include <sys/taskqueue.h>
50 #include <net/route.h>
52 #include <netinet/in.h>
53 #include <netinet/in_pcb.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/in_var.h>
57 #include <dev/cxgb/cxgb_osdep.h>
58 #include <dev/cxgb/sys/mbufq.h>
60 #include <netinet/in_pcb.h>
62 #include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
63 #include <netinet/tcp.h>
64 #include <netinet/tcp_var.h>
65 #include <netinet/tcp_offload.h>
66 #include <netinet/tcp_fsm.h>
69 #include <cxgb_include.h>
71 #include <dev/cxgb/cxgb_include.h>
74 #include <net/if_vlan_var.h>
75 #include <net/route.h>
78 #include <dev/cxgb/t3cdev.h>
79 #include <dev/cxgb/common/cxgb_firmware_exports.h>
80 #include <dev/cxgb/common/cxgb_tcb.h>
81 #include <dev/cxgb/cxgb_include.h>
82 #include <dev/cxgb/common/cxgb_ctl_defs.h>
83 #include <dev/cxgb/common/cxgb_t3_cpl.h>
84 #include <dev/cxgb/cxgb_offload.h>
85 #include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
86 #include <dev/cxgb/ulp/tom/cxgb_tom.h>
87 #include <dev/cxgb/ulp/tom/cxgb_defs.h>
88 #include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
89 #include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
90 #include <dev/cxgb/ulp/tom/cxgb_tcp.h>
92 TAILQ_HEAD(, adapter) adapter_list;
93 static struct rwlock adapter_list_lock;
95 static TAILQ_HEAD(, tom_data) cxgb_list;
96 static struct mtx cxgb_list_lock;
97 static const unsigned int MAX_ATIDS = 64 * 1024;
98 static const unsigned int ATID_BASE = 0x100000;
100 static int t3_toe_attach(struct toedev *dev, const struct offload_id *entry);
101 static void cxgb_register_listeners(void);
102 static void t3c_tom_add(struct t3cdev *cdev);
105 * Handlers for each CPL opcode
107 static cxgb_cpl_handler_func tom_cpl_handlers[256];
110 static eventhandler_tag listen_tag;
112 static struct offload_id t3_toe_id_tab[] = {
113 { TOE_ID_CHELSIO_T3, 0 },
114 { TOE_ID_CHELSIO_T3B, 0 },
115 { TOE_ID_CHELSIO_T3C, 0 },
119 static struct tom_info t3_tom_info = {
120 .ti_attach = t3_toe_attach,
121 .ti_id_table = t3_toe_id_tab,
122 .ti_name = "Chelsio-T3"
125 struct cxgb_client t3c_tom_client = {
129 .handlers = tom_cpl_handlers,
134 * Add an skb to the deferred skb queue for processing from process context.
137 t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler)
139 struct tom_data *td = TOM_DATA(dev);
141 m_set_handler(m, handler);
142 mtx_lock(&td->deferq.lock);
144 mbufq_tail(&td->deferq, m);
145 if (mbufq_len(&td->deferq) == 1)
146 taskqueue_enqueue(td->tq, &td->deferq_task);
147 mtx_lock(&td->deferq.lock);
155 toep = malloc(sizeof(struct toepcb), M_CXGB, M_NOWAIT|M_ZERO);
165 toepcb_init(struct toepcb *toep)
167 toep->tp_refcount = 1;
168 cv_init(&toep->tp_cv, "toep cv");
172 toepcb_hold(struct toepcb *toep)
174 atomic_add_acq_int(&toep->tp_refcount, 1);
178 toepcb_release(struct toepcb *toep)
180 if (toep->tp_refcount == 1) {
184 atomic_add_acq_int(&toep->tp_refcount, -1);
189 * Add a T3 offload device to the list of devices we are managing.
192 t3cdev_add(struct tom_data *t)
194 mtx_lock(&cxgb_list_lock);
195 TAILQ_INSERT_TAIL(&cxgb_list, t, entry);
196 mtx_unlock(&cxgb_list_lock);
200 cdev2type(struct t3cdev *cdev)
204 switch (cdev->type) {
206 type = TOE_ID_CHELSIO_T3;
209 type = TOE_ID_CHELSIO_T3B;
212 type = TOE_ID_CHELSIO_T3C;
219 * Allocate and initialize the TID tables. Returns 0 on success.
222 init_tid_tabs(struct tid_info *t, unsigned int ntids,
223 unsigned int natids, unsigned int nstids,
224 unsigned int atid_base, unsigned int stid_base)
226 unsigned long size = ntids * sizeof(*t->tid_tab) +
227 natids * sizeof(*t->atid_tab) + nstids * sizeof(*t->stid_tab);
229 t->tid_tab = cxgb_alloc_mem(size);
233 t->stid_tab = (union listen_entry *)&t->tid_tab[ntids];
234 t->atid_tab = (union active_open_entry *)&t->stid_tab[nstids];
237 t->stid_base = stid_base;
240 t->atid_base = atid_base;
242 t->stids_in_use = t->atids_in_use = 0;
243 atomic_set_int(&t->tids_in_use, 0);
244 mtx_init(&t->stid_lock, "stid", NULL, MTX_DUPOK|MTX_DEF);
245 mtx_init(&t->atid_lock, "atid", NULL, MTX_DUPOK|MTX_DEF);
248 * Setup the free lists for stid_tab and atid_tab.
252 t->stid_tab[nstids - 1].next = &t->stid_tab[nstids];
253 t->sfree = t->stid_tab;
257 t->atid_tab[natids - 1].next = &t->atid_tab[natids];
258 t->afree = t->atid_tab;
264 free_tid_maps(struct tid_info *t)
266 mtx_destroy(&t->stid_lock);
267 mtx_destroy(&t->atid_lock);
268 cxgb_free_mem(t->tid_tab);
272 add_adapter(adapter_t *adap)
274 rw_wlock(&adapter_list_lock);
275 TAILQ_INSERT_TAIL(&adapter_list, adap, adapter_entry);
276 rw_wunlock(&adapter_list_lock);
280 remove_adapter(adapter_t *adap)
282 rw_wlock(&adapter_list_lock);
283 TAILQ_REMOVE(&adapter_list, adap, adapter_entry);
284 rw_wunlock(&adapter_list_lock);
288 * Populate a TID_RELEASE WR. The mbuf must be already propely sized.
291 mk_tid_release(struct mbuf *m, unsigned int tid)
293 struct cpl_tid_release *req;
295 m_set_priority(m, CPL_PRIORITY_SETUP);
296 req = mtod(m, struct cpl_tid_release *);
297 m->m_pkthdr.len = m->m_len = sizeof(*req);
298 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
299 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
303 t3_process_tid_release_list(void *data, int pending)
306 struct t3cdev *tdev = data;
307 struct t3c_data *td = T3C_DATA (tdev);
309 mtx_lock(&td->tid_release_lock);
310 while (td->tid_release_list) {
311 struct toe_tid_entry *p = td->tid_release_list;
313 td->tid_release_list = (struct toe_tid_entry *)p->ctx;
314 mtx_unlock(&td->tid_release_lock);
315 m = m_get(M_WAIT, MT_DATA);
316 mk_tid_release(m, p - td->tid_maps.tid_tab);
317 cxgb_ofld_send(tdev, m);
319 mtx_lock(&td->tid_release_lock);
321 mtx_unlock(&td->tid_release_lock);
325 cxgb_offload_activate(struct adapter *adapter)
327 struct t3cdev *dev = &adapter->tdev;
330 struct tid_range stid_range, tid_range;
331 struct mtutab mtutab;
332 unsigned int l2t_capacity;
334 t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
337 dev->adapter = adapter;
340 if (dev->ctl(dev, GET_TX_MAX_CHUNK, &t->tx_max_chunk) < 0 ||
341 dev->ctl(dev, GET_MAX_OUTSTANDING_WR, &t->max_wrs) < 0 ||
342 dev->ctl(dev, GET_L2T_CAPACITY, &l2t_capacity) < 0 ||
343 dev->ctl(dev, GET_MTUS, &mtutab) < 0 ||
344 dev->ctl(dev, GET_TID_RANGE, &tid_range) < 0 ||
345 dev->ctl(dev, GET_STID_RANGE, &stid_range) < 0) {
346 device_printf(adapter->dev, "%s: dev->ctl check failed\n", __FUNCTION__);
351 L2DATA(dev) = t3_init_l2t(l2t_capacity);
353 device_printf(adapter->dev, "%s: t3_init_l2t failed\n", __FUNCTION__);
356 natids = min(tid_range.num / 2, MAX_ATIDS);
357 err = init_tid_tabs(&t->tid_maps, tid_range.num, natids,
358 stid_range.num, ATID_BASE, stid_range.base);
360 device_printf(adapter->dev, "%s: init_tid_tabs failed\n", __FUNCTION__);
364 t->mtus = mtutab.mtus;
365 t->nmtus = mtutab.size;
367 TASK_INIT(&t->tid_release_task, 0 /* XXX? */, t3_process_tid_release_list, dev);
368 mtx_init(&t->tid_release_lock, "tid release", NULL, MTX_DUPOK|MTX_DEF);
372 dev->recv = process_rx;
373 dev->arp_update = t3_l2t_update;
374 /* Register netevent handler once */
375 if (TAILQ_EMPTY(&adapter_list)) {
376 #if defined(CONFIG_CHELSIO_T3_MODULE)
377 if (prepare_arp_with_t3core())
378 log(LOG_ERR, "Unable to set offload capabilities\n");
381 CTR1(KTR_CXGB, "adding adapter %p", adapter);
382 add_adapter(adapter);
383 device_printf(adapter->dev, "offload started\n");
384 adapter->flags |= CXGB_OFLD_INIT;
388 t3_free_l2t(L2DATA(dev));
396 cxgb_offload_deactivate(struct adapter *adapter)
398 struct t3cdev *tdev = &adapter->tdev;
399 struct t3c_data *t = T3C_DATA(tdev);
401 printf("removing adapter %p\n", adapter);
402 remove_adapter(adapter);
403 if (TAILQ_EMPTY(&adapter_list)) {
404 #if defined(CONFIG_CHELSIO_T3_MODULE)
405 restore_arp_sans_t3core();
408 free_tid_maps(&t->tid_maps);
409 T3C_DATA(tdev) = NULL;
410 t3_free_l2t(L2DATA(tdev));
412 mtx_destroy(&t->tid_release_lock);
417 * Sends an sk_buff to a T3C driver after dealing with any active network taps.
420 cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m)
424 r = dev->send(dev, m);
428 static struct ifnet *
429 get_iff_from_mac(adapter_t *adapter, const uint8_t *mac, unsigned int vlan)
433 for_each_port(adapter, i) {
435 const struct vlan_group *grp;
437 const struct port_info *p = &adapter->port[i];
438 struct ifnet *ifp = p->ifp;
440 if (!memcmp(p->hw_addr, mac, ETHER_ADDR_LEN)) {
443 if (vlan && vlan != EVL_VLID_MASK) {
445 dev = grp ? grp->vlan_devices[vlan] : NULL;
457 failover_fixup(adapter_t *adapter, int port)
459 if (adapter->params.rev == 0) {
460 struct ifnet *ifp = adapter->port[port].ifp;
461 struct cmac *mac = &adapter->port[port].mac;
462 if (!(ifp->if_flags & IFF_UP)) {
463 /* Failover triggered by the interface ifdown */
464 t3_write_reg(adapter, A_XGM_TX_CTRL + mac->offset,
466 t3_read_reg(adapter, A_XGM_TX_CTRL + mac->offset);
468 /* Failover triggered by the interface link down */
469 t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, 0);
470 t3_read_reg(adapter, A_XGM_RX_CTRL + mac->offset);
471 t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset,
478 cxgb_ulp_iscsi_ctl(adapter_t *adapter, unsigned int req, void *data)
481 struct ulp_iscsi_info *uiip = data;
484 case ULP_ISCSI_GET_PARAMS:
485 uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT);
486 uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT);
487 uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK);
489 * On tx, the iscsi pdu has to be <= tx page size and has to
490 * fit into the Tx PM FIFO.
492 uiip->max_txsz = min(adapter->params.tp.tx_pg_size,
493 t3_read_reg(adapter, A_PM1_TX_CFG) >> 17);
494 /* on rx, the iscsi pdu has to be < rx page size and the
495 whole pdu + cpl headers has to fit into one sge buffer */
496 /* also check the max rx data length programmed in TP */
497 uiip->max_rxsz = min(uiip->max_rxsz,
498 ((t3_read_reg(adapter, A_TP_PARA_REG2))
499 >> S_MAXRXDATA) & M_MAXRXDATA);
501 case ULP_ISCSI_SET_PARAMS:
502 t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask);
510 /* Response queue used for RDMA events. */
511 #define ASYNC_NOTIF_RSPQ 0
514 cxgb_rdma_ctl(adapter_t *adapter, unsigned int req, void *data)
519 case RDMA_GET_PARAMS: {
520 struct rdma_info *req = data;
522 req->udbell_physbase = rman_get_start(adapter->udbs_res);
523 req->udbell_len = rman_get_size(adapter->udbs_res);
524 req->tpt_base = t3_read_reg(adapter, A_ULPTX_TPT_LLIMIT);
525 req->tpt_top = t3_read_reg(adapter, A_ULPTX_TPT_ULIMIT);
526 req->pbl_base = t3_read_reg(adapter, A_ULPTX_PBL_LLIMIT);
527 req->pbl_top = t3_read_reg(adapter, A_ULPTX_PBL_ULIMIT);
528 req->rqt_base = t3_read_reg(adapter, A_ULPRX_RQ_LLIMIT);
529 req->rqt_top = t3_read_reg(adapter, A_ULPRX_RQ_ULIMIT);
530 req->kdb_addr = (void *)((unsigned long)rman_get_virtual(adapter->regs_res) + A_SG_KDOORBELL); break;
533 struct rdma_cq_op *req = data;
535 /* may be called in any context */
536 mtx_lock_spin(&adapter->sge.reg_lock);
537 ret = t3_sge_cqcntxt_op(adapter, req->id, req->op,
539 mtx_unlock_spin(&adapter->sge.reg_lock);
543 struct ch_mem_range *t = data;
546 if ((t->addr & 7) || (t->len & 7))
548 if (t->mem_id == MEM_CM)
550 else if (t->mem_id == MEM_PMRX)
551 mem = &adapter->pmrx;
552 else if (t->mem_id == MEM_PMTX)
553 mem = &adapter->pmtx;
557 ret = t3_mc7_bd_read(mem, t->addr/8, t->len/8, (u64 *)t->buf);
562 case RDMA_CQ_SETUP: {
563 struct rdma_cq_setup *req = data;
565 mtx_lock_spin(&adapter->sge.reg_lock);
566 ret = t3_sge_init_cqcntxt(adapter, req->id, req->base_addr,
567 req->size, ASYNC_NOTIF_RSPQ,
568 req->ovfl_mode, req->credits,
570 mtx_unlock_spin(&adapter->sge.reg_lock);
573 case RDMA_CQ_DISABLE:
574 mtx_lock_spin(&adapter->sge.reg_lock);
575 ret = t3_sge_disable_cqcntxt(adapter, *(unsigned int *)data);
576 mtx_unlock_spin(&adapter->sge.reg_lock);
578 case RDMA_CTRL_QP_SETUP: {
579 struct rdma_ctrlqp_setup *req = data;
581 mtx_lock_spin(&adapter->sge.reg_lock);
582 ret = t3_sge_init_ecntxt(adapter, FW_RI_SGEEC_START, 0,
583 SGE_CNTXT_RDMA, ASYNC_NOTIF_RSPQ,
584 req->base_addr, req->size,
585 FW_RI_TID_START, 1, 0);
586 mtx_unlock_spin(&adapter->sge.reg_lock);
596 cxgb_offload_ctl(struct t3cdev *tdev, unsigned int req, void *data)
598 struct adapter *adapter = tdev2adap(tdev);
599 struct tid_range *tid;
601 struct iff_mac *iffmacp;
602 struct ddp_params *ddpp;
603 struct adap_ports *ports;
604 struct ofld_page_info *rx_page_info;
605 struct tp_params *tp = &adapter->params.tp;
609 case GET_MAX_OUTSTANDING_WR:
610 *(unsigned int *)data = FW_WR_NUM;
613 *(unsigned int *)data = WR_FLITS;
615 case GET_TX_MAX_CHUNK:
616 *(unsigned int *)data = 1 << 20; /* 1MB */
620 tid->num = t3_mc5_size(&adapter->mc5) -
621 adapter->params.mc5.nroutes -
622 adapter->params.mc5.nfilters -
623 adapter->params.mc5.nservers;
628 tid->num = adapter->params.mc5.nservers;
629 tid->base = t3_mc5_size(&adapter->mc5) - tid->num -
630 adapter->params.mc5.nfilters -
631 adapter->params.mc5.nroutes;
633 case GET_L2T_CAPACITY:
634 *(unsigned int *)data = 2048;
639 mtup->mtus = adapter->params.mtus;
641 case GET_IFF_FROM_MAC:
643 iffmacp->dev = get_iff_from_mac(adapter, iffmacp->mac_addr,
644 iffmacp->vlan_tag & EVL_VLID_MASK);
648 ddpp->llimit = t3_read_reg(adapter, A_ULPRX_TDDP_LLIMIT);
649 ddpp->ulimit = t3_read_reg(adapter, A_ULPRX_TDDP_ULIMIT);
650 ddpp->tag_mask = t3_read_reg(adapter, A_ULPRX_TDDP_TAGMASK);
654 ports->nports = adapter->params.nports;
655 for_each_port(adapter, port)
656 ports->lldevs[port] = adapter->port[port].ifp;
660 t3_port_failover(adapter, port);
661 failover_fixup(adapter, port);
665 t3_failover_done(adapter, port);
668 t3_failover_clear(adapter);
670 case GET_RX_PAGE_INFO:
672 rx_page_info->page_size = tp->rx_pg_size;
673 rx_page_info->num = tp->rx_num_pgs;
675 case ULP_ISCSI_GET_PARAMS:
676 case ULP_ISCSI_SET_PARAMS:
677 if (!offload_running(adapter))
679 return cxgb_ulp_iscsi_ctl(adapter, req, data);
680 case RDMA_GET_PARAMS:
683 case RDMA_CQ_DISABLE:
684 case RDMA_CTRL_QP_SETUP:
686 if (!offload_running(adapter))
688 return cxgb_rdma_ctl(adapter, req, data);
696 * Allocate a TOM data structure,
697 * initialize its cpl_handlers
698 * and register it as a T3C client
701 t3c_tom_add(struct t3cdev *cdev)
707 struct adap_ports *port_info;
709 t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
713 cdev->send = t3_offload_tx;
714 cdev->ctl = cxgb_offload_ctl;
716 if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0)
719 port_info = malloc(sizeof(*port_info), M_CXGB, M_NOWAIT|M_ZERO);
723 if (cdev->ctl(cdev, GET_PORTS, port_info) < 0)
726 t3_init_wr_tab(wr_len);
728 t->client = &t3c_tom_client;
730 /* Register TCP offload device */
732 tdev->tod_ttid = cdev2type(cdev);
733 tdev->tod_lldev = cdev->lldev;
735 if (register_toedev(tdev, "toe%d")) {
736 printf("unable to register offload device");
741 for (i = 0; i < port_info->nports; i++) {
742 struct ifnet *ifp = port_info->lldevs[i];
745 CTR1(KTR_TOM, "enabling toe on %p", ifp);
746 ifp->if_capabilities |= IFCAP_TOE4;
747 ifp->if_capenable |= IFCAP_TOE4;
749 t->ports = port_info;
751 /* Add device to the list of offload devices */
754 /* Activate TCP offload device */
755 cxgb_offload_activate(TOM_DATA(tdev)->cdev->adapter);
757 activate_offload(tdev);
758 cxgb_register_listeners();
762 printf("out_free_all fail\n");
763 free(port_info, M_CXGB);
765 printf("out_free_tom fail\n");
773 do_act_open_rpl(struct t3cdev *dev, struct mbuf *m)
775 struct cpl_act_open_rpl *rpl = cplhdr(m);
776 unsigned int atid = G_TID(ntohl(rpl->atid));
777 struct toe_tid_entry *toe_tid;
779 toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
780 if (toe_tid->ctx && toe_tid->client && toe_tid->client->handlers &&
781 toe_tid->client->handlers[CPL_ACT_OPEN_RPL]) {
782 return toe_tid->client->handlers[CPL_ACT_OPEN_RPL] (dev, m,
785 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
786 dev->name, CPL_ACT_OPEN_RPL);
787 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
792 do_stid_rpl(struct t3cdev *dev, struct mbuf *m)
794 union opcode_tid *p = cplhdr(m);
795 unsigned int stid = G_TID(ntohl(p->opcode_tid));
796 struct toe_tid_entry *toe_tid;
798 toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
799 if (toe_tid->ctx && toe_tid->client->handlers &&
800 toe_tid->client->handlers[p->opcode]) {
801 return toe_tid->client->handlers[p->opcode] (dev, m, toe_tid->ctx);
803 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
804 dev->name, p->opcode);
805 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
810 do_hwtid_rpl(struct t3cdev *dev, struct mbuf *m)
812 union opcode_tid *p = cplhdr(m);
814 struct toe_tid_entry *toe_tid;
816 DPRINTF("do_hwtid_rpl opcode=0x%x\n", p->opcode);
817 hwtid = G_TID(ntohl(p->opcode_tid));
819 toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
820 if (toe_tid->ctx && toe_tid->client->handlers &&
821 toe_tid->client->handlers[p->opcode]) {
822 return toe_tid->client->handlers[p->opcode]
823 (dev, m, toe_tid->ctx);
825 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
826 dev->name, p->opcode);
827 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
832 do_cr(struct t3cdev *dev, struct mbuf *m)
834 struct cpl_pass_accept_req *req = cplhdr(m);
835 unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
836 struct toe_tid_entry *toe_tid;
838 toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
839 if (toe_tid->ctx && toe_tid->client->handlers &&
840 toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]) {
841 return toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]
842 (dev, m, toe_tid->ctx);
844 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
845 dev->name, CPL_PASS_ACCEPT_REQ);
846 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
851 do_abort_req_rss(struct t3cdev *dev, struct mbuf *m)
853 union opcode_tid *p = cplhdr(m);
854 unsigned int hwtid = G_TID(ntohl(p->opcode_tid));
855 struct toe_tid_entry *toe_tid;
857 toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
858 if (toe_tid->ctx && toe_tid->client->handlers &&
859 toe_tid->client->handlers[p->opcode]) {
860 return toe_tid->client->handlers[p->opcode]
861 (dev, m, toe_tid->ctx);
863 struct cpl_abort_req_rss *req = cplhdr(m);
864 struct cpl_abort_rpl *rpl;
866 struct mbuf *m = m_get(M_NOWAIT, MT_DATA);
868 log(LOG_NOTICE, "do_abort_req_rss: couldn't get mbuf!\n");
872 m_set_priority(m, CPL_PRIORITY_DATA);
875 htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
876 rpl->wr.wr_lo = htonl(V_WR_TID(GET_TID(req)));
878 htonl(MK_OPCODE_TID(CPL_ABORT_RPL, GET_TID(req)));
879 rpl->cmd = req->status;
880 cxgb_ofld_send(dev, m);
882 return (CPL_RET_BUF_DONE);
887 do_act_establish(struct t3cdev *dev, struct mbuf *m)
889 struct cpl_act_establish *req;
891 struct toe_tid_entry *toe_tid;
894 atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
895 toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
896 if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
897 toe_tid->client->handlers[CPL_ACT_ESTABLISH]) {
899 return toe_tid->client->handlers[CPL_ACT_ESTABLISH]
900 (dev, m, toe_tid->ctx);
903 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
904 dev->name, CPL_ACT_ESTABLISH);
905 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
911 do_term(struct t3cdev *dev, struct mbuf *m)
913 unsigned int hwtid = ntohl(m_get_priority(m)) >> 8 & 0xfffff;
914 unsigned int opcode = G_OPCODE(ntohl(m->m_pkthdr.csum_data));
915 struct toe_tid_entry *toe_tid;
917 toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
918 if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
919 toe_tid->client->handlers[opcode]) {
920 return toe_tid->client->handlers[opcode](dev, m, toe_tid->ctx);
922 log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
924 return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
930 * Process a received packet with an unknown/unexpected CPL opcode.
933 do_bad_cpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
935 log(LOG_ERR, "%s: received bad CPL command %u\n", cdev->name,
936 0xFF & *mtod(m, unsigned int *));
937 return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
941 * Add a new handler to the CPL dispatch table. A NULL handler may be supplied
942 * to unregister an existing handler.
945 t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h)
947 if (opcode < UCHAR_MAX)
948 tom_cpl_handlers[opcode] = h ? h : do_bad_cpl;
950 log(LOG_ERR, "Chelsio T3 TOM: handler registration for "
951 "opcode %u failed\n", opcode);
955 * Make a preliminary determination if a connection can be offloaded. It's OK
956 * to fail the offload later if we say we can offload here. For now this
957 * always accepts the offload request unless there are IP options.
960 can_offload(struct toedev *dev, struct socket *so)
962 struct tom_data *tomd = TOM_DATA(dev);
963 struct t3cdev *cdev = T3CDEV(dev->tod_lldev);
964 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
966 return so_sotoinpcb(so)->inp_depend4.inp4_options == NULL &&
967 tomd->conf.activated &&
968 (tomd->conf.max_conn < 0 ||
969 atomic_load_acq_int(&t->tids_in_use) + t->atids_in_use < tomd->conf.max_conn);
973 tom_ctl(struct toedev *dev, unsigned int req, void *data)
975 struct tom_data *t = TOM_DATA(dev);
976 struct t3cdev *cdev = t->cdev;
979 return cdev->ctl(cdev, req, data);
985 * Free an active-open TID.
988 cxgb_free_atid(struct t3cdev *tdev, int atid)
990 struct tid_info *t = &(T3C_DATA(tdev))->tid_maps;
991 union active_open_entry *p = atid2entry(t, atid);
992 void *ctx = p->toe_tid.ctx;
994 mtx_lock(&t->atid_lock);
998 mtx_unlock(&t->atid_lock);
1004 * Free a server TID and return it to the free pool.
1007 cxgb_free_stid(struct t3cdev *tdev, int stid)
1009 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1010 union listen_entry *p = stid2entry(t, stid);
1012 mtx_lock(&t->stid_lock);
1016 mtx_unlock(&t->stid_lock);
1020 * Free a server TID and return it to the free pool.
1023 cxgb_get_lctx(struct t3cdev *tdev, int stid)
1025 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1026 union listen_entry *p = stid2entry(t, stid);
1028 return (p->toe_tid.ctx);
1032 cxgb_insert_tid(struct t3cdev *tdev, struct cxgb_client *client,
1033 void *ctx, unsigned int tid)
1035 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1037 t->tid_tab[tid].client = client;
1038 t->tid_tab[tid].ctx = ctx;
1039 atomic_add_int(&t->tids_in_use, 1);
1042 /* use ctx as a next pointer in the tid release list */
1044 cxgb_queue_tid_release(struct t3cdev *tdev, unsigned int tid)
1046 struct t3c_data *td = T3C_DATA (tdev);
1047 struct toe_tid_entry *p = &td->tid_maps.tid_tab[tid];
1049 CTR0(KTR_TOM, "queuing tid release\n");
1051 mtx_lock(&td->tid_release_lock);
1052 p->ctx = td->tid_release_list;
1053 td->tid_release_list = p;
1056 taskqueue_enqueue(tdev->adapter->tq, &td->tid_release_task);
1058 mtx_unlock(&td->tid_release_lock);
1062 * Remove a tid from the TID table. A client may defer processing its last
1063 * CPL message if it is locked at the time it arrives, and while the message
1064 * sits in the client's backlog the TID may be reused for another connection.
1065 * To handle this we atomically switch the TID association if it still points
1066 * to the original client context.
1069 cxgb_remove_tid(struct t3cdev *tdev, void *ctx, unsigned int tid)
1071 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1073 if (tid >= t->ntids)
1074 panic("tid=%d >= t->ntids=%d", tid, t->ntids);
1076 if (tdev->type == T3A)
1077 atomic_cmpset_ptr((uintptr_t *)&t->tid_tab[tid].ctx, (long)NULL, (long)ctx);
1081 m = m_get(M_NOWAIT, MT_DATA);
1082 if (__predict_true(m != NULL)) {
1083 mk_tid_release(m, tid);
1084 CTR1(KTR_CXGB, "releasing tid=%u", tid);
1086 cxgb_ofld_send(tdev, m);
1087 t->tid_tab[tid].ctx = NULL;
1089 cxgb_queue_tid_release(tdev, tid);
1091 atomic_add_int(&t->tids_in_use, -1);
1095 cxgb_alloc_atid(struct t3cdev *tdev, struct cxgb_client *client,
1099 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1101 mtx_lock(&t->atid_lock);
1103 union active_open_entry *p = t->afree;
1105 atid = (p - t->atid_tab) + t->atid_base;
1107 p->toe_tid.ctx = ctx;
1108 p->toe_tid.client = client;
1111 mtx_unlock(&t->atid_lock);
1116 cxgb_alloc_stid(struct t3cdev *tdev, struct cxgb_client *client,
1120 struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
1122 mtx_lock(&t->stid_lock);
1124 union listen_entry *p = t->sfree;
1126 stid = (p - t->stid_tab) + t->stid_base;
1128 p->toe_tid.ctx = ctx;
1129 p->toe_tid.client = client;
1132 mtx_unlock(&t->stid_lock);
1138 is_offloading(struct ifnet *ifp)
1140 struct adapter *adapter;
1143 rw_rlock(&adapter_list_lock);
1144 TAILQ_FOREACH(adapter, &adapter_list, adapter_entry) {
1145 for_each_port(adapter, port) {
1146 if (ifp == adapter->port[port].ifp) {
1147 rw_runlock(&adapter_list_lock);
1152 rw_runlock(&adapter_list_lock);
1158 cxgb_arp_update_event(void *unused, struct rtentry *rt0,
1159 uint8_t *enaddr, struct sockaddr *sa)
1162 if (!is_offloading(rt0->rt_ifp))
1167 cxgb_neigh_update(rt0, enaddr, sa);
1173 cxgb_redirect_event(void *unused, int event, struct rtentry *rt0,
1174 struct rtentry *rt1, struct sockaddr *sa)
1177 * ignore events on non-offloaded interfaces
1179 if (!is_offloading(rt0->rt_ifp))
1183 * Cannot redirect to non-offload device.
1185 if (!is_offloading(rt1->rt_ifp)) {
1186 log(LOG_WARNING, "%s: Redirect to non-offload"
1187 "device ignored.\n", __FUNCTION__);
1192 * avoid LORs by dropping the route lock but keeping a reference
1200 cxgb_redirect(rt0, rt1, sa);
1201 cxgb_neigh_update(rt1, NULL, sa);
1210 cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa)
1213 if (rt->rt_ifp && is_offloading(rt->rt_ifp) && (rt->rt_ifp->if_flags & IFCAP_TOE)) {
1214 struct t3cdev *tdev = T3CDEV(rt->rt_ifp);
1217 t3_l2t_update(tdev, rt, enaddr, sa);
1222 set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e)
1225 struct cpl_set_tcb_field *req;
1227 m = m_gethdr(M_NOWAIT, MT_DATA);
1229 log(LOG_ERR, "%s: cannot allocate mbuf!\n", __FUNCTION__);
1233 m_set_priority(m, CPL_PRIORITY_CONTROL);
1234 req = mtod(m, struct cpl_set_tcb_field *);
1235 m->m_pkthdr.len = m->m_len = sizeof(*req);
1237 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1238 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
1241 req->word = htons(W_TCB_L2T_IX);
1242 req->mask = htobe64(V_TCB_L2T_IX(M_TCB_L2T_IX));
1243 req->val = htobe64(V_TCB_L2T_IX(e->idx));
1244 tdev->send(tdev, m);
1248 cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa)
1250 struct ifnet *olddev, *newdev;
1251 struct tid_info *ti;
1252 struct t3cdev *tdev;
1255 struct l2t_entry *e;
1256 struct toe_tid_entry *te;
1258 olddev = old->rt_ifp;
1259 newdev = new->rt_ifp;
1260 if (!is_offloading(olddev))
1262 if (!is_offloading(newdev)) {
1263 log(LOG_WARNING, "%s: Redirect to non-offload"
1264 "device ignored.\n", __FUNCTION__);
1267 tdev = T3CDEV(olddev);
1269 if (tdev != T3CDEV(newdev)) {
1270 log(LOG_WARNING, "%s: Redirect to different "
1271 "offload device ignored.\n", __FUNCTION__);
1275 /* Add new L2T entry */
1276 e = t3_l2t_get(tdev, new, new->rt_ifp, sa);
1278 log(LOG_ERR, "%s: couldn't allocate new l2t entry!\n",
1283 /* Walk tid table and notify clients of dst change. */
1284 ti = &(T3C_DATA (tdev))->tid_maps;
1285 for (tid=0; tid < ti->ntids; tid++) {
1286 te = lookup_tid(ti, tid);
1288 if (te->ctx && te->client && te->client->redirect) {
1289 update_tcb = te->client->redirect(te->ctx, old, new,
1292 l2t_hold(L2DATA(tdev), e);
1293 set_l2t_ix(tdev, tid, e);
1297 l2t_release(L2DATA(tdev), e);
1301 * Initialize the CPL dispatch table.
1304 init_cpl_handlers(void)
1308 for (i = 0; i < 256; ++i)
1309 tom_cpl_handlers[i] = do_bad_cpl;
1311 t3_init_listen_cpl_handlers();
1315 t3_toe_attach(struct toedev *dev, const struct offload_id *entry)
1317 struct tom_data *t = TOM_DATA(dev);
1318 struct t3cdev *cdev = t->cdev;
1319 struct ddp_params ddp;
1320 struct ofld_page_info rx_page_info;
1323 t3_init_tunables(t);
1324 mtx_init(&t->listen_lock, "tom data listeners", NULL, MTX_DEF);
1325 CTR2(KTR_TOM, "t3_toe_attach dev=%p entry=%p", dev, entry);
1327 dev->tod_can_offload = can_offload;
1328 dev->tod_connect = t3_connect;
1329 dev->tod_ctl = tom_ctl;
1331 dev->tod_failover = t3_failover;
1333 err = cdev->ctl(cdev, GET_DDP_PARAMS, &ddp);
1337 err = cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info);
1341 t->ddp_llimit = ddp.llimit;
1342 t->ddp_ulimit = ddp.ulimit;
1344 t->rx_page_size = rx_page_info.page_size;
1345 /* OK if this fails, we just can't do DDP */
1346 t->nppods = (ddp.ulimit + 1 - ddp.llimit) / PPOD_SIZE;
1347 t->ppod_map = malloc(t->nppods, M_DEVBUF, M_NOWAIT|M_ZERO);
1349 mtx_init(&t->ppod_map_lock, "ppod map", NULL, MTX_DEF);
1352 t3_sysctl_register(cdev->adapter, &t->conf);
1357 cxgb_toe_listen_start(void *unused, struct tcpcb *tp)
1359 struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
1362 mtx_lock(&cxgb_list_lock);
1363 TAILQ_FOREACH(p, &cxgb_list, entry) {
1364 t3_listen_start(&p->tdev, so, p->cdev);
1366 mtx_unlock(&cxgb_list_lock);
1370 cxgb_toe_listen_stop(void *unused, struct tcpcb *tp)
1372 struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
1375 mtx_lock(&cxgb_list_lock);
1376 TAILQ_FOREACH(p, &cxgb_list, entry) {
1377 if (tp->t_state == TCPS_LISTEN)
1378 t3_listen_stop(&p->tdev, so, p->cdev);
1380 mtx_unlock(&cxgb_list_lock);
1384 cxgb_toe_listen_start_handler(struct inpcb *inp, void *arg)
1386 struct tcpcb *tp = intotcpcb(inp);
1388 if (tp->t_state == TCPS_LISTEN)
1389 cxgb_toe_listen_start(NULL, tp);
1393 cxgb_register_listeners(void)
1396 inp_apply_all(cxgb_toe_listen_start_handler, NULL);
1402 init_cpl_handlers();
1403 if (t3_init_cpl_io() < 0) {
1405 "Unable to initialize cpl io ops\n");
1408 t3_init_socket_ops();
1410 /* Register with the TOE device layer. */
1412 if (register_tom(&t3_tom_info) != 0) {
1414 "Unable to register Chelsio T3 TCP offload module.\n");
1418 rw_init(&adapter_list_lock, "ofld adap list");
1419 TAILQ_INIT(&adapter_list);
1420 EVENTHANDLER_REGISTER(route_arp_update_event, cxgb_arp_update_event,
1421 NULL, EVENTHANDLER_PRI_ANY);
1422 EVENTHANDLER_REGISTER(route_redirect_event, cxgb_redirect_event,
1423 NULL, EVENTHANDLER_PRI_ANY);
1425 mtx_init(&cxgb_list_lock, "cxgb tom list", NULL, MTX_DEF);
1426 listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
1427 cxgb_toe_listen_start, NULL, EVENTHANDLER_PRI_ANY);
1428 listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
1429 cxgb_toe_listen_stop, NULL, EVENTHANDLER_PRI_ANY);
1430 TAILQ_INIT(&cxgb_list);
1434 t3_register_cpl_handler(CPL_PASS_OPEN_RPL, do_stid_rpl);
1435 t3_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_stid_rpl);
1436 t3_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_cr);
1437 t3_register_cpl_handler(CPL_PASS_ESTABLISH, do_hwtid_rpl);
1438 t3_register_cpl_handler(CPL_ABORT_RPL_RSS, do_hwtid_rpl);
1439 t3_register_cpl_handler(CPL_ABORT_RPL, do_hwtid_rpl);
1440 t3_register_cpl_handler(CPL_RX_URG_NOTIFY, do_hwtid_rpl);
1441 t3_register_cpl_handler(CPL_RX_DATA, do_hwtid_rpl);
1442 t3_register_cpl_handler(CPL_TX_DATA_ACK, do_hwtid_rpl);
1443 t3_register_cpl_handler(CPL_TX_DMA_ACK, do_hwtid_rpl);
1444 t3_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
1445 t3_register_cpl_handler(CPL_PEER_CLOSE, do_hwtid_rpl);
1446 t3_register_cpl_handler(CPL_CLOSE_CON_RPL, do_hwtid_rpl);
1447 t3_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req_rss);
1448 t3_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
1449 t3_register_cpl_handler(CPL_RDMA_TERMINATE, do_term);
1450 t3_register_cpl_handler(CPL_RDMA_EC_STATUS, do_hwtid_rpl);
1451 t3_register_cpl_handler(CPL_RX_DATA_DDP, do_hwtid_rpl);
1452 t3_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_hwtid_rpl);
1453 t3_register_cpl_handler(CPL_ISCSI_HDR, do_hwtid_rpl);
1454 t3_register_cpl_handler(CPL_GET_TCB_RPL, do_hwtid_rpl);
1455 t3_register_cpl_handler(CPL_SET_TCB_RPL, do_hwtid_rpl);
1457 /* Register to offloading devices */
1458 cxgb_register_client(&t3c_tom_client);
1464 t3_tom_load(module_t mod, int cmd, void *arg)
1475 printf("uhm, ... unloading isn't really supported for toe\n");
1487 static moduledata_t mod_data= {
1492 MODULE_VERSION(t3_tom, 1);
1493 MODULE_DEPEND(t3_tom, toecore, 1, 1, 1);
1494 MODULE_DEPEND(t3_tom, if_cxgb, 1, 1, 1);
1495 DECLARE_MODULE(t3_tom, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);