2 * Copyright (c) 2012 Chelsio Communications, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
38 #include <sys/socket.h>
40 #include <net/ethernet.h>
41 #include <net/if_vlan_var.h>
42 #include <netinet/in.h>
43 #include <netinet/toecore.h>
45 #include "cxgb_include.h"
46 #include "ulp/tom/cxgb_tom.h"
47 #include "ulp/tom/cxgb_l2t.h"
49 #define VLAN_NONE 0xfff
50 #define SA(x) ((struct sockaddr *)(x))
51 #define SIN(x) ((struct sockaddr_in *)(x))
52 #define SINADDR(x) (SIN(x)->sin_addr.s_addr)
55 * Module locking notes: There is a RW lock protecting the L2 table as a
56 * whole plus a mutex per L2T entry. Entry lookups and allocations happen
57 * under the protection of the table lock, individual entry changes happen
58 * while holding that entry's mutex. The table lock nests outside the
59 * entry locks. Allocations of new entries take the table lock as writers so
60 * no other lookups can happen while allocating new entries. Entry updates
61 * take the table lock as readers so multiple entries can be updated in
62 * parallel. An L2T entry can be dropped by decrementing its reference count
63 * and therefore can happen in parallel with entry allocation but no entry
64 * can change state or increment its ref count during allocation as both of
65 * these perform lookups.
67 * When acquiring multiple locks, the order is llentry -> L2 table -> L2 entry.
70 static inline unsigned int
71 arp_hash(u32 key, int ifindex, const struct l2t_data *d)
73 return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
77 * Set up an L2T entry and send any packets waiting in the arp queue. Must be
78 * called with the entry locked.
81 setup_l2e_send_pending(struct adapter *sc, struct l2t_entry *e)
84 struct cpl_l2t_write_req *req;
85 struct port_info *pi = &sc->port[e->smt_idx]; /* smt_idx is port_id */
87 mtx_assert(&e->lock, MA_OWNED);
89 m = M_GETHDR_OFLD(pi->first_qset, CPL_PRIORITY_CONTROL, req);
91 log(LOG_ERR, "%s: no mbuf, can't setup L2 entry at index %d\n",
96 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
97 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
98 req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
99 V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
100 V_L2T_W_PRIO(EVL_PRIOFTAG(e->vlan)));
101 memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
103 t3_offload_tx(sc, m);
106 * XXX: We used pi->first_qset to send the L2T_WRITE_REQ. If any mbuf
107 * on the arpq is going out via another queue set associated with the
108 * port then it has a bad race with the L2T_WRITE_REQ. Ideally we
109 * should wait till the reply to the write before draining the arpq.
111 while (e->arpq_head) {
113 e->arpq_head = m->m_next;
115 t3_offload_tx(sc, m);
123 * Add a packet to the an L2T entry's queue of packets awaiting resolution.
124 * Must be called with the entry's lock held.
127 arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
129 mtx_assert(&e->lock, MA_OWNED);
133 e->arpq_tail->m_next = m;
140 resolution_failed_mbuf(struct mbuf *m)
142 log(LOG_ERR, "%s: leaked mbuf %p, CPL at %p",
143 __func__, m, mtod(m, void *));
147 resolution_failed(struct l2t_entry *e)
151 mtx_assert(&e->lock, MA_OWNED);
153 while (e->arpq_head) {
155 e->arpq_head = m->m_next;
157 resolution_failed_mbuf(m);
163 update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr,
167 mtx_assert(&e->lock, MA_OWNED);
170 * The entry may be in active use (e->refcount > 0) or not. We update
171 * it even when it's not as this simplifies the case where we decide to
172 * reuse the entry later.
175 if (lladdr == NULL &&
176 (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) {
178 * Never got a valid L2 address for this one. Just mark it as
179 * failed instead of removing it from the hash (for which we'd
180 * need to wlock the table).
182 e->state = L2T_STATE_FAILED;
183 resolution_failed(e);
186 } else if (lladdr == NULL) {
188 /* Valid or already-stale entry was deleted (or expired) */
190 KASSERT(e->state == L2T_STATE_VALID ||
191 e->state == L2T_STATE_STALE,
192 ("%s: lladdr NULL, state %d", __func__, e->state));
194 e->state = L2T_STATE_STALE;
198 if (e->state == L2T_STATE_RESOLVING ||
199 e->state == L2T_STATE_FAILED ||
200 memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) {
202 /* unresolved -> resolved; or dmac changed */
204 memcpy(e->dmac, lladdr, ETHER_ADDR_LEN);
206 setup_l2e_send_pending(sc, e);
208 e->state = L2T_STATE_VALID;
213 resolve_entry(struct adapter *sc, struct l2t_entry *e)
215 struct tom_data *td = sc->tom_softc;
216 struct toedev *tod = &td->tod;
217 struct sockaddr_in sin = {0};
218 uint8_t dmac[ETHER_ADDR_LEN];
219 uint16_t vtag = EVL_VLID_MASK;
222 sin.sin_family = AF_INET;
223 sin.sin_len = sizeof(struct sockaddr_in);
224 SINADDR(&sin) = e->addr;
226 rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag);
227 if (rc == EWOULDBLOCK)
231 update_entry(sc, e, rc == 0 ? dmac : NULL, vtag);
232 mtx_unlock(&e->lock);
238 t3_l2t_send_slow(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
243 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
245 if (resolve_entry(sc, e) != EWOULDBLOCK)
246 goto again; /* entry updated, re-examine state */
250 case L2T_STATE_VALID: /* fast-path, send the packet on */
252 return (t3_offload_tx(sc, m));
254 case L2T_STATE_RESOLVING:
256 if (e->state != L2T_STATE_RESOLVING) {
257 mtx_unlock(&e->lock);
261 mtx_unlock(&e->lock);
263 if (resolve_entry(sc, e) == EWOULDBLOCK)
267 if (e->state == L2T_STATE_VALID && e->arpq_head)
268 setup_l2e_send_pending(sc, e);
269 if (e->state == L2T_STATE_FAILED)
270 resolution_failed(e);
271 mtx_unlock(&e->lock);
274 case L2T_STATE_FAILED:
275 resolution_failed_mbuf(m);
276 return (EHOSTUNREACH);
283 * Allocate a free L2T entry. Must be called with l2t_data.lock held.
285 static struct l2t_entry *
286 alloc_l2e(struct l2t_data *d)
288 struct l2t_entry *end, *e, **p;
290 rw_assert(&d->lock, RA_WLOCKED);
292 if (!atomic_load_acq_int(&d->nfree))
295 /* there's definitely a free entry */
296 for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) {
297 if (atomic_load_acq_int(&e->refcnt) == 0)
301 for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e)
305 atomic_add_int(&d->nfree, -1);
308 * The entry we found may be an inactive entry that is
309 * presently in the hash table. We need to remove it.
311 if (e->state != L2T_STATE_UNUSED) {
312 int hash = arp_hash(e->addr, e->ifp->if_index, d);
314 for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) {
320 e->state = L2T_STATE_UNUSED;
327 t3_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
329 struct tom_data *td = pi->adapter->tom_softc;
331 struct l2t_data *d = td->l2t;
332 uint32_t addr = SINADDR(sa);
333 int hash = arp_hash(addr, ifp->if_index, d);
334 unsigned int smt_idx = pi->port_id;
337 for (e = d->l2tab[hash].first; e; e = e->next) {
338 if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) {
344 /* Need to allocate a new entry */
347 mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
348 e->next = d->l2tab[hash].first;
349 d->l2tab[hash].first = e;
351 e->state = L2T_STATE_RESOLVING;
354 e->smt_idx = smt_idx;
355 atomic_store_rel_int(&e->refcnt, 1);
357 KASSERT(ifp->if_vlantrunk == NULL, ("TOE+VLAN unimplemented."));
360 mtx_unlock(&e->lock);
364 rw_wunlock(&d->lock);
370 t3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
371 uint8_t *lladdr, uint16_t vtag)
373 struct tom_data *td = t3_tomdata(tod);
374 struct adapter *sc = tod->tod_softc;
376 struct l2t_data *d = td->l2t;
377 u32 addr = *(u32 *) &SIN(sa)->sin_addr;
378 int hash = arp_hash(addr, ifp->if_index, d);
381 for (e = d->l2tab[hash].first; e; e = e->next)
382 if (e->addr == addr && e->ifp == ifp) {
386 rw_runlock(&d->lock);
389 * This is of no interest to us. We've never had an offloaded
390 * connection to this destination, and we aren't attempting one right
396 rw_runlock(&d->lock);
398 KASSERT(e->state != L2T_STATE_UNUSED,
399 ("%s: unused entry in the hash.", __func__));
401 update_entry(sc, e, lladdr, vtag);
402 mtx_unlock(&e->lock);
406 t3_init_l2t(unsigned int l2t_capacity)
409 int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
411 d = malloc(size, M_CXGB, M_NOWAIT | M_ZERO);
415 d->nentries = l2t_capacity;
416 d->rover = &d->l2tab[1]; /* entry 0 is not used */
417 atomic_store_rel_int(&d->nfree, l2t_capacity - 1);
418 rw_init(&d->lock, "L2T");
420 for (i = 0; i < l2t_capacity; ++i) {
422 d->l2tab[i].state = L2T_STATE_UNUSED;
423 mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
424 atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
430 t3_free_l2t(struct l2t_data *d)
434 rw_destroy(&d->lock);
435 for (i = 0; i < d->nentries; ++i)
436 mtx_destroy(&d->l2tab[i].lock);
442 do_l2t_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
444 struct cpl_l2t_write_rpl *rpl = mtod(m, void *);
446 if (rpl->status != CPL_ERR_NONE)
448 "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
449 rpl->status, GET_TID(rpl));
456 t3_init_l2t_cpl_handlers(struct adapter *sc)
458 t3_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);