1 /**************************************************************************
3 Copyright (c) 2007, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
39 #include <sys/mutex.h>
40 #if __FreeBSD_version > 700000
41 #include <sys/rwlock.h>
44 #include <sys/socket.h>
46 #include <net/ethernet.h>
47 #include <net/if_vlan_var.h>
48 #include <net/if_dl.h>
49 #include <net/route.h>
50 #include <netinet/in.h>
51 #include <netinet/if_ether.h>
53 #include <cxgb_include.h>
54 #include <ulp/tom/cxgb_l2t.h>
56 #define VLAN_NONE 0xfff
57 #define SDL(s) ((struct sockaddr_dl *)s)
58 #define RT_ENADDR(sa) ((u_char *)LLADDR(SDL((sa))))
59 #define rt_expire rt_rmx.rmx_expire
62 struct callout la_timer;
63 struct rtentry *la_rt;
64 struct mbuf *la_hold; /* last packet until resolved/timeout */
65 u_short la_preempt; /* countdown for pre-expiry arps */
66 u_short la_asked; /* # requests sent */
70 * Module locking notes: There is a RW lock protecting the L2 table as a
71 * whole plus a spinlock per L2T entry. Entry lookups and allocations happen
72 * under the protection of the table lock, individual entry changes happen
73 * while holding that entry's spinlock. The table lock nests outside the
74 * entry locks. Allocations of new entries take the table lock as writers so
75 * no other lookups can happen while allocating new entries. Entry updates
76 * take the table lock as readers so multiple entries can be updated in
77 * parallel. An L2T entry can be dropped by decrementing its reference count
78 * and therefore can happen in parallel with entry allocation but no entry
79 * can change state or increment its ref count during allocation as both of
80 * these perform lookups.
83 static inline unsigned int
84 vlan_prio(const struct l2t_entry *e)
89 static inline unsigned int
90 arp_hash(u32 key, int ifindex, const struct l2t_data *d)
92 return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
96 neigh_replace(struct l2t_entry *e, struct rtentry *rt)
108 * Set up an L2T entry and send any packets waiting in the arp queue. The
109 * supplied mbuf is used for the CPL_L2T_WRITE_REQ. Must be called with the
113 setup_l2e_send_pending(struct t3cdev *dev, struct mbuf *m,
116 struct cpl_l2t_write_req *req;
119 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
125 req = mtod(m, struct cpl_l2t_write_req *);
126 m->m_pkthdr.len = m->m_len = sizeof(*req);
128 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
129 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
130 req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
131 V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
132 V_L2T_W_PRIO(vlan_prio(e)));
134 memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
135 m_set_priority(m, CPL_PRIORITY_CONTROL);
136 cxgb_ofld_send(dev, m);
137 while (e->arpq_head) {
139 e->arpq_head = m->m_next;
141 cxgb_ofld_send(dev, m);
144 e->state = L2T_STATE_VALID;
150 * Add a packet to the an L2T entry's queue of packets awaiting resolution.
151 * Must be called with the entry's lock held.
154 arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
158 e->arpq_tail->m_next = m;
165 t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m, struct l2t_entry *e)
167 struct rtentry *rt = e->neigh;
168 struct sockaddr_in sin;
170 bzero(&sin, sizeof(struct sockaddr_in));
171 sin.sin_family = AF_INET;
172 sin.sin_len = sizeof(struct sockaddr_in);
173 sin.sin_addr.s_addr = e->addr;
175 CTR2(KTR_CXGB, "send slow on rt=%p eaddr=0x%08x\n", rt, e->addr);
178 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
179 arpresolve(rt->rt_ifp, rt, NULL,
180 (struct sockaddr *)&sin, e->dmac);
182 if (e->state == L2T_STATE_STALE)
183 e->state = L2T_STATE_VALID;
184 mtx_unlock(&e->lock);
185 case L2T_STATE_VALID: /* fast-path, send the packet on */
186 return cxgb_ofld_send(dev, m);
187 case L2T_STATE_RESOLVING:
189 if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
190 mtx_unlock(&e->lock);
194 mtx_unlock(&e->lock);
196 * Only the first packet added to the arpq should kick off
197 * resolution. However, because the m_gethdr below can fail,
198 * we allow each packet added to the arpq to retry resolution
199 * as a way of recovering from transient memory exhaustion.
200 * A better way would be to use a work request to retry L2T
201 * entries when there's no memory.
203 if (arpresolve(rt->rt_ifp, rt, NULL,
204 (struct sockaddr *)&sin, e->dmac) == 0) {
205 CTR6(KTR_CXGB, "mac=%x:%x:%x:%x:%x:%x\n",
206 e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
208 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
213 setup_l2e_send_pending(dev, m, e);
216 mtx_unlock(&e->lock);
223 t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e)
227 struct sockaddr_in sin;
228 sin.sin_family = AF_INET;
229 sin.sin_len = sizeof(struct sockaddr_in);
230 sin.sin_addr.s_addr = e->addr;
232 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
238 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
239 arpresolve(rt->rt_ifp, rt, NULL,
240 (struct sockaddr *)&sin, e->dmac);
242 if (e->state == L2T_STATE_STALE) {
243 e->state = L2T_STATE_VALID;
245 mtx_unlock(&e->lock);
247 case L2T_STATE_VALID: /* fast-path, send the packet on */
249 case L2T_STATE_RESOLVING:
251 if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
252 mtx_unlock(&e->lock);
255 mtx_unlock(&e->lock);
258 * Only the first packet added to the arpq should kick off
259 * resolution. However, because the alloc_skb below can fail,
260 * we allow each packet added to the arpq to retry resolution
261 * as a way of recovering from transient memory exhaustion.
262 * A better way would be to use a work request to retry L2T
263 * entries when there's no memory.
265 arpresolve(rt->rt_ifp, rt, NULL,
266 (struct sockaddr *)&sin, e->dmac);
272 * Allocate a free L2T entry. Must be called with l2t_data.lock held.
274 static struct l2t_entry *
275 alloc_l2e(struct l2t_data *d)
277 struct l2t_entry *end, *e, **p;
279 if (!atomic_load_acq_int(&d->nfree))
282 /* there's definitely a free entry */
283 for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e)
284 if (atomic_load_acq_int(&e->refcnt) == 0)
287 for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) ;
290 atomic_add_int(&d->nfree, -1);
293 * The entry we found may be an inactive entry that is
294 * presently in the hash table. We need to remove it.
296 if (e->state != L2T_STATE_UNUSED) {
297 int hash = arp_hash(e->addr, e->ifindex, d);
299 for (p = &d->l2tab[hash].first; *p; p = &(*p)->next)
304 e->state = L2T_STATE_UNUSED;
311 * Called when an L2T entry has no more users. The entry is left in the hash
312 * table since it is likely to be reused but we also bump nfree to indicate
313 * that the entry can be reallocated for a different neighbor. We also drop
314 * the existing neighbor reference in case the neighbor is going away and is
315 * waiting on our reference.
317 * Because entries can be reallocated to other neighbors once their ref count
318 * drops to 0 we need to take the entry's lock to avoid races with a new
322 t3_l2e_free(struct l2t_data *d, struct l2t_entry *e)
324 struct rtentry *rt = NULL;
327 if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */
332 mtx_unlock(&e->lock);
333 atomic_add_int(&d->nfree, 1);
340 * Update an L2T entry that was previously used for the same next hop as neigh.
341 * Must be called with softirqs disabled.
344 reuse_entry(struct l2t_entry *e, struct rtentry *neigh)
346 struct llinfo_arp *la;
348 la = (struct llinfo_arp *)neigh->rt_llinfo;
350 mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
351 if (neigh != e->neigh)
352 neigh_replace(e, neigh);
354 if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), sizeof(e->dmac)) ||
355 (neigh->rt_expire > time_uptime))
356 e->state = L2T_STATE_RESOLVING;
357 else if (la->la_hold == NULL)
358 e->state = L2T_STATE_VALID;
360 e->state = L2T_STATE_STALE;
361 mtx_unlock(&e->lock);
365 t3_l2t_get(struct t3cdev *dev, struct rtentry *neigh, struct ifnet *ifp,
369 struct l2t_data *d = L2DATA(dev);
370 u32 addr = ((struct sockaddr_in *)sa)->sin_addr.s_addr;
371 int ifidx = neigh->rt_ifp->if_index;
372 int hash = arp_hash(addr, ifidx, d);
373 unsigned int smt_idx = ((struct port_info *)ifp->if_softc)->port_id;
376 for (e = d->l2tab[hash].first; e; e = e->next)
377 if (e->addr == addr && e->ifindex == ifidx &&
378 e->smt_idx == smt_idx) {
380 if (atomic_load_acq_int(&e->refcnt) == 1)
381 reuse_entry(e, neigh);
385 /* Need to allocate a new entry */
388 mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
389 e->next = d->l2tab[hash].first;
390 d->l2tab[hash].first = e;
391 rw_wunlock(&d->lock);
393 e->state = L2T_STATE_RESOLVING;
396 e->smt_idx = smt_idx;
397 atomic_store_rel_int(&e->refcnt, 1);
401 neigh_replace(e, neigh);
404 * XXX need to add accessor function for vlan tag
406 if (neigh->rt_ifp->if_vlantrunk)
407 e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id;
411 mtx_unlock(&e->lock);
417 rw_wunlock(&d->lock);
422 * Called when address resolution fails for an L2T entry to handle packets
423 * on the arpq head. If a packet specifies a failure handler it is invoked,
424 * otherwise the packets is sent to the TOE.
426 * XXX: maybe we should abandon the latter behavior and just require a failure
430 handle_failed_resolution(struct t3cdev *dev, struct mbuf *arpq)
434 struct mbuf *m = arpq;
436 struct l2t_mbuf_cb *cb = L2T_MBUF_CB(m);
441 if (cb->arp_failure_handler)
442 cb->arp_failure_handler(dev, m);
445 cxgb_ofld_send(dev, m);
451 t3_l2t_update(struct t3cdev *dev, struct rtentry *neigh,
452 uint8_t *enaddr, struct sockaddr *sa)
455 struct mbuf *arpq = NULL;
456 struct l2t_data *d = L2DATA(dev);
457 u32 addr = *(u32 *) &((struct sockaddr_in *)sa)->sin_addr;
458 int ifidx = neigh->rt_ifp->if_index;
459 int hash = arp_hash(addr, ifidx, d);
460 struct llinfo_arp *la;
463 for (e = d->l2tab[hash].first; e; e = e->next)
464 if (e->addr == addr && e->ifindex == ifidx) {
468 rw_runlock(&d->lock);
469 CTR1(KTR_CXGB, "t3_l2t_update: addr=0x%08x not found", addr);
473 printf("found 0x%08x\n", addr);
475 rw_runlock(&d->lock);
476 memcpy(e->dmac, enaddr, ETHER_ADDR_LEN);
477 printf("mac=%x:%x:%x:%x:%x:%x\n",
478 e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
480 if (atomic_load_acq_int(&e->refcnt)) {
481 if (neigh != e->neigh)
482 neigh_replace(e, neigh);
484 la = (struct llinfo_arp *)neigh->rt_llinfo;
485 if (e->state == L2T_STATE_RESOLVING) {
487 if (la->la_asked >= 5 /* arp_maxtries */) {
489 e->arpq_head = e->arpq_tail = NULL;
491 setup_l2e_send_pending(dev, NULL, e);
493 e->state = L2T_STATE_VALID;
494 if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), 6))
495 setup_l2e_send_pending(dev, NULL, e);
498 mtx_unlock(&e->lock);
501 handle_failed_resolution(dev, arpq);
505 t3_init_l2t(unsigned int l2t_capacity)
508 int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
510 d = cxgb_alloc_mem(size);
514 d->nentries = l2t_capacity;
515 d->rover = &d->l2tab[1]; /* entry 0 is not used */
516 atomic_store_rel_int(&d->nfree, l2t_capacity - 1);
517 rw_init(&d->lock, "L2T");
519 for (i = 0; i < l2t_capacity; ++i) {
521 d->l2tab[i].state = L2T_STATE_UNUSED;
522 mtx_init(&d->l2tab[i].lock, "L2TAB", NULL, MTX_DEF);
523 atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
529 t3_free_l2t(struct l2t_data *d)
533 rw_destroy(&d->lock);
534 for (i = 0; i < d->nentries; ++i)
535 mtx_destroy(&d->l2tab[i].lock);