1 /**************************************************************************
3 Copyright (c) 2007, Chelsio Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
39 #include <sys/mutex.h>
40 #if __FreeBSD_version > 700000
41 #include <sys/rwlock.h>
44 #include <sys/socket.h>
46 #include <net/ethernet.h>
47 #include <net/if_vlan_var.h>
48 #include <net/if_dl.h>
49 #include <net/route.h>
50 #include <netinet/in.h>
51 #include <netinet/if_ether.h>
54 #include <cxgb_include.h>
56 #include <dev/cxgb/cxgb_include.h>
59 #define VLAN_NONE 0xfff
60 #define SDL(s) ((struct sockaddr_dl *)s)
61 #define RT_ENADDR(sa) ((u_char *)LLADDR(SDL((sa))))
62 #define rt_expire rt_rmx.rmx_expire
65 struct callout la_timer;
66 struct rtentry *la_rt;
67 struct mbuf *la_hold; /* last packet until resolved/timeout */
68 u_short la_preempt; /* countdown for pre-expiry arps */
69 u_short la_asked; /* # requests sent */
73 * Module locking notes: There is a RW lock protecting the L2 table as a
74 * whole plus a spinlock per L2T entry. Entry lookups and allocations happen
75 * under the protection of the table lock, individual entry changes happen
76 * while holding that entry's spinlock. The table lock nests outside the
77 * entry locks. Allocations of new entries take the table lock as writers so
78 * no other lookups can happen while allocating new entries. Entry updates
79 * take the table lock as readers so multiple entries can be updated in
80 * parallel. An L2T entry can be dropped by decrementing its reference count
81 * and therefore can happen in parallel with entry allocation but no entry
82 * can change state or increment its ref count during allocation as both of
83 * these perform lookups.
86 static inline unsigned int
87 vlan_prio(const struct l2t_entry *e)
92 static inline unsigned int
93 arp_hash(u32 key, int ifindex, const struct l2t_data *d)
95 return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
99 neigh_replace(struct l2t_entry *e, struct rtentry *rt)
111 * Set up an L2T entry and send any packets waiting in the arp queue. The
112 * supplied mbuf is used for the CPL_L2T_WRITE_REQ. Must be called with the
116 setup_l2e_send_pending(struct t3cdev *dev, struct mbuf *m,
119 struct cpl_l2t_write_req *req;
122 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
128 req = mtod(m, struct cpl_l2t_write_req *);
129 m->m_pkthdr.len = m->m_len = sizeof(*req);
131 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
132 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
133 req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
134 V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
135 V_L2T_W_PRIO(vlan_prio(e)));
137 memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
138 m_set_priority(m, CPL_PRIORITY_CONTROL);
139 cxgb_ofld_send(dev, m);
140 while (e->arpq_head) {
142 e->arpq_head = m->m_next;
144 cxgb_ofld_send(dev, m);
147 e->state = L2T_STATE_VALID;
153 * Add a packet to the an L2T entry's queue of packets awaiting resolution.
154 * Must be called with the entry's lock held.
157 arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
161 e->arpq_tail->m_next = m;
168 t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m, struct l2t_entry *e)
170 struct rtentry *rt = e->neigh;
171 struct sockaddr_in sin;
173 bzero(&sin, sizeof(struct sockaddr_in));
174 sin.sin_family = AF_INET;
175 sin.sin_len = sizeof(struct sockaddr_in);
176 sin.sin_addr.s_addr = e->addr;
178 CTR2(KTR_CXGB, "send slow on rt=%p eaddr=0x%08x\n", rt, e->addr);
181 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
182 arpresolve(rt->rt_ifp, rt, NULL,
183 (struct sockaddr *)&sin, e->dmac);
185 if (e->state == L2T_STATE_STALE)
186 e->state = L2T_STATE_VALID;
187 mtx_unlock(&e->lock);
188 case L2T_STATE_VALID: /* fast-path, send the packet on */
189 return cxgb_ofld_send(dev, m);
190 case L2T_STATE_RESOLVING:
192 if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
193 mtx_unlock(&e->lock);
197 mtx_unlock(&e->lock);
199 * Only the first packet added to the arpq should kick off
200 * resolution. However, because the m_gethdr below can fail,
201 * we allow each packet added to the arpq to retry resolution
202 * as a way of recovering from transient memory exhaustion.
203 * A better way would be to use a work request to retry L2T
204 * entries when there's no memory.
206 if (arpresolve(rt->rt_ifp, rt, NULL,
207 (struct sockaddr *)&sin, e->dmac) == 0) {
208 CTR6(KTR_CXGB, "mac=%x:%x:%x:%x:%x:%x\n",
209 e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
211 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
216 setup_l2e_send_pending(dev, m, e);
219 mtx_unlock(&e->lock);
226 t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e)
230 struct sockaddr_in sin;
231 sin.sin_family = AF_INET;
232 sin.sin_len = sizeof(struct sockaddr_in);
233 sin.sin_addr.s_addr = e->addr;
235 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
241 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
242 arpresolve(rt->rt_ifp, rt, NULL,
243 (struct sockaddr *)&sin, e->dmac);
245 if (e->state == L2T_STATE_STALE) {
246 e->state = L2T_STATE_VALID;
248 mtx_unlock(&e->lock);
250 case L2T_STATE_VALID: /* fast-path, send the packet on */
252 case L2T_STATE_RESOLVING:
254 if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
255 mtx_unlock(&e->lock);
258 mtx_unlock(&e->lock);
261 * Only the first packet added to the arpq should kick off
262 * resolution. However, because the alloc_skb below can fail,
263 * we allow each packet added to the arpq to retry resolution
264 * as a way of recovering from transient memory exhaustion.
265 * A better way would be to use a work request to retry L2T
266 * entries when there's no memory.
268 arpresolve(rt->rt_ifp, rt, NULL,
269 (struct sockaddr *)&sin, e->dmac);
275 * Allocate a free L2T entry. Must be called with l2t_data.lock held.
277 static struct l2t_entry *
278 alloc_l2e(struct l2t_data *d)
280 struct l2t_entry *end, *e, **p;
282 if (!atomic_load_acq_int(&d->nfree))
285 /* there's definitely a free entry */
286 for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e)
287 if (atomic_load_acq_int(&e->refcnt) == 0)
290 for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) ;
293 atomic_add_int(&d->nfree, -1);
296 * The entry we found may be an inactive entry that is
297 * presently in the hash table. We need to remove it.
299 if (e->state != L2T_STATE_UNUSED) {
300 int hash = arp_hash(e->addr, e->ifindex, d);
302 for (p = &d->l2tab[hash].first; *p; p = &(*p)->next)
307 e->state = L2T_STATE_UNUSED;
314 * Called when an L2T entry has no more users. The entry is left in the hash
315 * table since it is likely to be reused but we also bump nfree to indicate
316 * that the entry can be reallocated for a different neighbor. We also drop
317 * the existing neighbor reference in case the neighbor is going away and is
318 * waiting on our reference.
320 * Because entries can be reallocated to other neighbors once their ref count
321 * drops to 0 we need to take the entry's lock to avoid races with a new
325 t3_l2e_free(struct l2t_data *d, struct l2t_entry *e)
327 struct rtentry *rt = NULL;
330 if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */
335 mtx_unlock(&e->lock);
336 atomic_add_int(&d->nfree, 1);
343 * Update an L2T entry that was previously used for the same next hop as neigh.
344 * Must be called with softirqs disabled.
347 reuse_entry(struct l2t_entry *e, struct rtentry *neigh)
349 struct llinfo_arp *la;
351 la = (struct llinfo_arp *)neigh->rt_llinfo;
353 mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
354 if (neigh != e->neigh)
355 neigh_replace(e, neigh);
357 if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), sizeof(e->dmac)) ||
358 (neigh->rt_expire > time_uptime))
359 e->state = L2T_STATE_RESOLVING;
360 else if (la->la_hold == NULL)
361 e->state = L2T_STATE_VALID;
363 e->state = L2T_STATE_STALE;
364 mtx_unlock(&e->lock);
368 t3_l2t_get(struct t3cdev *dev, struct rtentry *neigh, struct ifnet *ifp,
372 struct l2t_data *d = L2DATA(dev);
373 u32 addr = ((struct sockaddr_in *)sa)->sin_addr.s_addr;
374 int ifidx = neigh->rt_ifp->if_index;
375 int hash = arp_hash(addr, ifidx, d);
376 unsigned int smt_idx = ((struct port_info *)ifp->if_softc)->port_id;
379 for (e = d->l2tab[hash].first; e; e = e->next)
380 if (e->addr == addr && e->ifindex == ifidx &&
381 e->smt_idx == smt_idx) {
383 if (atomic_load_acq_int(&e->refcnt) == 1)
384 reuse_entry(e, neigh);
388 /* Need to allocate a new entry */
391 mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
392 e->next = d->l2tab[hash].first;
393 d->l2tab[hash].first = e;
394 rw_wunlock(&d->lock);
396 e->state = L2T_STATE_RESOLVING;
399 e->smt_idx = smt_idx;
400 atomic_store_rel_int(&e->refcnt, 1);
404 neigh_replace(e, neigh);
407 * XXX need to add accessor function for vlan tag
409 if (neigh->rt_ifp->if_vlantrunk)
410 e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id;
414 mtx_unlock(&e->lock);
420 rw_wunlock(&d->lock);
425 * Called when address resolution fails for an L2T entry to handle packets
426 * on the arpq head. If a packet specifies a failure handler it is invoked,
427 * otherwise the packets is sent to the TOE.
429 * XXX: maybe we should abandon the latter behavior and just require a failure
433 handle_failed_resolution(struct t3cdev *dev, struct mbuf *arpq)
437 struct mbuf *m = arpq;
439 struct l2t_mbuf_cb *cb = L2T_MBUF_CB(m);
444 if (cb->arp_failure_handler)
445 cb->arp_failure_handler(dev, m);
448 cxgb_ofld_send(dev, m);
454 t3_l2t_update(struct t3cdev *dev, struct rtentry *neigh,
455 uint8_t *enaddr, struct sockaddr *sa)
458 struct mbuf *arpq = NULL;
459 struct l2t_data *d = L2DATA(dev);
460 u32 addr = *(u32 *) &((struct sockaddr_in *)sa)->sin_addr;
461 int ifidx = neigh->rt_ifp->if_index;
462 int hash = arp_hash(addr, ifidx, d);
463 struct llinfo_arp *la;
466 for (e = d->l2tab[hash].first; e; e = e->next)
467 if (e->addr == addr && e->ifindex == ifidx) {
471 rw_runlock(&d->lock);
472 CTR1(KTR_CXGB, "t3_l2t_update: addr=0x%08x not found", addr);
476 printf("found 0x%08x\n", addr);
478 rw_runlock(&d->lock);
479 memcpy(e->dmac, enaddr, ETHER_ADDR_LEN);
480 printf("mac=%x:%x:%x:%x:%x:%x\n",
481 e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
483 if (atomic_load_acq_int(&e->refcnt)) {
484 if (neigh != e->neigh)
485 neigh_replace(e, neigh);
487 la = (struct llinfo_arp *)neigh->rt_llinfo;
488 if (e->state == L2T_STATE_RESOLVING) {
490 if (la->la_asked >= 5 /* arp_maxtries */) {
492 e->arpq_head = e->arpq_tail = NULL;
494 setup_l2e_send_pending(dev, NULL, e);
496 e->state = L2T_STATE_VALID;
497 if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), 6))
498 setup_l2e_send_pending(dev, NULL, e);
501 mtx_unlock(&e->lock);
504 handle_failed_resolution(dev, arpq);
508 t3_init_l2t(unsigned int l2t_capacity)
511 int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
513 d = cxgb_alloc_mem(size);
517 d->nentries = l2t_capacity;
518 d->rover = &d->l2tab[1]; /* entry 0 is not used */
519 atomic_store_rel_int(&d->nfree, l2t_capacity - 1);
520 rw_init(&d->lock, "L2T");
522 for (i = 0; i < l2t_capacity; ++i) {
524 d->l2tab[i].state = L2T_STATE_UNUSED;
525 mtx_init(&d->l2tab[i].lock, "L2TAB", NULL, MTX_DEF);
526 atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
532 t3_free_l2t(struct l2t_data *d)
536 rw_destroy(&d->lock);
537 for (i = 0; i < d->nentries; ++i)
538 mtx_destroy(&d->l2tab[i].lock);