]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/sfxge/sfxge.c
Merge llvm, clang, lld, lldb, compiler-rt and libc++ r306325, and update
[FreeBSD/FreeBSD.git] / sys / dev / sfxge / sfxge.c
1 /*-
2  * Copyright (c) 2010-2016 Solarflare Communications Inc.
3  * All rights reserved.
4  *
5  * This software was developed in part by Philip Paeps under contract for
6  * Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright notice,
12  *    this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * The views and conclusions contained in the software and documentation are
30  * those of the authors and should not be interpreted as representing official
31  * policies, either expressed or implied, of the FreeBSD Project.
32  */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include "opt_rss.h"
38
39 #include <sys/param.h>
40 #include <sys/kernel.h>
41 #include <sys/bus.h>
42 #include <sys/rman.h>
43 #include <sys/lock.h>
44 #include <sys/module.h>
45 #include <sys/mutex.h>
46 #include <sys/smp.h>
47 #include <sys/socket.h>
48 #include <sys/taskqueue.h>
49 #include <sys/sockio.h>
50 #include <sys/sysctl.h>
51 #include <sys/priv.h>
52 #include <sys/syslog.h>
53
54 #include <dev/pci/pcireg.h>
55 #include <dev/pci/pcivar.h>
56
57 #include <net/ethernet.h>
58 #include <net/if.h>
59 #include <net/if_var.h>
60 #include <net/if_media.h>
61 #include <net/if_types.h>
62
63 #ifdef RSS
64 #include <net/rss_config.h>
65 #endif
66
67 #include "common/efx.h"
68
69 #include "sfxge.h"
70 #include "sfxge_rx.h"
71 #include "sfxge_ioc.h"
72 #include "sfxge_version.h"
73
74 #define SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM |                 \
75                    IFCAP_RXCSUM | IFCAP_TXCSUM |                        \
76                    IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 |              \
77                    IFCAP_TSO4 | IFCAP_TSO6 |                            \
78                    IFCAP_JUMBO_MTU |                                    \
79                    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWSTATS)
80 #define SFXGE_CAP_ENABLE SFXGE_CAP
81 #define SFXGE_CAP_FIXED (IFCAP_VLAN_MTU |                               \
82                          IFCAP_JUMBO_MTU | IFCAP_LINKSTATE | IFCAP_HWSTATS)
83
84 MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver");
85
86
87 SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD, 0,
88             "SFXGE driver parameters");
89
90 #define SFXGE_PARAM_RX_RING     SFXGE_PARAM(rx_ring)
91 static int sfxge_rx_ring_entries = SFXGE_NDESCS;
92 TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries);
93 SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN,
94            &sfxge_rx_ring_entries, 0,
95            "Maximum number of descriptors in a receive ring");
96
97 #define SFXGE_PARAM_TX_RING     SFXGE_PARAM(tx_ring)
98 static int sfxge_tx_ring_entries = SFXGE_NDESCS;
99 TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries);
100 SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN,
101            &sfxge_tx_ring_entries, 0,
102            "Maximum number of descriptors in a transmit ring");
103
104 #define SFXGE_PARAM_RESTART_ATTEMPTS    SFXGE_PARAM(restart_attempts)
105 static int sfxge_restart_attempts = 3;
106 TUNABLE_INT(SFXGE_PARAM_RESTART_ATTEMPTS, &sfxge_restart_attempts);
107 SYSCTL_INT(_hw_sfxge, OID_AUTO, restart_attempts, CTLFLAG_RDTUN,
108            &sfxge_restart_attempts, 0,
109            "Maximum number of attempts to bring interface up after reset");
110
111 #if EFSYS_OPT_MCDI_LOGGING
112 #define SFXGE_PARAM_MCDI_LOGGING        SFXGE_PARAM(mcdi_logging)
113 static int sfxge_mcdi_logging = 0;
114 TUNABLE_INT(SFXGE_PARAM_MCDI_LOGGING, &sfxge_mcdi_logging);
115 #endif
116
117 static void
118 sfxge_reset(void *arg, int npending);
119
120 static int
121 sfxge_estimate_rsrc_limits(struct sfxge_softc *sc)
122 {
123         efx_drv_limits_t limits;
124         int rc;
125         unsigned int evq_max;
126         uint32_t evq_allocated;
127         uint32_t rxq_allocated;
128         uint32_t txq_allocated;
129
130         /*
131          * Limit the number of event queues to:
132          *  - number of CPUs
133          *  - hardwire maximum RSS channels
134          *  - administratively specified maximum RSS channels
135          */
136 #ifdef RSS
137         /*
138          * Avoid extra limitations so that the number of queues
139          * may be configured at administrator's will
140          */
141         evq_max = MIN(MAX(rss_getnumbuckets(), 1), EFX_MAXRSS);
142 #else
143         evq_max = MIN(mp_ncpus, EFX_MAXRSS);
144 #endif
145         if (sc->max_rss_channels > 0)
146                 evq_max = MIN(evq_max, sc->max_rss_channels);
147
148         memset(&limits, 0, sizeof(limits));
149
150         limits.edl_min_evq_count = 1;
151         limits.edl_max_evq_count = evq_max;
152         limits.edl_min_txq_count = SFXGE_TXQ_NTYPES;
153         limits.edl_max_txq_count = evq_max + SFXGE_TXQ_NTYPES - 1;
154         limits.edl_min_rxq_count = 1;
155         limits.edl_max_rxq_count = evq_max;
156
157         efx_nic_set_drv_limits(sc->enp, &limits);
158
159         if ((rc = efx_nic_init(sc->enp)) != 0)
160                 return (rc);
161
162         rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated,
163                                  &txq_allocated);
164         if (rc != 0) {
165                 efx_nic_fini(sc->enp);
166                 return (rc);
167         }
168
169         KASSERT(txq_allocated >= SFXGE_TXQ_NTYPES,
170                 ("txq_allocated < SFXGE_TXQ_NTYPES"));
171
172         sc->evq_max = MIN(evq_allocated, evq_max);
173         sc->evq_max = MIN(rxq_allocated, sc->evq_max);
174         sc->evq_max = MIN(txq_allocated - (SFXGE_TXQ_NTYPES - 1),
175                           sc->evq_max);
176
177         KASSERT(sc->evq_max <= evq_max,
178                 ("allocated more than maximum requested"));
179
180 #ifdef RSS
181         if (sc->evq_max < rss_getnumbuckets())
182                 device_printf(sc->dev, "The number of allocated queues (%u) "
183                               "is less than the number of RSS buckets (%u); "
184                               "performance degradation might be observed",
185                               sc->evq_max, rss_getnumbuckets());
186 #endif
187
188         /*
189          * NIC is kept initialized in the case of success to be able to
190          * initialize port to find out media types.
191          */
192         return (0);
193 }
194
195 static int
196 sfxge_set_drv_limits(struct sfxge_softc *sc)
197 {
198         efx_drv_limits_t limits;
199
200         memset(&limits, 0, sizeof(limits));
201
202         /* Limits are strict since take into account initial estimation */
203         limits.edl_min_evq_count = limits.edl_max_evq_count =
204             sc->intr.n_alloc;
205         limits.edl_min_txq_count = limits.edl_max_txq_count =
206             sc->intr.n_alloc + SFXGE_TXQ_NTYPES - 1;
207         limits.edl_min_rxq_count = limits.edl_max_rxq_count =
208             sc->intr.n_alloc;
209
210         return (efx_nic_set_drv_limits(sc->enp, &limits));
211 }
212
213 static int
214 sfxge_start(struct sfxge_softc *sc)
215 {
216         int rc;
217
218         SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
219
220         if (sc->init_state == SFXGE_STARTED)
221                 return (0);
222
223         if (sc->init_state != SFXGE_REGISTERED) {
224                 rc = EINVAL;
225                 goto fail;
226         }
227
228         /* Set required resource limits */
229         if ((rc = sfxge_set_drv_limits(sc)) != 0)
230                 goto fail;
231
232         if ((rc = efx_nic_init(sc->enp)) != 0)
233                 goto fail;
234
235         /* Start processing interrupts. */
236         if ((rc = sfxge_intr_start(sc)) != 0)
237                 goto fail2;
238
239         /* Start processing events. */
240         if ((rc = sfxge_ev_start(sc)) != 0)
241                 goto fail3;
242
243         /* Fire up the port. */
244         if ((rc = sfxge_port_start(sc)) != 0)
245                 goto fail4;
246
247         /* Start the receiver side. */
248         if ((rc = sfxge_rx_start(sc)) != 0)
249                 goto fail5;
250
251         /* Start the transmitter side. */
252         if ((rc = sfxge_tx_start(sc)) != 0)
253                 goto fail6;
254
255         sc->init_state = SFXGE_STARTED;
256
257         /* Tell the stack we're running. */
258         sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING;
259         sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE;
260
261         return (0);
262
263 fail6:
264         sfxge_rx_stop(sc);
265
266 fail5:
267         sfxge_port_stop(sc);
268
269 fail4:
270         sfxge_ev_stop(sc);
271
272 fail3:
273         sfxge_intr_stop(sc);
274
275 fail2:
276         efx_nic_fini(sc->enp);
277
278 fail:
279         device_printf(sc->dev, "sfxge_start: %d\n", rc);
280
281         return (rc);
282 }
283
284 static void
285 sfxge_if_init(void *arg)
286 {
287         struct sfxge_softc *sc;
288
289         sc = (struct sfxge_softc *)arg;
290
291         SFXGE_ADAPTER_LOCK(sc);
292         (void)sfxge_start(sc);
293         SFXGE_ADAPTER_UNLOCK(sc);
294 }
295
296 static void
297 sfxge_stop(struct sfxge_softc *sc)
298 {
299         SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
300
301         if (sc->init_state != SFXGE_STARTED)
302                 return;
303
304         sc->init_state = SFXGE_REGISTERED;
305
306         /* Stop the transmitter. */
307         sfxge_tx_stop(sc);
308
309         /* Stop the receiver. */
310         sfxge_rx_stop(sc);
311
312         /* Stop the port. */
313         sfxge_port_stop(sc);
314
315         /* Stop processing events. */
316         sfxge_ev_stop(sc);
317
318         /* Stop processing interrupts. */
319         sfxge_intr_stop(sc);
320
321         efx_nic_fini(sc->enp);
322
323         sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
324 }
325
326
327 static int
328 sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
329 {
330         efx_vpd_value_t value;
331         int rc = 0;
332
333         switch (ioc->u.vpd.op) {
334         case SFXGE_VPD_OP_GET_KEYWORD:
335                 value.evv_tag = ioc->u.vpd.tag;
336                 value.evv_keyword = ioc->u.vpd.keyword;
337                 rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value);
338                 if (rc != 0)
339                         break;
340                 ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length);
341                 if (ioc->u.vpd.payload != 0) {
342                         rc = copyout(value.evv_value, ioc->u.vpd.payload,
343                                      ioc->u.vpd.len);
344                 }
345                 break;
346         case SFXGE_VPD_OP_SET_KEYWORD:
347                 if (ioc->u.vpd.len > sizeof(value.evv_value))
348                         return (EINVAL);
349                 value.evv_tag = ioc->u.vpd.tag;
350                 value.evv_keyword = ioc->u.vpd.keyword;
351                 value.evv_length = ioc->u.vpd.len;
352                 rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length);
353                 if (rc != 0)
354                         break;
355                 rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value);
356                 if (rc != 0)
357                         break;
358                 rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size);
359                 if (rc != 0)
360                         break;
361                 rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size);
362                 break;
363         default:
364                 rc = EOPNOTSUPP;
365                 break;
366         }
367
368         return (rc);
369 }
370
371 static int
372 sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
373 {
374         switch (ioc->op) {
375         case SFXGE_MCDI_IOC:
376                 return (sfxge_mcdi_ioctl(sc, ioc));
377         case SFXGE_NVRAM_IOC:
378                 return (sfxge_nvram_ioctl(sc, ioc));
379         case SFXGE_VPD_IOC:
380                 return (sfxge_vpd_ioctl(sc, ioc));
381         default:
382                 return (EOPNOTSUPP);
383         }
384 }
385
386
387 static int
388 sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
389 {
390         struct sfxge_softc *sc;
391         struct ifreq *ifr;
392         sfxge_ioc_t ioc;
393         int error;
394
395         ifr = (struct ifreq *)data;
396         sc = ifp->if_softc;
397         error = 0;
398
399         switch (command) {
400         case SIOCSIFFLAGS:
401                 SFXGE_ADAPTER_LOCK(sc);
402                 if (ifp->if_flags & IFF_UP) {
403                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
404                                 if ((ifp->if_flags ^ sc->if_flags) &
405                                     (IFF_PROMISC | IFF_ALLMULTI)) {
406                                         sfxge_mac_filter_set(sc);
407                                 }
408                         } else
409                                 sfxge_start(sc);
410                 } else
411                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
412                                 sfxge_stop(sc);
413                 sc->if_flags = ifp->if_flags;
414                 SFXGE_ADAPTER_UNLOCK(sc);
415                 break;
416         case SIOCSIFMTU:
417                 if (ifr->ifr_mtu == ifp->if_mtu) {
418                         /* Nothing to do */
419                         error = 0;
420                 } else if (ifr->ifr_mtu > SFXGE_MAX_MTU) {
421                         error = EINVAL;
422                 } else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
423                         ifp->if_mtu = ifr->ifr_mtu;
424                         error = 0;
425                 } else {
426                         /* Restart required */
427                         SFXGE_ADAPTER_LOCK(sc);
428                         sfxge_stop(sc);
429                         ifp->if_mtu = ifr->ifr_mtu;
430                         error = sfxge_start(sc);
431                         SFXGE_ADAPTER_UNLOCK(sc);
432                         if (error != 0) {
433                                 ifp->if_flags &= ~IFF_UP;
434                                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
435                                 if_down(ifp);
436                         }
437                 }
438                 break;
439         case SIOCADDMULTI:
440         case SIOCDELMULTI:
441                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
442                         sfxge_mac_filter_set(sc);
443                 break;
444         case SIOCSIFCAP:
445         {
446                 int reqcap = ifr->ifr_reqcap;
447                 int capchg_mask;
448
449                 SFXGE_ADAPTER_LOCK(sc);
450
451                 /* Capabilities to be changed in accordance with request */
452                 capchg_mask = ifp->if_capenable ^ reqcap;
453
454                 /*
455                  * The networking core already rejects attempts to
456                  * enable capabilities we don't have.  We still have
457                  * to reject attempts to disable capabilities that we
458                  * can't (yet) disable.
459                  */
460                 KASSERT((reqcap & ~ifp->if_capabilities) == 0,
461                     ("Unsupported capabilities 0x%x requested 0x%x vs "
462                      "supported 0x%x",
463                      reqcap & ~ifp->if_capabilities,
464                      reqcap , ifp->if_capabilities));
465                 if (capchg_mask & SFXGE_CAP_FIXED) {
466                         error = EINVAL;
467                         SFXGE_ADAPTER_UNLOCK(sc);
468                         break;
469                 }
470
471                 /* Check request before any changes */
472                 if ((capchg_mask & IFCAP_TSO4) &&
473                     (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) {
474                         error = EAGAIN;
475                         SFXGE_ADAPTER_UNLOCK(sc);
476                         if_printf(ifp, "enable txcsum before tso4\n");
477                         break;
478                 }
479                 if ((capchg_mask & IFCAP_TSO6) &&
480                     (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) {
481                         error = EAGAIN;
482                         SFXGE_ADAPTER_UNLOCK(sc);
483                         if_printf(ifp, "enable txcsum6 before tso6\n");
484                         break;
485                 }
486
487                 if (reqcap & IFCAP_TXCSUM) {
488                         ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP);
489                 } else {
490                         ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP);
491                         if (reqcap & IFCAP_TSO4) {
492                                 reqcap &= ~IFCAP_TSO4;
493                                 if_printf(ifp,
494                                     "tso4 disabled due to -txcsum\n");
495                         }
496                 }
497                 if (reqcap & IFCAP_TXCSUM_IPV6) {
498                         ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
499                 } else {
500                         ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
501                         if (reqcap & IFCAP_TSO6) {
502                                 reqcap &= ~IFCAP_TSO6;
503                                 if_printf(ifp,
504                                     "tso6 disabled due to -txcsum6\n");
505                         }
506                 }
507
508                 /*
509                  * The kernel takes both IFCAP_TSOx and CSUM_TSO into
510                  * account before using TSO. So, we do not touch
511                  * checksum flags when IFCAP_TSOx is modified.
512                  * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO),
513                  * but both bits are set in IPv4 and IPv6 mbufs.
514                  */
515
516                 ifp->if_capenable = reqcap;
517
518                 SFXGE_ADAPTER_UNLOCK(sc);
519                 break;
520         }
521         case SIOCSIFMEDIA:
522         case SIOCGIFMEDIA:
523                 error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
524                 break;
525 #ifdef SIOCGI2C
526         case SIOCGI2C:
527         {
528                 struct ifi2creq i2c;
529
530                 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
531                 if (error != 0)
532                         break;
533
534                 if (i2c.len > sizeof(i2c.data)) {
535                         error = EINVAL;
536                         break;
537                 }
538
539                 SFXGE_ADAPTER_LOCK(sc);
540                 error = efx_phy_module_get_info(sc->enp, i2c.dev_addr,
541                                                 i2c.offset, i2c.len,
542                                                 &i2c.data[0]);
543                 SFXGE_ADAPTER_UNLOCK(sc);
544                 if (error == 0)
545                         error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
546                 break;
547         }
548 #endif
549         case SIOCGPRIVATE_0:
550                 error = priv_check(curthread, PRIV_DRIVER);
551                 if (error != 0)
552                         break;
553                 error = copyin(ifr->ifr_data, &ioc, sizeof(ioc));
554                 if (error != 0)
555                         return (error);
556                 error = sfxge_private_ioctl(sc, &ioc);
557                 if (error == 0) {
558                         error = copyout(&ioc, ifr->ifr_data, sizeof(ioc));
559                 }
560                 break;
561         default:
562                 error = ether_ioctl(ifp, command, data);
563         }
564
565         return (error);
566 }
567
568 static void
569 sfxge_ifnet_fini(struct ifnet *ifp)
570 {
571         struct sfxge_softc *sc = ifp->if_softc;
572
573         SFXGE_ADAPTER_LOCK(sc);
574         sfxge_stop(sc);
575         SFXGE_ADAPTER_UNLOCK(sc);
576
577         ifmedia_removeall(&sc->media);
578         ether_ifdetach(ifp);
579         if_free(ifp);
580 }
581
582 static int
583 sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc)
584 {
585         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
586         device_t dev;
587         int rc;
588
589         dev = sc->dev;
590         sc->ifnet = ifp;
591
592         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
593         ifp->if_init = sfxge_if_init;
594         ifp->if_softc = sc;
595         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
596         ifp->if_ioctl = sfxge_if_ioctl;
597
598         ifp->if_capabilities = SFXGE_CAP;
599         ifp->if_capenable = SFXGE_CAP_ENABLE;
600         ifp->if_hw_tsomax = SFXGE_TSO_MAX_SIZE;
601         ifp->if_hw_tsomaxsegcount = SFXGE_TX_MAPPING_MAX_SEG;
602         ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
603
604 #ifdef SFXGE_LRO
605         ifp->if_capabilities |= IFCAP_LRO;
606         ifp->if_capenable |= IFCAP_LRO;
607 #endif
608
609         if (encp->enc_hw_tx_insert_vlan_enabled) {
610                 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
611                 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING;
612         }
613         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
614                            CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
615
616         ether_ifattach(ifp, encp->enc_mac_addr);
617
618         ifp->if_transmit = sfxge_if_transmit;
619         ifp->if_qflush = sfxge_if_qflush;
620
621         ifp->if_get_counter = sfxge_get_counter;
622
623         DBGPRINT(sc->dev, "ifmedia_init");
624         if ((rc = sfxge_port_ifmedia_init(sc)) != 0)
625                 goto fail;
626
627         return (0);
628
629 fail:
630         ether_ifdetach(sc->ifnet);
631         return (rc);
632 }
633
634 void
635 sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp)
636 {
637         KASSERT(sc->buffer_table_next + n <=
638                 efx_nic_cfg_get(sc->enp)->enc_buftbl_limit,
639                 ("buffer table full"));
640
641         *idp = sc->buffer_table_next;
642         sc->buffer_table_next += n;
643 }
644
645 static int
646 sfxge_bar_init(struct sfxge_softc *sc)
647 {
648         efsys_bar_t *esbp = &sc->bar;
649
650         esbp->esb_rid = PCIR_BAR(EFX_MEM_BAR);
651         if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
652             &esbp->esb_rid, RF_ACTIVE)) == NULL) {
653                 device_printf(sc->dev, "Cannot allocate BAR region %d\n",
654                     EFX_MEM_BAR);
655                 return (ENXIO);
656         }
657         esbp->esb_tag = rman_get_bustag(esbp->esb_res);
658         esbp->esb_handle = rman_get_bushandle(esbp->esb_res);
659
660         SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev));
661
662         return (0);
663 }
664
665 static void
666 sfxge_bar_fini(struct sfxge_softc *sc)
667 {
668         efsys_bar_t *esbp = &sc->bar;
669
670         bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid,
671             esbp->esb_res);
672         SFXGE_BAR_LOCK_DESTROY(esbp);
673 }
674
675 static int
676 sfxge_create(struct sfxge_softc *sc)
677 {
678         device_t dev;
679         efx_nic_t *enp;
680         int error;
681         char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))];
682 #if EFSYS_OPT_MCDI_LOGGING
683         char mcdi_log_param_name[sizeof(SFXGE_PARAM(%d.mcdi_logging))];
684 #endif
685
686         dev = sc->dev;
687
688         SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev));
689
690         sc->max_rss_channels = 0;
691         snprintf(rss_param_name, sizeof(rss_param_name),
692                  SFXGE_PARAM(%d.max_rss_channels),
693                  (int)device_get_unit(dev));
694         TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels);
695 #if EFSYS_OPT_MCDI_LOGGING
696         sc->mcdi_logging = sfxge_mcdi_logging;
697         snprintf(mcdi_log_param_name, sizeof(mcdi_log_param_name),
698                  SFXGE_PARAM(%d.mcdi_logging),
699                  (int)device_get_unit(dev));
700         TUNABLE_INT_FETCH(mcdi_log_param_name, &sc->mcdi_logging);
701 #endif
702
703         sc->stats_node = SYSCTL_ADD_NODE(
704                 device_get_sysctl_ctx(dev),
705                 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
706                 OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics");
707         if (sc->stats_node == NULL) {
708                 error = ENOMEM;
709                 goto fail;
710         }
711
712         TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc);
713
714         (void) pci_enable_busmaster(dev);
715
716         /* Initialize DMA mappings. */
717         DBGPRINT(sc->dev, "dma_init...");
718         if ((error = sfxge_dma_init(sc)) != 0)
719                 goto fail;
720
721         /* Map the device registers. */
722         DBGPRINT(sc->dev, "bar_init...");
723         if ((error = sfxge_bar_init(sc)) != 0)
724                 goto fail;
725
726         error = efx_family(pci_get_vendor(dev), pci_get_device(dev),
727             &sc->family);
728         KASSERT(error == 0, ("Family should be filtered by sfxge_probe()"));
729
730         DBGPRINT(sc->dev, "nic_create...");
731
732         /* Create the common code nic object. */
733         SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock,
734                               device_get_nameunit(sc->dev), "nic");
735         if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc,
736             &sc->bar, &sc->enp_lock, &enp)) != 0)
737                 goto fail3;
738         sc->enp = enp;
739
740         /* Initialize MCDI to talk to the microcontroller. */
741         DBGPRINT(sc->dev, "mcdi_init...");
742         if ((error = sfxge_mcdi_init(sc)) != 0)
743                 goto fail4;
744
745         /* Probe the NIC and build the configuration data area. */
746         DBGPRINT(sc->dev, "nic_probe...");
747         if ((error = efx_nic_probe(enp)) != 0)
748                 goto fail5;
749
750         if (!ISP2(sfxge_rx_ring_entries) ||
751             (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) ||
752             (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) {
753                 log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
754                     SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries,
755                     EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS);
756                 error = EINVAL;
757                 goto fail_rx_ring_entries;
758         }
759         sc->rxq_entries = sfxge_rx_ring_entries;
760
761         if (!ISP2(sfxge_tx_ring_entries) ||
762             (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) ||
763             (sfxge_tx_ring_entries > EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)))) {
764                 log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
765                     SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries,
766                     EFX_TXQ_MINNDESCS, EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)));
767                 error = EINVAL;
768                 goto fail_tx_ring_entries;
769         }
770         sc->txq_entries = sfxge_tx_ring_entries;
771
772         SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev),
773                           SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
774                           OID_AUTO, "version", CTLFLAG_RD,
775                           SFXGE_VERSION_STRING, 0,
776                           "Driver version");
777
778         SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
779                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
780                         OID_AUTO, "phy_type", CTLFLAG_RD,
781                         NULL, efx_nic_cfg_get(enp)->enc_phy_type,
782                         "PHY type");
783
784         /* Initialize the NVRAM. */
785         DBGPRINT(sc->dev, "nvram_init...");
786         if ((error = efx_nvram_init(enp)) != 0)
787                 goto fail6;
788
789         /* Initialize the VPD. */
790         DBGPRINT(sc->dev, "vpd_init...");
791         if ((error = efx_vpd_init(enp)) != 0)
792                 goto fail7;
793
794         efx_mcdi_new_epoch(enp);
795
796         /* Reset the NIC. */
797         DBGPRINT(sc->dev, "nic_reset...");
798         if ((error = efx_nic_reset(enp)) != 0)
799                 goto fail8;
800
801         /* Initialize buffer table allocation. */
802         sc->buffer_table_next = 0;
803
804         /*
805          * Guarantee minimum and estimate maximum number of event queues
806          * to take it into account when MSI-X interrupts are allocated.
807          * It initializes NIC and keeps it initialized on success.
808          */
809         if ((error = sfxge_estimate_rsrc_limits(sc)) != 0)
810                 goto fail8;
811
812         /* Set up interrupts. */
813         DBGPRINT(sc->dev, "intr_init...");
814         if ((error = sfxge_intr_init(sc)) != 0)
815                 goto fail9;
816
817         /* Initialize event processing state. */
818         DBGPRINT(sc->dev, "ev_init...");
819         if ((error = sfxge_ev_init(sc)) != 0)
820                 goto fail11;
821
822         /* Initialize port state. */
823         DBGPRINT(sc->dev, "port_init...");
824         if ((error = sfxge_port_init(sc)) != 0)
825                 goto fail12;
826
827         /* Initialize receive state. */
828         DBGPRINT(sc->dev, "rx_init...");
829         if ((error = sfxge_rx_init(sc)) != 0)
830                 goto fail13;
831
832         /* Initialize transmit state. */
833         DBGPRINT(sc->dev, "tx_init...");
834         if ((error = sfxge_tx_init(sc)) != 0)
835                 goto fail14;
836
837         sc->init_state = SFXGE_INITIALIZED;
838
839         DBGPRINT(sc->dev, "success");
840         return (0);
841
842 fail14:
843         sfxge_rx_fini(sc);
844
845 fail13:
846         sfxge_port_fini(sc);
847
848 fail12:
849         sfxge_ev_fini(sc);
850
851 fail11:
852         sfxge_intr_fini(sc);
853
854 fail9:
855         efx_nic_fini(sc->enp);
856
857 fail8:
858         efx_vpd_fini(enp);
859
860 fail7:
861         efx_nvram_fini(enp);
862
863 fail6:
864 fail_tx_ring_entries:
865 fail_rx_ring_entries:
866         efx_nic_unprobe(enp);
867
868 fail5:
869         sfxge_mcdi_fini(sc);
870
871 fail4:
872         sc->enp = NULL;
873         efx_nic_destroy(enp);
874         SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock);
875
876 fail3:
877         sfxge_bar_fini(sc);
878         (void) pci_disable_busmaster(sc->dev);
879
880 fail:
881         DBGPRINT(sc->dev, "failed %d", error);
882         sc->dev = NULL;
883         SFXGE_ADAPTER_LOCK_DESTROY(sc);
884         return (error);
885 }
886
887 static void
888 sfxge_destroy(struct sfxge_softc *sc)
889 {
890         efx_nic_t *enp;
891
892         /* Clean up transmit state. */
893         sfxge_tx_fini(sc);
894
895         /* Clean up receive state. */
896         sfxge_rx_fini(sc);
897
898         /* Clean up port state. */
899         sfxge_port_fini(sc);
900
901         /* Clean up event processing state. */
902         sfxge_ev_fini(sc);
903
904         /* Clean up interrupts. */
905         sfxge_intr_fini(sc);
906
907         /* Tear down common code subsystems. */
908         efx_nic_reset(sc->enp);
909         efx_vpd_fini(sc->enp);
910         efx_nvram_fini(sc->enp);
911         efx_nic_unprobe(sc->enp);
912
913         /* Tear down MCDI. */
914         sfxge_mcdi_fini(sc);
915
916         /* Destroy common code context. */
917         enp = sc->enp;
918         sc->enp = NULL;
919         efx_nic_destroy(enp);
920
921         /* Free DMA memory. */
922         sfxge_dma_fini(sc);
923
924         /* Free mapped BARs. */
925         sfxge_bar_fini(sc);
926
927         (void) pci_disable_busmaster(sc->dev);
928
929         taskqueue_drain(taskqueue_thread, &sc->task_reset);
930
931         /* Destroy the softc lock. */
932         SFXGE_ADAPTER_LOCK_DESTROY(sc);
933 }
934
935 static int
936 sfxge_vpd_handler(SYSCTL_HANDLER_ARGS)
937 {
938         struct sfxge_softc *sc = arg1;
939         efx_vpd_value_t value;
940         int rc;
941
942         value.evv_tag = arg2 >> 16;
943         value.evv_keyword = arg2 & 0xffff;
944         if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value))
945             != 0)
946                 return (rc);
947
948         return (SYSCTL_OUT(req, value.evv_value, value.evv_length));
949 }
950
951 static void
952 sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list,
953                   efx_vpd_tag_t tag, const char *keyword)
954 {
955         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
956         efx_vpd_value_t value;
957
958         /* Check whether VPD tag/keyword is present */
959         value.evv_tag = tag;
960         value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]);
961         if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0)
962                 return;
963
964         SYSCTL_ADD_PROC(
965                 ctx, list, OID_AUTO, keyword, CTLTYPE_STRING|CTLFLAG_RD,
966                 sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]),
967                 sfxge_vpd_handler, "A", "");
968 }
969
970 static int
971 sfxge_vpd_init(struct sfxge_softc *sc)
972 {
973         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
974         struct sysctl_oid *vpd_node;
975         struct sysctl_oid_list *vpd_list;
976         char keyword[3];
977         efx_vpd_value_t value;
978         int rc;
979
980         if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0) {
981                 /*
982                  * Unpriviledged functions deny VPD access.
983                  * Simply skip VPD in this case.
984                  */
985                 if (rc == EACCES)
986                         goto done;
987                 goto fail;
988         }
989         sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK);
990         if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0)
991                 goto fail2;
992
993         /* Copy ID (product name) into device description, and log it. */
994         value.evv_tag = EFX_VPD_ID;
995         if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) {
996                 value.evv_value[value.evv_length] = 0;
997                 device_set_desc_copy(sc->dev, value.evv_value);
998                 device_printf(sc->dev, "%s\n", value.evv_value);
999         }
1000
1001         vpd_node = SYSCTL_ADD_NODE(
1002                 ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
1003                 OID_AUTO, "vpd", CTLFLAG_RD, NULL, "Vital Product Data");
1004         vpd_list = SYSCTL_CHILDREN(vpd_node);
1005
1006         /* Add sysctls for all expected and any vendor-defined keywords. */
1007         sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN");
1008         sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC");
1009         sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN");
1010         keyword[0] = 'V';
1011         keyword[2] = 0;
1012         for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++)
1013                 sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1014         for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++)
1015                 sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1016
1017 done:
1018         return (0);
1019
1020 fail2:
1021         free(sc->vpd_data, M_SFXGE);
1022 fail:
1023         return (rc);
1024 }
1025
1026 static void
1027 sfxge_vpd_fini(struct sfxge_softc *sc)
1028 {
1029         free(sc->vpd_data, M_SFXGE);
1030 }
1031
1032 static void
1033 sfxge_reset(void *arg, int npending)
1034 {
1035         struct sfxge_softc *sc;
1036         int rc;
1037         unsigned attempt;
1038
1039         (void)npending;
1040
1041         sc = (struct sfxge_softc *)arg;
1042
1043         SFXGE_ADAPTER_LOCK(sc);
1044
1045         if (sc->init_state != SFXGE_STARTED)
1046                 goto done;
1047
1048         sfxge_stop(sc);
1049         efx_nic_reset(sc->enp);
1050         for (attempt = 0; attempt < sfxge_restart_attempts; ++attempt) {
1051                 if ((rc = sfxge_start(sc)) == 0)
1052                         goto done;
1053
1054                 device_printf(sc->dev, "start on reset failed (%d)\n", rc);
1055                 DELAY(100000);
1056         }
1057
1058         device_printf(sc->dev, "reset failed; interface is now stopped\n");
1059
1060 done:
1061         SFXGE_ADAPTER_UNLOCK(sc);
1062 }
1063
1064 void
1065 sfxge_schedule_reset(struct sfxge_softc *sc)
1066 {
1067         taskqueue_enqueue(taskqueue_thread, &sc->task_reset);
1068 }
1069
1070 static int
1071 sfxge_attach(device_t dev)
1072 {
1073         struct sfxge_softc *sc;
1074         struct ifnet *ifp;
1075         int error;
1076
1077         sc = device_get_softc(dev);
1078         sc->dev = dev;
1079
1080         /* Allocate ifnet. */
1081         ifp = if_alloc(IFT_ETHER);
1082         if (ifp == NULL) {
1083                 device_printf(dev, "Couldn't allocate ifnet\n");
1084                 error = ENOMEM;
1085                 goto fail;
1086         }
1087         sc->ifnet = ifp;
1088
1089         /* Initialize hardware. */
1090         DBGPRINT(sc->dev, "create nic");
1091         if ((error = sfxge_create(sc)) != 0)
1092                 goto fail2;
1093
1094         /* Create the ifnet for the port. */
1095         DBGPRINT(sc->dev, "init ifnet");
1096         if ((error = sfxge_ifnet_init(ifp, sc)) != 0)
1097                 goto fail3;
1098
1099         DBGPRINT(sc->dev, "init vpd");
1100         if ((error = sfxge_vpd_init(sc)) != 0)
1101                 goto fail4;
1102
1103         /*
1104          * NIC is initialized inside sfxge_create() and kept inialized
1105          * to be able to initialize port to discover media types in
1106          * sfxge_ifnet_init().
1107          */
1108         efx_nic_fini(sc->enp);
1109
1110         sc->init_state = SFXGE_REGISTERED;
1111
1112         DBGPRINT(sc->dev, "success");
1113         return (0);
1114
1115 fail4:
1116         sfxge_ifnet_fini(ifp);
1117 fail3:
1118         efx_nic_fini(sc->enp);
1119         sfxge_destroy(sc);
1120
1121 fail2:
1122         if_free(sc->ifnet);
1123
1124 fail:
1125         DBGPRINT(sc->dev, "failed %d", error);
1126         return (error);
1127 }
1128
1129 static int
1130 sfxge_detach(device_t dev)
1131 {
1132         struct sfxge_softc *sc;
1133
1134         sc = device_get_softc(dev);
1135
1136         sfxge_vpd_fini(sc);
1137
1138         /* Destroy the ifnet. */
1139         sfxge_ifnet_fini(sc->ifnet);
1140
1141         /* Tear down hardware. */
1142         sfxge_destroy(sc);
1143
1144         return (0);
1145 }
1146
1147 static int
1148 sfxge_probe(device_t dev)
1149 {
1150         uint16_t pci_vendor_id;
1151         uint16_t pci_device_id;
1152         efx_family_t family;
1153         int rc;
1154
1155         pci_vendor_id = pci_get_vendor(dev);
1156         pci_device_id = pci_get_device(dev);
1157
1158         DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id);
1159         rc = efx_family(pci_vendor_id, pci_device_id, &family);
1160         if (rc != 0) {
1161                 DBGPRINT(dev, "efx_family fail %d", rc);
1162                 return (ENXIO);
1163         }
1164
1165         if (family == EFX_FAMILY_SIENA) {
1166                 device_set_desc(dev, "Solarflare SFC9000 family");
1167                 return (0);
1168         }
1169
1170         if (family == EFX_FAMILY_HUNTINGTON) {
1171                 device_set_desc(dev, "Solarflare SFC9100 family");
1172                 return (0);
1173         }
1174
1175         if (family == EFX_FAMILY_MEDFORD) {
1176                 device_set_desc(dev, "Solarflare SFC9200 family");
1177                 return (0);
1178         }
1179
1180         DBGPRINT(dev, "impossible controller family %d", family);
1181         return (ENXIO);
1182 }
1183
1184 static device_method_t sfxge_methods[] = {
1185         DEVMETHOD(device_probe,         sfxge_probe),
1186         DEVMETHOD(device_attach,        sfxge_attach),
1187         DEVMETHOD(device_detach,        sfxge_detach),
1188
1189         DEVMETHOD_END
1190 };
1191
1192 static devclass_t sfxge_devclass;
1193
1194 static driver_t sfxge_driver = {
1195         "sfxge",
1196         sfxge_methods,
1197         sizeof(struct sfxge_softc)
1198 };
1199
1200 DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0);