]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/sfxge/sfxge.c
Mark more nodes as CTLFLAG_MPSAFE or CTLFLAG_NEEDGIANT (17 of many)
[FreeBSD/FreeBSD.git] / sys / dev / sfxge / sfxge.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010-2016 Solarflare Communications Inc.
5  * All rights reserved.
6  *
7  * This software was developed in part by Philip Paeps under contract for
8  * Solarflare Communications, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright notice,
14  *    this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright notice,
16  *    this list of conditions and the following disclaimer in the documentation
17  *    and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * The views and conclusions contained in the software and documentation are
32  * those of the authors and should not be interpreted as representing official
33  * policies, either expressed or implied, of the FreeBSD Project.
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 #include "opt_rss.h"
40
41 #include <sys/param.h>
42 #include <sys/kernel.h>
43 #include <sys/bus.h>
44 #include <sys/rman.h>
45 #include <sys/lock.h>
46 #include <sys/module.h>
47 #include <sys/mutex.h>
48 #include <sys/smp.h>
49 #include <sys/socket.h>
50 #include <sys/taskqueue.h>
51 #include <sys/sockio.h>
52 #include <sys/sysctl.h>
53 #include <sys/priv.h>
54 #include <sys/syslog.h>
55
56 #include <dev/pci/pcireg.h>
57 #include <dev/pci/pcivar.h>
58
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_var.h>
62 #include <net/if_media.h>
63 #include <net/if_types.h>
64
65 #ifdef RSS
66 #include <net/rss_config.h>
67 #endif
68
69 #include "common/efx.h"
70
71 #include "sfxge.h"
72 #include "sfxge_rx.h"
73 #include "sfxge_ioc.h"
74 #include "sfxge_version.h"
75
76 #define SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM |                 \
77                    IFCAP_RXCSUM | IFCAP_TXCSUM |                        \
78                    IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 |              \
79                    IFCAP_TSO4 | IFCAP_TSO6 |                            \
80                    IFCAP_JUMBO_MTU |                                    \
81                    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWSTATS)
82 #define SFXGE_CAP_ENABLE SFXGE_CAP
83 #define SFXGE_CAP_FIXED (IFCAP_VLAN_MTU |                               \
84                          IFCAP_JUMBO_MTU | IFCAP_LINKSTATE | IFCAP_HWSTATS)
85
86 MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver");
87
88
89 SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
90     "SFXGE driver parameters");
91
92 #define SFXGE_PARAM_RX_RING     SFXGE_PARAM(rx_ring)
93 static int sfxge_rx_ring_entries = SFXGE_NDESCS;
94 TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries);
95 SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN,
96            &sfxge_rx_ring_entries, 0,
97            "Maximum number of descriptors in a receive ring");
98
99 #define SFXGE_PARAM_TX_RING     SFXGE_PARAM(tx_ring)
100 static int sfxge_tx_ring_entries = SFXGE_NDESCS;
101 TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries);
102 SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN,
103            &sfxge_tx_ring_entries, 0,
104            "Maximum number of descriptors in a transmit ring");
105
106 #define SFXGE_PARAM_RESTART_ATTEMPTS    SFXGE_PARAM(restart_attempts)
107 static int sfxge_restart_attempts = 3;
108 TUNABLE_INT(SFXGE_PARAM_RESTART_ATTEMPTS, &sfxge_restart_attempts);
109 SYSCTL_INT(_hw_sfxge, OID_AUTO, restart_attempts, CTLFLAG_RDTUN,
110            &sfxge_restart_attempts, 0,
111            "Maximum number of attempts to bring interface up after reset");
112
113 #if EFSYS_OPT_MCDI_LOGGING
114 #define SFXGE_PARAM_MCDI_LOGGING        SFXGE_PARAM(mcdi_logging)
115 static int sfxge_mcdi_logging = 0;
116 TUNABLE_INT(SFXGE_PARAM_MCDI_LOGGING, &sfxge_mcdi_logging);
117 #endif
118
119 static void
120 sfxge_reset(void *arg, int npending);
121
122 static int
123 sfxge_estimate_rsrc_limits(struct sfxge_softc *sc)
124 {
125         efx_drv_limits_t limits;
126         int rc;
127         unsigned int evq_max;
128         uint32_t evq_allocated;
129         uint32_t rxq_allocated;
130         uint32_t txq_allocated;
131
132         /*
133          * Limit the number of event queues to:
134          *  - number of CPUs
135          *  - hardwire maximum RSS channels
136          *  - administratively specified maximum RSS channels
137          */
138 #ifdef RSS
139         /*
140          * Avoid extra limitations so that the number of queues
141          * may be configured at administrator's will
142          */
143         evq_max = MIN(MAX(rss_getnumbuckets(), 1), EFX_MAXRSS);
144 #else
145         evq_max = MIN(mp_ncpus, EFX_MAXRSS);
146 #endif
147         if (sc->max_rss_channels > 0)
148                 evq_max = MIN(evq_max, sc->max_rss_channels);
149
150         memset(&limits, 0, sizeof(limits));
151
152         limits.edl_min_evq_count = 1;
153         limits.edl_max_evq_count = evq_max;
154         limits.edl_min_txq_count = SFXGE_EVQ0_N_TXQ(sc);
155         limits.edl_max_txq_count = evq_max + SFXGE_EVQ0_N_TXQ(sc) - 1;
156         limits.edl_min_rxq_count = 1;
157         limits.edl_max_rxq_count = evq_max;
158
159         efx_nic_set_drv_limits(sc->enp, &limits);
160
161         if ((rc = efx_nic_init(sc->enp)) != 0)
162                 return (rc);
163
164         rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated,
165                                  &txq_allocated);
166         if (rc != 0) {
167                 efx_nic_fini(sc->enp);
168                 return (rc);
169         }
170
171         KASSERT(txq_allocated >= SFXGE_EVQ0_N_TXQ(sc),
172                 ("txq_allocated < %u", SFXGE_EVQ0_N_TXQ(sc)));
173
174         sc->evq_max = MIN(evq_allocated, evq_max);
175         sc->evq_max = MIN(rxq_allocated, sc->evq_max);
176         sc->evq_max = MIN(txq_allocated - (SFXGE_EVQ0_N_TXQ(sc) - 1),
177                           sc->evq_max);
178
179         KASSERT(sc->evq_max <= evq_max,
180                 ("allocated more than maximum requested"));
181
182 #ifdef RSS
183         if (sc->evq_max < rss_getnumbuckets())
184                 device_printf(sc->dev, "The number of allocated queues (%u) "
185                               "is less than the number of RSS buckets (%u); "
186                               "performance degradation might be observed",
187                               sc->evq_max, rss_getnumbuckets());
188 #endif
189
190         /*
191          * NIC is kept initialized in the case of success to be able to
192          * initialize port to find out media types.
193          */
194         return (0);
195 }
196
197 static int
198 sfxge_set_drv_limits(struct sfxge_softc *sc)
199 {
200         efx_drv_limits_t limits;
201
202         memset(&limits, 0, sizeof(limits));
203
204         /* Limits are strict since take into account initial estimation */
205         limits.edl_min_evq_count = limits.edl_max_evq_count =
206             sc->intr.n_alloc;
207         limits.edl_min_txq_count = limits.edl_max_txq_count =
208             sc->intr.n_alloc + SFXGE_EVQ0_N_TXQ(sc) - 1;
209         limits.edl_min_rxq_count = limits.edl_max_rxq_count =
210             sc->intr.n_alloc;
211
212         return (efx_nic_set_drv_limits(sc->enp, &limits));
213 }
214
215 static int
216 sfxge_start(struct sfxge_softc *sc)
217 {
218         int rc;
219
220         SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
221
222         if (sc->init_state == SFXGE_STARTED)
223                 return (0);
224
225         if (sc->init_state != SFXGE_REGISTERED) {
226                 rc = EINVAL;
227                 goto fail;
228         }
229
230         /* Set required resource limits */
231         if ((rc = sfxge_set_drv_limits(sc)) != 0)
232                 goto fail;
233
234         if ((rc = efx_nic_init(sc->enp)) != 0)
235                 goto fail;
236
237         /* Start processing interrupts. */
238         if ((rc = sfxge_intr_start(sc)) != 0)
239                 goto fail2;
240
241         /* Start processing events. */
242         if ((rc = sfxge_ev_start(sc)) != 0)
243                 goto fail3;
244
245         /* Fire up the port. */
246         if ((rc = sfxge_port_start(sc)) != 0)
247                 goto fail4;
248
249         /* Start the receiver side. */
250         if ((rc = sfxge_rx_start(sc)) != 0)
251                 goto fail5;
252
253         /* Start the transmitter side. */
254         if ((rc = sfxge_tx_start(sc)) != 0)
255                 goto fail6;
256
257         sc->init_state = SFXGE_STARTED;
258
259         /* Tell the stack we're running. */
260         sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING;
261         sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE;
262
263         return (0);
264
265 fail6:
266         sfxge_rx_stop(sc);
267
268 fail5:
269         sfxge_port_stop(sc);
270
271 fail4:
272         sfxge_ev_stop(sc);
273
274 fail3:
275         sfxge_intr_stop(sc);
276
277 fail2:
278         efx_nic_fini(sc->enp);
279
280 fail:
281         device_printf(sc->dev, "sfxge_start: %d\n", rc);
282
283         return (rc);
284 }
285
286 static void
287 sfxge_if_init(void *arg)
288 {
289         struct sfxge_softc *sc;
290
291         sc = (struct sfxge_softc *)arg;
292
293         SFXGE_ADAPTER_LOCK(sc);
294         (void)sfxge_start(sc);
295         SFXGE_ADAPTER_UNLOCK(sc);
296 }
297
298 static void
299 sfxge_stop(struct sfxge_softc *sc)
300 {
301         SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
302
303         if (sc->init_state != SFXGE_STARTED)
304                 return;
305
306         sc->init_state = SFXGE_REGISTERED;
307
308         /* Stop the transmitter. */
309         sfxge_tx_stop(sc);
310
311         /* Stop the receiver. */
312         sfxge_rx_stop(sc);
313
314         /* Stop the port. */
315         sfxge_port_stop(sc);
316
317         /* Stop processing events. */
318         sfxge_ev_stop(sc);
319
320         /* Stop processing interrupts. */
321         sfxge_intr_stop(sc);
322
323         efx_nic_fini(sc->enp);
324
325         sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
326 }
327
328
329 static int
330 sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
331 {
332         efx_vpd_value_t value;
333         int rc = 0;
334
335         switch (ioc->u.vpd.op) {
336         case SFXGE_VPD_OP_GET_KEYWORD:
337                 value.evv_tag = ioc->u.vpd.tag;
338                 value.evv_keyword = ioc->u.vpd.keyword;
339                 rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value);
340                 if (rc != 0)
341                         break;
342                 ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length);
343                 if (ioc->u.vpd.payload != 0) {
344                         rc = copyout(value.evv_value, ioc->u.vpd.payload,
345                                      ioc->u.vpd.len);
346                 }
347                 break;
348         case SFXGE_VPD_OP_SET_KEYWORD:
349                 if (ioc->u.vpd.len > sizeof(value.evv_value))
350                         return (EINVAL);
351                 value.evv_tag = ioc->u.vpd.tag;
352                 value.evv_keyword = ioc->u.vpd.keyword;
353                 value.evv_length = ioc->u.vpd.len;
354                 rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length);
355                 if (rc != 0)
356                         break;
357                 rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value);
358                 if (rc != 0)
359                         break;
360                 rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size);
361                 if (rc != 0)
362                         break;
363                 rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size);
364                 break;
365         default:
366                 rc = EOPNOTSUPP;
367                 break;
368         }
369
370         return (rc);
371 }
372
373 static int
374 sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
375 {
376         switch (ioc->op) {
377         case SFXGE_MCDI_IOC:
378                 return (sfxge_mcdi_ioctl(sc, ioc));
379         case SFXGE_NVRAM_IOC:
380                 return (sfxge_nvram_ioctl(sc, ioc));
381         case SFXGE_VPD_IOC:
382                 return (sfxge_vpd_ioctl(sc, ioc));
383         default:
384                 return (EOPNOTSUPP);
385         }
386 }
387
388
389 static int
390 sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
391 {
392         struct sfxge_softc *sc;
393         struct ifreq *ifr;
394         sfxge_ioc_t ioc;
395         int error;
396
397         ifr = (struct ifreq *)data;
398         sc = ifp->if_softc;
399         error = 0;
400
401         switch (command) {
402         case SIOCSIFFLAGS:
403                 SFXGE_ADAPTER_LOCK(sc);
404                 if (ifp->if_flags & IFF_UP) {
405                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
406                                 if ((ifp->if_flags ^ sc->if_flags) &
407                                     (IFF_PROMISC | IFF_ALLMULTI)) {
408                                         sfxge_mac_filter_set(sc);
409                                 }
410                         } else
411                                 sfxge_start(sc);
412                 } else
413                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
414                                 sfxge_stop(sc);
415                 sc->if_flags = ifp->if_flags;
416                 SFXGE_ADAPTER_UNLOCK(sc);
417                 break;
418         case SIOCSIFMTU:
419                 if (ifr->ifr_mtu == ifp->if_mtu) {
420                         /* Nothing to do */
421                         error = 0;
422                 } else if (ifr->ifr_mtu > SFXGE_MAX_MTU) {
423                         error = EINVAL;
424                 } else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
425                         ifp->if_mtu = ifr->ifr_mtu;
426                         error = 0;
427                 } else {
428                         /* Restart required */
429                         SFXGE_ADAPTER_LOCK(sc);
430                         sfxge_stop(sc);
431                         ifp->if_mtu = ifr->ifr_mtu;
432                         error = sfxge_start(sc);
433                         SFXGE_ADAPTER_UNLOCK(sc);
434                         if (error != 0) {
435                                 ifp->if_flags &= ~IFF_UP;
436                                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
437                                 if_down(ifp);
438                         }
439                 }
440                 break;
441         case SIOCADDMULTI:
442         case SIOCDELMULTI:
443                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
444                         sfxge_mac_filter_set(sc);
445                 break;
446         case SIOCSIFCAP:
447         {
448                 int reqcap = ifr->ifr_reqcap;
449                 int capchg_mask;
450
451                 SFXGE_ADAPTER_LOCK(sc);
452
453                 /* Capabilities to be changed in accordance with request */
454                 capchg_mask = ifp->if_capenable ^ reqcap;
455
456                 /*
457                  * The networking core already rejects attempts to
458                  * enable capabilities we don't have.  We still have
459                  * to reject attempts to disable capabilities that we
460                  * can't (yet) disable.
461                  */
462                 KASSERT((reqcap & ~ifp->if_capabilities) == 0,
463                     ("Unsupported capabilities 0x%x requested 0x%x vs "
464                      "supported 0x%x",
465                      reqcap & ~ifp->if_capabilities,
466                      reqcap , ifp->if_capabilities));
467                 if (capchg_mask & SFXGE_CAP_FIXED) {
468                         error = EINVAL;
469                         SFXGE_ADAPTER_UNLOCK(sc);
470                         break;
471                 }
472
473                 /* Check request before any changes */
474                 if ((capchg_mask & IFCAP_TSO4) &&
475                     (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) {
476                         error = EAGAIN;
477                         SFXGE_ADAPTER_UNLOCK(sc);
478                         if_printf(ifp, "enable txcsum before tso4\n");
479                         break;
480                 }
481                 if ((capchg_mask & IFCAP_TSO6) &&
482                     (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) {
483                         error = EAGAIN;
484                         SFXGE_ADAPTER_UNLOCK(sc);
485                         if_printf(ifp, "enable txcsum6 before tso6\n");
486                         break;
487                 }
488
489                 if (reqcap & IFCAP_TXCSUM) {
490                         ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP);
491                 } else {
492                         ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP);
493                         if (reqcap & IFCAP_TSO4) {
494                                 reqcap &= ~IFCAP_TSO4;
495                                 if_printf(ifp,
496                                     "tso4 disabled due to -txcsum\n");
497                         }
498                 }
499                 if (reqcap & IFCAP_TXCSUM_IPV6) {
500                         ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
501                 } else {
502                         ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
503                         if (reqcap & IFCAP_TSO6) {
504                                 reqcap &= ~IFCAP_TSO6;
505                                 if_printf(ifp,
506                                     "tso6 disabled due to -txcsum6\n");
507                         }
508                 }
509
510                 /*
511                  * The kernel takes both IFCAP_TSOx and CSUM_TSO into
512                  * account before using TSO. So, we do not touch
513                  * checksum flags when IFCAP_TSOx is modified.
514                  * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO),
515                  * but both bits are set in IPv4 and IPv6 mbufs.
516                  */
517
518                 ifp->if_capenable = reqcap;
519
520                 SFXGE_ADAPTER_UNLOCK(sc);
521                 break;
522         }
523         case SIOCSIFMEDIA:
524         case SIOCGIFMEDIA:
525                 error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
526                 break;
527 #ifdef SIOCGI2C
528         case SIOCGI2C:
529         {
530                 struct ifi2creq i2c;
531
532                 error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
533                 if (error != 0)
534                         break;
535
536                 if (i2c.len > sizeof(i2c.data)) {
537                         error = EINVAL;
538                         break;
539                 }
540
541                 SFXGE_ADAPTER_LOCK(sc);
542                 error = efx_phy_module_get_info(sc->enp, i2c.dev_addr,
543                                                 i2c.offset, i2c.len,
544                                                 &i2c.data[0]);
545                 SFXGE_ADAPTER_UNLOCK(sc);
546                 if (error == 0)
547                         error = copyout(&i2c, ifr_data_get_ptr(ifr),
548                             sizeof(i2c));
549                 break;
550         }
551 #endif
552         case SIOCGPRIVATE_0:
553                 error = priv_check(curthread, PRIV_DRIVER);
554                 if (error != 0)
555                         break;
556                 error = copyin(ifr_data_get_ptr(ifr), &ioc, sizeof(ioc));
557                 if (error != 0)
558                         return (error);
559                 error = sfxge_private_ioctl(sc, &ioc);
560                 if (error == 0) {
561                         error = copyout(&ioc, ifr_data_get_ptr(ifr),
562                             sizeof(ioc));
563                 }
564                 break;
565         default:
566                 error = ether_ioctl(ifp, command, data);
567         }
568
569         return (error);
570 }
571
572 static void
573 sfxge_ifnet_fini(struct ifnet *ifp)
574 {
575         struct sfxge_softc *sc = ifp->if_softc;
576
577         SFXGE_ADAPTER_LOCK(sc);
578         sfxge_stop(sc);
579         SFXGE_ADAPTER_UNLOCK(sc);
580
581         ifmedia_removeall(&sc->media);
582         ether_ifdetach(ifp);
583         if_free(ifp);
584 }
585
586 static int
587 sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc)
588 {
589         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
590         device_t dev;
591         int rc;
592
593         dev = sc->dev;
594         sc->ifnet = ifp;
595
596         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
597         ifp->if_init = sfxge_if_init;
598         ifp->if_softc = sc;
599         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
600         ifp->if_ioctl = sfxge_if_ioctl;
601
602         ifp->if_capabilities = SFXGE_CAP;
603         ifp->if_capenable = SFXGE_CAP_ENABLE;
604         ifp->if_hw_tsomax = SFXGE_TSO_MAX_SIZE;
605         ifp->if_hw_tsomaxsegcount = SFXGE_TX_MAPPING_MAX_SEG;
606         ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
607
608 #ifdef SFXGE_LRO
609         ifp->if_capabilities |= IFCAP_LRO;
610         ifp->if_capenable |= IFCAP_LRO;
611 #endif
612
613         if (encp->enc_hw_tx_insert_vlan_enabled) {
614                 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
615                 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING;
616         }
617         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
618                            CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
619
620         ether_ifattach(ifp, encp->enc_mac_addr);
621
622         ifp->if_transmit = sfxge_if_transmit;
623         ifp->if_qflush = sfxge_if_qflush;
624
625         ifp->if_get_counter = sfxge_get_counter;
626
627         DBGPRINT(sc->dev, "ifmedia_init");
628         if ((rc = sfxge_port_ifmedia_init(sc)) != 0)
629                 goto fail;
630
631         return (0);
632
633 fail:
634         ether_ifdetach(sc->ifnet);
635         return (rc);
636 }
637
638 void
639 sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp)
640 {
641         KASSERT(sc->buffer_table_next + n <=
642                 efx_nic_cfg_get(sc->enp)->enc_buftbl_limit,
643                 ("buffer table full"));
644
645         *idp = sc->buffer_table_next;
646         sc->buffer_table_next += n;
647 }
648
649 static int
650 sfxge_bar_init(struct sfxge_softc *sc)
651 {
652         efsys_bar_t *esbp = &sc->bar;
653
654         esbp->esb_rid = PCIR_BAR(sc->mem_bar);
655         if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
656             &esbp->esb_rid, RF_ACTIVE)) == NULL) {
657                 device_printf(sc->dev, "Cannot allocate BAR region %d\n",
658                     sc->mem_bar);
659                 return (ENXIO);
660         }
661         esbp->esb_tag = rman_get_bustag(esbp->esb_res);
662         esbp->esb_handle = rman_get_bushandle(esbp->esb_res);
663
664         SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev));
665
666         return (0);
667 }
668
669 static void
670 sfxge_bar_fini(struct sfxge_softc *sc)
671 {
672         efsys_bar_t *esbp = &sc->bar;
673
674         bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid,
675             esbp->esb_res);
676         SFXGE_BAR_LOCK_DESTROY(esbp);
677 }
678
679 static int
680 sfxge_create(struct sfxge_softc *sc)
681 {
682         device_t dev;
683         efx_nic_t *enp;
684         int error;
685         char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))];
686 #if EFSYS_OPT_MCDI_LOGGING
687         char mcdi_log_param_name[sizeof(SFXGE_PARAM(%d.mcdi_logging))];
688 #endif
689
690         dev = sc->dev;
691
692         SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev));
693
694         sc->max_rss_channels = 0;
695         snprintf(rss_param_name, sizeof(rss_param_name),
696                  SFXGE_PARAM(%d.max_rss_channels),
697                  (int)device_get_unit(dev));
698         TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels);
699 #if EFSYS_OPT_MCDI_LOGGING
700         sc->mcdi_logging = sfxge_mcdi_logging;
701         snprintf(mcdi_log_param_name, sizeof(mcdi_log_param_name),
702                  SFXGE_PARAM(%d.mcdi_logging),
703                  (int)device_get_unit(dev));
704         TUNABLE_INT_FETCH(mcdi_log_param_name, &sc->mcdi_logging);
705 #endif
706
707         sc->stats_node = SYSCTL_ADD_NODE(device_get_sysctl_ctx(dev),
708             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "stats",
709             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics");
710         if (sc->stats_node == NULL) {
711                 error = ENOMEM;
712                 goto fail;
713         }
714
715         TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc);
716
717         (void) pci_enable_busmaster(dev);
718
719         /* Initialize DMA mappings. */
720         DBGPRINT(sc->dev, "dma_init...");
721         if ((error = sfxge_dma_init(sc)) != 0)
722                 goto fail;
723
724         error = efx_family(pci_get_vendor(dev), pci_get_device(dev),
725             &sc->family, &sc->mem_bar);
726         KASSERT(error == 0, ("Family should be filtered by sfxge_probe()"));
727
728         /* Map the device registers. */
729         DBGPRINT(sc->dev, "bar_init...");
730         if ((error = sfxge_bar_init(sc)) != 0)
731                 goto fail;
732
733         DBGPRINT(sc->dev, "nic_create...");
734
735         /* Create the common code nic object. */
736         SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock,
737                               device_get_nameunit(sc->dev), "nic");
738         if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc,
739             &sc->bar, &sc->enp_lock, &enp)) != 0)
740                 goto fail3;
741         sc->enp = enp;
742
743         /* Initialize MCDI to talk to the microcontroller. */
744         DBGPRINT(sc->dev, "mcdi_init...");
745         if ((error = sfxge_mcdi_init(sc)) != 0)
746                 goto fail4;
747
748         /* Probe the NIC and build the configuration data area. */
749         DBGPRINT(sc->dev, "nic_probe...");
750         if ((error = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE)) != 0)
751                 goto fail5;
752
753         if (!ISP2(sfxge_rx_ring_entries) ||
754             (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) ||
755             (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) {
756                 log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
757                     SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries,
758                     EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS);
759                 error = EINVAL;
760                 goto fail_rx_ring_entries;
761         }
762         sc->rxq_entries = sfxge_rx_ring_entries;
763
764         if (efx_nic_cfg_get(enp)->enc_features & EFX_FEATURE_TXQ_CKSUM_OP_DESC)
765                 sc->txq_dynamic_cksum_toggle_supported = B_TRUE;
766         else
767                 sc->txq_dynamic_cksum_toggle_supported = B_FALSE;
768
769         if (!ISP2(sfxge_tx_ring_entries) ||
770             (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) ||
771             (sfxge_tx_ring_entries > efx_nic_cfg_get(enp)->enc_txq_max_ndescs)) {
772                 log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
773                     SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries,
774                     EFX_TXQ_MINNDESCS, efx_nic_cfg_get(enp)->enc_txq_max_ndescs);
775                 error = EINVAL;
776                 goto fail_tx_ring_entries;
777         }
778         sc->txq_entries = sfxge_tx_ring_entries;
779
780         SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev),
781                           SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
782                           OID_AUTO, "version", CTLFLAG_RD,
783                           SFXGE_VERSION_STRING, 0,
784                           "Driver version");
785
786         SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
787                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
788                         OID_AUTO, "phy_type", CTLFLAG_RD,
789                         NULL, efx_nic_cfg_get(enp)->enc_phy_type,
790                         "PHY type");
791
792         /* Initialize the NVRAM. */
793         DBGPRINT(sc->dev, "nvram_init...");
794         if ((error = efx_nvram_init(enp)) != 0)
795                 goto fail6;
796
797         /* Initialize the VPD. */
798         DBGPRINT(sc->dev, "vpd_init...");
799         if ((error = efx_vpd_init(enp)) != 0)
800                 goto fail7;
801
802         efx_mcdi_new_epoch(enp);
803
804         /* Reset the NIC. */
805         DBGPRINT(sc->dev, "nic_reset...");
806         if ((error = efx_nic_reset(enp)) != 0)
807                 goto fail8;
808
809         /* Initialize buffer table allocation. */
810         sc->buffer_table_next = 0;
811
812         /*
813          * Guarantee minimum and estimate maximum number of event queues
814          * to take it into account when MSI-X interrupts are allocated.
815          * It initializes NIC and keeps it initialized on success.
816          */
817         if ((error = sfxge_estimate_rsrc_limits(sc)) != 0)
818                 goto fail8;
819
820         /* Set up interrupts. */
821         DBGPRINT(sc->dev, "intr_init...");
822         if ((error = sfxge_intr_init(sc)) != 0)
823                 goto fail9;
824
825         /* Initialize event processing state. */
826         DBGPRINT(sc->dev, "ev_init...");
827         if ((error = sfxge_ev_init(sc)) != 0)
828                 goto fail11;
829
830         /* Initialize port state. */
831         DBGPRINT(sc->dev, "port_init...");
832         if ((error = sfxge_port_init(sc)) != 0)
833                 goto fail12;
834
835         /* Initialize receive state. */
836         DBGPRINT(sc->dev, "rx_init...");
837         if ((error = sfxge_rx_init(sc)) != 0)
838                 goto fail13;
839
840         /* Initialize transmit state. */
841         DBGPRINT(sc->dev, "tx_init...");
842         if ((error = sfxge_tx_init(sc)) != 0)
843                 goto fail14;
844
845         sc->init_state = SFXGE_INITIALIZED;
846
847         DBGPRINT(sc->dev, "success");
848         return (0);
849
850 fail14:
851         sfxge_rx_fini(sc);
852
853 fail13:
854         sfxge_port_fini(sc);
855
856 fail12:
857         sfxge_ev_fini(sc);
858
859 fail11:
860         sfxge_intr_fini(sc);
861
862 fail9:
863         efx_nic_fini(sc->enp);
864
865 fail8:
866         efx_vpd_fini(enp);
867
868 fail7:
869         efx_nvram_fini(enp);
870
871 fail6:
872 fail_tx_ring_entries:
873 fail_rx_ring_entries:
874         efx_nic_unprobe(enp);
875
876 fail5:
877         sfxge_mcdi_fini(sc);
878
879 fail4:
880         sc->enp = NULL;
881         efx_nic_destroy(enp);
882         SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock);
883
884 fail3:
885         sfxge_bar_fini(sc);
886         (void) pci_disable_busmaster(sc->dev);
887
888 fail:
889         DBGPRINT(sc->dev, "failed %d", error);
890         sc->dev = NULL;
891         SFXGE_ADAPTER_LOCK_DESTROY(sc);
892         return (error);
893 }
894
895 static void
896 sfxge_destroy(struct sfxge_softc *sc)
897 {
898         efx_nic_t *enp;
899
900         /* Clean up transmit state. */
901         sfxge_tx_fini(sc);
902
903         /* Clean up receive state. */
904         sfxge_rx_fini(sc);
905
906         /* Clean up port state. */
907         sfxge_port_fini(sc);
908
909         /* Clean up event processing state. */
910         sfxge_ev_fini(sc);
911
912         /* Clean up interrupts. */
913         sfxge_intr_fini(sc);
914
915         /* Tear down common code subsystems. */
916         efx_nic_reset(sc->enp);
917         efx_vpd_fini(sc->enp);
918         efx_nvram_fini(sc->enp);
919         efx_nic_unprobe(sc->enp);
920
921         /* Tear down MCDI. */
922         sfxge_mcdi_fini(sc);
923
924         /* Destroy common code context. */
925         enp = sc->enp;
926         sc->enp = NULL;
927         efx_nic_destroy(enp);
928
929         /* Free DMA memory. */
930         sfxge_dma_fini(sc);
931
932         /* Free mapped BARs. */
933         sfxge_bar_fini(sc);
934
935         (void) pci_disable_busmaster(sc->dev);
936
937         taskqueue_drain(taskqueue_thread, &sc->task_reset);
938
939         /* Destroy the softc lock. */
940         SFXGE_ADAPTER_LOCK_DESTROY(sc);
941 }
942
943 static int
944 sfxge_vpd_handler(SYSCTL_HANDLER_ARGS)
945 {
946         struct sfxge_softc *sc = arg1;
947         efx_vpd_value_t value;
948         int rc;
949
950         value.evv_tag = arg2 >> 16;
951         value.evv_keyword = arg2 & 0xffff;
952         if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value))
953             != 0)
954                 return (rc);
955
956         return (SYSCTL_OUT(req, value.evv_value, value.evv_length));
957 }
958
959 static void
960 sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list,
961                   efx_vpd_tag_t tag, const char *keyword)
962 {
963         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
964         efx_vpd_value_t value;
965
966         /* Check whether VPD tag/keyword is present */
967         value.evv_tag = tag;
968         value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]);
969         if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0)
970                 return;
971
972         SYSCTL_ADD_PROC(ctx, list, OID_AUTO, keyword,
973             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
974             sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]),
975             sfxge_vpd_handler, "A", "");
976 }
977
978 static int
979 sfxge_vpd_init(struct sfxge_softc *sc)
980 {
981         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
982         struct sysctl_oid *vpd_node;
983         struct sysctl_oid_list *vpd_list;
984         char keyword[3];
985         efx_vpd_value_t value;
986         int rc;
987
988         if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0) {
989                 /*
990                  * Unpriviledged functions deny VPD access.
991                  * Simply skip VPD in this case.
992                  */
993                 if (rc == EACCES)
994                         goto done;
995                 goto fail;
996         }
997         sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK);
998         if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0)
999                 goto fail2;
1000
1001         /* Copy ID (product name) into device description, and log it. */
1002         value.evv_tag = EFX_VPD_ID;
1003         if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) {
1004                 value.evv_value[value.evv_length] = 0;
1005                 device_set_desc_copy(sc->dev, value.evv_value);
1006                 device_printf(sc->dev, "%s\n", value.evv_value);
1007         }
1008
1009         vpd_node = SYSCTL_ADD_NODE(ctx,
1010             SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, "vpd",
1011             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Vital Product Data");
1012         vpd_list = SYSCTL_CHILDREN(vpd_node);
1013
1014         /* Add sysctls for all expected and any vendor-defined keywords. */
1015         sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN");
1016         sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC");
1017         sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN");
1018         keyword[0] = 'V';
1019         keyword[2] = 0;
1020         for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++)
1021                 sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1022         for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++)
1023                 sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1024
1025 done:
1026         return (0);
1027
1028 fail2:
1029         free(sc->vpd_data, M_SFXGE);
1030 fail:
1031         return (rc);
1032 }
1033
1034 static void
1035 sfxge_vpd_fini(struct sfxge_softc *sc)
1036 {
1037         free(sc->vpd_data, M_SFXGE);
1038 }
1039
1040 static void
1041 sfxge_reset(void *arg, int npending)
1042 {
1043         struct sfxge_softc *sc;
1044         int rc;
1045         unsigned attempt;
1046
1047         (void)npending;
1048
1049         sc = (struct sfxge_softc *)arg;
1050
1051         SFXGE_ADAPTER_LOCK(sc);
1052
1053         if (sc->init_state != SFXGE_STARTED)
1054                 goto done;
1055
1056         sfxge_stop(sc);
1057         efx_nic_reset(sc->enp);
1058         for (attempt = 0; attempt < sfxge_restart_attempts; ++attempt) {
1059                 if ((rc = sfxge_start(sc)) == 0)
1060                         goto done;
1061
1062                 device_printf(sc->dev, "start on reset failed (%d)\n", rc);
1063                 DELAY(100000);
1064         }
1065
1066         device_printf(sc->dev, "reset failed; interface is now stopped\n");
1067
1068 done:
1069         SFXGE_ADAPTER_UNLOCK(sc);
1070 }
1071
1072 void
1073 sfxge_schedule_reset(struct sfxge_softc *sc)
1074 {
1075         taskqueue_enqueue(taskqueue_thread, &sc->task_reset);
1076 }
1077
1078 static int
1079 sfxge_attach(device_t dev)
1080 {
1081         struct sfxge_softc *sc;
1082         struct ifnet *ifp;
1083         int error;
1084
1085         sc = device_get_softc(dev);
1086         sc->dev = dev;
1087
1088         /* Allocate ifnet. */
1089         ifp = if_alloc(IFT_ETHER);
1090         if (ifp == NULL) {
1091                 device_printf(dev, "Couldn't allocate ifnet\n");
1092                 error = ENOMEM;
1093                 goto fail;
1094         }
1095         sc->ifnet = ifp;
1096
1097         /* Initialize hardware. */
1098         DBGPRINT(sc->dev, "create nic");
1099         if ((error = sfxge_create(sc)) != 0)
1100                 goto fail2;
1101
1102         /* Create the ifnet for the port. */
1103         DBGPRINT(sc->dev, "init ifnet");
1104         if ((error = sfxge_ifnet_init(ifp, sc)) != 0)
1105                 goto fail3;
1106
1107         DBGPRINT(sc->dev, "init vpd");
1108         if ((error = sfxge_vpd_init(sc)) != 0)
1109                 goto fail4;
1110
1111         /*
1112          * NIC is initialized inside sfxge_create() and kept inialized
1113          * to be able to initialize port to discover media types in
1114          * sfxge_ifnet_init().
1115          */
1116         efx_nic_fini(sc->enp);
1117
1118         sc->init_state = SFXGE_REGISTERED;
1119
1120         DBGPRINT(sc->dev, "success");
1121         return (0);
1122
1123 fail4:
1124         sfxge_ifnet_fini(ifp);
1125 fail3:
1126         efx_nic_fini(sc->enp);
1127         sfxge_destroy(sc);
1128
1129 fail2:
1130         if_free(sc->ifnet);
1131
1132 fail:
1133         DBGPRINT(sc->dev, "failed %d", error);
1134         return (error);
1135 }
1136
1137 static int
1138 sfxge_detach(device_t dev)
1139 {
1140         struct sfxge_softc *sc;
1141
1142         sc = device_get_softc(dev);
1143
1144         sfxge_vpd_fini(sc);
1145
1146         /* Destroy the ifnet. */
1147         sfxge_ifnet_fini(sc->ifnet);
1148
1149         /* Tear down hardware. */
1150         sfxge_destroy(sc);
1151
1152         return (0);
1153 }
1154
1155 static int
1156 sfxge_probe(device_t dev)
1157 {
1158         uint16_t pci_vendor_id;
1159         uint16_t pci_device_id;
1160         efx_family_t family;
1161         unsigned int mem_bar;
1162         int rc;
1163
1164         pci_vendor_id = pci_get_vendor(dev);
1165         pci_device_id = pci_get_device(dev);
1166
1167         DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id);
1168         rc = efx_family(pci_vendor_id, pci_device_id, &family, &mem_bar);
1169         if (rc != 0) {
1170                 DBGPRINT(dev, "efx_family fail %d", rc);
1171                 return (ENXIO);
1172         }
1173
1174         if (family == EFX_FAMILY_SIENA) {
1175                 device_set_desc(dev, "Solarflare SFC9000 family");
1176                 return (0);
1177         }
1178
1179         if (family == EFX_FAMILY_HUNTINGTON) {
1180                 device_set_desc(dev, "Solarflare SFC9100 family");
1181                 return (0);
1182         }
1183
1184         if (family == EFX_FAMILY_MEDFORD) {
1185                 device_set_desc(dev, "Solarflare SFC9200 family");
1186                 return (0);
1187         }
1188
1189         if (family == EFX_FAMILY_MEDFORD2) {
1190                 device_set_desc(dev, "Solarflare SFC9250 family");
1191                 return (0);
1192         }
1193
1194         DBGPRINT(dev, "impossible controller family %d", family);
1195         return (ENXIO);
1196 }
1197
1198 static device_method_t sfxge_methods[] = {
1199         DEVMETHOD(device_probe,         sfxge_probe),
1200         DEVMETHOD(device_attach,        sfxge_attach),
1201         DEVMETHOD(device_detach,        sfxge_detach),
1202
1203         DEVMETHOD_END
1204 };
1205
1206 static devclass_t sfxge_devclass;
1207
1208 static driver_t sfxge_driver = {
1209         "sfxge",
1210         sfxge_methods,
1211         sizeof(struct sfxge_softc)
1212 };
1213
1214 DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0);