]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/sfxge/sfxge.c
Remove aligment requirements for passthrough buffer.
[FreeBSD/FreeBSD.git] / sys / dev / sfxge / sfxge.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010-2016 Solarflare Communications Inc.
5  * All rights reserved.
6  *
7  * This software was developed in part by Philip Paeps under contract for
8  * Solarflare Communications, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright notice,
14  *    this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright notice,
16  *    this list of conditions and the following disclaimer in the documentation
17  *    and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * The views and conclusions contained in the software and documentation are
32  * those of the authors and should not be interpreted as representing official
33  * policies, either expressed or implied, of the FreeBSD Project.
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 #include "opt_rss.h"
40
41 #include <sys/param.h>
42 #include <sys/kernel.h>
43 #include <sys/bus.h>
44 #include <sys/rman.h>
45 #include <sys/lock.h>
46 #include <sys/module.h>
47 #include <sys/mutex.h>
48 #include <sys/smp.h>
49 #include <sys/socket.h>
50 #include <sys/taskqueue.h>
51 #include <sys/sockio.h>
52 #include <sys/sysctl.h>
53 #include <sys/priv.h>
54 #include <sys/syslog.h>
55
56 #include <dev/pci/pcireg.h>
57 #include <dev/pci/pcivar.h>
58
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_var.h>
62 #include <net/if_media.h>
63 #include <net/if_types.h>
64
65 #ifdef RSS
66 #include <net/rss_config.h>
67 #endif
68
69 #include "common/efx.h"
70
71 #include "sfxge.h"
72 #include "sfxge_rx.h"
73 #include "sfxge_ioc.h"
74 #include "sfxge_version.h"
75
76 #define SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM |                 \
77                    IFCAP_RXCSUM | IFCAP_TXCSUM |                        \
78                    IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 |              \
79                    IFCAP_TSO4 | IFCAP_TSO6 |                            \
80                    IFCAP_JUMBO_MTU |                                    \
81                    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWSTATS)
82 #define SFXGE_CAP_ENABLE SFXGE_CAP
83 #define SFXGE_CAP_FIXED (IFCAP_VLAN_MTU |                               \
84                          IFCAP_JUMBO_MTU | IFCAP_LINKSTATE | IFCAP_HWSTATS)
85
86 MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver");
87
88 SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
89     "SFXGE driver parameters");
90
91 #define SFXGE_PARAM_RX_RING     SFXGE_PARAM(rx_ring)
92 static int sfxge_rx_ring_entries = SFXGE_NDESCS;
93 TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries);
94 SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN,
95            &sfxge_rx_ring_entries, 0,
96            "Maximum number of descriptors in a receive ring");
97
98 #define SFXGE_PARAM_TX_RING     SFXGE_PARAM(tx_ring)
99 static int sfxge_tx_ring_entries = SFXGE_NDESCS;
100 TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries);
101 SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN,
102            &sfxge_tx_ring_entries, 0,
103            "Maximum number of descriptors in a transmit ring");
104
105 #define SFXGE_PARAM_RESTART_ATTEMPTS    SFXGE_PARAM(restart_attempts)
106 static int sfxge_restart_attempts = 3;
107 TUNABLE_INT(SFXGE_PARAM_RESTART_ATTEMPTS, &sfxge_restart_attempts);
108 SYSCTL_INT(_hw_sfxge, OID_AUTO, restart_attempts, CTLFLAG_RDTUN,
109            &sfxge_restart_attempts, 0,
110            "Maximum number of attempts to bring interface up after reset");
111
112 #if EFSYS_OPT_MCDI_LOGGING
113 #define SFXGE_PARAM_MCDI_LOGGING        SFXGE_PARAM(mcdi_logging)
114 static int sfxge_mcdi_logging = 0;
115 TUNABLE_INT(SFXGE_PARAM_MCDI_LOGGING, &sfxge_mcdi_logging);
116 #endif
117
118 static void
119 sfxge_reset(void *arg, int npending);
120
121 static int
122 sfxge_estimate_rsrc_limits(struct sfxge_softc *sc)
123 {
124         efx_drv_limits_t limits;
125         int rc;
126         unsigned int evq_max;
127         uint32_t evq_allocated;
128         uint32_t rxq_allocated;
129         uint32_t txq_allocated;
130
131         /*
132          * Limit the number of event queues to:
133          *  - number of CPUs
134          *  - hardwire maximum RSS channels
135          *  - administratively specified maximum RSS channels
136          */
137 #ifdef RSS
138         /*
139          * Avoid extra limitations so that the number of queues
140          * may be configured at administrator's will
141          */
142         evq_max = MIN(MAX(rss_getnumbuckets(), 1), EFX_MAXRSS);
143 #else
144         evq_max = MIN(mp_ncpus, EFX_MAXRSS);
145 #endif
146         if (sc->max_rss_channels > 0)
147                 evq_max = MIN(evq_max, sc->max_rss_channels);
148
149         memset(&limits, 0, sizeof(limits));
150
151         limits.edl_min_evq_count = 1;
152         limits.edl_max_evq_count = evq_max;
153         limits.edl_min_txq_count = SFXGE_EVQ0_N_TXQ(sc);
154         limits.edl_max_txq_count = evq_max + SFXGE_EVQ0_N_TXQ(sc) - 1;
155         limits.edl_min_rxq_count = 1;
156         limits.edl_max_rxq_count = evq_max;
157
158         efx_nic_set_drv_limits(sc->enp, &limits);
159
160         if ((rc = efx_nic_init(sc->enp)) != 0)
161                 return (rc);
162
163         rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated,
164                                  &txq_allocated);
165         if (rc != 0) {
166                 efx_nic_fini(sc->enp);
167                 return (rc);
168         }
169
170         KASSERT(txq_allocated >= SFXGE_EVQ0_N_TXQ(sc),
171                 ("txq_allocated < %u", SFXGE_EVQ0_N_TXQ(sc)));
172
173         sc->evq_max = MIN(evq_allocated, evq_max);
174         sc->evq_max = MIN(rxq_allocated, sc->evq_max);
175         sc->evq_max = MIN(txq_allocated - (SFXGE_EVQ0_N_TXQ(sc) - 1),
176                           sc->evq_max);
177
178         KASSERT(sc->evq_max <= evq_max,
179                 ("allocated more than maximum requested"));
180
181 #ifdef RSS
182         if (sc->evq_max < rss_getnumbuckets())
183                 device_printf(sc->dev, "The number of allocated queues (%u) "
184                               "is less than the number of RSS buckets (%u); "
185                               "performance degradation might be observed",
186                               sc->evq_max, rss_getnumbuckets());
187 #endif
188
189         /*
190          * NIC is kept initialized in the case of success to be able to
191          * initialize port to find out media types.
192          */
193         return (0);
194 }
195
196 static int
197 sfxge_set_drv_limits(struct sfxge_softc *sc)
198 {
199         efx_drv_limits_t limits;
200
201         memset(&limits, 0, sizeof(limits));
202
203         /* Limits are strict since take into account initial estimation */
204         limits.edl_min_evq_count = limits.edl_max_evq_count =
205             sc->intr.n_alloc;
206         limits.edl_min_txq_count = limits.edl_max_txq_count =
207             sc->intr.n_alloc + SFXGE_EVQ0_N_TXQ(sc) - 1;
208         limits.edl_min_rxq_count = limits.edl_max_rxq_count =
209             sc->intr.n_alloc;
210
211         return (efx_nic_set_drv_limits(sc->enp, &limits));
212 }
213
214 static int
215 sfxge_start(struct sfxge_softc *sc)
216 {
217         int rc;
218
219         SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
220
221         if (sc->init_state == SFXGE_STARTED)
222                 return (0);
223
224         if (sc->init_state != SFXGE_REGISTERED) {
225                 rc = EINVAL;
226                 goto fail;
227         }
228
229         /* Set required resource limits */
230         if ((rc = sfxge_set_drv_limits(sc)) != 0)
231                 goto fail;
232
233         if ((rc = efx_nic_init(sc->enp)) != 0)
234                 goto fail;
235
236         /* Start processing interrupts. */
237         if ((rc = sfxge_intr_start(sc)) != 0)
238                 goto fail2;
239
240         /* Start processing events. */
241         if ((rc = sfxge_ev_start(sc)) != 0)
242                 goto fail3;
243
244         /* Fire up the port. */
245         if ((rc = sfxge_port_start(sc)) != 0)
246                 goto fail4;
247
248         /* Start the receiver side. */
249         if ((rc = sfxge_rx_start(sc)) != 0)
250                 goto fail5;
251
252         /* Start the transmitter side. */
253         if ((rc = sfxge_tx_start(sc)) != 0)
254                 goto fail6;
255
256         sc->init_state = SFXGE_STARTED;
257
258         /* Tell the stack we're running. */
259         sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING;
260         sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE;
261
262         return (0);
263
264 fail6:
265         sfxge_rx_stop(sc);
266
267 fail5:
268         sfxge_port_stop(sc);
269
270 fail4:
271         sfxge_ev_stop(sc);
272
273 fail3:
274         sfxge_intr_stop(sc);
275
276 fail2:
277         efx_nic_fini(sc->enp);
278
279 fail:
280         device_printf(sc->dev, "sfxge_start: %d\n", rc);
281
282         return (rc);
283 }
284
285 static void
286 sfxge_if_init(void *arg)
287 {
288         struct sfxge_softc *sc;
289
290         sc = (struct sfxge_softc *)arg;
291
292         SFXGE_ADAPTER_LOCK(sc);
293         (void)sfxge_start(sc);
294         SFXGE_ADAPTER_UNLOCK(sc);
295 }
296
297 static void
298 sfxge_stop(struct sfxge_softc *sc)
299 {
300         SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
301
302         if (sc->init_state != SFXGE_STARTED)
303                 return;
304
305         sc->init_state = SFXGE_REGISTERED;
306
307         /* Stop the transmitter. */
308         sfxge_tx_stop(sc);
309
310         /* Stop the receiver. */
311         sfxge_rx_stop(sc);
312
313         /* Stop the port. */
314         sfxge_port_stop(sc);
315
316         /* Stop processing events. */
317         sfxge_ev_stop(sc);
318
319         /* Stop processing interrupts. */
320         sfxge_intr_stop(sc);
321
322         efx_nic_fini(sc->enp);
323
324         sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
325 }
326
327 static int
328 sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
329 {
330         efx_vpd_value_t value;
331         int rc = 0;
332
333         switch (ioc->u.vpd.op) {
334         case SFXGE_VPD_OP_GET_KEYWORD:
335                 value.evv_tag = ioc->u.vpd.tag;
336                 value.evv_keyword = ioc->u.vpd.keyword;
337                 rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value);
338                 if (rc != 0)
339                         break;
340                 ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length);
341                 if (ioc->u.vpd.payload != 0) {
342                         rc = copyout(value.evv_value, ioc->u.vpd.payload,
343                                      ioc->u.vpd.len);
344                 }
345                 break;
346         case SFXGE_VPD_OP_SET_KEYWORD:
347                 if (ioc->u.vpd.len > sizeof(value.evv_value))
348                         return (EINVAL);
349                 value.evv_tag = ioc->u.vpd.tag;
350                 value.evv_keyword = ioc->u.vpd.keyword;
351                 value.evv_length = ioc->u.vpd.len;
352                 rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length);
353                 if (rc != 0)
354                         break;
355                 rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value);
356                 if (rc != 0)
357                         break;
358                 rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size);
359                 if (rc != 0)
360                         break;
361                 rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size);
362                 break;
363         default:
364                 rc = EOPNOTSUPP;
365                 break;
366         }
367
368         return (rc);
369 }
370
371 static int
372 sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
373 {
374         switch (ioc->op) {
375         case SFXGE_MCDI_IOC:
376                 return (sfxge_mcdi_ioctl(sc, ioc));
377         case SFXGE_NVRAM_IOC:
378                 return (sfxge_nvram_ioctl(sc, ioc));
379         case SFXGE_VPD_IOC:
380                 return (sfxge_vpd_ioctl(sc, ioc));
381         default:
382                 return (EOPNOTSUPP);
383         }
384 }
385
386 static int
387 sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
388 {
389         struct sfxge_softc *sc;
390         struct ifreq *ifr;
391         sfxge_ioc_t ioc;
392         int error;
393
394         ifr = (struct ifreq *)data;
395         sc = ifp->if_softc;
396         error = 0;
397
398         switch (command) {
399         case SIOCSIFFLAGS:
400                 SFXGE_ADAPTER_LOCK(sc);
401                 if (ifp->if_flags & IFF_UP) {
402                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
403                                 if ((ifp->if_flags ^ sc->if_flags) &
404                                     (IFF_PROMISC | IFF_ALLMULTI)) {
405                                         sfxge_mac_filter_set(sc);
406                                 }
407                         } else
408                                 sfxge_start(sc);
409                 } else
410                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
411                                 sfxge_stop(sc);
412                 sc->if_flags = ifp->if_flags;
413                 SFXGE_ADAPTER_UNLOCK(sc);
414                 break;
415         case SIOCSIFMTU:
416                 if (ifr->ifr_mtu == ifp->if_mtu) {
417                         /* Nothing to do */
418                         error = 0;
419                 } else if (ifr->ifr_mtu > SFXGE_MAX_MTU) {
420                         error = EINVAL;
421                 } else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
422                         ifp->if_mtu = ifr->ifr_mtu;
423                         error = 0;
424                 } else {
425                         /* Restart required */
426                         SFXGE_ADAPTER_LOCK(sc);
427                         sfxge_stop(sc);
428                         ifp->if_mtu = ifr->ifr_mtu;
429                         error = sfxge_start(sc);
430                         SFXGE_ADAPTER_UNLOCK(sc);
431                         if (error != 0) {
432                                 ifp->if_flags &= ~IFF_UP;
433                                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
434                                 if_down(ifp);
435                         }
436                 }
437                 break;
438         case SIOCADDMULTI:
439         case SIOCDELMULTI:
440                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
441                         sfxge_mac_filter_set(sc);
442                 break;
443         case SIOCSIFCAP:
444         {
445                 int reqcap = ifr->ifr_reqcap;
446                 int capchg_mask;
447
448                 SFXGE_ADAPTER_LOCK(sc);
449
450                 /* Capabilities to be changed in accordance with request */
451                 capchg_mask = ifp->if_capenable ^ reqcap;
452
453                 /*
454                  * The networking core already rejects attempts to
455                  * enable capabilities we don't have.  We still have
456                  * to reject attempts to disable capabilities that we
457                  * can't (yet) disable.
458                  */
459                 KASSERT((reqcap & ~ifp->if_capabilities) == 0,
460                     ("Unsupported capabilities 0x%x requested 0x%x vs "
461                      "supported 0x%x",
462                      reqcap & ~ifp->if_capabilities,
463                      reqcap , ifp->if_capabilities));
464                 if (capchg_mask & SFXGE_CAP_FIXED) {
465                         error = EINVAL;
466                         SFXGE_ADAPTER_UNLOCK(sc);
467                         break;
468                 }
469
470                 /* Check request before any changes */
471                 if ((capchg_mask & IFCAP_TSO4) &&
472                     (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) {
473                         error = EAGAIN;
474                         SFXGE_ADAPTER_UNLOCK(sc);
475                         if_printf(ifp, "enable txcsum before tso4\n");
476                         break;
477                 }
478                 if ((capchg_mask & IFCAP_TSO6) &&
479                     (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) {
480                         error = EAGAIN;
481                         SFXGE_ADAPTER_UNLOCK(sc);
482                         if_printf(ifp, "enable txcsum6 before tso6\n");
483                         break;
484                 }
485
486                 if (reqcap & IFCAP_TXCSUM) {
487                         ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP);
488                 } else {
489                         ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP);
490                         if (reqcap & IFCAP_TSO4) {
491                                 reqcap &= ~IFCAP_TSO4;
492                                 if_printf(ifp,
493                                     "tso4 disabled due to -txcsum\n");
494                         }
495                 }
496                 if (reqcap & IFCAP_TXCSUM_IPV6) {
497                         ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
498                 } else {
499                         ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
500                         if (reqcap & IFCAP_TSO6) {
501                                 reqcap &= ~IFCAP_TSO6;
502                                 if_printf(ifp,
503                                     "tso6 disabled due to -txcsum6\n");
504                         }
505                 }
506
507                 /*
508                  * The kernel takes both IFCAP_TSOx and CSUM_TSO into
509                  * account before using TSO. So, we do not touch
510                  * checksum flags when IFCAP_TSOx is modified.
511                  * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO),
512                  * but both bits are set in IPv4 and IPv6 mbufs.
513                  */
514
515                 ifp->if_capenable = reqcap;
516
517                 SFXGE_ADAPTER_UNLOCK(sc);
518                 break;
519         }
520         case SIOCSIFMEDIA:
521         case SIOCGIFMEDIA:
522                 error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
523                 break;
524 #ifdef SIOCGI2C
525         case SIOCGI2C:
526         {
527                 struct ifi2creq i2c;
528
529                 error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
530                 if (error != 0)
531                         break;
532
533                 if (i2c.len > sizeof(i2c.data)) {
534                         error = EINVAL;
535                         break;
536                 }
537
538                 SFXGE_ADAPTER_LOCK(sc);
539                 error = efx_phy_module_get_info(sc->enp, i2c.dev_addr,
540                                                 i2c.offset, i2c.len,
541                                                 &i2c.data[0]);
542                 SFXGE_ADAPTER_UNLOCK(sc);
543                 if (error == 0)
544                         error = copyout(&i2c, ifr_data_get_ptr(ifr),
545                             sizeof(i2c));
546                 break;
547         }
548 #endif
549         case SIOCGPRIVATE_0:
550                 error = priv_check(curthread, PRIV_DRIVER);
551                 if (error != 0)
552                         break;
553                 error = copyin(ifr_data_get_ptr(ifr), &ioc, sizeof(ioc));
554                 if (error != 0)
555                         return (error);
556                 error = sfxge_private_ioctl(sc, &ioc);
557                 if (error == 0) {
558                         error = copyout(&ioc, ifr_data_get_ptr(ifr),
559                             sizeof(ioc));
560                 }
561                 break;
562         default:
563                 error = ether_ioctl(ifp, command, data);
564         }
565
566         return (error);
567 }
568
569 static void
570 sfxge_ifnet_fini(struct ifnet *ifp)
571 {
572         struct sfxge_softc *sc = ifp->if_softc;
573
574         SFXGE_ADAPTER_LOCK(sc);
575         sfxge_stop(sc);
576         SFXGE_ADAPTER_UNLOCK(sc);
577
578         ifmedia_removeall(&sc->media);
579         ether_ifdetach(ifp);
580         if_free(ifp);
581 }
582
583 static int
584 sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc)
585 {
586         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
587         device_t dev;
588         int rc;
589
590         dev = sc->dev;
591         sc->ifnet = ifp;
592
593         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
594         ifp->if_init = sfxge_if_init;
595         ifp->if_softc = sc;
596         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
597         ifp->if_ioctl = sfxge_if_ioctl;
598
599         ifp->if_capabilities = SFXGE_CAP;
600         ifp->if_capenable = SFXGE_CAP_ENABLE;
601         ifp->if_hw_tsomax = SFXGE_TSO_MAX_SIZE;
602         ifp->if_hw_tsomaxsegcount = SFXGE_TX_MAPPING_MAX_SEG;
603         ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
604
605 #ifdef SFXGE_LRO
606         ifp->if_capabilities |= IFCAP_LRO;
607         ifp->if_capenable |= IFCAP_LRO;
608 #endif
609
610         if (encp->enc_hw_tx_insert_vlan_enabled) {
611                 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
612                 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING;
613         }
614         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
615                            CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
616
617         ether_ifattach(ifp, encp->enc_mac_addr);
618
619         ifp->if_transmit = sfxge_if_transmit;
620         ifp->if_qflush = sfxge_if_qflush;
621
622         ifp->if_get_counter = sfxge_get_counter;
623
624         DBGPRINT(sc->dev, "ifmedia_init");
625         if ((rc = sfxge_port_ifmedia_init(sc)) != 0)
626                 goto fail;
627
628         return (0);
629
630 fail:
631         ether_ifdetach(sc->ifnet);
632         return (rc);
633 }
634
635 void
636 sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp)
637 {
638         KASSERT(sc->buffer_table_next + n <=
639                 efx_nic_cfg_get(sc->enp)->enc_buftbl_limit,
640                 ("buffer table full"));
641
642         *idp = sc->buffer_table_next;
643         sc->buffer_table_next += n;
644 }
645
646 static int
647 sfxge_bar_init(struct sfxge_softc *sc)
648 {
649         efsys_bar_t *esbp = &sc->bar;
650
651         esbp->esb_rid = PCIR_BAR(sc->mem_bar);
652         if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
653             &esbp->esb_rid, RF_ACTIVE)) == NULL) {
654                 device_printf(sc->dev, "Cannot allocate BAR region %d\n",
655                     sc->mem_bar);
656                 return (ENXIO);
657         }
658         esbp->esb_tag = rman_get_bustag(esbp->esb_res);
659         esbp->esb_handle = rman_get_bushandle(esbp->esb_res);
660
661         SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev));
662
663         return (0);
664 }
665
666 static void
667 sfxge_bar_fini(struct sfxge_softc *sc)
668 {
669         efsys_bar_t *esbp = &sc->bar;
670
671         bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid,
672             esbp->esb_res);
673         SFXGE_BAR_LOCK_DESTROY(esbp);
674 }
675
676 static int
677 sfxge_create(struct sfxge_softc *sc)
678 {
679         device_t dev;
680         efx_nic_t *enp;
681         int error;
682         char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))];
683 #if EFSYS_OPT_MCDI_LOGGING
684         char mcdi_log_param_name[sizeof(SFXGE_PARAM(%d.mcdi_logging))];
685 #endif
686
687         dev = sc->dev;
688
689         SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev));
690
691         sc->max_rss_channels = 0;
692         snprintf(rss_param_name, sizeof(rss_param_name),
693                  SFXGE_PARAM(%d.max_rss_channels),
694                  (int)device_get_unit(dev));
695         TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels);
696 #if EFSYS_OPT_MCDI_LOGGING
697         sc->mcdi_logging = sfxge_mcdi_logging;
698         snprintf(mcdi_log_param_name, sizeof(mcdi_log_param_name),
699                  SFXGE_PARAM(%d.mcdi_logging),
700                  (int)device_get_unit(dev));
701         TUNABLE_INT_FETCH(mcdi_log_param_name, &sc->mcdi_logging);
702 #endif
703
704         sc->stats_node = SYSCTL_ADD_NODE(device_get_sysctl_ctx(dev),
705             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "stats",
706             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics");
707         if (sc->stats_node == NULL) {
708                 error = ENOMEM;
709                 goto fail;
710         }
711
712         TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc);
713
714         (void) pci_enable_busmaster(dev);
715
716         /* Initialize DMA mappings. */
717         DBGPRINT(sc->dev, "dma_init...");
718         if ((error = sfxge_dma_init(sc)) != 0)
719                 goto fail;
720
721         error = efx_family(pci_get_vendor(dev), pci_get_device(dev),
722             &sc->family, &sc->mem_bar);
723         KASSERT(error == 0, ("Family should be filtered by sfxge_probe()"));
724
725         /* Map the device registers. */
726         DBGPRINT(sc->dev, "bar_init...");
727         if ((error = sfxge_bar_init(sc)) != 0)
728                 goto fail;
729
730         DBGPRINT(sc->dev, "nic_create...");
731
732         /* Create the common code nic object. */
733         SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock,
734                               device_get_nameunit(sc->dev), "nic");
735         if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc,
736             &sc->bar, &sc->enp_lock, &enp)) != 0)
737                 goto fail3;
738         sc->enp = enp;
739
740         /* Initialize MCDI to talk to the microcontroller. */
741         DBGPRINT(sc->dev, "mcdi_init...");
742         if ((error = sfxge_mcdi_init(sc)) != 0)
743                 goto fail4;
744
745         /* Probe the NIC and build the configuration data area. */
746         DBGPRINT(sc->dev, "nic_probe...");
747         if ((error = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE)) != 0)
748                 goto fail5;
749
750         if (!ISP2(sfxge_rx_ring_entries) ||
751             (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) ||
752             (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) {
753                 log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
754                     SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries,
755                     EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS);
756                 error = EINVAL;
757                 goto fail_rx_ring_entries;
758         }
759         sc->rxq_entries = sfxge_rx_ring_entries;
760
761         if (efx_nic_cfg_get(enp)->enc_features & EFX_FEATURE_TXQ_CKSUM_OP_DESC)
762                 sc->txq_dynamic_cksum_toggle_supported = B_TRUE;
763         else
764                 sc->txq_dynamic_cksum_toggle_supported = B_FALSE;
765
766         if (!ISP2(sfxge_tx_ring_entries) ||
767             (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) ||
768             (sfxge_tx_ring_entries > efx_nic_cfg_get(enp)->enc_txq_max_ndescs)) {
769                 log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
770                     SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries,
771                     EFX_TXQ_MINNDESCS, efx_nic_cfg_get(enp)->enc_txq_max_ndescs);
772                 error = EINVAL;
773                 goto fail_tx_ring_entries;
774         }
775         sc->txq_entries = sfxge_tx_ring_entries;
776
777         SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev),
778                           SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
779                           OID_AUTO, "version", CTLFLAG_RD,
780                           SFXGE_VERSION_STRING, 0,
781                           "Driver version");
782
783         SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
784                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
785                         OID_AUTO, "phy_type", CTLFLAG_RD,
786                         NULL, efx_nic_cfg_get(enp)->enc_phy_type,
787                         "PHY type");
788
789         /* Initialize the NVRAM. */
790         DBGPRINT(sc->dev, "nvram_init...");
791         if ((error = efx_nvram_init(enp)) != 0)
792                 goto fail6;
793
794         /* Initialize the VPD. */
795         DBGPRINT(sc->dev, "vpd_init...");
796         if ((error = efx_vpd_init(enp)) != 0)
797                 goto fail7;
798
799         efx_mcdi_new_epoch(enp);
800
801         /* Reset the NIC. */
802         DBGPRINT(sc->dev, "nic_reset...");
803         if ((error = efx_nic_reset(enp)) != 0)
804                 goto fail8;
805
806         /* Initialize buffer table allocation. */
807         sc->buffer_table_next = 0;
808
809         /*
810          * Guarantee minimum and estimate maximum number of event queues
811          * to take it into account when MSI-X interrupts are allocated.
812          * It initializes NIC and keeps it initialized on success.
813          */
814         if ((error = sfxge_estimate_rsrc_limits(sc)) != 0)
815                 goto fail8;
816
817         /* Set up interrupts. */
818         DBGPRINT(sc->dev, "intr_init...");
819         if ((error = sfxge_intr_init(sc)) != 0)
820                 goto fail9;
821
822         /* Initialize event processing state. */
823         DBGPRINT(sc->dev, "ev_init...");
824         if ((error = sfxge_ev_init(sc)) != 0)
825                 goto fail11;
826
827         /* Initialize port state. */
828         DBGPRINT(sc->dev, "port_init...");
829         if ((error = sfxge_port_init(sc)) != 0)
830                 goto fail12;
831
832         /* Initialize receive state. */
833         DBGPRINT(sc->dev, "rx_init...");
834         if ((error = sfxge_rx_init(sc)) != 0)
835                 goto fail13;
836
837         /* Initialize transmit state. */
838         DBGPRINT(sc->dev, "tx_init...");
839         if ((error = sfxge_tx_init(sc)) != 0)
840                 goto fail14;
841
842         sc->init_state = SFXGE_INITIALIZED;
843
844         DBGPRINT(sc->dev, "success");
845         return (0);
846
847 fail14:
848         sfxge_rx_fini(sc);
849
850 fail13:
851         sfxge_port_fini(sc);
852
853 fail12:
854         sfxge_ev_fini(sc);
855
856 fail11:
857         sfxge_intr_fini(sc);
858
859 fail9:
860         efx_nic_fini(sc->enp);
861
862 fail8:
863         efx_vpd_fini(enp);
864
865 fail7:
866         efx_nvram_fini(enp);
867
868 fail6:
869 fail_tx_ring_entries:
870 fail_rx_ring_entries:
871         efx_nic_unprobe(enp);
872
873 fail5:
874         sfxge_mcdi_fini(sc);
875
876 fail4:
877         sc->enp = NULL;
878         efx_nic_destroy(enp);
879         SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock);
880
881 fail3:
882         sfxge_bar_fini(sc);
883         (void) pci_disable_busmaster(sc->dev);
884
885 fail:
886         DBGPRINT(sc->dev, "failed %d", error);
887         sc->dev = NULL;
888         SFXGE_ADAPTER_LOCK_DESTROY(sc);
889         return (error);
890 }
891
892 static void
893 sfxge_destroy(struct sfxge_softc *sc)
894 {
895         efx_nic_t *enp;
896
897         /* Clean up transmit state. */
898         sfxge_tx_fini(sc);
899
900         /* Clean up receive state. */
901         sfxge_rx_fini(sc);
902
903         /* Clean up port state. */
904         sfxge_port_fini(sc);
905
906         /* Clean up event processing state. */
907         sfxge_ev_fini(sc);
908
909         /* Clean up interrupts. */
910         sfxge_intr_fini(sc);
911
912         /* Tear down common code subsystems. */
913         efx_nic_reset(sc->enp);
914         efx_vpd_fini(sc->enp);
915         efx_nvram_fini(sc->enp);
916         efx_nic_unprobe(sc->enp);
917
918         /* Tear down MCDI. */
919         sfxge_mcdi_fini(sc);
920
921         /* Destroy common code context. */
922         enp = sc->enp;
923         sc->enp = NULL;
924         efx_nic_destroy(enp);
925
926         /* Free DMA memory. */
927         sfxge_dma_fini(sc);
928
929         /* Free mapped BARs. */
930         sfxge_bar_fini(sc);
931
932         (void) pci_disable_busmaster(sc->dev);
933
934         taskqueue_drain(taskqueue_thread, &sc->task_reset);
935
936         /* Destroy the softc lock. */
937         SFXGE_ADAPTER_LOCK_DESTROY(sc);
938 }
939
940 static int
941 sfxge_vpd_handler(SYSCTL_HANDLER_ARGS)
942 {
943         struct sfxge_softc *sc = arg1;
944         efx_vpd_value_t value;
945         int rc;
946
947         value.evv_tag = arg2 >> 16;
948         value.evv_keyword = arg2 & 0xffff;
949         if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value))
950             != 0)
951                 return (rc);
952
953         return (SYSCTL_OUT(req, value.evv_value, value.evv_length));
954 }
955
956 static void
957 sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list,
958                   efx_vpd_tag_t tag, const char *keyword)
959 {
960         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
961         efx_vpd_value_t value;
962
963         /* Check whether VPD tag/keyword is present */
964         value.evv_tag = tag;
965         value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]);
966         if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0)
967                 return;
968
969         SYSCTL_ADD_PROC(ctx, list, OID_AUTO, keyword,
970             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
971             sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]),
972             sfxge_vpd_handler, "A", "");
973 }
974
975 static int
976 sfxge_vpd_init(struct sfxge_softc *sc)
977 {
978         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
979         struct sysctl_oid *vpd_node;
980         struct sysctl_oid_list *vpd_list;
981         char keyword[3];
982         efx_vpd_value_t value;
983         int rc;
984
985         if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0) {
986                 /*
987                  * Unpriviledged functions deny VPD access.
988                  * Simply skip VPD in this case.
989                  */
990                 if (rc == EACCES)
991                         goto done;
992                 goto fail;
993         }
994         sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK);
995         if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0)
996                 goto fail2;
997
998         /* Copy ID (product name) into device description, and log it. */
999         value.evv_tag = EFX_VPD_ID;
1000         if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) {
1001                 value.evv_value[value.evv_length] = 0;
1002                 device_set_desc_copy(sc->dev, value.evv_value);
1003                 device_printf(sc->dev, "%s\n", value.evv_value);
1004         }
1005
1006         vpd_node = SYSCTL_ADD_NODE(ctx,
1007             SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, "vpd",
1008             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Vital Product Data");
1009         vpd_list = SYSCTL_CHILDREN(vpd_node);
1010
1011         /* Add sysctls for all expected and any vendor-defined keywords. */
1012         sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN");
1013         sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC");
1014         sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN");
1015         keyword[0] = 'V';
1016         keyword[2] = 0;
1017         for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++)
1018                 sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1019         for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++)
1020                 sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1021
1022 done:
1023         return (0);
1024
1025 fail2:
1026         free(sc->vpd_data, M_SFXGE);
1027 fail:
1028         return (rc);
1029 }
1030
1031 static void
1032 sfxge_vpd_fini(struct sfxge_softc *sc)
1033 {
1034         free(sc->vpd_data, M_SFXGE);
1035 }
1036
1037 static void
1038 sfxge_reset(void *arg, int npending)
1039 {
1040         struct sfxge_softc *sc;
1041         int rc;
1042         unsigned attempt;
1043
1044         (void)npending;
1045
1046         sc = (struct sfxge_softc *)arg;
1047
1048         SFXGE_ADAPTER_LOCK(sc);
1049
1050         if (sc->init_state != SFXGE_STARTED)
1051                 goto done;
1052
1053         sfxge_stop(sc);
1054         efx_nic_reset(sc->enp);
1055         for (attempt = 0; attempt < sfxge_restart_attempts; ++attempt) {
1056                 if ((rc = sfxge_start(sc)) == 0)
1057                         goto done;
1058
1059                 device_printf(sc->dev, "start on reset failed (%d)\n", rc);
1060                 DELAY(100000);
1061         }
1062
1063         device_printf(sc->dev, "reset failed; interface is now stopped\n");
1064
1065 done:
1066         SFXGE_ADAPTER_UNLOCK(sc);
1067 }
1068
1069 void
1070 sfxge_schedule_reset(struct sfxge_softc *sc)
1071 {
1072         taskqueue_enqueue(taskqueue_thread, &sc->task_reset);
1073 }
1074
1075 static int
1076 sfxge_attach(device_t dev)
1077 {
1078         struct sfxge_softc *sc;
1079         struct ifnet *ifp;
1080         int error;
1081
1082         sc = device_get_softc(dev);
1083         sc->dev = dev;
1084
1085         /* Allocate ifnet. */
1086         ifp = if_alloc(IFT_ETHER);
1087         if (ifp == NULL) {
1088                 device_printf(dev, "Couldn't allocate ifnet\n");
1089                 error = ENOMEM;
1090                 goto fail;
1091         }
1092         sc->ifnet = ifp;
1093
1094         /* Initialize hardware. */
1095         DBGPRINT(sc->dev, "create nic");
1096         if ((error = sfxge_create(sc)) != 0)
1097                 goto fail2;
1098
1099         /* Create the ifnet for the port. */
1100         DBGPRINT(sc->dev, "init ifnet");
1101         if ((error = sfxge_ifnet_init(ifp, sc)) != 0)
1102                 goto fail3;
1103
1104         DBGPRINT(sc->dev, "init vpd");
1105         if ((error = sfxge_vpd_init(sc)) != 0)
1106                 goto fail4;
1107
1108         /*
1109          * NIC is initialized inside sfxge_create() and kept inialized
1110          * to be able to initialize port to discover media types in
1111          * sfxge_ifnet_init().
1112          */
1113         efx_nic_fini(sc->enp);
1114
1115         sc->init_state = SFXGE_REGISTERED;
1116
1117         DBGPRINT(sc->dev, "success");
1118         return (0);
1119
1120 fail4:
1121         sfxge_ifnet_fini(ifp);
1122 fail3:
1123         efx_nic_fini(sc->enp);
1124         sfxge_destroy(sc);
1125
1126 fail2:
1127         if_free(sc->ifnet);
1128
1129 fail:
1130         DBGPRINT(sc->dev, "failed %d", error);
1131         return (error);
1132 }
1133
1134 static int
1135 sfxge_detach(device_t dev)
1136 {
1137         struct sfxge_softc *sc;
1138
1139         sc = device_get_softc(dev);
1140
1141         sfxge_vpd_fini(sc);
1142
1143         /* Destroy the ifnet. */
1144         sfxge_ifnet_fini(sc->ifnet);
1145
1146         /* Tear down hardware. */
1147         sfxge_destroy(sc);
1148
1149         return (0);
1150 }
1151
1152 static int
1153 sfxge_probe(device_t dev)
1154 {
1155         uint16_t pci_vendor_id;
1156         uint16_t pci_device_id;
1157         efx_family_t family;
1158         unsigned int mem_bar;
1159         int rc;
1160
1161         pci_vendor_id = pci_get_vendor(dev);
1162         pci_device_id = pci_get_device(dev);
1163
1164         DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id);
1165         rc = efx_family(pci_vendor_id, pci_device_id, &family, &mem_bar);
1166         if (rc != 0) {
1167                 DBGPRINT(dev, "efx_family fail %d", rc);
1168                 return (ENXIO);
1169         }
1170
1171         if (family == EFX_FAMILY_SIENA) {
1172                 device_set_desc(dev, "Solarflare SFC9000 family");
1173                 return (0);
1174         }
1175
1176         if (family == EFX_FAMILY_HUNTINGTON) {
1177                 device_set_desc(dev, "Solarflare SFC9100 family");
1178                 return (0);
1179         }
1180
1181         if (family == EFX_FAMILY_MEDFORD) {
1182                 device_set_desc(dev, "Solarflare SFC9200 family");
1183                 return (0);
1184         }
1185
1186         if (family == EFX_FAMILY_MEDFORD2) {
1187                 device_set_desc(dev, "Solarflare SFC9250 family");
1188                 return (0);
1189         }
1190
1191         DBGPRINT(dev, "impossible controller family %d", family);
1192         return (ENXIO);
1193 }
1194
1195 static device_method_t sfxge_methods[] = {
1196         DEVMETHOD(device_probe,         sfxge_probe),
1197         DEVMETHOD(device_attach,        sfxge_attach),
1198         DEVMETHOD(device_detach,        sfxge_detach),
1199
1200         DEVMETHOD_END
1201 };
1202
1203 static devclass_t sfxge_devclass;
1204
1205 static driver_t sfxge_driver = {
1206         "sfxge",
1207         sfxge_methods,
1208         sizeof(struct sfxge_softc)
1209 };
1210
1211 DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0);