]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/sume/if_sume.c
Driver for 4x10Gb Ethernet reference NIC FPGA design for NetFPGA SUME
[FreeBSD/FreeBSD.git] / sys / dev / sume / if_sume.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2015 Bjoern A. Zeeb
5  * Copyright (c) 2020 Denis Salopek
6  *
7  * This software was developed by SRI International and the University of
8  * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
9  * ("MRC2"), as part of the DARPA MRC research programme.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/bus.h>
38 #include <sys/endian.h>
39 #include <sys/kernel.h>
40 #include <sys/limits.h>
41 #include <sys/module.h>
42 #include <sys/rman.h>
43 #include <sys/socket.h>
44 #include <sys/sockio.h>
45 #include <sys/sysctl.h>
46 #include <sys/taskqueue.h>
47
48 #include <net/if.h>
49 #include <net/if_media.h>
50 #include <net/if_types.h>
51 #include <net/if_var.h>
52
53 #include <netinet/in.h>
54 #include <netinet/if_ether.h>
55
56 #include <dev/pci/pcivar.h>
57 #include <dev/pci/pcireg.h>
58
59 #include <machine/bus.h>
60
61 #include "adapter.h"
62
63 #define PCI_VENDOR_ID_XILINX    0x10ee
64 #define PCI_DEVICE_ID_SUME      0x7028
65
66 /* SUME bus driver interface */
67 static int sume_probe(device_t);
68 static int sume_attach(device_t);
69 static int sume_detach(device_t);
70
71 static device_method_t sume_methods[] = {
72         DEVMETHOD(device_probe,         sume_probe),
73         DEVMETHOD(device_attach,        sume_attach),
74         DEVMETHOD(device_detach,        sume_detach),
75         DEVMETHOD_END
76 };
77
78 static driver_t sume_driver = {
79         "sume",
80         sume_methods,
81         sizeof(struct sume_adapter)
82 };
83
84 /*
85  * The DMA engine for SUME generates interrupts for each RX/TX transaction.
86  * Depending on the channel (0 if packet transaction, 1 if register transaction)
87  * the used bits of the interrupt vector will be the lowest or the second lowest
88  * 5 bits.
89  *
90  * When receiving packets from SUME (RX):
91  * (1) SUME received a packet on one of the interfaces.
92  * (2) SUME generates an interrupt vector, bit 00001 is set (channel 0 - new RX
93  *     transaction).
94  * (3) We read the length of the incoming packet and the offset along with the
95  *     'last' flag from the SUME registers.
96  * (4) We prepare for the DMA transaction by setting the bouncebuffer on the
97  *     address buf_addr. For now, this is how it's done:
98  *     - First 3*sizeof(uint32_t) bytes are: lower and upper 32 bits of physical
99  *     address where we want the data to arrive (buf_addr[0] and buf_addr[1]),
100  *     and length of incoming data (buf_addr[2]).
101  *     - Data will start right after, at buf_addr+3*sizeof(uint32_t). The
102  *     physical address buf_hw_addr is a block of contiguous memory mapped to
103  *     buf_addr, so we can set the incoming data's physical address (buf_addr[0]
104  *     and buf_addr[1]) to buf_hw_addr+3*sizeof(uint32_t).
105  * (5) We notify SUME that the bouncebuffer is ready for the transaction by
106  *     writing the lower/upper physical address buf_hw_addr to the SUME
107  *     registers RIFFA_TX_SG_ADDR_LO_REG_OFF and RIFFA_TX_SG_ADDR_HI_REG_OFF as
108  *     well as the number of segments to the register RIFFA_TX_SG_LEN_REG_OFF.
109  * (6) SUME generates an interrupt vector, bit 00010 is set (channel 0 -
110  *     bouncebuffer received).
111  * (7) SUME generates an interrupt vector, bit 00100 is set (channel 0 -
112  *     transaction is done).
113  * (8) SUME can do both steps (6) and (7) using the same interrupt.
114  * (8) We read the first 16 bytes (metadata) of the received data and note the
115  *     incoming interface so we can later forward it to the right one in the OS
116  *     (sume0, sume1, sume2 or sume3).
117  * (10) We create an mbuf and copy the data from the bouncebuffer to the mbuf
118  *     and set the mbuf rcvif to the incoming interface.
119  * (11) We forward the mbuf to the appropriate interface via ifp->if_input.
120  *
121  * When sending packets to SUME (TX):
122  * (1) The OS calls sume_if_start() function on TX.
123  * (2) We get the mbuf packet data and copy it to the
124  *     buf_addr+3*sizeof(uint32_t) + metadata 16 bytes.
125  * (3) We create the metadata based on the output interface and copy it to the
126  *     buf_addr+3*sizeof(uint32_t).
127  * (4) We write the offset/last and length of the packet to the SUME registers
128  *     RIFFA_RX_OFFLAST_REG_OFF and RIFFA_RX_LEN_REG_OFF.
129  * (5) We fill the bouncebuffer by filling the first 3*sizeof(uint32_t) bytes
130  *     with the physical address and length just as in RX step (4).
131  * (6) We notify SUME that the bouncebuffer is ready by writing to SUME
132  *     registers RIFFA_RX_SG_ADDR_LO_REG_OFF, RIFFA_RX_SG_ADDR_HI_REG_OFF and
133  *     RIFFA_RX_SG_LEN_REG_OFF just as in RX step (5).
134  * (7) SUME generates an interrupt vector, bit 01000 is set (channel 0 -
135  *     bouncebuffer is read).
136  * (8) SUME generates an interrupt vector, bit 10000 is set (channel 0 -
137  *     transaction is done).
138  * (9) SUME can do both steps (7) and (8) using the same interrupt.
139  *
140  * Internal registers
141  * Every module in the SUME hardware has its own set of internal registers
142  * (IDs, for debugging and statistic purposes, etc.). Their base addresses are
143  * defined in 'projects/reference_nic/hw/tcl/reference_nic_defines.tcl' and the
144  * offsets to different memory locations of every module are defined in their
145  * corresponding folder inside the library. These registers can be RO/RW and
146  * there is a special method to fetch/change this data over 1 or 2 DMA
147  * transactions. For writing, by calling the sume_module_reg_write(). For
148  * reading, by calling the sume_module_reg_write() and then
149  * sume_module_reg_read(). Check those functions for more information.
150  */
151
152 MALLOC_DECLARE(M_SUME);
153 MALLOC_DEFINE(M_SUME, "sume", "NetFPGA SUME device driver");
154
155 static void check_tx_queues(struct sume_adapter *);
156 static void sume_fill_bb_desc(struct sume_adapter *, struct riffa_chnl_dir *,
157     uint64_t);
158
159 static struct unrhdr *unr;
160
161 static struct {
162         uint16_t device;
163         char *desc;
164 } sume_pciids[] = {
165         {PCI_DEVICE_ID_SUME, "NetFPGA SUME reference NIC"},
166 };
167
168 static inline uint32_t
169 read_reg(struct sume_adapter *adapter, int offset)
170 {
171
172         return (bus_space_read_4(adapter->bt, adapter->bh, offset << 2));
173 }
174
175 static inline void
176 write_reg(struct sume_adapter *adapter, int offset, uint32_t val)
177 {
178
179         bus_space_write_4(adapter->bt, adapter->bh, offset << 2, val);
180 }
181
182 static int
183 sume_probe(device_t dev)
184 {
185         int i;
186         uint16_t v = pci_get_vendor(dev);
187         uint16_t d = pci_get_device(dev);
188
189         if (v != PCI_VENDOR_ID_XILINX)
190                 return (ENXIO);
191
192         for (i = 0; i < nitems(sume_pciids); i++) {
193                 if (d == sume_pciids[i].device) {
194                         device_set_desc(dev, sume_pciids[i].desc);
195                         return (BUS_PROBE_DEFAULT);
196                 }
197         }
198
199         return (ENXIO);
200 }
201
202 /*
203  * Building mbuf for packet received from SUME. We expect to receive 'len'
204  * bytes of data (including metadata) written from the bouncebuffer address
205  * buf_addr+3*sizeof(uint32_t). Metadata will tell us which SUME interface
206  * received the packet (sport will be 1, 2, 4 or 8), the packet length (plen),
207  * and the magic word needs to be 0xcafe. When we have the packet data, we
208  * create an mbuf and copy the data to it using m_copyback() function, set the
209  * correct interface to rcvif and return the mbuf to be later sent to the OS
210  * with if_input.
211  */
212 static struct mbuf *
213 sume_rx_build_mbuf(struct sume_adapter *adapter, uint32_t len)
214 {
215         struct nf_priv *nf_priv;
216         struct mbuf *m;
217         struct ifnet *ifp = NULL;
218         int np;
219         uint16_t dport, plen, magic;
220         device_t dev = adapter->dev;
221         uint8_t *indata = (uint8_t *)
222             adapter->recv[SUME_RIFFA_CHANNEL_DATA]->buf_addr +
223             sizeof(struct nf_bb_desc);
224         struct nf_metadata *mdata = (struct nf_metadata *) indata;
225
226         /* The metadata header is 16 bytes. */
227         if (len < sizeof(struct nf_metadata)) {
228                 device_printf(dev, "short frame (%d)\n", len);
229                 adapter->packets_err++;
230                 adapter->bytes_err += len;
231                 return (NULL);
232         }
233
234         dport = le16toh(mdata->dport);
235         plen = le16toh(mdata->plen);
236         magic = le16toh(mdata->magic);
237
238         if (sizeof(struct nf_metadata) + plen > len ||
239             magic != SUME_RIFFA_MAGIC) {
240                 device_printf(dev, "corrupted packet (%zd + %d > %d || magic "
241                     "0x%04x != 0x%04x)\n", sizeof(struct nf_metadata), plen,
242                     len, magic, SUME_RIFFA_MAGIC);
243                 return (NULL);
244         }
245
246         /* We got the packet from one of the even bits */
247         np = (ffs(dport & SUME_DPORT_MASK) >> 1) - 1;
248         if (np > SUME_NPORTS) {
249                 device_printf(dev, "invalid destination port 0x%04x (%d)\n",
250                     dport, np);
251                 adapter->packets_err++;
252                 adapter->bytes_err += plen;
253                 return (NULL);
254         }
255         ifp = adapter->ifp[np];
256         nf_priv = ifp->if_softc;
257         nf_priv->stats.rx_packets++;
258         nf_priv->stats.rx_bytes += plen;
259
260         /* If the interface is down, well, we are done. */
261         if (!(ifp->if_flags & IFF_UP)) {
262                 nf_priv->stats.ifc_down_packets++;
263                 nf_priv->stats.ifc_down_bytes += plen;
264                 return (NULL);
265         }
266
267         if (adapter->sume_debug)
268                 printf("Building mbuf with length: %d\n", plen);
269
270         m = m_getm(NULL, plen, M_NOWAIT, MT_DATA);
271         if (m == NULL) {
272                 adapter->packets_err++;
273                 adapter->bytes_err += plen;
274                 return (NULL);
275         }
276
277         /* Copy the data in at the right offset. */
278         m_copyback(m, 0, plen, (void *) (indata + sizeof(struct nf_metadata)));
279         m->m_pkthdr.rcvif = ifp;
280
281         return (m);
282 }
283
284 /*
285  * SUME interrupt handler for when we get a valid interrupt from the board.
286  * Theoretically, we can receive interrupt for any of the available channels,
287  * but RIFFA DMA uses only 2: 0 and 1, so we use only vect0. The vector is a 32
288  * bit number, using 5 bits for every channel, the least significant bits
289  * correspond to channel 0 and the next 5 bits correspond to channel 1. Vector
290  * bits for RX/TX are:
291  * RX
292  * bit 0 - new transaction from SUME
293  * bit 1 - SUME received our bouncebuffer address
294  * bit 2 - SUME copied the received data to our bouncebuffer, transaction done
295  * TX
296  * bit 3 - SUME received our bouncebuffer address
297  * bit 4 - SUME copied the data from our bouncebuffer, transaction done
298  *
299  * There are two finite state machines (one for TX, one for RX). We loop
300  * through channels 0 and 1 to check and our current state and which interrupt
301  * bit is set.
302  * TX
303  * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the first TX transaction.
304  * SUME_RIFFA_CHAN_STATE_READY: we prepared (filled with data) the bouncebuffer
305  * and triggered the SUME for the TX transaction. Waiting for interrupt bit 3
306  * to go to the next state.
307  * SUME_RIFFA_CHAN_STATE_READ: waiting for interrupt bit 4 (for SUME to send
308  * our packet). Then we get the length of the sent data and go back to the
309  * IDLE state.
310  * RX
311  * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the interrupt bit 0 (new RX
312  * transaction). When we get it, we prepare our bouncebuffer for reading and
313  * trigger the SUME to start the transaction. Go to the next state.
314  * SUME_RIFFA_CHAN_STATE_READY: waiting for the interrupt bit 1 (SUME got our
315  * bouncebuffer). Go to the next state.
316  * SUME_RIFFA_CHAN_STATE_READ: SUME copied data and our bouncebuffer is ready,
317  * we can build the mbuf and go back to the IDLE state.
318  */
319 static void
320 sume_intr_handler(void *arg)
321 {
322         struct sume_adapter *adapter = arg;
323         uint32_t vect, vect0, len;
324         int ch, loops;
325         device_t dev = adapter->dev;
326         struct mbuf *m = NULL;
327         struct ifnet *ifp = NULL;
328         struct riffa_chnl_dir *send, *recv;
329
330         SUME_LOCK(adapter);
331
332         vect0 = read_reg(adapter, RIFFA_IRQ_REG0_OFF);
333         if ((vect0 & SUME_INVALID_VECT) != 0) {
334                 SUME_UNLOCK(adapter);
335                 return;
336         }
337
338         /*
339          * We only have one interrupt for all channels and no way
340          * to quickly lookup for which channel(s) we got an interrupt?
341          */
342         for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
343                 vect = vect0 >> (5 * ch);
344                 send = adapter->send[ch];
345                 recv = adapter->recv[ch];
346
347                 loops = 0;
348                 while ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
349                     loops <= 5) {
350                         if (adapter->sume_debug)
351                                 device_printf(dev, "TX ch %d state %u vect = "
352                                     "0x%08x\n", ch, send->state, vect);
353                         switch (send->state) {
354                         case SUME_RIFFA_CHAN_STATE_IDLE:
355                                 break;
356                         case SUME_RIFFA_CHAN_STATE_READY:
357                                 if (!(vect & SUME_MSI_TXBUF)) {
358                                         device_printf(dev, "ch %d unexpected "
359                                             "interrupt in send+3 state %u: "
360                                             "vect = 0x%08x\n", ch, send->state,
361                                             vect);
362                                         send->recovery = 1;
363                                         break;
364                                 }
365                                 send->state = SUME_RIFFA_CHAN_STATE_READ;
366                                 vect &= ~SUME_MSI_TXBUF;
367                                 break;
368                         case SUME_RIFFA_CHAN_STATE_READ:
369                                 if (!(vect & SUME_MSI_TXDONE)) {
370                                         device_printf(dev, "ch %d unexpected "
371                                             "interrupt in send+4 state %u: "
372                                             "vect = 0x%08x\n", ch, send->state,
373                                             vect);
374                                         send->recovery = 1;
375                                         break;
376                                 }
377                                 send->state = SUME_RIFFA_CHAN_STATE_LEN;
378
379                                 len = read_reg(adapter, RIFFA_CHNL_REG(ch,
380                                     RIFFA_RX_TNFR_LEN_REG_OFF));
381                                 if (ch == SUME_RIFFA_CHANNEL_DATA) {
382                                         send->state =
383                                             SUME_RIFFA_CHAN_STATE_IDLE;
384                                         check_tx_queues(adapter);
385                                 } else if (ch == SUME_RIFFA_CHANNEL_REG)
386                                         wakeup(&send->event);
387                                 else {
388                                         device_printf(dev, "ch %d unexpected "
389                                             "interrupt in send+4 state %u: "
390                                             "vect = 0x%08x\n", ch, send->state,
391                                             vect);
392                                         send->recovery = 1;
393                                 }
394                                 vect &= ~SUME_MSI_TXDONE;
395                                 break;
396                         case SUME_RIFFA_CHAN_STATE_LEN:
397                                 break;
398                         default:
399                                 device_printf(dev, "unknown TX state!\n");
400                         }
401                         loops++;
402                 }
403
404                 if ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
405                     send->recovery)
406                         device_printf(dev, "ch %d ignoring vect = 0x%08x "
407                             "during TX; not in recovery; state = %d loops = "
408                             "%d\n", ch, vect, send->state, loops);
409
410                 loops = 0;
411                 while ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
412                     SUME_MSI_RXDONE)) && loops < 5) {
413                         if (adapter->sume_debug)
414                                 device_printf(dev, "RX ch %d state %u vect = "
415                                     "0x%08x\n", ch, recv->state, vect);
416                         switch (recv->state) {
417                         case SUME_RIFFA_CHAN_STATE_IDLE:
418                                 if (!(vect & SUME_MSI_RXQUE)) {
419                                         device_printf(dev, "ch %d unexpected "
420                                             "interrupt in recv+0 state %u: "
421                                             "vect = 0x%08x\n", ch, recv->state,
422                                             vect);
423                                         recv->recovery = 1;
424                                         break;
425                                 }
426                                 uint32_t max_ptr;
427
428                                 /* Clear recovery state. */
429                                 recv->recovery = 0;
430
431                                 /* Get offset and length. */
432                                 recv->offlast = read_reg(adapter,
433                                     RIFFA_CHNL_REG(ch,
434                                     RIFFA_TX_OFFLAST_REG_OFF));
435                                 recv->len = read_reg(adapter, RIFFA_CHNL_REG(ch,
436                                     RIFFA_TX_LEN_REG_OFF));
437
438                                 /* Boundary checks. */
439                                 max_ptr = (uint32_t)((uintptr_t)recv->buf_addr
440                                     + SUME_RIFFA_OFFSET(recv->offlast)
441                                     + SUME_RIFFA_LEN(recv->len) - 1);
442                                 if (max_ptr <
443                                     (uint32_t)((uintptr_t)recv->buf_addr))
444                                         device_printf(dev, "receive buffer "
445                                             "wrap-around overflow.\n");
446                                 if (SUME_RIFFA_OFFSET(recv->offlast) +
447                                     SUME_RIFFA_LEN(recv->len) >
448                                     adapter->sg_buf_size)
449                                         device_printf(dev, "receive buffer too"
450                                             " small.\n");
451
452                                 /* Fill the bouncebuf "descriptor". */
453                                 sume_fill_bb_desc(adapter, recv,
454                                     SUME_RIFFA_LEN(recv->len));
455
456                                 bus_dmamap_sync(recv->ch_tag, recv->ch_map,
457                                     BUS_DMASYNC_PREREAD |
458                                     BUS_DMASYNC_PREWRITE);
459                                 write_reg(adapter, RIFFA_CHNL_REG(ch,
460                                     RIFFA_TX_SG_ADDR_LO_REG_OFF),
461                                     SUME_RIFFA_LO_ADDR(recv->buf_hw_addr));
462                                 write_reg(adapter, RIFFA_CHNL_REG(ch,
463                                     RIFFA_TX_SG_ADDR_HI_REG_OFF),
464                                     SUME_RIFFA_HI_ADDR(recv->buf_hw_addr));
465                                 write_reg(adapter, RIFFA_CHNL_REG(ch,
466                                     RIFFA_TX_SG_LEN_REG_OFF),
467                                     4 * recv->num_sg);
468                                 bus_dmamap_sync(recv->ch_tag, recv->ch_map,
469                                     BUS_DMASYNC_POSTREAD |
470                                     BUS_DMASYNC_POSTWRITE);
471
472                                 recv->state = SUME_RIFFA_CHAN_STATE_READY;
473                                 vect &= ~SUME_MSI_RXQUE;
474                                 break;
475                         case SUME_RIFFA_CHAN_STATE_READY:
476                                 if (!(vect & SUME_MSI_RXBUF)) {
477                                         device_printf(dev, "ch %d unexpected "
478                                             "interrupt in recv+1 state %u: "
479                                             "vect = 0x%08x\n", ch, recv->state,
480                                             vect);
481                                         recv->recovery = 1;
482                                         break;
483                                 }
484                                 recv->state = SUME_RIFFA_CHAN_STATE_READ;
485                                 vect &= ~SUME_MSI_RXBUF;
486                                 break;
487                         case SUME_RIFFA_CHAN_STATE_READ:
488                                 if (!(vect & SUME_MSI_RXDONE)) {
489                                         device_printf(dev, "ch %d unexpected "
490                                             "interrupt in recv+2 state %u: "
491                                             "vect = 0x%08x\n", ch, recv->state,
492                                             vect);
493                                         recv->recovery = 1;
494                                         break;
495                                 }
496                                 len = read_reg(adapter, RIFFA_CHNL_REG(ch,
497                                     RIFFA_TX_TNFR_LEN_REG_OFF));
498
499                                 /* Remember, len and recv->len are words. */
500                                 if (ch == SUME_RIFFA_CHANNEL_DATA) {
501                                         m = sume_rx_build_mbuf(adapter, 
502                                             len << 2);
503                                         recv->state =
504                                             SUME_RIFFA_CHAN_STATE_IDLE;
505                                 } else if (ch == SUME_RIFFA_CHANNEL_REG)
506                                         wakeup(&recv->event);
507                                 else {
508                                         device_printf(dev, "ch %d unexpected "
509                                             "interrupt in recv+2 state %u: "
510                                             "vect = 0x%08x\n", ch, recv->state,
511                                             vect);
512                                         recv->recovery = 1;
513                                 }
514                                 vect &= ~SUME_MSI_RXDONE;
515                                 break;
516                         case SUME_RIFFA_CHAN_STATE_LEN:
517                                 break;
518                         default:
519                                 device_printf(dev, "unknown RX state!\n");
520                         }
521                         loops++;
522                 }
523
524                 if ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
525                     SUME_MSI_RXDONE)) && recv->recovery) {
526                         device_printf(dev, "ch %d ignoring vect = 0x%08x "
527                             "during RX; not in recovery; state = %d, loops = "
528                             "%d\n", ch, vect, recv->state, loops);
529
530                         /* Clean the unfinished transaction. */
531                         if (ch == SUME_RIFFA_CHANNEL_REG &&
532                             vect & SUME_MSI_RXDONE) {
533                                 read_reg(adapter, RIFFA_CHNL_REG(ch,
534                                     RIFFA_TX_TNFR_LEN_REG_OFF));
535                                 recv->recovery = 0;
536                         }
537                 }
538         }
539         SUME_UNLOCK(adapter);
540
541         if (m != NULL) {
542                 ifp = m->m_pkthdr.rcvif;
543                 (*ifp->if_input)(ifp, m);
544         }
545 }
546
547 /*
548  * As we cannot disable interrupt generation, ignore early interrupts by waiting
549  * for the adapter to go into the 'running' state.
550  */
551 static int
552 sume_intr_filter(void *arg)
553 {
554         struct sume_adapter *adapter = arg;
555
556         if (adapter->running == 0)
557                 return (FILTER_STRAY);
558
559         return (FILTER_SCHEDULE_THREAD);
560 }
561
562 static int
563 sume_probe_riffa_pci(struct sume_adapter *adapter)
564 {
565         device_t dev = adapter->dev;
566         int error, count, capmem;
567         uint32_t reg, devctl, linkctl;
568
569         pci_enable_busmaster(dev);
570
571         adapter->rid = PCIR_BAR(0);
572         adapter->bar0_addr = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
573             &adapter->rid, RF_ACTIVE);
574         if (adapter->bar0_addr == NULL) {
575                 device_printf(dev, "unable to allocate bus resource: "
576                     "BAR0 address\n");
577                 return (ENXIO);
578         }
579         adapter->bt = rman_get_bustag(adapter->bar0_addr);
580         adapter->bh = rman_get_bushandle(adapter->bar0_addr);
581         adapter->bar0_len = rman_get_size(adapter->bar0_addr);
582         if (adapter->bar0_len != 1024) {
583                 device_printf(dev, "BAR0 resource length %lu != 1024\n",
584                     adapter->bar0_len);
585                 return (ENXIO);
586         }
587
588         count = pci_msi_count(dev);
589         error = pci_alloc_msi(dev, &count);
590         if (error) {
591                 device_printf(dev, "unable to allocate bus resource: PCI "
592                     "MSI\n");
593                 return (error);
594         }
595
596         adapter->irq.rid = 1; /* Should be 1, thus says pci_alloc_msi() */
597         adapter->irq.res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
598             &adapter->irq.rid, RF_SHAREABLE | RF_ACTIVE);
599         if (adapter->irq.res == NULL) {
600                 device_printf(dev, "unable to allocate bus resource: IRQ "
601                     "memory\n");
602                 return (ENXIO);
603         }
604
605         error = bus_setup_intr(dev, adapter->irq.res, INTR_MPSAFE |
606             INTR_TYPE_NET, sume_intr_filter, sume_intr_handler, adapter,
607             &adapter->irq.tag);
608         if (error) {
609                 device_printf(dev, "failed to setup interrupt for rid %d, name"
610                     " %s: %d\n", adapter->irq.rid, "SUME_INTR", error);
611                 return (ENXIO);
612         }
613
614         if (pci_find_cap(dev, PCIY_EXPRESS, &capmem) != 0) {
615                 device_printf(dev, "PCI not PCIe capable\n");
616                 return (ENXIO);
617         }
618
619         devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL, 2);
620         pci_write_config(dev, capmem + PCIER_DEVICE_CTL, (devctl |
621             PCIEM_CTL_EXT_TAG_FIELD), 2);
622
623         devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL2, 2);
624         pci_write_config(dev, capmem + PCIER_DEVICE_CTL2, (devctl |
625             PCIEM_CTL2_ID_ORDERED_REQ_EN), 2);
626
627         linkctl = pci_read_config(dev, capmem + PCIER_LINK_CTL, 2);
628         pci_write_config(dev, capmem + PCIER_LINK_CTL, (linkctl |
629             PCIEM_LINK_CTL_RCB), 2);
630
631         reg = read_reg(adapter, RIFFA_INFO_REG_OFF);
632         adapter->num_sg = RIFFA_SG_ELEMS * ((reg >> 19) & 0xf);
633         adapter->sg_buf_size = RIFFA_SG_BUF_SIZE * ((reg >> 19) & 0xf);
634
635         error = ENODEV;
636         /* Check bus master is enabled. */
637         if (((reg >> 4) & 0x1) != 1) {
638                 device_printf(dev, "bus master not enabled: %d\n",
639                     (reg >> 4) & 0x1);
640                 return (error);
641         }
642         /* Check link parameters are valid. */
643         if (((reg >> 5) & 0x3f) == 0 || ((reg >> 11) & 0x3) == 0) {
644                 device_printf(dev, "link parameters not valid: %d %d\n",
645                     (reg >> 5) & 0x3f, (reg >> 11) & 0x3);
646                 return (error);
647         }
648         /* Check # of channels are within valid range. */
649         if ((reg & 0xf) == 0 || (reg & 0xf) > RIFFA_MAX_CHNLS) {
650                 device_printf(dev, "number of channels out of range: %d\n",
651                     reg & 0xf);
652                 return (error);
653         }
654         /* Check bus width. */
655         if (((reg >> 19) & 0xf) == 0 ||
656             ((reg >> 19) & 0xf) > RIFFA_MAX_BUS_WIDTH_PARAM) {
657                 device_printf(dev, "bus width out of range: %d\n",
658                     (reg >> 19) & 0xf);
659                 return (error);
660         }
661
662         device_printf(dev, "[riffa] # of channels: %d\n",
663             reg & 0xf);
664         device_printf(dev, "[riffa] bus interface width: %d\n",
665             ((reg >> 19) & 0xf) << 5);
666         device_printf(dev, "[riffa] bus master enabled: %d\n",
667             (reg >> 4) & 0x1);
668         device_printf(dev, "[riffa] negotiated link width: %d\n",
669             (reg >> 5) & 0x3f);
670         device_printf(dev, "[riffa] negotiated rate width: %d MTs\n",
671             ((reg >> 11) & 0x3) * 2500);
672         device_printf(dev, "[riffa] max downstream payload: %d B\n",
673             128 << ((reg >> 13) & 0x7));
674         device_printf(dev, "[riffa] max upstream payload: %d B\n",
675             128 << ((reg >> 16) & 0x7));
676
677         return (0);
678 }
679
680 /* If there is no sume_if_init, the ether_ioctl panics. */
681 static void
682 sume_if_init(void *sc)
683 {
684 }
685
686 /* Write the address and length for our incoming / outgoing transaction. */
687 static void
688 sume_fill_bb_desc(struct sume_adapter *adapter, struct riffa_chnl_dir *p,
689     uint64_t len)
690 {
691         struct nf_bb_desc *bouncebuf = (struct nf_bb_desc *) p->buf_addr;
692
693         bouncebuf->lower = (p->buf_hw_addr + sizeof(struct nf_bb_desc));
694         bouncebuf->upper = (p->buf_hw_addr + sizeof(struct nf_bb_desc)) >> 32;
695         bouncebuf->len = len >> 2;
696 }
697
698 /* Module register locked write. */
699 static int
700 sume_modreg_write_locked(struct sume_adapter *adapter)
701 {
702         struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
703
704         /* Let the FPGA know about the transfer. */
705         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
706             RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
707         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
708             RIFFA_RX_LEN_REG_OFF), send->len);  /* words */
709
710         /* Fill the bouncebuf "descriptor". */
711         sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
712
713         /* Update the state before intiating the DMA to avoid races. */
714         send->state = SUME_RIFFA_CHAN_STATE_READY;
715
716         bus_dmamap_sync(send->ch_tag, send->ch_map,
717             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
718         /* DMA. */
719         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
720             RIFFA_RX_SG_ADDR_LO_REG_OFF),
721             SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
722         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
723             RIFFA_RX_SG_ADDR_HI_REG_OFF),
724             SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
725         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
726             RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
727         bus_dmamap_sync(send->ch_tag, send->ch_map,
728             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
729
730         return (0);
731 }
732
733 /*
734  * Request a register read or write (depending on optype).
735  * If optype is set (0x1f) this will result in a register write,
736  * otherwise this will result in a register read request at the given
737  * address and the result will need to be DMAed back.
738  */
739 static int
740 sume_module_reg_write(struct nf_priv *nf_priv, struct sume_ifreq *sifr,
741     uint32_t optype)
742 {
743         struct sume_adapter *adapter = nf_priv->adapter;
744         struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
745         struct nf_regop_data *data;
746         int error;
747
748         /*
749          * 1. Make sure the channel is free;  otherwise return EBUSY.
750          * 2. Prepare the memory in the bounce buffer (which we always
751          *    use for regs).
752          * 3. Start the DMA process.
753          * 4. Sleep and wait for result and return success or error.
754          */
755         SUME_LOCK(adapter);
756
757         if (send->state != SUME_RIFFA_CHAN_STATE_IDLE) {
758                 SUME_UNLOCK(adapter);
759                 return (EBUSY);
760         }
761
762         data = (struct nf_regop_data *) (send->buf_addr +
763             sizeof(struct nf_bb_desc));
764         data->addr = htole32(sifr->addr);
765         data->val = htole32(sifr->val);
766         /* Tag to indentify request. */
767         data->rtag = htole32(++send->rtag);
768         data->optype = htole32(optype);
769         send->len = sizeof(struct nf_regop_data) / 4; /* words */
770
771         error = sume_modreg_write_locked(adapter);
772         if (error) {
773                 SUME_UNLOCK(adapter);
774                 return (EFAULT);
775         }
776
777         /* Timeout after 1s. */
778         if (send->state != SUME_RIFFA_CHAN_STATE_LEN)
779                 error = msleep(&send->event, &adapter->lock, 0,
780                     "Waiting recv finish", 1 * hz);
781
782         /* This was a write so we are done; were interrupted, or timed out. */
783         if (optype != SUME_MR_READ || error != 0 || error == EWOULDBLOCK) {
784                 send->state = SUME_RIFFA_CHAN_STATE_IDLE;
785                 if (optype == SUME_MR_READ)
786                         error = EWOULDBLOCK;
787                 else
788                         error = 0;
789         } else
790                 error = 0;
791
792         /*
793          * For read requests we will update state once we are done
794          * having read the result to avoid any two outstanding
795          * transactions, or we need a queue and validate tags,
796          * which is a lot of work for a low priority, infrequent
797          * event.
798          */
799
800         SUME_UNLOCK(adapter);
801
802         return (error);
803 }
804
805 /* Module register read. */
806 static int
807 sume_module_reg_read(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
808 {
809         struct sume_adapter *adapter = nf_priv->adapter;
810         struct riffa_chnl_dir *recv = adapter->recv[SUME_RIFFA_CHANNEL_REG];
811         struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
812         struct nf_regop_data *data;
813         int error = 0;
814
815         /*
816          * 0. Sleep waiting for result if needed (unless condition is
817          *    true already).
818          * 1. Read DMA results.
819          * 2. Update state on *TX* to IDLE to allow next read to start.
820          */
821         SUME_LOCK(adapter);
822
823         bus_dmamap_sync(recv->ch_tag, recv->ch_map,
824             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
825         /*
826          * We only need to be woken up at the end of the transaction.
827          * Timeout after 1s.
828          */
829         if (recv->state != SUME_RIFFA_CHAN_STATE_READ)
830                 error = msleep(&recv->event, &adapter->lock, 0,
831                     "Waiting transaction finish", 1 * hz);
832
833         if (recv->state != SUME_RIFFA_CHAN_STATE_READ || error == EWOULDBLOCK) {
834                 SUME_UNLOCK(adapter);
835                 device_printf(adapter->dev, "wait error: %d\n", error);
836                 return (EWOULDBLOCK);
837         }
838
839         bus_dmamap_sync(recv->ch_tag, recv->ch_map,
840             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
841
842         /*
843          * Read reply data and validate address and tag.
844          * Note: we do access the send side without lock but the state
845          * machine does prevent the data from changing.
846          */
847         data = (struct nf_regop_data *) (recv->buf_addr +
848             sizeof(struct nf_bb_desc));
849
850         if (le32toh(data->rtag) != send->rtag)
851                 device_printf(adapter->dev, "rtag error: 0x%08x 0x%08x\n",
852                     le32toh(data->rtag), send->rtag);
853
854         sifr->val = le32toh(data->val);
855         recv->state = SUME_RIFFA_CHAN_STATE_IDLE;
856
857         /* We are done. */
858         send->state = SUME_RIFFA_CHAN_STATE_IDLE;
859
860         SUME_UNLOCK(adapter);
861
862         return (0);
863 }
864
865 /* Read value from a module register and return it to a sume_ifreq. */
866 static int
867 get_modreg_value(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
868 {
869         int error;
870
871         error = sume_module_reg_write(nf_priv, sifr, SUME_MR_READ);
872         if (!error)
873                 error = sume_module_reg_read(nf_priv, sifr);
874
875         return (error);
876 }
877
878 static int
879 sume_if_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
880 {
881         struct ifreq *ifr = (struct ifreq *) data;
882         struct nf_priv *nf_priv = ifp->if_softc;
883         struct sume_ifreq sifr;
884         int error = 0;
885
886         switch (cmd) {
887         case SIOCGIFMEDIA:
888         case SIOCGIFXMEDIA:
889                 error = ifmedia_ioctl(ifp, ifr, &nf_priv->media, cmd);
890                 break;
891
892         case SUME_IOCTL_CMD_WRITE_REG:
893                 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
894                 if (error) {
895                         error = EINVAL;
896                         break;
897                 }
898                 error = sume_module_reg_write(nf_priv, &sifr, SUME_MR_WRITE);
899                 break;
900
901         case SUME_IOCTL_CMD_READ_REG:
902                 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
903                 if (error) {
904                         error = EINVAL;
905                         break;
906                 }
907
908                 error = get_modreg_value(nf_priv, &sifr);
909                 if (error)
910                         break;
911
912                 error = copyout(&sifr, ifr_data_get_ptr(ifr), sizeof(sifr));
913                 if (error)
914                         error = EINVAL;
915
916                 break;
917
918         case SIOCSIFFLAGS:
919                 /* Silence tcpdump 'promisc mode not supported' warning. */
920                 if (ifp->if_flags & IFF_PROMISC)
921                         break;
922
923         default:
924                 error = ether_ioctl(ifp, cmd, data);
925                 break;
926         }
927
928         return (error);
929 }
930
931 static int
932 sume_media_change(struct ifnet *ifp)
933 {
934         struct nf_priv *nf_priv = ifp->if_softc;
935         struct ifmedia *ifm = &nf_priv->media;
936
937         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
938                 return (EINVAL);
939
940         if (IFM_SUBTYPE(ifm->ifm_media) == IFM_10G_SR)
941                 ifp->if_baudrate = ifmedia_baudrate(IFM_ETHER | IFM_10G_SR);
942         else
943                 ifp->if_baudrate = ifmedia_baudrate(ifm->ifm_media);
944
945         return (0);
946 }
947
948 static void
949 sume_update_link_status(struct ifnet *ifp)
950 {
951         struct nf_priv *nf_priv = ifp->if_softc;
952         struct sume_adapter *adapter = nf_priv->adapter;
953         struct sume_ifreq sifr;
954         int link_status;
955
956         sifr.addr = SUME_STATUS_ADDR(nf_priv->port);
957         sifr.val = 0;
958
959         if (get_modreg_value(nf_priv, &sifr))
960                 return;
961
962         link_status = SUME_LINK_STATUS(sifr.val);
963
964         if (!link_status && nf_priv->link_up) {
965                 if_link_state_change(ifp, LINK_STATE_DOWN);
966                 nf_priv->link_up = 0;
967                 if (adapter->sume_debug)
968                         device_printf(adapter->dev, "port %d link state "
969                             "changed to DOWN\n", nf_priv->unit);
970         } else if (link_status && !nf_priv->link_up) {
971                 nf_priv->link_up = 1;
972                 if_link_state_change(ifp, LINK_STATE_UP);
973                 if (adapter->sume_debug)
974                         device_printf(adapter->dev, "port %d link state "
975                             "changed to UP\n", nf_priv->unit);
976         }
977 }
978
979 static void
980 sume_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
981 {
982         struct nf_priv *nf_priv = ifp->if_softc;
983         struct ifmedia *ifm = &nf_priv->media;
984
985         if (ifm->ifm_cur->ifm_media == (IFM_ETHER | IFM_10G_SR) &&
986             (ifp->if_flags & IFF_UP))
987                 ifmr->ifm_active = IFM_ETHER | IFM_10G_SR;
988         else
989                 ifmr->ifm_active = ifm->ifm_cur->ifm_media;
990
991         ifmr->ifm_status |= IFM_AVALID;
992
993         sume_update_link_status(ifp);
994
995         if (nf_priv->link_up)
996                 ifmr->ifm_status |= IFM_ACTIVE;
997 }
998
999 /*
1000  * Packet to transmit. We take the packet data from the mbuf and copy it to the
1001  * bouncebuffer address buf_addr+3*sizeof(uint32_t)+16. The 16 bytes before the
1002  * packet data are for metadata: sport/dport (depending on our source
1003  * interface), packet length and magic 0xcafe. We tell the SUME about the
1004  * transfer, fill the first 3*sizeof(uint32_t) bytes of the bouncebuffer with
1005  * the information about the start and length of the packet and trigger the
1006  * transaction.
1007  */
1008 static int
1009 sume_if_start_locked(struct ifnet *ifp)
1010 {
1011         struct mbuf *m;
1012         struct nf_priv *nf_priv = ifp->if_softc;
1013         struct sume_adapter *adapter = nf_priv->adapter;
1014         struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_DATA];
1015         uint8_t *outbuf;
1016         struct nf_metadata *mdata;
1017         int plen = SUME_MIN_PKT_SIZE;
1018
1019         KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
1020         KASSERT(send->state == SUME_RIFFA_CHAN_STATE_IDLE,
1021             ("SUME not in IDLE state"));
1022
1023         IFQ_DEQUEUE(&ifp->if_snd, m);
1024         if (m == NULL)
1025                 return (EINVAL);
1026
1027         /* Packets large enough do not need to be padded */
1028         if (m->m_pkthdr.len > SUME_MIN_PKT_SIZE)
1029                 plen = m->m_pkthdr.len;
1030
1031         if (adapter->sume_debug)
1032                 device_printf(adapter->dev, "sending %d bytes to %s%d\n", plen,
1033                     SUME_ETH_DEVICE_NAME, nf_priv->unit);
1034
1035         outbuf = (uint8_t *) send->buf_addr + sizeof(struct nf_bb_desc);
1036         mdata = (struct nf_metadata *) outbuf;
1037
1038         /* Clear the recovery flag. */
1039         send->recovery = 0;
1040
1041         /* Make sure we fit with the 16 bytes nf_metadata. */
1042         if (m->m_pkthdr.len + sizeof(struct nf_metadata) >
1043             adapter->sg_buf_size) {
1044                 device_printf(adapter->dev, "packet too big for bounce buffer "
1045                     "(%d)\n", m->m_pkthdr.len);
1046                 m_freem(m);
1047                 nf_priv->stats.tx_dropped++;
1048                 return (ENOMEM);
1049         }
1050
1051         bus_dmamap_sync(send->ch_tag, send->ch_map,
1052             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1053
1054         /* Zero out the padded data */
1055         if (m->m_pkthdr.len < SUME_MIN_PKT_SIZE)
1056                 bzero(outbuf + sizeof(struct nf_metadata), SUME_MIN_PKT_SIZE);
1057         /* Skip the first 16 bytes for the metadata. */
1058         m_copydata(m, 0, m->m_pkthdr.len, outbuf + sizeof(struct nf_metadata));
1059         send->len = (sizeof(struct nf_metadata) + plen + 3) / 4;
1060
1061         /* Fill in the metadata: CPU(DMA) ports are odd, MAC ports are even. */
1062         mdata->sport = htole16(1 << (nf_priv->port * 2 + 1));
1063         mdata->dport = htole16(1 << (nf_priv->port * 2));
1064         mdata->plen = htole16(plen);
1065         mdata->magic = htole16(SUME_RIFFA_MAGIC);
1066         mdata->t1 = htole32(0);
1067         mdata->t2 = htole32(0);
1068
1069         /* Let the FPGA know about the transfer. */
1070         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1071             RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
1072         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1073             RIFFA_RX_LEN_REG_OFF), send->len);
1074
1075         /* Fill the bouncebuf "descriptor". */
1076         sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
1077
1078         /* Update the state before intiating the DMA to avoid races. */
1079         send->state = SUME_RIFFA_CHAN_STATE_READY;
1080
1081         /* DMA. */
1082         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1083             RIFFA_RX_SG_ADDR_LO_REG_OFF),
1084             SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
1085         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1086             RIFFA_RX_SG_ADDR_HI_REG_OFF),
1087             SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
1088         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1089             RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
1090
1091         bus_dmamap_sync(send->ch_tag, send->ch_map,
1092             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1093
1094         nf_priv->stats.tx_packets++;
1095         nf_priv->stats.tx_bytes += plen;
1096
1097         /* We can free as long as we use the bounce buffer. */
1098         m_freem(m);
1099
1100         adapter->last_ifc = nf_priv->port;
1101
1102         /* Reset watchdog counter. */
1103         adapter->wd_counter = 0;
1104
1105         return (0);
1106 }
1107
1108 static void
1109 sume_if_start(struct ifnet *ifp)
1110 {
1111         struct nf_priv *nf_priv = ifp->if_softc;
1112         struct sume_adapter *adapter = nf_priv->adapter;
1113
1114         if (!adapter->running || !(ifp->if_flags & IFF_UP))
1115                 return;
1116
1117         SUME_LOCK(adapter);
1118         if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state ==
1119             SUME_RIFFA_CHAN_STATE_IDLE)
1120                 sume_if_start_locked(ifp);
1121         SUME_UNLOCK(adapter);
1122 }
1123
1124 /*
1125  * We call this function at the end of every TX transaction to check for
1126  * remaining packets in the TX queues for every UP interface.
1127  */
1128 static void
1129 check_tx_queues(struct sume_adapter *adapter)
1130 {
1131         int i, last_ifc;
1132
1133         KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
1134
1135         last_ifc = adapter->last_ifc;
1136
1137         /* Check all interfaces */
1138         for (i = last_ifc + 1; i < last_ifc + SUME_NPORTS + 1; i++) {
1139                 struct ifnet *ifp = adapter->ifp[i % SUME_NPORTS];
1140
1141                 if (!(ifp->if_flags & IFF_UP))
1142                         continue;
1143
1144                 if (!sume_if_start_locked(ifp))
1145                         break;
1146         }
1147 }
1148
1149 static int
1150 sume_ifp_alloc(struct sume_adapter *adapter, uint32_t port)
1151 {
1152         struct ifnet *ifp;
1153         struct nf_priv *nf_priv = malloc(sizeof(struct nf_priv), M_SUME,
1154             M_ZERO | M_WAITOK);
1155
1156         ifp = if_alloc(IFT_ETHER);
1157         if (ifp == NULL) {
1158                 device_printf(adapter->dev, "cannot allocate ifnet\n");
1159                 return (ENOMEM);
1160         }
1161
1162         adapter->ifp[port] = ifp;
1163         ifp->if_softc = nf_priv;
1164
1165         nf_priv->adapter = adapter;
1166         nf_priv->unit = alloc_unr(unr);
1167         nf_priv->port = port;
1168         nf_priv->link_up = 0;
1169
1170         if_initname(ifp, SUME_ETH_DEVICE_NAME, nf_priv->unit);
1171         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1172
1173         ifp->if_init = sume_if_init;
1174         ifp->if_start = sume_if_start;
1175         ifp->if_ioctl = sume_if_ioctl;
1176
1177         uint8_t hw_addr[ETHER_ADDR_LEN] = DEFAULT_ETHER_ADDRESS;
1178         hw_addr[ETHER_ADDR_LEN-1] = nf_priv->unit;
1179         ether_ifattach(ifp, hw_addr);
1180
1181         ifmedia_init(&nf_priv->media, IFM_IMASK, sume_media_change,
1182             sume_media_status);
1183         ifmedia_add(&nf_priv->media, IFM_ETHER | IFM_10G_SR, 0, NULL);
1184         ifmedia_set(&nf_priv->media, IFM_ETHER | IFM_10G_SR);
1185
1186         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1187
1188         return (0);
1189 }
1190
1191 static void
1192 callback_dma(void *arg, bus_dma_segment_t *segs, int nseg, int err)
1193 {
1194         if (err)
1195                 return;
1196
1197         KASSERT(nseg == 1, ("%d segments returned!", nseg));
1198
1199         *(bus_addr_t *) arg = segs[0].ds_addr;
1200 }
1201
1202 static int
1203 sume_probe_riffa_buffer(const struct sume_adapter *adapter,
1204     struct riffa_chnl_dir ***p, const char *dir)
1205 {
1206         struct riffa_chnl_dir **rp;
1207         bus_addr_t hw_addr;
1208         int error, ch;
1209         device_t dev = adapter->dev;
1210
1211         error = ENOMEM;
1212         *p = malloc(SUME_RIFFA_CHANNELS * sizeof(struct riffa_chnl_dir *),
1213             M_SUME, M_ZERO | M_WAITOK);
1214         if (*p == NULL) {
1215                 device_printf(dev, "malloc(%s) failed.\n", dir);
1216                 return (error);
1217         }
1218
1219         rp = *p;
1220         /* Allocate the chnl_dir structs themselves. */
1221         for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
1222                 /* One direction. */
1223                 rp[ch] = malloc(sizeof(struct riffa_chnl_dir), M_SUME,
1224                     M_ZERO | M_WAITOK);
1225                 if (rp[ch] == NULL) {
1226                         device_printf(dev, "malloc(%s[%d]) riffa_chnl_dir "
1227                             "failed.\n", dir, ch);
1228                         return (error);
1229                 }
1230
1231                 int err = bus_dma_tag_create(bus_get_dma_tag(dev),
1232                     4, 0,
1233                     BUS_SPACE_MAXADDR,
1234                     BUS_SPACE_MAXADDR,
1235                     NULL, NULL,
1236                     adapter->sg_buf_size,
1237                     1,
1238                     adapter->sg_buf_size,
1239                     0,
1240                     NULL,
1241                     NULL,
1242                     &rp[ch]->ch_tag);
1243
1244                 if (err) {
1245                         device_printf(dev, "bus_dma_tag_create(%s[%d]) "
1246                             "failed.\n", dir, ch);
1247                         return (err);
1248                 }
1249
1250                 err = bus_dmamem_alloc(rp[ch]->ch_tag, (void **)
1251                     &rp[ch]->buf_addr, BUS_DMA_WAITOK | BUS_DMA_COHERENT |
1252                     BUS_DMA_ZERO, &rp[ch]->ch_map);
1253                 if (err) {
1254                         device_printf(dev, "bus_dmamem_alloc(%s[%d]) failed.\n",
1255                             dir, ch);
1256                         return (err);
1257                 }
1258
1259                 bzero(rp[ch]->buf_addr, adapter->sg_buf_size);
1260
1261                 err = bus_dmamap_load(rp[ch]->ch_tag, rp[ch]->ch_map,
1262                     rp[ch]->buf_addr, adapter->sg_buf_size, callback_dma,
1263                     &hw_addr, BUS_DMA_NOWAIT);
1264                 if (err) {
1265                         device_printf(dev, "bus_dmamap_load(%s[%d]) failed.\n",
1266                             dir, ch);
1267                         return (err);
1268                 }
1269                 rp[ch]->buf_hw_addr = hw_addr;
1270                 rp[ch]->num_sg = 1;
1271                 rp[ch]->state = SUME_RIFFA_CHAN_STATE_IDLE;
1272
1273                 rp[ch]->rtag = SUME_INIT_RTAG;
1274         }
1275
1276         return (0);
1277 }
1278
1279 static int
1280 sume_probe_riffa_buffers(struct sume_adapter *adapter)
1281 {
1282         int error;
1283
1284         error = sume_probe_riffa_buffer(adapter, &adapter->recv, "recv");
1285         if (error)
1286                 return (error);
1287
1288         error = sume_probe_riffa_buffer(adapter, &adapter->send, "send");
1289
1290         return (error);
1291 }
1292
1293 static void
1294 sume_sysctl_init(struct sume_adapter *adapter)
1295 {
1296         device_t dev = adapter->dev;
1297         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
1298         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
1299         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
1300         struct sysctl_oid *tmp_tree;
1301         char namebuf[MAX_IFC_NAME_LEN];
1302         int i;
1303
1304         tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "sume", CTLFLAG_RW,
1305             0, "SUME top-level tree");
1306         if (tree == NULL) {
1307                 device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
1308                 return;
1309         }
1310         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug", CTLFLAG_RW,
1311             &adapter->sume_debug, 0, "debug int leaf");
1312
1313         /* total RX error stats */
1314         SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_epkts",
1315             CTLFLAG_RD, &adapter->packets_err, 0, "rx errors");
1316         SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_ebytes",
1317             CTLFLAG_RD, &adapter->bytes_err, 0, "rx error bytes");
1318
1319         for (i = SUME_NPORTS - 1; i >= 0; i--) {
1320                 struct ifnet *ifp = adapter->ifp[i];
1321                 if (ifp == NULL)
1322                         continue;
1323
1324                 struct nf_priv *nf_priv = ifp->if_softc;
1325
1326                 snprintf(namebuf, MAX_IFC_NAME_LEN, "%s%d",
1327                     SUME_ETH_DEVICE_NAME, nf_priv->unit);
1328                 tmp_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
1329                     CTLFLAG_RW, 0, "SUME ifc tree");
1330                 if (tmp_tree == NULL) {
1331                         device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
1332                         return;
1333                 }
1334
1335                 /* Packets dropped by down interface. */
1336                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1337                     "ifc_down_bytes", CTLFLAG_RD,
1338                     &nf_priv->stats.ifc_down_bytes, 0, "ifc_down bytes");
1339                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1340                     "ifc_down_packets", CTLFLAG_RD,
1341                     &nf_priv->stats.ifc_down_packets, 0, "ifc_down packets");
1342
1343                 /* HW RX stats */
1344                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1345                     "hw_rx_packets", CTLFLAG_RD, &nf_priv->stats.hw_rx_packets,
1346                     0, "hw_rx packets");
1347
1348                 /* HW TX stats */
1349                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1350                     "hw_tx_packets", CTLFLAG_RD, &nf_priv->stats.hw_tx_packets,
1351                     0, "hw_tx packets");
1352
1353                 /* RX stats */
1354                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1355                     "rx_bytes", CTLFLAG_RD, &nf_priv->stats.rx_bytes, 0,
1356                     "rx bytes");
1357                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1358                     "rx_dropped", CTLFLAG_RD, &nf_priv->stats.rx_dropped, 0,
1359                     "rx dropped");
1360                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1361                     "rx_packets", CTLFLAG_RD, &nf_priv->stats.rx_packets, 0,
1362                     "rx packets");
1363
1364                 /* TX stats */
1365                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1366                     "tx_bytes", CTLFLAG_RD, &nf_priv->stats.tx_bytes, 0,
1367                     "tx bytes");
1368                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1369                     "tx_dropped", CTLFLAG_RD, &nf_priv->stats.tx_dropped, 0,
1370                     "tx dropped");
1371                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1372                     "tx_packets", CTLFLAG_RD, &nf_priv->stats.tx_packets, 0,
1373                     "tx packets");
1374         }
1375 }
1376
1377 static void
1378 sume_local_timer(void *arg)
1379 {
1380         struct sume_adapter *adapter = arg;
1381
1382         if (!adapter->running)
1383                 return;
1384
1385         taskqueue_enqueue(adapter->tq, &adapter->stat_task);
1386
1387         SUME_LOCK(adapter);
1388         if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state !=
1389             SUME_RIFFA_CHAN_STATE_IDLE && ++adapter->wd_counter >= 3) {
1390                 /* Resetting interfaces if stuck for 3 seconds. */
1391                 device_printf(adapter->dev, "TX stuck, resetting adapter.\n");
1392                 read_reg(adapter, RIFFA_INFO_REG_OFF);
1393
1394                 adapter->send[SUME_RIFFA_CHANNEL_DATA]->state =
1395                     SUME_RIFFA_CHAN_STATE_IDLE;
1396                 adapter->wd_counter = 0;
1397
1398                 check_tx_queues(adapter);
1399         }
1400         SUME_UNLOCK(adapter);
1401
1402         callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
1403 }
1404
1405 static void
1406 sume_get_stats(void *context, int pending)
1407 {
1408         struct sume_adapter *adapter = context;
1409         int i;
1410
1411         for (i = 0; i < SUME_NPORTS; i++) {
1412                 struct ifnet *ifp = adapter->ifp[i];
1413
1414                 if (ifp->if_flags & IFF_UP) {
1415                         struct nf_priv *nf_priv = ifp->if_softc;
1416                         struct sume_ifreq sifr;
1417
1418                         sume_update_link_status(ifp);
1419
1420                         /* Get RX counter. */
1421                         sifr.addr = SUME_STAT_RX_ADDR(nf_priv->port);
1422                         sifr.val = 0;
1423
1424                         if (!get_modreg_value(nf_priv, &sifr))
1425                                 nf_priv->stats.hw_rx_packets += sifr.val;
1426
1427                         /* Get TX counter. */
1428                         sifr.addr = SUME_STAT_TX_ADDR(nf_priv->port);
1429                         sifr.val = 0;
1430
1431                         if (!get_modreg_value(nf_priv, &sifr))
1432                                 nf_priv->stats.hw_tx_packets += sifr.val;
1433                 }
1434         }
1435 }
1436
1437 static int
1438 sume_attach(device_t dev)
1439 {
1440         struct sume_adapter *adapter = device_get_softc(dev);
1441         adapter->dev = dev;
1442         int error, i;
1443
1444         mtx_init(&adapter->lock, "Global lock", NULL, MTX_DEF);
1445
1446         adapter->running = 0;
1447
1448         /* OK finish up RIFFA. */
1449         error = sume_probe_riffa_pci(adapter);
1450         if (error != 0)
1451                 goto error;
1452
1453         error = sume_probe_riffa_buffers(adapter);
1454         if (error != 0)
1455                 goto error;
1456
1457         /* Now do the network interfaces. */
1458         for (i = 0; i < SUME_NPORTS; i++) {
1459                 error = sume_ifp_alloc(adapter, i);
1460                 if (error != 0)
1461                         goto error;
1462         }
1463
1464         /*  Register stats and register sysctls. */
1465         sume_sysctl_init(adapter);
1466
1467         /* Reset the HW. */
1468         read_reg(adapter, RIFFA_INFO_REG_OFF);
1469
1470         /* Ready to go, "enable" IRQ. */
1471         adapter->running = 1;
1472
1473         callout_init(&adapter->timer, 1);
1474         TASK_INIT(&adapter->stat_task, 0, sume_get_stats, adapter);
1475
1476         adapter->tq = taskqueue_create("sume_stats", M_NOWAIT,
1477             taskqueue_thread_enqueue, &adapter->tq);
1478         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s stattaskq",
1479             device_get_nameunit(adapter->dev));
1480
1481         callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
1482
1483         return (0);
1484
1485 error:
1486         sume_detach(dev);
1487
1488         return (error);
1489 }
1490
1491 static void
1492 sume_remove_riffa_buffer(const struct sume_adapter *adapter,
1493     struct riffa_chnl_dir **pp)
1494 {
1495         int ch;
1496
1497         for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
1498                 if (pp[ch] == NULL)
1499                         continue;
1500
1501                 if (pp[ch]->buf_hw_addr != 0) {
1502                         bus_dmamem_free(pp[ch]->ch_tag, pp[ch]->buf_addr,
1503                             pp[ch]->ch_map);
1504                         pp[ch]->buf_hw_addr = 0;
1505                 }
1506
1507                 free(pp[ch], M_SUME);
1508         }
1509 }
1510
1511 static void
1512 sume_remove_riffa_buffers(struct sume_adapter *adapter)
1513 {
1514         if (adapter->send != NULL) {
1515                 sume_remove_riffa_buffer(adapter, adapter->send);
1516                 free(adapter->send, M_SUME);
1517                 adapter->send = NULL;
1518         }
1519         if (adapter->recv != NULL) {
1520                 sume_remove_riffa_buffer(adapter, adapter->recv);
1521                 free(adapter->recv, M_SUME);
1522                 adapter->recv = NULL;
1523         }
1524 }
1525
1526 static int
1527 sume_detach(device_t dev)
1528 {
1529         struct sume_adapter *adapter = device_get_softc(dev);
1530         int i;
1531         struct nf_priv *nf_priv;
1532
1533         KASSERT(mtx_initialized(&adapter->lock), ("SUME mutex not "
1534             "initialized"));
1535         adapter->running = 0;
1536
1537         /* Drain the stats callout and task queue. */
1538         callout_drain(&adapter->timer);
1539
1540         if (adapter->tq) {
1541                 taskqueue_drain(adapter->tq, &adapter->stat_task);
1542                 taskqueue_free(adapter->tq);
1543         }
1544
1545         for (i = 0; i < SUME_NPORTS; i++) {
1546                 struct ifnet *ifp = adapter->ifp[i];
1547                 if (ifp == NULL)
1548                         continue;
1549
1550                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1551                 nf_priv = ifp->if_softc;
1552
1553                 if (ifp->if_flags & IFF_UP)
1554                         if_down(ifp);
1555                 ifmedia_removeall(&nf_priv->media);
1556                 free_unr(unr, nf_priv->unit);
1557
1558                 ifp->if_flags &= ~IFF_UP;
1559                 ether_ifdetach(ifp);
1560                 if_free(ifp);
1561
1562                 free(nf_priv, M_SUME);
1563         }
1564
1565         sume_remove_riffa_buffers(adapter);
1566
1567         if (adapter->irq.tag)
1568                 bus_teardown_intr(dev, adapter->irq.res, adapter->irq.tag);
1569         if (adapter->irq.res)
1570                 bus_release_resource(dev, SYS_RES_IRQ, adapter->irq.rid,
1571                     adapter->irq.res);
1572
1573         pci_release_msi(dev);
1574
1575         if (adapter->bar0_addr)
1576                 bus_release_resource(dev, SYS_RES_MEMORY, adapter->rid,
1577                     adapter->bar0_addr);
1578
1579         mtx_destroy(&adapter->lock);
1580
1581         return (0);
1582 }
1583
1584 static int
1585 mod_event(module_t mod, int cmd, void *arg)
1586 {
1587         switch (cmd) {
1588         case MOD_LOAD:
1589                 unr = new_unrhdr(0, INT_MAX, NULL);
1590                 break;
1591
1592         case MOD_UNLOAD:
1593                 delete_unrhdr(unr);
1594                 break;
1595         }
1596
1597         return (0);
1598 }
1599 static devclass_t sume_devclass;
1600
1601 DRIVER_MODULE(sume, pci, sume_driver, sume_devclass, mod_event, 0);
1602 MODULE_VERSION(sume, 1);