]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/sume/if_sume.c
contrib/tzdata: import tzdata 2024a
[FreeBSD/FreeBSD.git] / sys / dev / sume / if_sume.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015 Bjoern A. Zeeb
5  * Copyright (c) 2020 Denis Salopek
6  *
7  * This software was developed by SRI International and the University of
8  * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
9  * ("MRC2"), as part of the DARPA MRC research programme.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/param.h>
34 #include <sys/bus.h>
35 #include <sys/endian.h>
36 #include <sys/kernel.h>
37 #include <sys/limits.h>
38 #include <sys/module.h>
39 #include <sys/rman.h>
40 #include <sys/socket.h>
41 #include <sys/sockio.h>
42 #include <sys/sysctl.h>
43 #include <sys/taskqueue.h>
44
45 #include <net/if.h>
46 #include <net/if_media.h>
47 #include <net/if_types.h>
48 #include <net/if_var.h>
49
50 #include <netinet/in.h>
51 #include <netinet/if_ether.h>
52
53 #include <dev/pci/pcivar.h>
54 #include <dev/pci/pcireg.h>
55
56 #include <machine/bus.h>
57
58 #include "adapter.h"
59
60 #define PCI_VENDOR_ID_XILINX    0x10ee
61 #define PCI_DEVICE_ID_SUME      0x7028
62
63 /* SUME bus driver interface */
64 static int sume_probe(device_t);
65 static int sume_attach(device_t);
66 static int sume_detach(device_t);
67
68 static device_method_t sume_methods[] = {
69         DEVMETHOD(device_probe,         sume_probe),
70         DEVMETHOD(device_attach,        sume_attach),
71         DEVMETHOD(device_detach,        sume_detach),
72         DEVMETHOD_END
73 };
74
75 static driver_t sume_driver = {
76         "sume",
77         sume_methods,
78         sizeof(struct sume_adapter)
79 };
80
81 /*
82  * The DMA engine for SUME generates interrupts for each RX/TX transaction.
83  * Depending on the channel (0 if packet transaction, 1 if register transaction)
84  * the used bits of the interrupt vector will be the lowest or the second lowest
85  * 5 bits.
86  *
87  * When receiving packets from SUME (RX):
88  * (1) SUME received a packet on one of the interfaces.
89  * (2) SUME generates an interrupt vector, bit 00001 is set (channel 0 - new RX
90  *     transaction).
91  * (3) We read the length of the incoming packet and the offset along with the
92  *     'last' flag from the SUME registers.
93  * (4) We prepare for the DMA transaction by setting the bouncebuffer on the
94  *     address buf_addr. For now, this is how it's done:
95  *     - First 3*sizeof(uint32_t) bytes are: lower and upper 32 bits of physical
96  *     address where we want the data to arrive (buf_addr[0] and buf_addr[1]),
97  *     and length of incoming data (buf_addr[2]).
98  *     - Data will start right after, at buf_addr+3*sizeof(uint32_t). The
99  *     physical address buf_hw_addr is a block of contiguous memory mapped to
100  *     buf_addr, so we can set the incoming data's physical address (buf_addr[0]
101  *     and buf_addr[1]) to buf_hw_addr+3*sizeof(uint32_t).
102  * (5) We notify SUME that the bouncebuffer is ready for the transaction by
103  *     writing the lower/upper physical address buf_hw_addr to the SUME
104  *     registers RIFFA_TX_SG_ADDR_LO_REG_OFF and RIFFA_TX_SG_ADDR_HI_REG_OFF as
105  *     well as the number of segments to the register RIFFA_TX_SG_LEN_REG_OFF.
106  * (6) SUME generates an interrupt vector, bit 00010 is set (channel 0 -
107  *     bouncebuffer received).
108  * (7) SUME generates an interrupt vector, bit 00100 is set (channel 0 -
109  *     transaction is done).
110  * (8) SUME can do both steps (6) and (7) using the same interrupt.
111  * (8) We read the first 16 bytes (metadata) of the received data and note the
112  *     incoming interface so we can later forward it to the right one in the OS
113  *     (sume0, sume1, sume2 or sume3).
114  * (10) We create an mbuf and copy the data from the bouncebuffer to the mbuf
115  *     and set the mbuf rcvif to the incoming interface.
116  * (11) We forward the mbuf to the appropriate interface via ifp->if_input.
117  *
118  * When sending packets to SUME (TX):
119  * (1) The OS calls sume_if_start() function on TX.
120  * (2) We get the mbuf packet data and copy it to the
121  *     buf_addr+3*sizeof(uint32_t) + metadata 16 bytes.
122  * (3) We create the metadata based on the output interface and copy it to the
123  *     buf_addr+3*sizeof(uint32_t).
124  * (4) We write the offset/last and length of the packet to the SUME registers
125  *     RIFFA_RX_OFFLAST_REG_OFF and RIFFA_RX_LEN_REG_OFF.
126  * (5) We fill the bouncebuffer by filling the first 3*sizeof(uint32_t) bytes
127  *     with the physical address and length just as in RX step (4).
128  * (6) We notify SUME that the bouncebuffer is ready by writing to SUME
129  *     registers RIFFA_RX_SG_ADDR_LO_REG_OFF, RIFFA_RX_SG_ADDR_HI_REG_OFF and
130  *     RIFFA_RX_SG_LEN_REG_OFF just as in RX step (5).
131  * (7) SUME generates an interrupt vector, bit 01000 is set (channel 0 -
132  *     bouncebuffer is read).
133  * (8) SUME generates an interrupt vector, bit 10000 is set (channel 0 -
134  *     transaction is done).
135  * (9) SUME can do both steps (7) and (8) using the same interrupt.
136  *
137  * Internal registers
138  * Every module in the SUME hardware has its own set of internal registers
139  * (IDs, for debugging and statistic purposes, etc.). Their base addresses are
140  * defined in 'projects/reference_nic/hw/tcl/reference_nic_defines.tcl' and the
141  * offsets to different memory locations of every module are defined in their
142  * corresponding folder inside the library. These registers can be RO/RW and
143  * there is a special method to fetch/change this data over 1 or 2 DMA
144  * transactions. For writing, by calling the sume_module_reg_write(). For
145  * reading, by calling the sume_module_reg_write() and then
146  * sume_module_reg_read(). Check those functions for more information.
147  */
148
149 MALLOC_DECLARE(M_SUME);
150 MALLOC_DEFINE(M_SUME, "sume", "NetFPGA SUME device driver");
151
152 static void check_tx_queues(struct sume_adapter *);
153 static void sume_fill_bb_desc(struct sume_adapter *, struct riffa_chnl_dir *,
154     uint64_t);
155
156 static struct unrhdr *unr;
157
158 static struct {
159         uint16_t device;
160         char *desc;
161 } sume_pciids[] = {
162         {PCI_DEVICE_ID_SUME, "NetFPGA SUME reference NIC"},
163 };
164
165 static inline uint32_t
166 read_reg(struct sume_adapter *adapter, int offset)
167 {
168
169         return (bus_space_read_4(adapter->bt, adapter->bh, offset << 2));
170 }
171
172 static inline void
173 write_reg(struct sume_adapter *adapter, int offset, uint32_t val)
174 {
175
176         bus_space_write_4(adapter->bt, adapter->bh, offset << 2, val);
177 }
178
179 static int
180 sume_probe(device_t dev)
181 {
182         int i;
183         uint16_t v = pci_get_vendor(dev);
184         uint16_t d = pci_get_device(dev);
185
186         if (v != PCI_VENDOR_ID_XILINX)
187                 return (ENXIO);
188
189         for (i = 0; i < nitems(sume_pciids); i++) {
190                 if (d == sume_pciids[i].device) {
191                         device_set_desc(dev, sume_pciids[i].desc);
192                         return (BUS_PROBE_DEFAULT);
193                 }
194         }
195
196         return (ENXIO);
197 }
198
199 /*
200  * Building mbuf for packet received from SUME. We expect to receive 'len'
201  * bytes of data (including metadata) written from the bouncebuffer address
202  * buf_addr+3*sizeof(uint32_t). Metadata will tell us which SUME interface
203  * received the packet (sport will be 1, 2, 4 or 8), the packet length (plen),
204  * and the magic word needs to be 0xcafe. When we have the packet data, we
205  * create an mbuf and copy the data to it using m_copyback() function, set the
206  * correct interface to rcvif and return the mbuf to be later sent to the OS
207  * with if_input.
208  */
209 static struct mbuf *
210 sume_rx_build_mbuf(struct sume_adapter *adapter, uint32_t len)
211 {
212         struct nf_priv *nf_priv;
213         struct mbuf *m;
214         if_t ifp = NULL;
215         int np;
216         uint16_t dport, plen, magic;
217         device_t dev = adapter->dev;
218         uint8_t *indata = (uint8_t *)
219             adapter->recv[SUME_RIFFA_CHANNEL_DATA]->buf_addr +
220             sizeof(struct nf_bb_desc);
221         struct nf_metadata *mdata = (struct nf_metadata *) indata;
222
223         /* The metadata header is 16 bytes. */
224         if (len < sizeof(struct nf_metadata)) {
225                 device_printf(dev, "short frame (%d)\n", len);
226                 adapter->packets_err++;
227                 adapter->bytes_err += len;
228                 return (NULL);
229         }
230
231         dport = le16toh(mdata->dport);
232         plen = le16toh(mdata->plen);
233         magic = le16toh(mdata->magic);
234
235         if (sizeof(struct nf_metadata) + plen > len ||
236             magic != SUME_RIFFA_MAGIC) {
237                 device_printf(dev, "corrupted packet (%zd + %d > %d || magic "
238                     "0x%04x != 0x%04x)\n", sizeof(struct nf_metadata), plen,
239                     len, magic, SUME_RIFFA_MAGIC);
240                 return (NULL);
241         }
242
243         /* We got the packet from one of the even bits */
244         np = (ffs(dport & SUME_DPORT_MASK) >> 1) - 1;
245         if (np > SUME_NPORTS) {
246                 device_printf(dev, "invalid destination port 0x%04x (%d)\n",
247                     dport, np);
248                 adapter->packets_err++;
249                 adapter->bytes_err += plen;
250                 return (NULL);
251         }
252         ifp = adapter->ifp[np];
253         nf_priv = if_getsoftc(ifp);
254         nf_priv->stats.rx_packets++;
255         nf_priv->stats.rx_bytes += plen;
256
257         /* If the interface is down, well, we are done. */
258         if (!(if_getflags(ifp) & IFF_UP)) {
259                 nf_priv->stats.ifc_down_packets++;
260                 nf_priv->stats.ifc_down_bytes += plen;
261                 return (NULL);
262         }
263
264         if (adapter->sume_debug)
265                 printf("Building mbuf with length: %d\n", plen);
266
267         m = m_getm(NULL, plen, M_NOWAIT, MT_DATA);
268         if (m == NULL) {
269                 adapter->packets_err++;
270                 adapter->bytes_err += plen;
271                 return (NULL);
272         }
273
274         /* Copy the data in at the right offset. */
275         m_copyback(m, 0, plen, (void *) (indata + sizeof(struct nf_metadata)));
276         m->m_pkthdr.rcvif = ifp;
277
278         return (m);
279 }
280
281 /*
282  * SUME interrupt handler for when we get a valid interrupt from the board.
283  * Theoretically, we can receive interrupt for any of the available channels,
284  * but RIFFA DMA uses only 2: 0 and 1, so we use only vect0. The vector is a 32
285  * bit number, using 5 bits for every channel, the least significant bits
286  * correspond to channel 0 and the next 5 bits correspond to channel 1. Vector
287  * bits for RX/TX are:
288  * RX
289  * bit 0 - new transaction from SUME
290  * bit 1 - SUME received our bouncebuffer address
291  * bit 2 - SUME copied the received data to our bouncebuffer, transaction done
292  * TX
293  * bit 3 - SUME received our bouncebuffer address
294  * bit 4 - SUME copied the data from our bouncebuffer, transaction done
295  *
296  * There are two finite state machines (one for TX, one for RX). We loop
297  * through channels 0 and 1 to check and our current state and which interrupt
298  * bit is set.
299  * TX
300  * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the first TX transaction.
301  * SUME_RIFFA_CHAN_STATE_READY: we prepared (filled with data) the bouncebuffer
302  * and triggered the SUME for the TX transaction. Waiting for interrupt bit 3
303  * to go to the next state.
304  * SUME_RIFFA_CHAN_STATE_READ: waiting for interrupt bit 4 (for SUME to send
305  * our packet). Then we get the length of the sent data and go back to the
306  * IDLE state.
307  * RX
308  * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the interrupt bit 0 (new RX
309  * transaction). When we get it, we prepare our bouncebuffer for reading and
310  * trigger the SUME to start the transaction. Go to the next state.
311  * SUME_RIFFA_CHAN_STATE_READY: waiting for the interrupt bit 1 (SUME got our
312  * bouncebuffer). Go to the next state.
313  * SUME_RIFFA_CHAN_STATE_READ: SUME copied data and our bouncebuffer is ready,
314  * we can build the mbuf and go back to the IDLE state.
315  */
316 static void
317 sume_intr_handler(void *arg)
318 {
319         struct sume_adapter *adapter = arg;
320         uint32_t vect, vect0, len;
321         int ch, loops;
322         device_t dev = adapter->dev;
323         struct mbuf *m = NULL;
324         if_t ifp = NULL;
325         struct riffa_chnl_dir *send, *recv;
326
327         SUME_LOCK(adapter);
328
329         vect0 = read_reg(adapter, RIFFA_IRQ_REG0_OFF);
330         if ((vect0 & SUME_INVALID_VECT) != 0) {
331                 SUME_UNLOCK(adapter);
332                 return;
333         }
334
335         /*
336          * We only have one interrupt for all channels and no way
337          * to quickly lookup for which channel(s) we got an interrupt?
338          */
339         for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
340                 vect = vect0 >> (5 * ch);
341                 send = adapter->send[ch];
342                 recv = adapter->recv[ch];
343
344                 loops = 0;
345                 while ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
346                     loops <= 5) {
347                         if (adapter->sume_debug)
348                                 device_printf(dev, "TX ch %d state %u vect = "
349                                     "0x%08x\n", ch, send->state, vect);
350                         switch (send->state) {
351                         case SUME_RIFFA_CHAN_STATE_IDLE:
352                                 break;
353                         case SUME_RIFFA_CHAN_STATE_READY:
354                                 if (!(vect & SUME_MSI_TXBUF)) {
355                                         device_printf(dev, "ch %d unexpected "
356                                             "interrupt in send+3 state %u: "
357                                             "vect = 0x%08x\n", ch, send->state,
358                                             vect);
359                                         send->recovery = 1;
360                                         break;
361                                 }
362                                 send->state = SUME_RIFFA_CHAN_STATE_READ;
363                                 vect &= ~SUME_MSI_TXBUF;
364                                 break;
365                         case SUME_RIFFA_CHAN_STATE_READ:
366                                 if (!(vect & SUME_MSI_TXDONE)) {
367                                         device_printf(dev, "ch %d unexpected "
368                                             "interrupt in send+4 state %u: "
369                                             "vect = 0x%08x\n", ch, send->state,
370                                             vect);
371                                         send->recovery = 1;
372                                         break;
373                                 }
374                                 send->state = SUME_RIFFA_CHAN_STATE_LEN;
375
376                                 len = read_reg(adapter, RIFFA_CHNL_REG(ch,
377                                     RIFFA_RX_TNFR_LEN_REG_OFF));
378                                 if (ch == SUME_RIFFA_CHANNEL_DATA) {
379                                         send->state =
380                                             SUME_RIFFA_CHAN_STATE_IDLE;
381                                         check_tx_queues(adapter);
382                                 } else if (ch == SUME_RIFFA_CHANNEL_REG)
383                                         wakeup(&send->event);
384                                 else {
385                                         device_printf(dev, "ch %d unexpected "
386                                             "interrupt in send+4 state %u: "
387                                             "vect = 0x%08x\n", ch, send->state,
388                                             vect);
389                                         send->recovery = 1;
390                                 }
391                                 vect &= ~SUME_MSI_TXDONE;
392                                 break;
393                         case SUME_RIFFA_CHAN_STATE_LEN:
394                                 break;
395                         default:
396                                 device_printf(dev, "unknown TX state!\n");
397                         }
398                         loops++;
399                 }
400
401                 if ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
402                     send->recovery)
403                         device_printf(dev, "ch %d ignoring vect = 0x%08x "
404                             "during TX; not in recovery; state = %d loops = "
405                             "%d\n", ch, vect, send->state, loops);
406
407                 loops = 0;
408                 while ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
409                     SUME_MSI_RXDONE)) && loops < 5) {
410                         if (adapter->sume_debug)
411                                 device_printf(dev, "RX ch %d state %u vect = "
412                                     "0x%08x\n", ch, recv->state, vect);
413                         switch (recv->state) {
414                         case SUME_RIFFA_CHAN_STATE_IDLE:
415                                 if (!(vect & SUME_MSI_RXQUE)) {
416                                         device_printf(dev, "ch %d unexpected "
417                                             "interrupt in recv+0 state %u: "
418                                             "vect = 0x%08x\n", ch, recv->state,
419                                             vect);
420                                         recv->recovery = 1;
421                                         break;
422                                 }
423                                 uint32_t max_ptr;
424
425                                 /* Clear recovery state. */
426                                 recv->recovery = 0;
427
428                                 /* Get offset and length. */
429                                 recv->offlast = read_reg(adapter,
430                                     RIFFA_CHNL_REG(ch,
431                                     RIFFA_TX_OFFLAST_REG_OFF));
432                                 recv->len = read_reg(adapter, RIFFA_CHNL_REG(ch,
433                                     RIFFA_TX_LEN_REG_OFF));
434
435                                 /* Boundary checks. */
436                                 max_ptr = (uint32_t)((uintptr_t)recv->buf_addr
437                                     + SUME_RIFFA_OFFSET(recv->offlast)
438                                     + SUME_RIFFA_LEN(recv->len) - 1);
439                                 if (max_ptr <
440                                     (uint32_t)((uintptr_t)recv->buf_addr))
441                                         device_printf(dev, "receive buffer "
442                                             "wrap-around overflow.\n");
443                                 if (SUME_RIFFA_OFFSET(recv->offlast) +
444                                     SUME_RIFFA_LEN(recv->len) >
445                                     adapter->sg_buf_size)
446                                         device_printf(dev, "receive buffer too"
447                                             " small.\n");
448
449                                 /* Fill the bouncebuf "descriptor". */
450                                 sume_fill_bb_desc(adapter, recv,
451                                     SUME_RIFFA_LEN(recv->len));
452
453                                 bus_dmamap_sync(recv->ch_tag, recv->ch_map,
454                                     BUS_DMASYNC_PREREAD |
455                                     BUS_DMASYNC_PREWRITE);
456                                 write_reg(adapter, RIFFA_CHNL_REG(ch,
457                                     RIFFA_TX_SG_ADDR_LO_REG_OFF),
458                                     SUME_RIFFA_LO_ADDR(recv->buf_hw_addr));
459                                 write_reg(adapter, RIFFA_CHNL_REG(ch,
460                                     RIFFA_TX_SG_ADDR_HI_REG_OFF),
461                                     SUME_RIFFA_HI_ADDR(recv->buf_hw_addr));
462                                 write_reg(adapter, RIFFA_CHNL_REG(ch,
463                                     RIFFA_TX_SG_LEN_REG_OFF),
464                                     4 * recv->num_sg);
465                                 bus_dmamap_sync(recv->ch_tag, recv->ch_map,
466                                     BUS_DMASYNC_POSTREAD |
467                                     BUS_DMASYNC_POSTWRITE);
468
469                                 recv->state = SUME_RIFFA_CHAN_STATE_READY;
470                                 vect &= ~SUME_MSI_RXQUE;
471                                 break;
472                         case SUME_RIFFA_CHAN_STATE_READY:
473                                 if (!(vect & SUME_MSI_RXBUF)) {
474                                         device_printf(dev, "ch %d unexpected "
475                                             "interrupt in recv+1 state %u: "
476                                             "vect = 0x%08x\n", ch, recv->state,
477                                             vect);
478                                         recv->recovery = 1;
479                                         break;
480                                 }
481                                 recv->state = SUME_RIFFA_CHAN_STATE_READ;
482                                 vect &= ~SUME_MSI_RXBUF;
483                                 break;
484                         case SUME_RIFFA_CHAN_STATE_READ:
485                                 if (!(vect & SUME_MSI_RXDONE)) {
486                                         device_printf(dev, "ch %d unexpected "
487                                             "interrupt in recv+2 state %u: "
488                                             "vect = 0x%08x\n", ch, recv->state,
489                                             vect);
490                                         recv->recovery = 1;
491                                         break;
492                                 }
493                                 len = read_reg(adapter, RIFFA_CHNL_REG(ch,
494                                     RIFFA_TX_TNFR_LEN_REG_OFF));
495
496                                 /* Remember, len and recv->len are words. */
497                                 if (ch == SUME_RIFFA_CHANNEL_DATA) {
498                                         m = sume_rx_build_mbuf(adapter, 
499                                             len << 2);
500                                         recv->state =
501                                             SUME_RIFFA_CHAN_STATE_IDLE;
502                                 } else if (ch == SUME_RIFFA_CHANNEL_REG)
503                                         wakeup(&recv->event);
504                                 else {
505                                         device_printf(dev, "ch %d unexpected "
506                                             "interrupt in recv+2 state %u: "
507                                             "vect = 0x%08x\n", ch, recv->state,
508                                             vect);
509                                         recv->recovery = 1;
510                                 }
511                                 vect &= ~SUME_MSI_RXDONE;
512                                 break;
513                         case SUME_RIFFA_CHAN_STATE_LEN:
514                                 break;
515                         default:
516                                 device_printf(dev, "unknown RX state!\n");
517                         }
518                         loops++;
519                 }
520
521                 if ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
522                     SUME_MSI_RXDONE)) && recv->recovery) {
523                         device_printf(dev, "ch %d ignoring vect = 0x%08x "
524                             "during RX; not in recovery; state = %d, loops = "
525                             "%d\n", ch, vect, recv->state, loops);
526
527                         /* Clean the unfinished transaction. */
528                         if (ch == SUME_RIFFA_CHANNEL_REG &&
529                             vect & SUME_MSI_RXDONE) {
530                                 read_reg(adapter, RIFFA_CHNL_REG(ch,
531                                     RIFFA_TX_TNFR_LEN_REG_OFF));
532                                 recv->recovery = 0;
533                         }
534                 }
535         }
536         SUME_UNLOCK(adapter);
537
538         if (m != NULL) {
539                 ifp = m->m_pkthdr.rcvif;
540                 if_input(ifp, m);
541         }
542 }
543
544 /*
545  * As we cannot disable interrupt generation, ignore early interrupts by waiting
546  * for the adapter to go into the 'running' state.
547  */
548 static int
549 sume_intr_filter(void *arg)
550 {
551         struct sume_adapter *adapter = arg;
552
553         if (adapter->running == 0)
554                 return (FILTER_STRAY);
555
556         return (FILTER_SCHEDULE_THREAD);
557 }
558
559 static int
560 sume_probe_riffa_pci(struct sume_adapter *adapter)
561 {
562         device_t dev = adapter->dev;
563         int error, count, capmem;
564         uint32_t reg, devctl, linkctl;
565
566         pci_enable_busmaster(dev);
567
568         adapter->rid = PCIR_BAR(0);
569         adapter->bar0_addr = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
570             &adapter->rid, RF_ACTIVE);
571         if (adapter->bar0_addr == NULL) {
572                 device_printf(dev, "unable to allocate bus resource: "
573                     "BAR0 address\n");
574                 return (ENXIO);
575         }
576         adapter->bt = rman_get_bustag(adapter->bar0_addr);
577         adapter->bh = rman_get_bushandle(adapter->bar0_addr);
578         adapter->bar0_len = rman_get_size(adapter->bar0_addr);
579         if (adapter->bar0_len != 1024) {
580                 device_printf(dev, "BAR0 resource length %lu != 1024\n",
581                     adapter->bar0_len);
582                 return (ENXIO);
583         }
584
585         count = pci_msi_count(dev);
586         error = pci_alloc_msi(dev, &count);
587         if (error) {
588                 device_printf(dev, "unable to allocate bus resource: PCI "
589                     "MSI\n");
590                 return (error);
591         }
592
593         adapter->irq.rid = 1; /* Should be 1, thus says pci_alloc_msi() */
594         adapter->irq.res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
595             &adapter->irq.rid, RF_SHAREABLE | RF_ACTIVE);
596         if (adapter->irq.res == NULL) {
597                 device_printf(dev, "unable to allocate bus resource: IRQ "
598                     "memory\n");
599                 return (ENXIO);
600         }
601
602         error = bus_setup_intr(dev, adapter->irq.res, INTR_MPSAFE |
603             INTR_TYPE_NET, sume_intr_filter, sume_intr_handler, adapter,
604             &adapter->irq.tag);
605         if (error) {
606                 device_printf(dev, "failed to setup interrupt for rid %d, name"
607                     " %s: %d\n", adapter->irq.rid, "SUME_INTR", error);
608                 return (ENXIO);
609         }
610
611         if (pci_find_cap(dev, PCIY_EXPRESS, &capmem) != 0) {
612                 device_printf(dev, "PCI not PCIe capable\n");
613                 return (ENXIO);
614         }
615
616         devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL, 2);
617         pci_write_config(dev, capmem + PCIER_DEVICE_CTL, (devctl |
618             PCIEM_CTL_EXT_TAG_FIELD), 2);
619
620         devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL2, 2);
621         pci_write_config(dev, capmem + PCIER_DEVICE_CTL2, (devctl |
622             PCIEM_CTL2_ID_ORDERED_REQ_EN), 2);
623
624         linkctl = pci_read_config(dev, capmem + PCIER_LINK_CTL, 2);
625         pci_write_config(dev, capmem + PCIER_LINK_CTL, (linkctl |
626             PCIEM_LINK_CTL_RCB), 2);
627
628         reg = read_reg(adapter, RIFFA_INFO_REG_OFF);
629         adapter->num_sg = RIFFA_SG_ELEMS * ((reg >> 19) & 0xf);
630         adapter->sg_buf_size = RIFFA_SG_BUF_SIZE * ((reg >> 19) & 0xf);
631
632         error = ENODEV;
633         /* Check bus master is enabled. */
634         if (((reg >> 4) & 0x1) != 1) {
635                 device_printf(dev, "bus master not enabled: %d\n",
636                     (reg >> 4) & 0x1);
637                 return (error);
638         }
639         /* Check link parameters are valid. */
640         if (((reg >> 5) & 0x3f) == 0 || ((reg >> 11) & 0x3) == 0) {
641                 device_printf(dev, "link parameters not valid: %d %d\n",
642                     (reg >> 5) & 0x3f, (reg >> 11) & 0x3);
643                 return (error);
644         }
645         /* Check # of channels are within valid range. */
646         if ((reg & 0xf) == 0 || (reg & 0xf) > RIFFA_MAX_CHNLS) {
647                 device_printf(dev, "number of channels out of range: %d\n",
648                     reg & 0xf);
649                 return (error);
650         }
651         /* Check bus width. */
652         if (((reg >> 19) & 0xf) == 0 ||
653             ((reg >> 19) & 0xf) > RIFFA_MAX_BUS_WIDTH_PARAM) {
654                 device_printf(dev, "bus width out of range: %d\n",
655                     (reg >> 19) & 0xf);
656                 return (error);
657         }
658
659         device_printf(dev, "[riffa] # of channels: %d\n",
660             reg & 0xf);
661         device_printf(dev, "[riffa] bus interface width: %d\n",
662             ((reg >> 19) & 0xf) << 5);
663         device_printf(dev, "[riffa] bus master enabled: %d\n",
664             (reg >> 4) & 0x1);
665         device_printf(dev, "[riffa] negotiated link width: %d\n",
666             (reg >> 5) & 0x3f);
667         device_printf(dev, "[riffa] negotiated rate width: %d MTs\n",
668             ((reg >> 11) & 0x3) * 2500);
669         device_printf(dev, "[riffa] max downstream payload: %d B\n",
670             128 << ((reg >> 13) & 0x7));
671         device_printf(dev, "[riffa] max upstream payload: %d B\n",
672             128 << ((reg >> 16) & 0x7));
673
674         return (0);
675 }
676
677 /* If there is no sume_if_init, the ether_ioctl panics. */
678 static void
679 sume_if_init(void *sc)
680 {
681 }
682
683 /* Write the address and length for our incoming / outgoing transaction. */
684 static void
685 sume_fill_bb_desc(struct sume_adapter *adapter, struct riffa_chnl_dir *p,
686     uint64_t len)
687 {
688         struct nf_bb_desc *bouncebuf = (struct nf_bb_desc *) p->buf_addr;
689
690         bouncebuf->lower = (p->buf_hw_addr + sizeof(struct nf_bb_desc));
691         bouncebuf->upper = (p->buf_hw_addr + sizeof(struct nf_bb_desc)) >> 32;
692         bouncebuf->len = len >> 2;
693 }
694
695 /* Module register locked write. */
696 static int
697 sume_modreg_write_locked(struct sume_adapter *adapter)
698 {
699         struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
700
701         /* Let the FPGA know about the transfer. */
702         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
703             RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
704         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
705             RIFFA_RX_LEN_REG_OFF), send->len);  /* words */
706
707         /* Fill the bouncebuf "descriptor". */
708         sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
709
710         /* Update the state before intiating the DMA to avoid races. */
711         send->state = SUME_RIFFA_CHAN_STATE_READY;
712
713         bus_dmamap_sync(send->ch_tag, send->ch_map,
714             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
715         /* DMA. */
716         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
717             RIFFA_RX_SG_ADDR_LO_REG_OFF),
718             SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
719         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
720             RIFFA_RX_SG_ADDR_HI_REG_OFF),
721             SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
722         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
723             RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
724         bus_dmamap_sync(send->ch_tag, send->ch_map,
725             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
726
727         return (0);
728 }
729
730 /*
731  * Request a register read or write (depending on optype).
732  * If optype is set (0x1f) this will result in a register write,
733  * otherwise this will result in a register read request at the given
734  * address and the result will need to be DMAed back.
735  */
736 static int
737 sume_module_reg_write(struct nf_priv *nf_priv, struct sume_ifreq *sifr,
738     uint32_t optype)
739 {
740         struct sume_adapter *adapter = nf_priv->adapter;
741         struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
742         struct nf_regop_data *data;
743         int error;
744
745         /*
746          * 1. Make sure the channel is free;  otherwise return EBUSY.
747          * 2. Prepare the memory in the bounce buffer (which we always
748          *    use for regs).
749          * 3. Start the DMA process.
750          * 4. Sleep and wait for result and return success or error.
751          */
752         SUME_LOCK(adapter);
753
754         if (send->state != SUME_RIFFA_CHAN_STATE_IDLE) {
755                 SUME_UNLOCK(adapter);
756                 return (EBUSY);
757         }
758
759         data = (struct nf_regop_data *) (send->buf_addr +
760             sizeof(struct nf_bb_desc));
761         data->addr = htole32(sifr->addr);
762         data->val = htole32(sifr->val);
763         /* Tag to indentify request. */
764         data->rtag = htole32(++send->rtag);
765         data->optype = htole32(optype);
766         send->len = sizeof(struct nf_regop_data) / 4; /* words */
767
768         error = sume_modreg_write_locked(adapter);
769         if (error) {
770                 SUME_UNLOCK(adapter);
771                 return (EFAULT);
772         }
773
774         /* Timeout after 1s. */
775         if (send->state != SUME_RIFFA_CHAN_STATE_LEN)
776                 error = msleep(&send->event, &adapter->lock, 0,
777                     "Waiting recv finish", 1 * hz);
778
779         /* This was a write so we are done; were interrupted, or timed out. */
780         if (optype != SUME_MR_READ || error != 0 || error == EWOULDBLOCK) {
781                 send->state = SUME_RIFFA_CHAN_STATE_IDLE;
782                 if (optype == SUME_MR_READ)
783                         error = EWOULDBLOCK;
784                 else
785                         error = 0;
786         } else
787                 error = 0;
788
789         /*
790          * For read requests we will update state once we are done
791          * having read the result to avoid any two outstanding
792          * transactions, or we need a queue and validate tags,
793          * which is a lot of work for a low priority, infrequent
794          * event.
795          */
796
797         SUME_UNLOCK(adapter);
798
799         return (error);
800 }
801
802 /* Module register read. */
803 static int
804 sume_module_reg_read(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
805 {
806         struct sume_adapter *adapter = nf_priv->adapter;
807         struct riffa_chnl_dir *recv = adapter->recv[SUME_RIFFA_CHANNEL_REG];
808         struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
809         struct nf_regop_data *data;
810         int error = 0;
811
812         /*
813          * 0. Sleep waiting for result if needed (unless condition is
814          *    true already).
815          * 1. Read DMA results.
816          * 2. Update state on *TX* to IDLE to allow next read to start.
817          */
818         SUME_LOCK(adapter);
819
820         bus_dmamap_sync(recv->ch_tag, recv->ch_map,
821             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
822         /*
823          * We only need to be woken up at the end of the transaction.
824          * Timeout after 1s.
825          */
826         if (recv->state != SUME_RIFFA_CHAN_STATE_READ)
827                 error = msleep(&recv->event, &adapter->lock, 0,
828                     "Waiting transaction finish", 1 * hz);
829
830         if (recv->state != SUME_RIFFA_CHAN_STATE_READ || error == EWOULDBLOCK) {
831                 SUME_UNLOCK(adapter);
832                 device_printf(adapter->dev, "wait error: %d\n", error);
833                 return (EWOULDBLOCK);
834         }
835
836         bus_dmamap_sync(recv->ch_tag, recv->ch_map,
837             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
838
839         /*
840          * Read reply data and validate address and tag.
841          * Note: we do access the send side without lock but the state
842          * machine does prevent the data from changing.
843          */
844         data = (struct nf_regop_data *) (recv->buf_addr +
845             sizeof(struct nf_bb_desc));
846
847         if (le32toh(data->rtag) != send->rtag)
848                 device_printf(adapter->dev, "rtag error: 0x%08x 0x%08x\n",
849                     le32toh(data->rtag), send->rtag);
850
851         sifr->val = le32toh(data->val);
852         recv->state = SUME_RIFFA_CHAN_STATE_IDLE;
853
854         /* We are done. */
855         send->state = SUME_RIFFA_CHAN_STATE_IDLE;
856
857         SUME_UNLOCK(adapter);
858
859         return (0);
860 }
861
862 /* Read value from a module register and return it to a sume_ifreq. */
863 static int
864 get_modreg_value(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
865 {
866         int error;
867
868         error = sume_module_reg_write(nf_priv, sifr, SUME_MR_READ);
869         if (!error)
870                 error = sume_module_reg_read(nf_priv, sifr);
871
872         return (error);
873 }
874
875 static int
876 sume_if_ioctl(if_t ifp, unsigned long cmd, caddr_t data)
877 {
878         struct ifreq *ifr = (struct ifreq *) data;
879         struct nf_priv *nf_priv = if_getsoftc(ifp);
880         struct sume_ifreq sifr;
881         int error = 0;
882
883         switch (cmd) {
884         case SIOCGIFMEDIA:
885         case SIOCGIFXMEDIA:
886                 error = ifmedia_ioctl(ifp, ifr, &nf_priv->media, cmd);
887                 break;
888
889         case SUME_IOCTL_CMD_WRITE_REG:
890                 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
891                 if (error) {
892                         error = EINVAL;
893                         break;
894                 }
895                 error = sume_module_reg_write(nf_priv, &sifr, SUME_MR_WRITE);
896                 break;
897
898         case SUME_IOCTL_CMD_READ_REG:
899                 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
900                 if (error) {
901                         error = EINVAL;
902                         break;
903                 }
904
905                 error = get_modreg_value(nf_priv, &sifr);
906                 if (error)
907                         break;
908
909                 error = copyout(&sifr, ifr_data_get_ptr(ifr), sizeof(sifr));
910                 if (error)
911                         error = EINVAL;
912
913                 break;
914
915         case SIOCSIFFLAGS:
916                 /* Silence tcpdump 'promisc mode not supported' warning. */
917                 if (if_getflags(ifp) & IFF_PROMISC)
918                         break;
919
920         default:
921                 error = ether_ioctl(ifp, cmd, data);
922                 break;
923         }
924
925         return (error);
926 }
927
928 static int
929 sume_media_change(if_t ifp)
930 {
931         struct nf_priv *nf_priv = if_getsoftc(ifp);
932         struct ifmedia *ifm = &nf_priv->media;
933
934         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
935                 return (EINVAL);
936
937         if (IFM_SUBTYPE(ifm->ifm_media) == IFM_10G_SR)
938                 if_setbaudrate(ifp, ifmedia_baudrate(IFM_ETHER | IFM_10G_SR));
939         else
940                 if_setbaudrate(ifp, ifmedia_baudrate(ifm->ifm_media));
941
942         return (0);
943 }
944
945 static void
946 sume_update_link_status(if_t ifp)
947 {
948         struct nf_priv *nf_priv = if_getsoftc(ifp);
949         struct sume_adapter *adapter = nf_priv->adapter;
950         struct sume_ifreq sifr;
951         int link_status;
952
953         sifr.addr = SUME_STATUS_ADDR(nf_priv->port);
954         sifr.val = 0;
955
956         if (get_modreg_value(nf_priv, &sifr))
957                 return;
958
959         link_status = SUME_LINK_STATUS(sifr.val);
960
961         if (!link_status && nf_priv->link_up) {
962                 if_link_state_change(ifp, LINK_STATE_DOWN);
963                 nf_priv->link_up = 0;
964                 if (adapter->sume_debug)
965                         device_printf(adapter->dev, "port %d link state "
966                             "changed to DOWN\n", nf_priv->unit);
967         } else if (link_status && !nf_priv->link_up) {
968                 nf_priv->link_up = 1;
969                 if_link_state_change(ifp, LINK_STATE_UP);
970                 if (adapter->sume_debug)
971                         device_printf(adapter->dev, "port %d link state "
972                             "changed to UP\n", nf_priv->unit);
973         }
974 }
975
976 static void
977 sume_media_status(if_t ifp, struct ifmediareq *ifmr)
978 {
979         struct nf_priv *nf_priv = if_getsoftc(ifp);
980         struct ifmedia *ifm = &nf_priv->media;
981
982         if (ifm->ifm_cur->ifm_media == (IFM_ETHER | IFM_10G_SR) &&
983             (if_getflags(ifp) & IFF_UP))
984                 ifmr->ifm_active = IFM_ETHER | IFM_10G_SR;
985         else
986                 ifmr->ifm_active = ifm->ifm_cur->ifm_media;
987
988         ifmr->ifm_status |= IFM_AVALID;
989
990         sume_update_link_status(ifp);
991
992         if (nf_priv->link_up)
993                 ifmr->ifm_status |= IFM_ACTIVE;
994 }
995
996 /*
997  * Packet to transmit. We take the packet data from the mbuf and copy it to the
998  * bouncebuffer address buf_addr+3*sizeof(uint32_t)+16. The 16 bytes before the
999  * packet data are for metadata: sport/dport (depending on our source
1000  * interface), packet length and magic 0xcafe. We tell the SUME about the
1001  * transfer, fill the first 3*sizeof(uint32_t) bytes of the bouncebuffer with
1002  * the information about the start and length of the packet and trigger the
1003  * transaction.
1004  */
1005 static int
1006 sume_if_start_locked(if_t ifp)
1007 {
1008         struct mbuf *m;
1009         struct nf_priv *nf_priv = if_getsoftc(ifp);
1010         struct sume_adapter *adapter = nf_priv->adapter;
1011         struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_DATA];
1012         uint8_t *outbuf;
1013         struct nf_metadata *mdata;
1014         int plen = SUME_MIN_PKT_SIZE;
1015
1016         KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
1017         KASSERT(send->state == SUME_RIFFA_CHAN_STATE_IDLE,
1018             ("SUME not in IDLE state"));
1019
1020         m = if_dequeue(ifp);
1021         if (m == NULL)
1022                 return (EINVAL);
1023
1024         /* Packets large enough do not need to be padded */
1025         if (m->m_pkthdr.len > SUME_MIN_PKT_SIZE)
1026                 plen = m->m_pkthdr.len;
1027
1028         if (adapter->sume_debug)
1029                 device_printf(adapter->dev, "sending %d bytes to %s%d\n", plen,
1030                     SUME_ETH_DEVICE_NAME, nf_priv->unit);
1031
1032         outbuf = (uint8_t *) send->buf_addr + sizeof(struct nf_bb_desc);
1033         mdata = (struct nf_metadata *) outbuf;
1034
1035         /* Clear the recovery flag. */
1036         send->recovery = 0;
1037
1038         /* Make sure we fit with the 16 bytes nf_metadata. */
1039         if (m->m_pkthdr.len + sizeof(struct nf_metadata) >
1040             adapter->sg_buf_size) {
1041                 device_printf(adapter->dev, "packet too big for bounce buffer "
1042                     "(%d)\n", m->m_pkthdr.len);
1043                 m_freem(m);
1044                 nf_priv->stats.tx_dropped++;
1045                 return (ENOMEM);
1046         }
1047
1048         bus_dmamap_sync(send->ch_tag, send->ch_map,
1049             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1050
1051         /* Zero out the padded data */
1052         if (m->m_pkthdr.len < SUME_MIN_PKT_SIZE)
1053                 bzero(outbuf + sizeof(struct nf_metadata), SUME_MIN_PKT_SIZE);
1054         /* Skip the first 16 bytes for the metadata. */
1055         m_copydata(m, 0, m->m_pkthdr.len, outbuf + sizeof(struct nf_metadata));
1056         send->len = (sizeof(struct nf_metadata) + plen + 3) / 4;
1057
1058         /* Fill in the metadata: CPU(DMA) ports are odd, MAC ports are even. */
1059         mdata->sport = htole16(1 << (nf_priv->port * 2 + 1));
1060         mdata->dport = htole16(1 << (nf_priv->port * 2));
1061         mdata->plen = htole16(plen);
1062         mdata->magic = htole16(SUME_RIFFA_MAGIC);
1063         mdata->t1 = htole32(0);
1064         mdata->t2 = htole32(0);
1065
1066         /* Let the FPGA know about the transfer. */
1067         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1068             RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
1069         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1070             RIFFA_RX_LEN_REG_OFF), send->len);
1071
1072         /* Fill the bouncebuf "descriptor". */
1073         sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
1074
1075         /* Update the state before intiating the DMA to avoid races. */
1076         send->state = SUME_RIFFA_CHAN_STATE_READY;
1077
1078         /* DMA. */
1079         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1080             RIFFA_RX_SG_ADDR_LO_REG_OFF),
1081             SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
1082         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1083             RIFFA_RX_SG_ADDR_HI_REG_OFF),
1084             SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
1085         write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1086             RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
1087
1088         bus_dmamap_sync(send->ch_tag, send->ch_map,
1089             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1090
1091         nf_priv->stats.tx_packets++;
1092         nf_priv->stats.tx_bytes += plen;
1093
1094         /* We can free as long as we use the bounce buffer. */
1095         m_freem(m);
1096
1097         adapter->last_ifc = nf_priv->port;
1098
1099         /* Reset watchdog counter. */
1100         adapter->wd_counter = 0;
1101
1102         return (0);
1103 }
1104
1105 static void
1106 sume_if_start(if_t ifp)
1107 {
1108         struct nf_priv *nf_priv = if_getsoftc(ifp);
1109         struct sume_adapter *adapter = nf_priv->adapter;
1110
1111         if (!adapter->running || !(if_getflags(ifp) & IFF_UP))
1112                 return;
1113
1114         SUME_LOCK(adapter);
1115         if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state ==
1116             SUME_RIFFA_CHAN_STATE_IDLE)
1117                 sume_if_start_locked(ifp);
1118         SUME_UNLOCK(adapter);
1119 }
1120
1121 /*
1122  * We call this function at the end of every TX transaction to check for
1123  * remaining packets in the TX queues for every UP interface.
1124  */
1125 static void
1126 check_tx_queues(struct sume_adapter *adapter)
1127 {
1128         int i, last_ifc;
1129
1130         KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
1131
1132         last_ifc = adapter->last_ifc;
1133
1134         /* Check all interfaces */
1135         for (i = last_ifc + 1; i < last_ifc + SUME_NPORTS + 1; i++) {
1136                 if_t ifp = adapter->ifp[i % SUME_NPORTS];
1137
1138                 if (!(if_getflags(ifp) & IFF_UP))
1139                         continue;
1140
1141                 if (!sume_if_start_locked(ifp))
1142                         break;
1143         }
1144 }
1145
1146 static int
1147 sume_ifp_alloc(struct sume_adapter *adapter, uint32_t port)
1148 {
1149         if_t ifp;
1150         struct nf_priv *nf_priv = malloc(sizeof(struct nf_priv), M_SUME,
1151             M_ZERO | M_WAITOK);
1152
1153         ifp = if_alloc(IFT_ETHER);
1154         if (ifp == NULL) {
1155                 device_printf(adapter->dev, "cannot allocate ifnet\n");
1156                 return (ENOMEM);
1157         }
1158
1159         adapter->ifp[port] = ifp;
1160         if_setsoftc(ifp, nf_priv);
1161
1162         nf_priv->adapter = adapter;
1163         nf_priv->unit = alloc_unr(unr);
1164         nf_priv->port = port;
1165         nf_priv->link_up = 0;
1166
1167         if_initname(ifp, SUME_ETH_DEVICE_NAME, nf_priv->unit);
1168         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
1169
1170         if_setinitfn(ifp, sume_if_init);
1171         if_setstartfn(ifp, sume_if_start);
1172         if_setioctlfn(ifp, sume_if_ioctl);
1173
1174         uint8_t hw_addr[ETHER_ADDR_LEN] = DEFAULT_ETHER_ADDRESS;
1175         hw_addr[ETHER_ADDR_LEN-1] = nf_priv->unit;
1176         ether_ifattach(ifp, hw_addr);
1177
1178         ifmedia_init(&nf_priv->media, IFM_IMASK, sume_media_change,
1179             sume_media_status);
1180         ifmedia_add(&nf_priv->media, IFM_ETHER | IFM_10G_SR, 0, NULL);
1181         ifmedia_set(&nf_priv->media, IFM_ETHER | IFM_10G_SR);
1182
1183         if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
1184
1185         return (0);
1186 }
1187
1188 static void
1189 callback_dma(void *arg, bus_dma_segment_t *segs, int nseg, int err)
1190 {
1191         if (err)
1192                 return;
1193
1194         KASSERT(nseg == 1, ("%d segments returned!", nseg));
1195
1196         *(bus_addr_t *) arg = segs[0].ds_addr;
1197 }
1198
1199 static int
1200 sume_probe_riffa_buffer(const struct sume_adapter *adapter,
1201     struct riffa_chnl_dir ***p, const char *dir)
1202 {
1203         struct riffa_chnl_dir **rp;
1204         bus_addr_t hw_addr;
1205         int error, ch;
1206         device_t dev = adapter->dev;
1207
1208         error = ENOMEM;
1209         *p = malloc(SUME_RIFFA_CHANNELS * sizeof(struct riffa_chnl_dir *),
1210             M_SUME, M_ZERO | M_WAITOK);
1211         if (*p == NULL) {
1212                 device_printf(dev, "malloc(%s) failed.\n", dir);
1213                 return (error);
1214         }
1215
1216         rp = *p;
1217         /* Allocate the chnl_dir structs themselves. */
1218         for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
1219                 /* One direction. */
1220                 rp[ch] = malloc(sizeof(struct riffa_chnl_dir), M_SUME,
1221                     M_ZERO | M_WAITOK);
1222                 if (rp[ch] == NULL) {
1223                         device_printf(dev, "malloc(%s[%d]) riffa_chnl_dir "
1224                             "failed.\n", dir, ch);
1225                         return (error);
1226                 }
1227
1228                 int err = bus_dma_tag_create(bus_get_dma_tag(dev),
1229                     4, 0,
1230                     BUS_SPACE_MAXADDR,
1231                     BUS_SPACE_MAXADDR,
1232                     NULL, NULL,
1233                     adapter->sg_buf_size,
1234                     1,
1235                     adapter->sg_buf_size,
1236                     0,
1237                     NULL,
1238                     NULL,
1239                     &rp[ch]->ch_tag);
1240
1241                 if (err) {
1242                         device_printf(dev, "bus_dma_tag_create(%s[%d]) "
1243                             "failed.\n", dir, ch);
1244                         return (err);
1245                 }
1246
1247                 err = bus_dmamem_alloc(rp[ch]->ch_tag, (void **)
1248                     &rp[ch]->buf_addr, BUS_DMA_WAITOK | BUS_DMA_COHERENT |
1249                     BUS_DMA_ZERO, &rp[ch]->ch_map);
1250                 if (err) {
1251                         device_printf(dev, "bus_dmamem_alloc(%s[%d]) failed.\n",
1252                             dir, ch);
1253                         return (err);
1254                 }
1255
1256                 bzero(rp[ch]->buf_addr, adapter->sg_buf_size);
1257
1258                 err = bus_dmamap_load(rp[ch]->ch_tag, rp[ch]->ch_map,
1259                     rp[ch]->buf_addr, adapter->sg_buf_size, callback_dma,
1260                     &hw_addr, BUS_DMA_NOWAIT);
1261                 if (err) {
1262                         device_printf(dev, "bus_dmamap_load(%s[%d]) failed.\n",
1263                             dir, ch);
1264                         return (err);
1265                 }
1266                 rp[ch]->buf_hw_addr = hw_addr;
1267                 rp[ch]->num_sg = 1;
1268                 rp[ch]->state = SUME_RIFFA_CHAN_STATE_IDLE;
1269
1270                 rp[ch]->rtag = SUME_INIT_RTAG;
1271         }
1272
1273         return (0);
1274 }
1275
1276 static int
1277 sume_probe_riffa_buffers(struct sume_adapter *adapter)
1278 {
1279         int error;
1280
1281         error = sume_probe_riffa_buffer(adapter, &adapter->recv, "recv");
1282         if (error)
1283                 return (error);
1284
1285         error = sume_probe_riffa_buffer(adapter, &adapter->send, "send");
1286
1287         return (error);
1288 }
1289
1290 static void
1291 sume_sysctl_init(struct sume_adapter *adapter)
1292 {
1293         device_t dev = adapter->dev;
1294         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
1295         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
1296         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
1297         struct sysctl_oid *tmp_tree;
1298         char namebuf[MAX_IFC_NAME_LEN];
1299         int i;
1300
1301         tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "sume", CTLFLAG_RW,
1302             0, "SUME top-level tree");
1303         if (tree == NULL) {
1304                 device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
1305                 return;
1306         }
1307         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug", CTLFLAG_RW,
1308             &adapter->sume_debug, 0, "debug int leaf");
1309
1310         /* total RX error stats */
1311         SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_epkts",
1312             CTLFLAG_RD, &adapter->packets_err, 0, "rx errors");
1313         SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_ebytes",
1314             CTLFLAG_RD, &adapter->bytes_err, 0, "rx error bytes");
1315
1316         for (i = SUME_NPORTS - 1; i >= 0; i--) {
1317                 if_t ifp = adapter->ifp[i];
1318                 if (ifp == NULL)
1319                         continue;
1320
1321                 struct nf_priv *nf_priv = if_getsoftc(ifp);
1322
1323                 snprintf(namebuf, MAX_IFC_NAME_LEN, "%s%d",
1324                     SUME_ETH_DEVICE_NAME, nf_priv->unit);
1325                 tmp_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
1326                     CTLFLAG_RW, 0, "SUME ifc tree");
1327                 if (tmp_tree == NULL) {
1328                         device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
1329                         return;
1330                 }
1331
1332                 /* Packets dropped by down interface. */
1333                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1334                     "ifc_down_bytes", CTLFLAG_RD,
1335                     &nf_priv->stats.ifc_down_bytes, 0, "ifc_down bytes");
1336                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1337                     "ifc_down_packets", CTLFLAG_RD,
1338                     &nf_priv->stats.ifc_down_packets, 0, "ifc_down packets");
1339
1340                 /* HW RX stats */
1341                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1342                     "hw_rx_packets", CTLFLAG_RD, &nf_priv->stats.hw_rx_packets,
1343                     0, "hw_rx packets");
1344
1345                 /* HW TX stats */
1346                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1347                     "hw_tx_packets", CTLFLAG_RD, &nf_priv->stats.hw_tx_packets,
1348                     0, "hw_tx packets");
1349
1350                 /* RX stats */
1351                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1352                     "rx_bytes", CTLFLAG_RD, &nf_priv->stats.rx_bytes, 0,
1353                     "rx bytes");
1354                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1355                     "rx_dropped", CTLFLAG_RD, &nf_priv->stats.rx_dropped, 0,
1356                     "rx dropped");
1357                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1358                     "rx_packets", CTLFLAG_RD, &nf_priv->stats.rx_packets, 0,
1359                     "rx packets");
1360
1361                 /* TX stats */
1362                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1363                     "tx_bytes", CTLFLAG_RD, &nf_priv->stats.tx_bytes, 0,
1364                     "tx bytes");
1365                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1366                     "tx_dropped", CTLFLAG_RD, &nf_priv->stats.tx_dropped, 0,
1367                     "tx dropped");
1368                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1369                     "tx_packets", CTLFLAG_RD, &nf_priv->stats.tx_packets, 0,
1370                     "tx packets");
1371         }
1372 }
1373
1374 static void
1375 sume_local_timer(void *arg)
1376 {
1377         struct sume_adapter *adapter = arg;
1378
1379         if (!adapter->running)
1380                 return;
1381
1382         taskqueue_enqueue(adapter->tq, &adapter->stat_task);
1383
1384         SUME_LOCK(adapter);
1385         if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state !=
1386             SUME_RIFFA_CHAN_STATE_IDLE && ++adapter->wd_counter >= 3) {
1387                 /* Resetting interfaces if stuck for 3 seconds. */
1388                 device_printf(adapter->dev, "TX stuck, resetting adapter.\n");
1389                 read_reg(adapter, RIFFA_INFO_REG_OFF);
1390
1391                 adapter->send[SUME_RIFFA_CHANNEL_DATA]->state =
1392                     SUME_RIFFA_CHAN_STATE_IDLE;
1393                 adapter->wd_counter = 0;
1394
1395                 check_tx_queues(adapter);
1396         }
1397         SUME_UNLOCK(adapter);
1398
1399         callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
1400 }
1401
1402 static void
1403 sume_get_stats(void *context, int pending)
1404 {
1405         struct sume_adapter *adapter = context;
1406         int i;
1407
1408         for (i = 0; i < SUME_NPORTS; i++) {
1409                 if_t ifp = adapter->ifp[i];
1410
1411                 if (if_getflags(ifp) & IFF_UP) {
1412                         struct nf_priv *nf_priv = if_getsoftc(ifp);
1413                         struct sume_ifreq sifr;
1414
1415                         sume_update_link_status(ifp);
1416
1417                         /* Get RX counter. */
1418                         sifr.addr = SUME_STAT_RX_ADDR(nf_priv->port);
1419                         sifr.val = 0;
1420
1421                         if (!get_modreg_value(nf_priv, &sifr))
1422                                 nf_priv->stats.hw_rx_packets += sifr.val;
1423
1424                         /* Get TX counter. */
1425                         sifr.addr = SUME_STAT_TX_ADDR(nf_priv->port);
1426                         sifr.val = 0;
1427
1428                         if (!get_modreg_value(nf_priv, &sifr))
1429                                 nf_priv->stats.hw_tx_packets += sifr.val;
1430                 }
1431         }
1432 }
1433
1434 static int
1435 sume_attach(device_t dev)
1436 {
1437         struct sume_adapter *adapter = device_get_softc(dev);
1438         adapter->dev = dev;
1439         int error, i;
1440
1441         mtx_init(&adapter->lock, "Global lock", NULL, MTX_DEF);
1442
1443         adapter->running = 0;
1444
1445         /* OK finish up RIFFA. */
1446         error = sume_probe_riffa_pci(adapter);
1447         if (error != 0)
1448                 goto error;
1449
1450         error = sume_probe_riffa_buffers(adapter);
1451         if (error != 0)
1452                 goto error;
1453
1454         /* Now do the network interfaces. */
1455         for (i = 0; i < SUME_NPORTS; i++) {
1456                 error = sume_ifp_alloc(adapter, i);
1457                 if (error != 0)
1458                         goto error;
1459         }
1460
1461         /*  Register stats and register sysctls. */
1462         sume_sysctl_init(adapter);
1463
1464         /* Reset the HW. */
1465         read_reg(adapter, RIFFA_INFO_REG_OFF);
1466
1467         /* Ready to go, "enable" IRQ. */
1468         adapter->running = 1;
1469
1470         callout_init(&adapter->timer, 1);
1471         TASK_INIT(&adapter->stat_task, 0, sume_get_stats, adapter);
1472
1473         adapter->tq = taskqueue_create("sume_stats", M_NOWAIT,
1474             taskqueue_thread_enqueue, &adapter->tq);
1475         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s stattaskq",
1476             device_get_nameunit(adapter->dev));
1477
1478         callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
1479
1480         return (0);
1481
1482 error:
1483         sume_detach(dev);
1484
1485         return (error);
1486 }
1487
1488 static void
1489 sume_remove_riffa_buffer(const struct sume_adapter *adapter,
1490     struct riffa_chnl_dir **pp)
1491 {
1492         int ch;
1493
1494         for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
1495                 if (pp[ch] == NULL)
1496                         continue;
1497
1498                 if (pp[ch]->buf_hw_addr != 0) {
1499                         bus_dmamem_free(pp[ch]->ch_tag, pp[ch]->buf_addr,
1500                             pp[ch]->ch_map);
1501                         pp[ch]->buf_hw_addr = 0;
1502                 }
1503
1504                 free(pp[ch], M_SUME);
1505         }
1506 }
1507
1508 static void
1509 sume_remove_riffa_buffers(struct sume_adapter *adapter)
1510 {
1511         if (adapter->send != NULL) {
1512                 sume_remove_riffa_buffer(adapter, adapter->send);
1513                 free(adapter->send, M_SUME);
1514                 adapter->send = NULL;
1515         }
1516         if (adapter->recv != NULL) {
1517                 sume_remove_riffa_buffer(adapter, adapter->recv);
1518                 free(adapter->recv, M_SUME);
1519                 adapter->recv = NULL;
1520         }
1521 }
1522
1523 static int
1524 sume_detach(device_t dev)
1525 {
1526         struct sume_adapter *adapter = device_get_softc(dev);
1527         int i;
1528         struct nf_priv *nf_priv;
1529
1530         KASSERT(mtx_initialized(&adapter->lock), ("SUME mutex not "
1531             "initialized"));
1532         adapter->running = 0;
1533
1534         /* Drain the stats callout and task queue. */
1535         callout_drain(&adapter->timer);
1536
1537         if (adapter->tq) {
1538                 taskqueue_drain(adapter->tq, &adapter->stat_task);
1539                 taskqueue_free(adapter->tq);
1540         }
1541
1542         for (i = 0; i < SUME_NPORTS; i++) {
1543                 if_t ifp = adapter->ifp[i];
1544                 if (ifp == NULL)
1545                         continue;
1546
1547                 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1548                 nf_priv = if_getsoftc(ifp);
1549
1550                 if (if_getflags(ifp) & IFF_UP)
1551                         if_down(ifp);
1552                 ifmedia_removeall(&nf_priv->media);
1553                 free_unr(unr, nf_priv->unit);
1554
1555                 if_setflagbits(ifp, 0, IFF_UP);
1556                 ether_ifdetach(ifp);
1557                 if_free(ifp);
1558
1559                 free(nf_priv, M_SUME);
1560         }
1561
1562         sume_remove_riffa_buffers(adapter);
1563
1564         if (adapter->irq.tag)
1565                 bus_teardown_intr(dev, adapter->irq.res, adapter->irq.tag);
1566         if (adapter->irq.res)
1567                 bus_release_resource(dev, SYS_RES_IRQ, adapter->irq.rid,
1568                     adapter->irq.res);
1569
1570         pci_release_msi(dev);
1571
1572         if (adapter->bar0_addr)
1573                 bus_release_resource(dev, SYS_RES_MEMORY, adapter->rid,
1574                     adapter->bar0_addr);
1575
1576         mtx_destroy(&adapter->lock);
1577
1578         return (0);
1579 }
1580
1581 static int
1582 mod_event(module_t mod, int cmd, void *arg)
1583 {
1584         switch (cmd) {
1585         case MOD_LOAD:
1586                 unr = new_unrhdr(0, INT_MAX, NULL);
1587                 break;
1588
1589         case MOD_UNLOAD:
1590                 delete_unrhdr(unr);
1591                 break;
1592         }
1593
1594         return (0);
1595 }
1596
1597 DRIVER_MODULE(sume, pci, sume_driver, mod_event, NULL);
1598 MODULE_VERSION(sume, 1);