]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/xen/netfront/netfront.c
Merge from vendor/bind9/dist as of the 9.4.3 import
[FreeBSD/FreeBSD.git] / sys / dev / xen / netfront / netfront.c
1 /*
2  *
3  * Copyright (c) 2004-2006 Kip Macy
4  * All rights reserved.
5  *
6  *
7  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
8  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
9  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
10  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
11  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
12  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
13  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
14  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
15  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
16  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17  */
18
19
20 #include <sys/cdefs.h>
21 __FBSDID("$FreeBSD$");
22
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/sockio.h>
26 #include <sys/mbuf.h>
27 #include <sys/malloc.h>
28 #include <sys/module.h>
29 #include <sys/kernel.h>
30 #include <sys/socket.h>
31 #include <sys/queue.h>
32 #include <sys/sx.h>
33
34 #include <net/if.h>
35 #include <net/if_arp.h>
36 #include <net/ethernet.h>
37 #include <net/if_dl.h>
38 #include <net/if_media.h>
39
40 #include <net/bpf.h>
41
42 #include <net/if_types.h>
43 #include <net/if.h>
44
45 #include <netinet/in_systm.h>
46 #include <netinet/in.h>
47 #include <netinet/ip.h>
48 #include <netinet/if_ether.h>
49
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52
53 #include <machine/clock.h>      /* for DELAY */
54 #include <machine/bus.h>
55 #include <machine/resource.h>
56 #include <machine/frame.h>
57 #include <machine/vmparam.h>
58
59 #include <sys/bus.h>
60 #include <sys/rman.h>
61
62 #include <machine/intr_machdep.h>
63
64 #include <machine/xen/xen-os.h>
65 #include <machine/xen/hypervisor.h>
66 #include <machine/xen/xen_intr.h>
67 #include <machine/xen/evtchn.h>
68 #include <xen/gnttab.h>
69 #include <xen/interface/memory.h>
70 #include <dev/xen/netfront/mbufq.h>
71 #include <machine/xen/features.h>
72 #include <xen/interface/io/netif.h>
73 #include <xen/xenbus/xenbusvar.h>
74
75 #include "xenbus_if.h"
76
77 #define GRANT_INVALID_REF       0
78
79 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
80 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
81
82 #ifdef CONFIG_XEN
83 static int MODPARM_rx_copy = 0;
84 module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
85 MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
86 static int MODPARM_rx_flip = 0;
87 module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
88 MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
89 #else
90 static const int MODPARM_rx_copy = 1;
91 static const int MODPARM_rx_flip = 0;
92 #endif
93
94 #define RX_COPY_THRESHOLD 256
95
96 #define net_ratelimit() 0
97
98 struct netfront_info;
99 struct netfront_rx_info;
100
101 static void xn_txeof(struct netfront_info *);
102 static void xn_rxeof(struct netfront_info *);
103 static void network_alloc_rx_buffers(struct netfront_info *);
104
105 static void xn_tick_locked(struct netfront_info *);
106 static void xn_tick(void *);
107
108 static void xn_intr(void *);
109 static void xn_start_locked(struct ifnet *);
110 static void xn_start(struct ifnet *);
111 static int  xn_ioctl(struct ifnet *, u_long, caddr_t);
112 static void xn_ifinit_locked(struct netfront_info *);
113 static void xn_ifinit(void *);
114 static void xn_stop(struct netfront_info *);
115 #ifdef notyet
116 static void xn_watchdog(struct ifnet *);
117 #endif
118
119 static void show_device(struct netfront_info *sc);
120 #ifdef notyet
121 static void netfront_closing(device_t dev);
122 #endif
123 static void netif_free(struct netfront_info *info);
124 static int netfront_detach(device_t dev);
125
126 static int talk_to_backend(device_t dev, struct netfront_info *info);
127 static int create_netdev(device_t dev);
128 static void netif_disconnect_backend(struct netfront_info *info);
129 static int setup_device(device_t dev, struct netfront_info *info);
130 static void end_access(int ref, void *page);
131
132 /* Xenolinux helper functions */
133 int network_connect(struct netfront_info *);
134
135 static void xn_free_rx_ring(struct netfront_info *);
136
137 static void xn_free_tx_ring(struct netfront_info *);
138
139 static int xennet_get_responses(struct netfront_info *np,
140         struct netfront_rx_info *rinfo, RING_IDX rp, struct mbuf **list,
141         int *pages_flipped_p);
142
143 #define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
144
145 #define INVALID_P2M_ENTRY (~0UL)
146
147 /*
148  * Mbuf pointers. We need these to keep track of the virtual addresses
149  * of our mbuf chains since we can only convert from virtual to physical,
150  * not the other way around.  The size must track the free index arrays.
151  */
152 struct xn_chain_data {
153                 struct mbuf             *xn_tx_chain[NET_TX_RING_SIZE+1];
154                 struct mbuf             *xn_rx_chain[NET_RX_RING_SIZE+1];
155 };
156
157
158 struct net_device_stats
159 {
160         u_long  rx_packets;             /* total packets received       */
161         u_long  tx_packets;             /* total packets transmitted    */
162         u_long  rx_bytes;               /* total bytes received         */
163         u_long  tx_bytes;               /* total bytes transmitted      */
164         u_long  rx_errors;              /* bad packets received         */
165         u_long  tx_errors;              /* packet transmit problems     */
166         u_long  rx_dropped;             /* no space in linux buffers    */
167         u_long  tx_dropped;             /* no space available in linux  */
168         u_long  multicast;              /* multicast packets received   */
169         u_long  collisions;
170
171         /* detailed rx_errors: */
172         u_long  rx_length_errors;
173         u_long  rx_over_errors;         /* receiver ring buff overflow  */
174         u_long  rx_crc_errors;          /* recved pkt with crc error    */
175         u_long  rx_frame_errors;        /* recv'd frame alignment error */
176         u_long  rx_fifo_errors;         /* recv'r fifo overrun          */
177         u_long  rx_missed_errors;       /* receiver missed packet       */
178
179         /* detailed tx_errors */
180         u_long  tx_aborted_errors;
181         u_long  tx_carrier_errors;
182         u_long  tx_fifo_errors;
183         u_long  tx_heartbeat_errors;
184         u_long  tx_window_errors;
185         
186         /* for cslip etc */
187         u_long  rx_compressed;
188         u_long  tx_compressed;
189 };
190
191 struct netfront_info {
192                 
193         struct ifnet *xn_ifp;
194
195         struct net_device_stats stats;
196         u_int tx_full;
197
198         netif_tx_front_ring_t tx;
199         netif_rx_front_ring_t rx;
200
201         struct mtx   tx_lock;
202         struct mtx   rx_lock;
203         struct sx    sc_lock;
204
205         u_int handle;
206         u_int irq;
207         u_int copying_receiver;
208         u_int carrier;
209                 
210         /* Receive-ring batched refills. */
211 #define RX_MIN_TARGET 32
212 #define RX_MAX_TARGET NET_RX_RING_SIZE
213         int rx_min_target, rx_max_target, rx_target;
214
215         /*
216          * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
217          * array is an index into a chain of free entries.
218          */
219
220         grant_ref_t gref_tx_head;
221         grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; 
222         grant_ref_t gref_rx_head;
223         grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; 
224
225 #define TX_MAX_TARGET min(NET_RX_RING_SIZE, 256)
226         device_t xbdev;
227         int tx_ring_ref;
228         int rx_ring_ref;
229         uint8_t mac[ETHER_ADDR_LEN];
230         struct xn_chain_data    xn_cdata;       /* mbufs */
231         struct mbuf_head xn_rx_batch;   /* head of the batch queue */
232
233         int                     xn_if_flags;
234         struct callout          xn_stat_ch;
235
236         u_long rx_pfn_array[NET_RX_RING_SIZE];
237         multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
238         mmu_update_t rx_mmu[NET_RX_RING_SIZE];
239 };
240
241 #define rx_mbufs xn_cdata.xn_rx_chain
242 #define tx_mbufs xn_cdata.xn_tx_chain
243
244 #define XN_LOCK_INIT(_sc, _name) \
245         mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \
246         mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF);  \
247         sx_init(&(_sc)->sc_lock, #_name"_rx")
248
249 #define XN_RX_LOCK(_sc)           mtx_lock(&(_sc)->rx_lock)
250 #define XN_RX_UNLOCK(_sc)         mtx_unlock(&(_sc)->rx_lock)
251
252 #define XN_TX_LOCK(_sc)           mtx_lock(&(_sc)->tx_lock)
253 #define XN_TX_UNLOCK(_sc)         mtx_unlock(&(_sc)->tx_lock)
254
255 #define XN_LOCK(_sc)           sx_xlock(&(_sc)->sc_lock); 
256 #define XN_UNLOCK(_sc)         sx_xunlock(&(_sc)->sc_lock); 
257
258 #define XN_LOCK_ASSERT(_sc)    sx_assert(&(_sc)->sc_lock, SX_LOCKED); 
259 #define XN_RX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->rx_lock, MA_OWNED); 
260 #define XN_TX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->tx_lock, MA_OWNED); 
261 #define XN_LOCK_DESTROY(_sc)   mtx_destroy(&(_sc)->rx_lock); \
262                                mtx_destroy(&(_sc)->tx_lock); \
263                                sx_destroy(&(_sc)->sc_lock);
264
265 struct netfront_rx_info {
266         struct netif_rx_response rx;
267         struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
268 };
269
270 #define netfront_carrier_on(netif)      ((netif)->carrier = 1)
271 #define netfront_carrier_off(netif)     ((netif)->carrier = 0)
272 #define netfront_carrier_ok(netif)      ((netif)->carrier)
273
274 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
275
276
277
278 /*
279  * Access macros for acquiring freeing slots in tx_skbs[].
280  */
281
282 static inline void
283 add_id_to_freelist(struct mbuf **list, unsigned short id)
284 {
285         list[id] = list[0];
286         list[0]  = (void *)(u_long)id;
287 }
288
289 static inline unsigned short
290 get_id_from_freelist(struct mbuf **list)
291 {
292         u_int id = (u_int)(u_long)list[0];
293         list[0] = list[id];
294         return (id);
295 }
296
297 static inline int
298 xennet_rxidx(RING_IDX idx)
299 {
300         return idx & (NET_RX_RING_SIZE - 1);
301 }
302
303 static inline struct mbuf *
304 xennet_get_rx_mbuf(struct netfront_info *np,
305                                                 RING_IDX ri)
306 {
307         int i = xennet_rxidx(ri);
308         struct mbuf *m;
309
310         m = np->rx_mbufs[i];
311         np->rx_mbufs[i] = NULL;
312         return (m);
313 }
314
315 static inline grant_ref_t
316 xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri)
317 {
318         int i = xennet_rxidx(ri);
319         grant_ref_t ref = np->grant_rx_ref[i];
320         np->grant_rx_ref[i] = GRANT_INVALID_REF;
321         return ref;
322 }
323
324 #ifdef DEBUG
325
326 #endif
327 #define IPRINTK(fmt, args...) \
328     printf("[XEN] " fmt, ##args)
329 #define WPRINTK(fmt, args...) \
330     printf("[XEN] " fmt, ##args)
331 #define DPRINTK(fmt, args...) \
332     printf("[XEN] %s: " fmt, __func__, ##args)
333
334
335 static __inline struct mbuf* 
336 makembuf (struct mbuf *buf)
337 {
338         struct mbuf *m = NULL;
339         
340         MGETHDR (m, M_DONTWAIT, MT_DATA);
341         
342         if (! m)
343                 return 0;
344                 
345         M_MOVE_PKTHDR(m, buf);
346
347         m_cljget(m, M_DONTWAIT, MJUMPAGESIZE);
348         m->m_pkthdr.len = buf->m_pkthdr.len;
349         m->m_len = buf->m_len;
350         m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) );
351
352                 m->m_ext.ext_arg1 = (caddr_t *)(uintptr_t)(vtophys(mtod(m,caddr_t)) >> PAGE_SHIFT);
353
354         return m;
355 }
356
357 /**
358  * Read the 'mac' node at the given device's node in the store, and parse that
359  * as colon-separated octets, placing result the given mac array.  mac must be
360  * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
361  * Return 0 on success, or errno on error.
362  */
363 static int 
364 xen_net_read_mac(device_t dev, uint8_t mac[])
365 {
366         char *s;
367         int i;
368         char *e;
369         char *macstr = xenbus_read(XBT_NIL, xenbus_get_node(dev), "mac", NULL);
370         if (IS_ERR(macstr)) {
371                 return PTR_ERR(macstr);
372         }
373         s = macstr;
374         for (i = 0; i < ETHER_ADDR_LEN; i++) {
375                 mac[i] = strtoul(s, &e, 16);
376                 if (s == e || (e[0] != ':' && e[0] != 0)) {
377                         free(macstr, M_DEVBUF);
378                         return ENOENT;
379                 }
380                 s = &e[1];
381         }
382         free(macstr, M_DEVBUF);
383         return 0;
384 }
385
386 /**
387  * Entry point to this code when a new device is created.  Allocate the basic
388  * structures and the ring buffers for communication with the backend, and
389  * inform the backend of the appropriate details for those.  Switch to
390  * Connected state.
391  */
392 static int 
393 netfront_probe(device_t dev)
394 {
395
396         if (!strcmp(xenbus_get_type(dev), "vif")) {
397                 device_set_desc(dev, "Virtual Network Interface");
398                 return (0);
399         }
400
401         return (ENXIO);
402 }
403
404 static int
405 netfront_attach(device_t dev)
406 {       
407         int err;
408
409         err = create_netdev(dev);
410         if (err) {
411                 xenbus_dev_fatal(dev, err, "creating netdev");
412                 return err;
413         }
414
415         return 0;
416 }
417
418
419 /**
420  * We are reconnecting to the backend, due to a suspend/resume, or a backend
421  * driver restart.  We tear down our netif structure and recreate it, but
422  * leave the device-layer structures intact so that this is transparent to the
423  * rest of the kernel.
424  */
425 static int 
426 netfront_resume(device_t dev)
427 {
428         struct netfront_info *info = device_get_softc(dev);
429         
430         DPRINTK("%s\n", xenbus_get_node(dev));
431         
432         netif_disconnect_backend(info);
433         return (0);
434 }
435
436
437 /* Common code used when first setting up, and when resuming. */
438 static int 
439 talk_to_backend(device_t dev, struct netfront_info *info)
440 {
441         const char *message;
442         struct xenbus_transaction xbt;
443         const char *node = xenbus_get_node(dev);
444         int err;
445
446         err = xen_net_read_mac(dev, info->mac);
447         if (err) {
448                 xenbus_dev_fatal(dev, err, "parsing %s/mac", node);
449                 goto out;
450         }
451
452         /* Create shared ring, alloc event channel. */
453         err = setup_device(dev, info);
454         if (err)
455                 goto out;
456         
457  again:
458         err = xenbus_transaction_start(&xbt);
459         if (err) {
460                 xenbus_dev_fatal(dev, err, "starting transaction");
461                 goto destroy_ring;
462         }
463         err = xenbus_printf(xbt, node, "tx-ring-ref","%u",
464                             info->tx_ring_ref);
465         if (err) {
466                 message = "writing tx ring-ref";
467                 goto abort_transaction;
468         }
469         err = xenbus_printf(xbt, node, "rx-ring-ref","%u",
470                             info->rx_ring_ref);
471         if (err) {
472                 message = "writing rx ring-ref";
473                 goto abort_transaction;
474         }
475         err = xenbus_printf(xbt, node,
476                 "event-channel", "%u", irq_to_evtchn_port(info->irq));
477         if (err) {
478                 message = "writing event-channel";
479                 goto abort_transaction;
480         }
481         err = xenbus_printf(xbt, node, "request-rx-copy", "%u",
482                             info->copying_receiver);
483         if (err) {
484                 message = "writing request-rx-copy";
485                 goto abort_transaction;
486         }
487         err = xenbus_printf(xbt, node, "feature-rx-notify", "%d", 1);
488         if (err) {
489                 message = "writing feature-rx-notify";
490                 goto abort_transaction;
491         }
492         err = xenbus_printf(xbt, node, "feature-no-csum-offload", "%d", 1);
493         if (err) {
494                 message = "writing feature-no-csum-offload";
495                 goto abort_transaction;
496         }
497         err = xenbus_printf(xbt, node, "feature-sg", "%d", 1);
498         if (err) {
499                 message = "writing feature-sg";
500                 goto abort_transaction;
501         }
502 #ifdef HAVE_TSO
503         err = xenbus_printf(xbt, node, "feature-gso-tcpv4", "%d", 1);
504         if (err) {
505                 message = "writing feature-gso-tcpv4";
506                 goto abort_transaction;
507         }
508 #endif
509
510         err = xenbus_transaction_end(xbt, 0);
511         if (err) {
512                 if (err == EAGAIN)
513                         goto again;
514                 xenbus_dev_fatal(dev, err, "completing transaction");
515                 goto destroy_ring;
516         }
517         
518         return 0;
519         
520  abort_transaction:
521         xenbus_transaction_end(xbt, 1);
522         xenbus_dev_fatal(dev, err, "%s", message);
523  destroy_ring:
524         netif_free(info);
525  out:
526         return err;
527 }
528
529
530 static int 
531 setup_device(device_t dev, struct netfront_info *info)
532 {
533         netif_tx_sring_t *txs;
534         netif_rx_sring_t *rxs;
535         int err;
536         struct ifnet *ifp;
537         
538         ifp = info->xn_ifp;
539
540         info->tx_ring_ref = GRANT_INVALID_REF;
541         info->rx_ring_ref = GRANT_INVALID_REF;
542         info->rx.sring = NULL;
543         info->tx.sring = NULL;
544         info->irq = 0;
545
546         txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
547         if (!txs) {
548                 err = ENOMEM;
549                 xenbus_dev_fatal(dev, err, "allocating tx ring page");
550                 goto fail;
551         }
552         SHARED_RING_INIT(txs);
553         FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
554         err = xenbus_grant_ring(dev, virt_to_mfn(txs));
555         if (err < 0)
556                 goto fail;
557         info->tx_ring_ref = err;
558
559         rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
560         if (!rxs) {
561                 err = ENOMEM;
562                 xenbus_dev_fatal(dev, err, "allocating rx ring page");
563                 goto fail;
564         }
565         SHARED_RING_INIT(rxs);
566         FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
567
568         err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
569         if (err < 0)
570                 goto fail;
571         info->rx_ring_ref = err;
572
573 #if 0   
574         network_connect(info);
575 #endif
576         err = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
577                 "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, NULL);
578
579         if (err <= 0) {
580                 xenbus_dev_fatal(dev, err,
581                                  "bind_evtchn_to_irqhandler failed");
582                 goto fail;
583         }
584         info->irq = err;
585         
586         show_device(info);
587         
588         return 0;
589         
590  fail:
591         netif_free(info);
592         return err;
593 }
594
595 /**
596  * Callback received when the backend's state changes.
597  */
598 static void
599 netfront_backend_changed(device_t dev, XenbusState newstate)
600 {
601         struct netfront_info *sc = device_get_softc(dev);
602                 
603         DPRINTK("newstate=%d\n", newstate);
604
605         switch (newstate) {
606         case XenbusStateInitialising:
607         case XenbusStateInitialised:
608         case XenbusStateConnected:
609         case XenbusStateUnknown:
610         case XenbusStateClosed:
611         case XenbusStateReconfigured:
612         case XenbusStateReconfiguring:
613                 break;
614         case XenbusStateInitWait:
615                 if (xenbus_get_state(dev) != XenbusStateInitialising)
616                         break;
617                 if (network_connect(sc) != 0)
618                         break;
619                 xenbus_set_state(dev, XenbusStateConnected);
620 #ifdef notyet           
621                 (void)send_fake_arp(netdev);
622 #endif          
623                 break;
624         case XenbusStateClosing:
625                 xenbus_set_state(dev, XenbusStateClosed);
626                 break;
627         }
628 }
629
630 static void
631 xn_free_rx_ring(struct netfront_info *sc)
632 {
633 #if 0
634         int i;
635         
636         for (i = 0; i < NET_RX_RING_SIZE; i++) {
637                 if (sc->xn_cdata.xn_rx_chain[i] != NULL) {
638                         m_freem(sc->xn_cdata.xn_rx_chain[i]);
639                         sc->xn_cdata.xn_rx_chain[i] = NULL;
640                 }
641         }
642         
643         sc->rx.rsp_cons = 0;
644         sc->xn_rx_if->req_prod = 0;
645         sc->xn_rx_if->event = sc->rx.rsp_cons ;
646 #endif
647 }
648
649 static void
650 xn_free_tx_ring(struct netfront_info *sc)
651 {
652 #if 0
653         int i;
654         
655         for (i = 0; i < NET_TX_RING_SIZE; i++) {
656                 if (sc->xn_cdata.xn_tx_chain[i] != NULL) {
657                         m_freem(sc->xn_cdata.xn_tx_chain[i]);
658                         sc->xn_cdata.xn_tx_chain[i] = NULL;
659                 }
660         }
661         
662         return;
663 #endif
664 }
665
666 static inline int
667 netfront_tx_slot_available(struct netfront_info *np)
668 {
669         return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
670                 (TX_MAX_TARGET - /* MAX_SKB_FRAGS */ 24 - 2));
671 }
672 static void
673 netif_release_tx_bufs(struct netfront_info *np)
674 {
675         struct mbuf *m;
676         int i;
677
678         for (i = 1; i <= NET_TX_RING_SIZE; i++) {
679                 m = np->xn_cdata.xn_tx_chain[i];
680
681                 if (((u_long)m) < KERNBASE)
682                         continue;
683                 gnttab_grant_foreign_access_ref(np->grant_tx_ref[i],
684                     xenbus_get_otherend_id(np->xbdev),
685                     virt_to_mfn(mtod(m, vm_offset_t)),
686                     GNTMAP_readonly);
687                 gnttab_release_grant_reference(&np->gref_tx_head,
688                     np->grant_tx_ref[i]);
689                 np->grant_tx_ref[i] = GRANT_INVALID_REF;
690                 add_id_to_freelist(np->tx_mbufs, i);
691                 m_freem(m);
692         }
693 }
694
695 static void
696 network_alloc_rx_buffers(struct netfront_info *sc)
697 {
698         int otherend_id = xenbus_get_otherend_id(sc->xbdev);
699         unsigned short id;
700         struct mbuf *m_new;
701         int i, batch_target, notify;
702         RING_IDX req_prod;
703         struct xen_memory_reservation reservation;
704         grant_ref_t ref;
705         int nr_flips;
706         netif_rx_request_t *req;
707         vm_offset_t vaddr;
708         u_long pfn;
709         
710         req_prod = sc->rx.req_prod_pvt;
711
712         if (unlikely(sc->carrier == 0))
713                 return;
714         
715         /*
716          * Allocate skbuffs greedily, even though we batch updates to the
717          * receive ring. This creates a less bursty demand on the memory
718          * allocator, so should reduce the chance of failed allocation
719          * requests both for ourself and for other kernel subsystems.
720          */
721         batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons);
722         for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) {
723                 MGETHDR(m_new, M_DONTWAIT, MT_DATA);
724                 if (m_new == NULL) 
725                         goto no_mbuf;
726
727                 m_cljget(m_new, M_DONTWAIT, MJUMPAGESIZE);
728                 if ((m_new->m_flags & M_EXT) == 0) {
729                         m_freem(m_new);
730
731 no_mbuf:
732                         if (i != 0)
733                                 goto refill;
734                         /*
735                          * XXX set timer
736                          */
737                         break;
738                 }
739                 m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE;
740                 
741                 /* queue the mbufs allocated */
742                 mbufq_tail(&sc->xn_rx_batch, m_new);
743         }
744         
745         /* Is the batch large enough to be worthwhile? */
746         if (i < (sc->rx_target/2)) {
747                 if (req_prod >sc->rx.sring->req_prod)
748                         goto push;
749                 return;
750         }
751         /* Adjust floating fill target if we risked running out of buffers. */
752         if ( ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) &&
753              ((sc->rx_target *= 2) > sc->rx_max_target) )
754                 sc->rx_target = sc->rx_max_target;
755
756 refill:
757         for (nr_flips = i = 0; ; i++) {
758                 if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL)
759                         break;
760
761                 m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)(
762                                 vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT);
763
764                 id = xennet_rxidx(req_prod + i);
765
766                 KASSERT(sc->xn_cdata.xn_rx_chain[id] == NULL,
767                     ("non-NULL xm_rx_chain"));
768                 sc->xn_cdata.xn_rx_chain[id] = m_new;
769
770                 ref = gnttab_claim_grant_reference(&sc->gref_rx_head);
771                 KASSERT((short)ref >= 0, ("negative ref"));
772                 sc->grant_rx_ref[id] = ref;
773
774                 vaddr = mtod(m_new, vm_offset_t);
775                 pfn = vtophys(vaddr) >> PAGE_SHIFT;
776                 req = RING_GET_REQUEST(&sc->rx, req_prod + i);
777
778                 if (sc->copying_receiver == 0) {
779                         gnttab_grant_foreign_transfer_ref(ref,
780                             otherend_id, pfn);
781                         sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
782                         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
783                                 /* Remove this page before passing
784                                  * back to Xen.
785                                  */
786                                 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
787                                 MULTI_update_va_mapping(&sc->rx_mcl[i],
788                                     vaddr, 0, 0);
789                         }
790                         nr_flips++;
791                 } else {
792                         gnttab_grant_foreign_access_ref(ref,
793                             otherend_id,
794                             PFNTOMFN(pfn), 0);
795                 }
796                 req->id = id;
797                 req->gref = ref;
798                 
799                 sc->rx_pfn_array[i] =
800                     vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
801         } 
802         
803         KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
804         KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed"));
805         /*
806          * We may have allocated buffers which have entries outstanding
807          * in the page * update queue -- make sure we flush those first!
808          */
809         PT_UPDATES_FLUSH();
810         if (nr_flips != 0) {
811 #ifdef notyet
812                 /* Tell the ballon driver what is going on. */
813                 balloon_update_driver_allowance(i);
814 #endif
815                 set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array);
816                 reservation.nr_extents   = i;
817                 reservation.extent_order = 0;
818                 reservation.address_bits = 0;
819                 reservation.domid        = DOMID_SELF;
820
821                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
822
823                         /* After all PTEs have been zapped, flush the TLB. */
824                         sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
825                             UVMF_TLB_FLUSH|UVMF_ALL;
826         
827                         /* Give away a batch of pages. */
828                         sc->rx_mcl[i].op = __HYPERVISOR_memory_op;
829                         sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
830                         sc->rx_mcl[i].args[1] =  (u_long)&reservation;
831                         /* Zap PTEs and give away pages in one big multicall. */
832                         (void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
833
834                         /* Check return status of HYPERVISOR_dom_mem_op(). */
835                         if (unlikely(sc->rx_mcl[i].result != i))
836                                 panic("Unable to reduce memory reservation\n");
837                         } else {
838                                 if (HYPERVISOR_memory_op(
839                                     XENMEM_decrease_reservation, &reservation)
840                                     != i)
841                                         panic("Unable to reduce memory "
842                                             "reservation\n");
843                 }
844         } else {
845                 wmb();
846         }
847                         
848         /* Above is a suitable barrier to ensure backend will see requests. */
849         sc->rx.req_prod_pvt = req_prod + i;
850 push:
851         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify);
852         if (notify)
853                 notify_remote_via_irq(sc->irq);
854 }
855
856 static void
857 xn_rxeof(struct netfront_info *np)
858 {
859         struct ifnet *ifp;
860         struct netfront_rx_info rinfo;
861         struct netif_rx_response *rx = &rinfo.rx;
862         struct netif_extra_info *extras = rinfo.extras;
863         RING_IDX i, rp;
864         multicall_entry_t *mcl;
865         struct mbuf *m;
866         struct mbuf_head rxq, errq;
867         int err, pages_flipped = 0, work_to_do;
868
869         do {
870                 XN_RX_LOCK_ASSERT(np);
871                 if (!netfront_carrier_ok(np))
872                         return;
873
874                 mbufq_init(&errq);
875                 mbufq_init(&rxq);
876
877                 ifp = np->xn_ifp;
878         
879                 rp = np->rx.sring->rsp_prod;
880                 rmb();  /* Ensure we see queued responses up to 'rp'. */
881
882                 i = np->rx.rsp_cons;
883                 while ((i != rp)) {
884                         memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
885                         memset(extras, 0, sizeof(rinfo.extras));
886
887                         m = NULL;
888                         err = xennet_get_responses(np, &rinfo, rp, &m,
889                             &pages_flipped);
890
891                         if (unlikely(err)) {
892                                 if (m)
893                                         mbufq_tail(&errq, m);
894                                 np->stats.rx_errors++;
895                                 i = np->rx.rsp_cons;
896                                 continue;
897                         }
898
899                         m->m_pkthdr.rcvif = ifp;
900                         if ( rx->flags & NETRXF_data_validated ) {
901                                 /* Tell the stack the checksums are okay */
902                                 /*
903                                  * XXX this isn't necessarily the case - need to add
904                                  * check
905                                  */
906                                 
907                                 m->m_pkthdr.csum_flags |=
908                                         (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID
909                                             | CSUM_PSEUDO_HDR);
910                                 m->m_pkthdr.csum_data = 0xffff;
911                         }
912
913                         np->stats.rx_packets++;
914                         np->stats.rx_bytes += m->m_pkthdr.len;
915
916                         mbufq_tail(&rxq, m);
917                         np->rx.rsp_cons = ++i;
918                 }
919
920                 if (pages_flipped) {
921                         /* Some pages are no longer absent... */
922 #ifdef notyet
923                         balloon_update_driver_allowance(-pages_flipped);
924 #endif
925                         /* Do all the remapping work, and M->P updates, in one big
926                          * hypercall.
927                          */
928                         if (!!xen_feature(XENFEAT_auto_translated_physmap)) {
929                                 mcl = np->rx_mcl + pages_flipped;
930                                 mcl->op = __HYPERVISOR_mmu_update;
931                                 mcl->args[0] = (u_long)np->rx_mmu;
932                                 mcl->args[1] = pages_flipped;
933                                 mcl->args[2] = 0;
934                                 mcl->args[3] = DOMID_SELF;
935                                 (void)HYPERVISOR_multicall(np->rx_mcl,
936                                     pages_flipped + 1);
937                         }
938                 }
939         
940                 while ((m = mbufq_dequeue(&errq)))
941                         m_freem(m);
942
943                 /* 
944                  * Process all the mbufs after the remapping is complete.
945                  * Break the mbuf chain first though.
946                  */
947                 while ((m = mbufq_dequeue(&rxq)) != NULL) {
948                         ifp->if_ipackets++;
949                         
950                         /*
951                          * Do we really need to drop the rx lock?
952                          */
953                         XN_RX_UNLOCK(np);
954                         /* Pass it up. */
955                         (*ifp->if_input)(ifp, m);
956                         XN_RX_LOCK(np);
957                 }
958         
959                 np->rx.rsp_cons = i;
960
961 #if 0
962                 /* If we get a callback with very few responses, reduce fill target. */
963                 /* NB. Note exponential increase, linear decrease. */
964                 if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > 
965                         ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target))
966                         np->rx_target = np->rx_min_target;
967 #endif
968         
969                 network_alloc_rx_buffers(np);
970
971                 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do);
972         } while (work_to_do);
973 }
974
975 static void 
976 xn_txeof(struct netfront_info *np)
977 {
978         RING_IDX i, prod;
979         unsigned short id;
980         struct ifnet *ifp;
981         struct mbuf *m;
982         
983         XN_TX_LOCK_ASSERT(np);
984         
985         if (!netfront_carrier_ok(np))
986                 return;
987         
988         ifp = np->xn_ifp;
989         ifp->if_timer = 0;
990         
991         do {
992                 prod = np->tx.sring->rsp_prod;
993                 rmb(); /* Ensure we see responses up to 'rp'. */
994                 
995                 for (i = np->tx.rsp_cons; i != prod; i++) {
996                         id = RING_GET_RESPONSE(&np->tx, i)->id;
997                         m = np->xn_cdata.xn_tx_chain[id]; 
998                         
999                         ifp->if_opackets++;
1000                         KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
1001                         M_ASSERTVALID(m);
1002                         if (unlikely(gnttab_query_foreign_access(
1003                             np->grant_tx_ref[id]) != 0)) {
1004                                 printf("network_tx_buf_gc: warning "
1005                                     "-- grant still in use by backend "
1006                                     "domain.\n");
1007                                 goto out; 
1008                         }
1009                         gnttab_end_foreign_access_ref(
1010                                 np->grant_tx_ref[id]);
1011                         gnttab_release_grant_reference(
1012                                 &np->gref_tx_head, np->grant_tx_ref[id]);
1013                         np->grant_tx_ref[id] = GRANT_INVALID_REF;
1014                         
1015                         np->xn_cdata.xn_tx_chain[id] = NULL;
1016                         add_id_to_freelist(np->xn_cdata.xn_tx_chain, id);
1017                         m_freem(m);
1018                 }
1019                 np->tx.rsp_cons = prod;
1020                 
1021                 /*
1022                  * Set a new event, then check for race with update of
1023                  * tx_cons. Note that it is essential to schedule a
1024                  * callback, no matter how few buffers are pending. Even if
1025                  * there is space in the transmit ring, higher layers may
1026                  * be blocked because too much data is outstanding: in such
1027                  * cases notification from Xen is likely to be the only kick
1028                  * that we'll get.
1029                  */
1030                 np->tx.sring->rsp_event =
1031                     prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
1032
1033                 mb();
1034                 
1035         } while (prod != np->tx.sring->rsp_prod);
1036         
1037  out: 
1038         if (np->tx_full &&
1039             ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) {
1040                 np->tx_full = 0;
1041 #if 0
1042                 if (np->user_state == UST_OPEN)
1043                         netif_wake_queue(dev);
1044 #endif
1045         }
1046
1047 }
1048
1049 static void
1050 xn_intr(void *xsc)
1051 {
1052         struct netfront_info *np = xsc;
1053         struct ifnet *ifp = np->xn_ifp;
1054
1055 #if 0
1056         if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod &&
1057             likely(netfront_carrier_ok(np)) &&
1058             ifp->if_drv_flags & IFF_DRV_RUNNING))
1059                 return;
1060 #endif
1061         if (np->tx.rsp_cons != np->tx.sring->rsp_prod) {
1062                 XN_TX_LOCK(np);
1063                 xn_txeof(np);
1064                 XN_TX_UNLOCK(np);                       
1065         }       
1066
1067         XN_RX_LOCK(np);
1068         xn_rxeof(np);
1069         XN_RX_UNLOCK(np);
1070
1071         if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1072             !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1073                 xn_start(ifp);
1074 }
1075
1076
1077 static void
1078 xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m,
1079         grant_ref_t ref)
1080 {
1081         int new = xennet_rxidx(np->rx.req_prod_pvt);
1082
1083         KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL"));
1084         np->rx_mbufs[new] = m;
1085         np->grant_rx_ref[new] = ref;
1086         RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
1087         RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
1088         np->rx.req_prod_pvt++;
1089 }
1090
1091 static int
1092 xennet_get_extras(struct netfront_info *np,
1093     struct netif_extra_info *extras, RING_IDX rp)
1094 {
1095         struct netif_extra_info *extra;
1096         RING_IDX cons = np->rx.rsp_cons;
1097
1098         int err = 0;
1099
1100         do {
1101                 struct mbuf *m;
1102                 grant_ref_t ref;
1103
1104                 if (unlikely(cons + 1 == rp)) {
1105 #if 0                   
1106                         if (net_ratelimit())
1107                                 WPRINTK("Missing extra info\n");
1108 #endif                  
1109                         err = -EINVAL;
1110                         break;
1111                 }
1112
1113                 extra = (struct netif_extra_info *)
1114                 RING_GET_RESPONSE(&np->rx, ++cons);
1115
1116                 if (unlikely(!extra->type ||
1117                         extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1118 #if 0                           
1119                         if (net_ratelimit())
1120                                 WPRINTK("Invalid extra type: %d\n",
1121                                         extra->type);
1122 #endif                  
1123                         err = -EINVAL;
1124                 } else {
1125                         memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
1126                 }
1127
1128                 m = xennet_get_rx_mbuf(np, cons);
1129                 ref = xennet_get_rx_ref(np, cons);
1130                 xennet_move_rx_slot(np, m, ref);
1131         } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
1132
1133         np->rx.rsp_cons = cons;
1134         return err;
1135 }
1136
1137 static int
1138 xennet_get_responses(struct netfront_info *np,
1139         struct netfront_rx_info *rinfo, RING_IDX rp,
1140         struct mbuf  **list,
1141         int *pages_flipped_p)
1142 {
1143         int pages_flipped = *pages_flipped_p;
1144         struct mmu_update *mmu;
1145         struct multicall_entry *mcl;
1146         struct netif_rx_response *rx = &rinfo->rx;
1147         struct netif_extra_info *extras = rinfo->extras;
1148         RING_IDX cons = np->rx.rsp_cons;
1149         struct mbuf *m, *m0, *m_prev;
1150         grant_ref_t ref = xennet_get_rx_ref(np, cons);
1151         int max = 5 /* MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD) */;
1152         int frags = 1;
1153         int err = 0;
1154         u_long ret;
1155
1156         m0 = m = m_prev = xennet_get_rx_mbuf(np, cons);
1157
1158         
1159         if (rx->flags & NETRXF_extra_info) {
1160                 err = xennet_get_extras(np, extras, rp);
1161                 cons = np->rx.rsp_cons;
1162         }
1163
1164
1165         if (m0 != NULL) {
1166                         m0->m_pkthdr.len = 0;
1167                         m0->m_next = NULL;
1168         }
1169         
1170         for (;;) {
1171                 u_long mfn;
1172
1173 #if 0           
1174                 printf("rx->status=%hd rx->offset=%hu frags=%u\n",
1175                         rx->status, rx->offset, frags);
1176 #endif
1177                 if (unlikely(rx->status < 0 ||
1178                         rx->offset + rx->status > PAGE_SIZE)) {
1179 #if 0                                           
1180                         if (net_ratelimit())
1181                                 WPRINTK("rx->offset: %x, size: %u\n",
1182                                         rx->offset, rx->status);
1183 #endif                                          
1184                         xennet_move_rx_slot(np, m, ref);
1185                         err = -EINVAL;
1186                         goto next;
1187                 }
1188                 
1189                 /*
1190                  * This definitely indicates a bug, either in this driver or in
1191                  * the backend driver. In future this should flag the bad
1192                  * situation to the system controller to reboot the backed.
1193                  */
1194                 if (ref == GRANT_INVALID_REF) {
1195 #if 0                           
1196                         if (net_ratelimit())
1197                                 WPRINTK("Bad rx response id %d.\n", rx->id);
1198 #endif                  
1199                         err = -EINVAL;
1200                         goto next;
1201                 }
1202
1203                 if (!np->copying_receiver) {
1204                         /* Memory pressure, insufficient buffer
1205                          * headroom, ...
1206                          */
1207                         if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
1208                                 if (net_ratelimit())
1209                                         WPRINTK("Unfulfilled rx req "
1210                                                 "(id=%d, st=%d).\n",
1211                                                 rx->id, rx->status);
1212                                 xennet_move_rx_slot(np, m, ref);
1213                                 err = -ENOMEM;
1214                                 goto next;
1215                         }
1216
1217                         if (!xen_feature( XENFEAT_auto_translated_physmap)) {
1218                                 /* Remap the page. */
1219                                 void *vaddr = mtod(m, void *);
1220                                 uint32_t pfn;
1221
1222                                 mcl = np->rx_mcl + pages_flipped;
1223                                 mmu = np->rx_mmu + pages_flipped;
1224
1225                                 MULTI_update_va_mapping(mcl, (u_long)vaddr,
1226                                     (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW |
1227                                     PG_V | PG_M | PG_A, 0);
1228                                 pfn = (uint32_t)m->m_ext.ext_arg1;
1229                                 mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) |
1230                                     MMU_MACHPHYS_UPDATE;
1231                                 mmu->val = pfn;
1232
1233                                 set_phys_to_machine(pfn, mfn);
1234                         }
1235                         pages_flipped++;
1236                 } else {
1237                         ret = gnttab_end_foreign_access_ref(ref);
1238                         KASSERT(ret, ("ret != 0"));
1239                 }
1240
1241                 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1242
1243 next:
1244                 if (m != NULL) {
1245                                 m->m_len = rx->status;
1246                                 m->m_data += rx->offset;
1247                                 m0->m_pkthdr.len += rx->status;
1248                 }
1249                 
1250                 if (!(rx->flags & NETRXF_more_data))
1251                         break;
1252
1253                 if (cons + frags == rp) {
1254                         if (net_ratelimit())
1255                                 WPRINTK("Need more frags\n");
1256                         err = -ENOENT;
1257                                 break;
1258                 }
1259                 m_prev = m;
1260                 
1261                 rx = RING_GET_RESPONSE(&np->rx, cons + frags);
1262                 m = xennet_get_rx_mbuf(np, cons + frags);
1263
1264                 m_prev->m_next = m;
1265                 m->m_next = NULL;
1266                 ref = xennet_get_rx_ref(np, cons + frags);
1267                 frags++;
1268         }
1269         *list = m0;
1270
1271         if (unlikely(frags > max)) {
1272                 if (net_ratelimit())
1273                         WPRINTK("Too many frags\n");
1274                 err = -E2BIG;
1275         }
1276
1277         if (unlikely(err))
1278                 np->rx.rsp_cons = cons + frags;
1279
1280         *pages_flipped_p = pages_flipped;
1281
1282         return err;
1283 }
1284
1285 static void
1286 xn_tick_locked(struct netfront_info *sc) 
1287 {
1288         XN_RX_LOCK_ASSERT(sc);
1289         callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
1290
1291         /* XXX placeholder for printing debug information */
1292      
1293 }
1294
1295
1296 static void
1297 xn_tick(void *xsc) 
1298 {
1299         struct netfront_info *sc;
1300     
1301         sc = xsc;
1302         XN_RX_LOCK(sc);
1303         xn_tick_locked(sc);
1304         XN_RX_UNLOCK(sc);
1305      
1306 }
1307 static void
1308 xn_start_locked(struct ifnet *ifp) 
1309 {
1310         int otherend_id;
1311         unsigned short id;
1312         struct mbuf *m_head, *new_m;
1313         struct netfront_info *sc;
1314         netif_tx_request_t *tx;
1315         RING_IDX i;
1316         grant_ref_t ref;
1317         u_long mfn, tx_bytes;
1318         int notify;
1319
1320         sc = ifp->if_softc;
1321         otherend_id = xenbus_get_otherend_id(sc->xbdev);
1322         tx_bytes = 0;
1323
1324         if (!netfront_carrier_ok(sc))
1325                 return;
1326         
1327         for (i = sc->tx.req_prod_pvt; TRUE; i++) {
1328                 IF_DEQUEUE(&ifp->if_snd, m_head);
1329                 if (m_head == NULL) 
1330                         break;
1331                 
1332                 if (!netfront_tx_slot_available(sc)) {
1333                         IF_PREPEND(&ifp->if_snd, m_head);
1334                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1335                         break;
1336                 }
1337                 
1338                 id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain);
1339
1340                 /*
1341                  * Start packing the mbufs in this chain into
1342                  * the fragment pointers. Stop when we run out
1343                  * of fragments or hit the end of the mbuf chain.
1344                  */
1345                 new_m = makembuf(m_head);
1346                 tx = RING_GET_REQUEST(&sc->tx, i);
1347                 tx->id = id;
1348                 ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
1349                 KASSERT((short)ref >= 0, ("Negative ref"));
1350                 mfn = virt_to_mfn(mtod(new_m, vm_offset_t));
1351                 gnttab_grant_foreign_access_ref(ref, otherend_id,
1352                     mfn, GNTMAP_readonly);
1353                 tx->gref = sc->grant_tx_ref[id] = ref;
1354                 tx->size = new_m->m_pkthdr.len;
1355 #if 0
1356                 tx->flags = (skb->ip_summed == CHECKSUM_HW) ? NETTXF_csum_blank : 0;
1357 #endif
1358                 tx->flags = 0;
1359                 new_m->m_next = NULL;
1360                 new_m->m_nextpkt = NULL;
1361
1362                 m_freem(m_head);
1363
1364                 sc->xn_cdata.xn_tx_chain[id] = new_m;
1365                 BPF_MTAP(ifp, new_m);
1366
1367                 sc->stats.tx_bytes += new_m->m_pkthdr.len;
1368                 sc->stats.tx_packets++;
1369         }
1370
1371         sc->tx.req_prod_pvt = i;
1372         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify);
1373         if (notify)
1374                 notify_remote_via_irq(sc->irq);
1375
1376         xn_txeof(sc);
1377
1378         if (RING_FULL(&sc->tx)) {
1379                 sc->tx_full = 1;
1380 #if 0
1381                 netif_stop_queue(dev);
1382 #endif
1383         }
1384
1385         return;
1386 }    
1387
1388 static void
1389 xn_start(struct ifnet *ifp)
1390 {
1391         struct netfront_info *sc;
1392         sc = ifp->if_softc;
1393         XN_TX_LOCK(sc);
1394         xn_start_locked(ifp);
1395         XN_TX_UNLOCK(sc);
1396 }
1397
1398 /* equivalent of network_open() in Linux */
1399 static void 
1400 xn_ifinit_locked(struct netfront_info *sc) 
1401 {
1402         struct ifnet *ifp;
1403         
1404         XN_LOCK_ASSERT(sc);
1405         
1406         ifp = sc->xn_ifp;
1407         
1408         if (ifp->if_drv_flags & IFF_DRV_RUNNING) 
1409                 return;
1410         
1411         xn_stop(sc);
1412         
1413         network_alloc_rx_buffers(sc);
1414         sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1;
1415         
1416         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1417         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1418         
1419         callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
1420
1421 }
1422
1423
1424 static void 
1425 xn_ifinit(void *xsc)
1426 {
1427         struct netfront_info *sc = xsc;
1428     
1429         XN_LOCK(sc);
1430         xn_ifinit_locked(sc);
1431         XN_UNLOCK(sc);
1432
1433 }
1434
1435
1436 static int
1437 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1438 {
1439         struct netfront_info *sc = ifp->if_softc;
1440         struct ifreq *ifr = (struct ifreq *) data;
1441         struct ifaddr *ifa = (struct ifaddr *)data;
1442
1443         int mask, error = 0;
1444         switch(cmd) {
1445         case SIOCSIFADDR:
1446         case SIOCGIFADDR:
1447                 XN_LOCK(sc);
1448                 if (ifa->ifa_addr->sa_family == AF_INET) {
1449                         ifp->if_flags |= IFF_UP;
1450                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 
1451                                 xn_ifinit_locked(sc);
1452                         arp_ifinit(ifp, ifa);
1453                         XN_UNLOCK(sc);
1454                 } else {
1455                         XN_UNLOCK(sc);
1456                         error = ether_ioctl(ifp, cmd, data);
1457                 }
1458                 break;
1459         case SIOCSIFMTU:
1460                 /* XXX can we alter the MTU on a VN ?*/
1461 #ifdef notyet
1462                 if (ifr->ifr_mtu > XN_JUMBO_MTU)
1463                         error = EINVAL;
1464                 else 
1465 #endif
1466                 {
1467                         ifp->if_mtu = ifr->ifr_mtu;
1468                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1469                         xn_ifinit(sc);
1470                 }
1471                 break;
1472         case SIOCSIFFLAGS:
1473                 XN_LOCK(sc);
1474                 if (ifp->if_flags & IFF_UP) {
1475                         /*
1476                          * If only the state of the PROMISC flag changed,
1477                          * then just use the 'set promisc mode' command
1478                          * instead of reinitializing the entire NIC. Doing
1479                          * a full re-init means reloading the firmware and
1480                          * waiting for it to start up, which may take a
1481                          * second or two.
1482                          */
1483 #ifdef notyet
1484                         /* No promiscuous mode with Xen */
1485                         if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1486                             ifp->if_flags & IFF_PROMISC &&
1487                             !(sc->xn_if_flags & IFF_PROMISC)) {
1488                                 XN_SETBIT(sc, XN_RX_MODE,
1489                                           XN_RXMODE_RX_PROMISC);
1490                         } else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1491                                    !(ifp->if_flags & IFF_PROMISC) &&
1492                                    sc->xn_if_flags & IFF_PROMISC) {
1493                                 XN_CLRBIT(sc, XN_RX_MODE,
1494                                           XN_RXMODE_RX_PROMISC);
1495                         } else
1496 #endif
1497                                 xn_ifinit_locked(sc);
1498                 } else {
1499                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1500                                 xn_stop(sc);
1501                         }
1502                 }
1503                 sc->xn_if_flags = ifp->if_flags;
1504                 XN_UNLOCK(sc);
1505                 error = 0;
1506                 break;
1507         case SIOCSIFCAP:
1508                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1509                 if (mask & IFCAP_HWCSUM) {
1510                         if (IFCAP_HWCSUM & ifp->if_capenable)
1511                                 ifp->if_capenable &= ~IFCAP_HWCSUM;
1512                         else
1513                                 ifp->if_capenable |= IFCAP_HWCSUM;
1514                 }
1515                 error = 0;
1516                 break;
1517         case SIOCADDMULTI:
1518         case SIOCDELMULTI:
1519 #ifdef notyet
1520                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1521                         XN_LOCK(sc);
1522                         xn_setmulti(sc);
1523                         XN_UNLOCK(sc);
1524                         error = 0;
1525                 }
1526 #endif
1527                 /* FALLTHROUGH */
1528         case SIOCSIFMEDIA:
1529         case SIOCGIFMEDIA:
1530                 error = EINVAL;
1531                 break;
1532         default:
1533                 error = ether_ioctl(ifp, cmd, data);
1534         }
1535     
1536         return (error);
1537 }
1538
1539 static void
1540 xn_stop(struct netfront_info *sc)
1541 {       
1542         struct ifnet *ifp;
1543
1544         XN_LOCK_ASSERT(sc);
1545     
1546         ifp = sc->xn_ifp;
1547
1548         callout_stop(&sc->xn_stat_ch);
1549
1550         xn_free_rx_ring(sc);
1551         xn_free_tx_ring(sc);
1552     
1553         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1554 }
1555
1556 /* START of Xenolinux helper functions adapted to FreeBSD */
1557 int
1558 network_connect(struct netfront_info *np)
1559 {
1560         int i, requeue_idx, err;
1561         grant_ref_t ref;
1562         netif_rx_request_t *req;
1563         u_int feature_rx_copy, feature_rx_flip;
1564
1565         err = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev),
1566                            "feature-rx-copy", "%u", &feature_rx_copy);
1567         if (err != 1)
1568                 feature_rx_copy = 0;
1569         err = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev),
1570                            "feature-rx-flip", "%u", &feature_rx_flip);
1571         if (err != 1)
1572                 feature_rx_flip = 1;
1573
1574         /*
1575          * Copy packets on receive path if:
1576          *  (a) This was requested by user, and the backend supports it; or
1577          *  (b) Flipping was requested, but this is unsupported by the backend.
1578          */
1579         np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
1580                                 (MODPARM_rx_flip && !feature_rx_flip));
1581
1582         XN_LOCK(np);
1583         /* Recovery procedure: */
1584         err = talk_to_backend(np->xbdev, np);
1585         if (err) 
1586                 return (err);
1587         
1588         /* Step 1: Reinitialise variables. */
1589         netif_release_tx_bufs(np);
1590
1591         /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1592         for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
1593                 struct mbuf *m;
1594
1595                 if (np->rx_mbufs[i] == NULL)
1596                         continue;
1597
1598                 m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i);
1599                 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
1600                 req = RING_GET_REQUEST(&np->rx, requeue_idx);
1601
1602                 if (!np->copying_receiver) {
1603                         gnttab_grant_foreign_transfer_ref(ref,
1604                             xenbus_get_otherend_id(np->xbdev),
1605                             vtophys(mtod(m, vm_offset_t)));
1606                 } else {
1607                         gnttab_grant_foreign_access_ref(ref,
1608                             xenbus_get_otherend_id(np->xbdev),
1609                             vtophys(mtod(m, vm_offset_t)), 0);
1610                 }
1611                 req->gref = ref;
1612                 req->id   = requeue_idx;
1613
1614                 requeue_idx++;
1615         }
1616
1617         np->rx.req_prod_pvt = requeue_idx;
1618         
1619         /* Step 3: All public and private state should now be sane.  Get
1620          * ready to start sending and receiving packets and give the driver
1621          * domain a kick because we've probably just requeued some
1622          * packets.
1623          */
1624         netfront_carrier_on(np);
1625         notify_remote_via_irq(np->irq);
1626         XN_TX_LOCK(np);
1627         xn_txeof(np);
1628         XN_TX_UNLOCK(np);
1629         network_alloc_rx_buffers(np);
1630         XN_UNLOCK(np);
1631
1632         return (0);
1633 }
1634
1635 static void 
1636 show_device(struct netfront_info *sc)
1637 {
1638 #ifdef DEBUG
1639         if (sc) {
1640                 IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
1641                         sc->xn_ifno,
1642                         be_state_name[sc->xn_backend_state],
1643                         sc->xn_user_state ? "open" : "closed",
1644                         sc->xn_evtchn,
1645                         sc->xn_irq,
1646                         sc->xn_tx_if,
1647                         sc->xn_rx_if);
1648         } else {
1649                 IPRINTK("<vif NULL>\n");
1650         }
1651 #endif
1652 }
1653
1654 /** Create a network device.
1655  * @param handle device handle
1656  */
1657 int 
1658 create_netdev(device_t dev)
1659 {
1660         int i;
1661         struct netfront_info *np;
1662         int err;
1663         struct ifnet *ifp;
1664
1665         np = device_get_softc(dev);
1666         
1667         np->xbdev         = dev;
1668     
1669         XN_LOCK_INIT(np, xennetif);
1670         np->rx_target     = RX_MIN_TARGET;
1671         np->rx_min_target = RX_MIN_TARGET;
1672         np->rx_max_target = RX_MAX_TARGET;
1673         
1674         /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
1675         for (i = 0; i <= NET_TX_RING_SIZE; i++) {
1676                 np->tx_mbufs[i] = (void *) ((u_long) i+1);
1677                 np->grant_tx_ref[i] = GRANT_INVALID_REF;        
1678         }
1679         for (i = 0; i <= NET_RX_RING_SIZE; i++) {
1680                 np->rx_mbufs[i] = NULL;
1681                 np->grant_rx_ref[i] = GRANT_INVALID_REF;
1682         }
1683         /* A grant for every tx ring slot */
1684         if (gnttab_alloc_grant_references(TX_MAX_TARGET,
1685                                           &np->gref_tx_head) < 0) {
1686                 printf("#### netfront can't alloc tx grant refs\n");
1687                 err = ENOMEM;
1688                 goto exit;
1689         }
1690         /* A grant for every rx ring slot */
1691         if (gnttab_alloc_grant_references(RX_MAX_TARGET,
1692                                           &np->gref_rx_head) < 0) {
1693                 printf("#### netfront can't alloc rx grant refs\n");
1694                 gnttab_free_grant_references(np->gref_tx_head);
1695                 err = ENOMEM;
1696                 goto exit;
1697         }
1698         
1699         err = xen_net_read_mac(dev, np->mac);
1700         if (err) {
1701                 xenbus_dev_fatal(dev, err, "parsing %s/mac",
1702                     xenbus_get_node(dev));
1703                 goto out;
1704         }
1705         
1706         /* Set up ifnet structure */
1707         ifp = np->xn_ifp = if_alloc(IFT_ETHER);
1708         ifp->if_softc = np;
1709         if_initname(ifp, "xn",  device_get_unit(dev));
1710         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
1711         ifp->if_ioctl = xn_ioctl;
1712         ifp->if_output = ether_output;
1713         ifp->if_start = xn_start;
1714 #ifdef notyet
1715         ifp->if_watchdog = xn_watchdog;
1716 #endif
1717         ifp->if_init = xn_ifinit;
1718         ifp->if_mtu = ETHERMTU;
1719         ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
1720         
1721 #ifdef notyet
1722         ifp->if_hwassist = XN_CSUM_FEATURES;
1723         ifp->if_capabilities = IFCAP_HWCSUM;
1724         ifp->if_capenable = ifp->if_capabilities;
1725 #endif    
1726         
1727         ether_ifattach(ifp, np->mac);
1728         callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
1729         netfront_carrier_off(np);
1730
1731         return (0);
1732
1733 exit:
1734         gnttab_free_grant_references(np->gref_tx_head);
1735 out:
1736         panic("do something smart");
1737
1738 }
1739
1740 /**
1741  * Handle the change of state of the backend to Closing.  We must delete our
1742  * device-layer structures now, to ensure that writes are flushed through to
1743  * the backend.  Once is this done, we can switch to Closed in
1744  * acknowledgement.
1745  */
1746 #if 0
1747 static void netfront_closing(device_t dev)
1748 {
1749 #if 0
1750         struct netfront_info *info = dev->dev_driver_data;
1751
1752         DPRINTK("netfront_closing: %s removed\n", dev->nodename);
1753
1754         close_netdev(info);
1755 #endif
1756         xenbus_switch_state(dev, XenbusStateClosed);
1757 }
1758 #endif
1759
1760 static int netfront_detach(device_t dev)
1761 {
1762         struct netfront_info *info = device_get_softc(dev);
1763
1764         DPRINTK("%s\n", xenbus_get_node(dev));
1765
1766         netif_free(info);
1767
1768         return 0;
1769 }
1770
1771
1772 static void netif_free(struct netfront_info *info)
1773 {
1774         netif_disconnect_backend(info);
1775 #if 0
1776         close_netdev(info);
1777 #endif
1778 }
1779
1780
1781
1782 static void netif_disconnect_backend(struct netfront_info *info)
1783 {
1784         xn_stop(info);
1785         end_access(info->tx_ring_ref, info->tx.sring);
1786         end_access(info->rx_ring_ref, info->rx.sring);
1787         info->tx_ring_ref = GRANT_INVALID_REF;
1788         info->rx_ring_ref = GRANT_INVALID_REF;
1789         info->tx.sring = NULL;
1790         info->rx.sring = NULL;
1791
1792 #if 0
1793         if (info->irq)
1794                 unbind_from_irqhandler(info->irq, info->netdev);
1795 #else 
1796         panic("FIX ME");
1797 #endif
1798         info->irq = 0;
1799 }
1800
1801
1802 static void end_access(int ref, void *page)
1803 {
1804         if (ref != GRANT_INVALID_REF)
1805                 gnttab_end_foreign_access(ref, page);
1806 }
1807
1808 /* ** Driver registration ** */
1809 static device_method_t netfront_methods[] = { 
1810         /* Device interface */ 
1811         DEVMETHOD(device_probe,         netfront_probe), 
1812         DEVMETHOD(device_attach,        netfront_attach), 
1813         DEVMETHOD(device_detach,        netfront_detach), 
1814         DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
1815         DEVMETHOD(device_suspend,       bus_generic_suspend), 
1816         DEVMETHOD(device_resume,        netfront_resume), 
1817  
1818         /* Xenbus interface */
1819         DEVMETHOD(xenbus_backend_changed, netfront_backend_changed),
1820
1821         { 0, 0 } 
1822 }; 
1823
1824 static driver_t netfront_driver = { 
1825         "xn", 
1826         netfront_methods, 
1827         sizeof(struct netfront_info),                      
1828 }; 
1829 devclass_t netfront_devclass; 
1830  
1831 DRIVER_MODULE(xe, xenbus, netfront_driver, netfront_devclass, 0, 0);