]> CyberLeo.Net >> Repos - FreeBSD/releng/8.0.git/blob - sys/dev/xen/netfront/netfront.c
Adjust to reflect 8.0-RELEASE.
[FreeBSD/releng/8.0.git] / sys / dev / xen / netfront / netfront.c
1 /*
2  *
3  * Copyright (c) 2004-2006 Kip Macy
4  * All rights reserved.
5  *
6  *
7  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
8  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
9  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
10  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
11  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
12  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
13  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
14  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
15  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
16  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17  */
18
19
20 #include <sys/cdefs.h>
21 __FBSDID("$FreeBSD$");
22
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/sockio.h>
26 #include <sys/mbuf.h>
27 #include <sys/malloc.h>
28 #include <sys/module.h>
29 #include <sys/kernel.h>
30 #include <sys/socket.h>
31 #include <sys/sysctl.h>
32 #include <sys/queue.h>
33 #include <sys/lock.h>
34 #include <sys/sx.h>
35
36 #include <net/if.h>
37 #include <net/if_arp.h>
38 #include <net/ethernet.h>
39 #include <net/if_dl.h>
40 #include <net/if_media.h>
41
42 #include <net/bpf.h>
43
44 #include <net/if_types.h>
45 #include <net/if.h>
46
47 #include <netinet/in_systm.h>
48 #include <netinet/in.h>
49 #include <netinet/ip.h>
50 #include <netinet/if_ether.h>
51 #if __FreeBSD_version >= 700000
52 #include <netinet/tcp.h>
53 #include <netinet/tcp_lro.h>
54 #endif
55
56 #include <vm/vm.h>
57 #include <vm/pmap.h>
58
59 #include <machine/clock.h>      /* for DELAY */
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 #include <machine/frame.h>
63 #include <machine/vmparam.h>
64
65 #include <sys/bus.h>
66 #include <sys/rman.h>
67
68 #include <machine/intr_machdep.h>
69
70 #include <machine/xen/xen-os.h>
71 #include <machine/xen/xenfunc.h>
72 #include <xen/hypervisor.h>
73 #include <xen/xen_intr.h>
74 #include <xen/evtchn.h>
75 #include <xen/gnttab.h>
76 #include <xen/interface/memory.h>
77 #include <xen/interface/io/netif.h>
78 #include <xen/xenbus/xenbusvar.h>
79
80 #include <dev/xen/netfront/mbufq.h>
81
82 #include "xenbus_if.h"
83
84 #define XN_CSUM_FEATURES        (CSUM_TCP | CSUM_UDP | CSUM_TSO)
85
86 #define GRANT_INVALID_REF       0
87
88 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
89 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
90
91 #if __FreeBSD_version >= 700000
92 /*
93  * Should the driver do LRO on the RX end
94  *  this can be toggled on the fly, but the
95  *  interface must be reset (down/up) for it
96  *  to take effect.
97  */
98 static int xn_enable_lro = 1;
99 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro);
100 #else
101
102 #define IFCAP_TSO4      0
103 #define CSUM_TSO        0
104
105 #endif
106
107 #ifdef CONFIG_XEN
108 static int MODPARM_rx_copy = 0;
109 module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
110 MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
111 static int MODPARM_rx_flip = 0;
112 module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
113 MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
114 #else
115 static const int MODPARM_rx_copy = 1;
116 static const int MODPARM_rx_flip = 0;
117 #endif
118
119 #define MAX_SKB_FRAGS   (65536/PAGE_SIZE + 2)
120 #define RX_COPY_THRESHOLD 256
121
122 #define net_ratelimit() 0
123
124 struct netfront_info;
125 struct netfront_rx_info;
126
127 static void xn_txeof(struct netfront_info *);
128 static void xn_rxeof(struct netfront_info *);
129 static void network_alloc_rx_buffers(struct netfront_info *);
130
131 static void xn_tick_locked(struct netfront_info *);
132 static void xn_tick(void *);
133
134 static void xn_intr(void *);
135 static void xn_start_locked(struct ifnet *);
136 static void xn_start(struct ifnet *);
137 static int  xn_ioctl(struct ifnet *, u_long, caddr_t);
138 static void xn_ifinit_locked(struct netfront_info *);
139 static void xn_ifinit(void *);
140 static void xn_stop(struct netfront_info *);
141 #ifdef notyet
142 static void xn_watchdog(struct ifnet *);
143 #endif
144
145 static void show_device(struct netfront_info *sc);
146 #ifdef notyet
147 static void netfront_closing(device_t dev);
148 #endif
149 static void netif_free(struct netfront_info *info);
150 static int netfront_detach(device_t dev);
151
152 static int talk_to_backend(device_t dev, struct netfront_info *info);
153 static int create_netdev(device_t dev);
154 static void netif_disconnect_backend(struct netfront_info *info);
155 static int setup_device(device_t dev, struct netfront_info *info);
156 static void end_access(int ref, void *page);
157
158 /* Xenolinux helper functions */
159 int network_connect(struct netfront_info *);
160
161 static void xn_free_rx_ring(struct netfront_info *);
162
163 static void xn_free_tx_ring(struct netfront_info *);
164
165 static int xennet_get_responses(struct netfront_info *np,
166         struct netfront_rx_info *rinfo, RING_IDX rp, struct mbuf **list,
167         int *pages_flipped_p);
168
169 #define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
170
171 #define INVALID_P2M_ENTRY (~0UL)
172
173 /*
174  * Mbuf pointers. We need these to keep track of the virtual addresses
175  * of our mbuf chains since we can only convert from virtual to physical,
176  * not the other way around.  The size must track the free index arrays.
177  */
178 struct xn_chain_data {
179                 struct mbuf             *xn_tx_chain[NET_TX_RING_SIZE+1];
180                 int                     xn_tx_chain_cnt;
181                 struct mbuf             *xn_rx_chain[NET_RX_RING_SIZE+1];
182 };
183
184
185 struct net_device_stats
186 {
187         u_long  rx_packets;             /* total packets received       */
188         u_long  tx_packets;             /* total packets transmitted    */
189         u_long  rx_bytes;               /* total bytes received         */
190         u_long  tx_bytes;               /* total bytes transmitted      */
191         u_long  rx_errors;              /* bad packets received         */
192         u_long  tx_errors;              /* packet transmit problems     */
193         u_long  rx_dropped;             /* no space in linux buffers    */
194         u_long  tx_dropped;             /* no space available in linux  */
195         u_long  multicast;              /* multicast packets received   */
196         u_long  collisions;
197
198         /* detailed rx_errors: */
199         u_long  rx_length_errors;
200         u_long  rx_over_errors;         /* receiver ring buff overflow  */
201         u_long  rx_crc_errors;          /* recved pkt with crc error    */
202         u_long  rx_frame_errors;        /* recv'd frame alignment error */
203         u_long  rx_fifo_errors;         /* recv'r fifo overrun          */
204         u_long  rx_missed_errors;       /* receiver missed packet       */
205
206         /* detailed tx_errors */
207         u_long  tx_aborted_errors;
208         u_long  tx_carrier_errors;
209         u_long  tx_fifo_errors;
210         u_long  tx_heartbeat_errors;
211         u_long  tx_window_errors;
212         
213         /* for cslip etc */
214         u_long  rx_compressed;
215         u_long  tx_compressed;
216 };
217
218 struct netfront_info {
219                 
220         struct ifnet *xn_ifp;
221 #if __FreeBSD_version >= 700000
222         struct lro_ctrl xn_lro;
223 #endif
224
225         struct net_device_stats stats;
226         u_int tx_full;
227
228         netif_tx_front_ring_t tx;
229         netif_rx_front_ring_t rx;
230
231         struct mtx   tx_lock;
232         struct mtx   rx_lock;
233         struct sx    sc_lock;
234
235         u_int handle;
236         u_int irq;
237         u_int copying_receiver;
238         u_int carrier;
239                 
240         /* Receive-ring batched refills. */
241 #define RX_MIN_TARGET 32
242 #define RX_MAX_TARGET NET_RX_RING_SIZE
243         int rx_min_target, rx_max_target, rx_target;
244
245         /*
246          * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
247          * array is an index into a chain of free entries.
248          */
249
250         grant_ref_t gref_tx_head;
251         grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; 
252         grant_ref_t gref_rx_head;
253         grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; 
254
255 #define TX_MAX_TARGET min(NET_RX_RING_SIZE, 256)
256         device_t xbdev;
257         int tx_ring_ref;
258         int rx_ring_ref;
259         uint8_t mac[ETHER_ADDR_LEN];
260         struct xn_chain_data    xn_cdata;       /* mbufs */
261         struct mbuf_head xn_rx_batch;   /* head of the batch queue */
262
263         int                     xn_if_flags;
264         struct callout          xn_stat_ch;
265
266         u_long rx_pfn_array[NET_RX_RING_SIZE];
267         multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
268         mmu_update_t rx_mmu[NET_RX_RING_SIZE];
269 };
270
271 #define rx_mbufs xn_cdata.xn_rx_chain
272 #define tx_mbufs xn_cdata.xn_tx_chain
273
274 #define XN_LOCK_INIT(_sc, _name) \
275         mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \
276         mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF);  \
277         sx_init(&(_sc)->sc_lock, #_name"_rx")
278
279 #define XN_RX_LOCK(_sc)           mtx_lock(&(_sc)->rx_lock)
280 #define XN_RX_UNLOCK(_sc)         mtx_unlock(&(_sc)->rx_lock)
281
282 #define XN_TX_LOCK(_sc)           mtx_lock(&(_sc)->tx_lock)
283 #define XN_TX_UNLOCK(_sc)         mtx_unlock(&(_sc)->tx_lock)
284
285 #define XN_LOCK(_sc)           sx_xlock(&(_sc)->sc_lock); 
286 #define XN_UNLOCK(_sc)         sx_xunlock(&(_sc)->sc_lock); 
287
288 #define XN_LOCK_ASSERT(_sc)    sx_assert(&(_sc)->sc_lock, SX_LOCKED); 
289 #define XN_RX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->rx_lock, MA_OWNED); 
290 #define XN_TX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->tx_lock, MA_OWNED); 
291 #define XN_LOCK_DESTROY(_sc)   mtx_destroy(&(_sc)->rx_lock); \
292                                mtx_destroy(&(_sc)->tx_lock); \
293                                sx_destroy(&(_sc)->sc_lock);
294
295 struct netfront_rx_info {
296         struct netif_rx_response rx;
297         struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
298 };
299
300 #define netfront_carrier_on(netif)      ((netif)->carrier = 1)
301 #define netfront_carrier_off(netif)     ((netif)->carrier = 0)
302 #define netfront_carrier_ok(netif)      ((netif)->carrier)
303
304 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
305
306
307
308 /*
309  * Access macros for acquiring freeing slots in tx_skbs[].
310  */
311
312 static inline void
313 add_id_to_freelist(struct mbuf **list, unsigned short id)
314 {
315         KASSERT(id != 0, ("add_id_to_freelist: the head item (0) must always be free."));
316         list[id] = list[0];
317         list[0]  = (void *)(u_long)id;
318 }
319
320 static inline unsigned short
321 get_id_from_freelist(struct mbuf **list)
322 {
323         u_int id = (u_int)(u_long)list[0];
324         KASSERT(id != 0, ("get_id_from_freelist: the head item (0) must always remain free."));
325         list[0] = list[id];
326         return (id);
327 }
328
329 static inline int
330 xennet_rxidx(RING_IDX idx)
331 {
332         return idx & (NET_RX_RING_SIZE - 1);
333 }
334
335 static inline struct mbuf *
336 xennet_get_rx_mbuf(struct netfront_info *np,
337                                                 RING_IDX ri)
338 {
339         int i = xennet_rxidx(ri);
340         struct mbuf *m;
341
342         m = np->rx_mbufs[i];
343         np->rx_mbufs[i] = NULL;
344         return (m);
345 }
346
347 static inline grant_ref_t
348 xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri)
349 {
350         int i = xennet_rxidx(ri);
351         grant_ref_t ref = np->grant_rx_ref[i];
352         np->grant_rx_ref[i] = GRANT_INVALID_REF;
353         return ref;
354 }
355
356 #define IPRINTK(fmt, args...) \
357     printf("[XEN] " fmt, ##args)
358 #define WPRINTK(fmt, args...) \
359     printf("[XEN] " fmt, ##args)
360 #if 0
361 #define DPRINTK(fmt, args...) \
362     printf("[XEN] %s: " fmt, __func__, ##args)
363 #else
364 #define DPRINTK(fmt, args...)
365 #endif
366
367 /**
368  * Read the 'mac' node at the given device's node in the store, and parse that
369  * as colon-separated octets, placing result the given mac array.  mac must be
370  * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
371  * Return 0 on success, or errno on error.
372  */
373 static int 
374 xen_net_read_mac(device_t dev, uint8_t mac[])
375 {
376         int error, i;
377         char *s, *e, *macstr;
378
379         error = xenbus_read(XBT_NIL, xenbus_get_node(dev), "mac", NULL,
380             (void **) &macstr);
381         if (error)
382                 return (error);
383
384         s = macstr;
385         for (i = 0; i < ETHER_ADDR_LEN; i++) {
386                 mac[i] = strtoul(s, &e, 16);
387                 if (s == e || (e[0] != ':' && e[0] != 0)) {
388                         free(macstr, M_DEVBUF);
389                         return (ENOENT);
390                 }
391                 s = &e[1];
392         }
393         free(macstr, M_DEVBUF);
394         return (0);
395 }
396
397 /**
398  * Entry point to this code when a new device is created.  Allocate the basic
399  * structures and the ring buffers for communication with the backend, and
400  * inform the backend of the appropriate details for those.  Switch to
401  * Connected state.
402  */
403 static int 
404 netfront_probe(device_t dev)
405 {
406
407         if (!strcmp(xenbus_get_type(dev), "vif")) {
408                 device_set_desc(dev, "Virtual Network Interface");
409                 return (0);
410         }
411
412         return (ENXIO);
413 }
414
415 static int
416 netfront_attach(device_t dev)
417 {       
418         int err;
419
420         err = create_netdev(dev);
421         if (err) {
422                 xenbus_dev_fatal(dev, err, "creating netdev");
423                 return err;
424         }
425
426 #if __FreeBSD_version >= 700000
427         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
428             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429             OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW,
430             &xn_enable_lro, 0, "Large Receive Offload");
431 #endif
432
433         return 0;
434 }
435
436
437 /**
438  * We are reconnecting to the backend, due to a suspend/resume, or a backend
439  * driver restart.  We tear down our netif structure and recreate it, but
440  * leave the device-layer structures intact so that this is transparent to the
441  * rest of the kernel.
442  */
443 static int
444 netfront_resume(device_t dev)
445 {
446         struct netfront_info *info = device_get_softc(dev);
447
448         netif_disconnect_backend(info);
449         return (0);
450 }
451
452
453 /* Common code used when first setting up, and when resuming. */
454 static int 
455 talk_to_backend(device_t dev, struct netfront_info *info)
456 {
457         const char *message;
458         struct xenbus_transaction xbt;
459         const char *node = xenbus_get_node(dev);
460         int err;
461
462         err = xen_net_read_mac(dev, info->mac);
463         if (err) {
464                 xenbus_dev_fatal(dev, err, "parsing %s/mac", node);
465                 goto out;
466         }
467
468         /* Create shared ring, alloc event channel. */
469         err = setup_device(dev, info);
470         if (err)
471                 goto out;
472         
473  again:
474         err = xenbus_transaction_start(&xbt);
475         if (err) {
476                 xenbus_dev_fatal(dev, err, "starting transaction");
477                 goto destroy_ring;
478         }
479         err = xenbus_printf(xbt, node, "tx-ring-ref","%u",
480                             info->tx_ring_ref);
481         if (err) {
482                 message = "writing tx ring-ref";
483                 goto abort_transaction;
484         }
485         err = xenbus_printf(xbt, node, "rx-ring-ref","%u",
486                             info->rx_ring_ref);
487         if (err) {
488                 message = "writing rx ring-ref";
489                 goto abort_transaction;
490         }
491         err = xenbus_printf(xbt, node,
492                 "event-channel", "%u", irq_to_evtchn_port(info->irq));
493         if (err) {
494                 message = "writing event-channel";
495                 goto abort_transaction;
496         }
497         err = xenbus_printf(xbt, node, "request-rx-copy", "%u",
498                             info->copying_receiver);
499         if (err) {
500                 message = "writing request-rx-copy";
501                 goto abort_transaction;
502         }
503         err = xenbus_printf(xbt, node, "feature-rx-notify", "%d", 1);
504         if (err) {
505                 message = "writing feature-rx-notify";
506                 goto abort_transaction;
507         }
508         err = xenbus_printf(xbt, node, "feature-sg", "%d", 1);
509         if (err) {
510                 message = "writing feature-sg";
511                 goto abort_transaction;
512         }
513 #if __FreeBSD_version >= 700000
514         err = xenbus_printf(xbt, node, "feature-gso-tcpv4", "%d", 1);
515         if (err) {
516                 message = "writing feature-gso-tcpv4";
517                 goto abort_transaction;
518         }
519 #endif
520
521         err = xenbus_transaction_end(xbt, 0);
522         if (err) {
523                 if (err == EAGAIN)
524                         goto again;
525                 xenbus_dev_fatal(dev, err, "completing transaction");
526                 goto destroy_ring;
527         }
528         
529         return 0;
530         
531  abort_transaction:
532         xenbus_transaction_end(xbt, 1);
533         xenbus_dev_fatal(dev, err, "%s", message);
534  destroy_ring:
535         netif_free(info);
536  out:
537         return err;
538 }
539
540
541 static int 
542 setup_device(device_t dev, struct netfront_info *info)
543 {
544         netif_tx_sring_t *txs;
545         netif_rx_sring_t *rxs;
546         int error;
547         struct ifnet *ifp;
548         
549         ifp = info->xn_ifp;
550
551         info->tx_ring_ref = GRANT_INVALID_REF;
552         info->rx_ring_ref = GRANT_INVALID_REF;
553         info->rx.sring = NULL;
554         info->tx.sring = NULL;
555         info->irq = 0;
556
557         txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
558         if (!txs) {
559                 error = ENOMEM;
560                 xenbus_dev_fatal(dev, error, "allocating tx ring page");
561                 goto fail;
562         }
563         SHARED_RING_INIT(txs);
564         FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
565         error = xenbus_grant_ring(dev, virt_to_mfn(txs), &info->tx_ring_ref);
566         if (error)
567                 goto fail;
568
569         rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
570         if (!rxs) {
571                 error = ENOMEM;
572                 xenbus_dev_fatal(dev, error, "allocating rx ring page");
573                 goto fail;
574         }
575         SHARED_RING_INIT(rxs);
576         FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
577
578         error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &info->rx_ring_ref);
579         if (error)
580                 goto fail;
581
582         error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
583             "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq);
584
585         if (error) {
586                 xenbus_dev_fatal(dev, error,
587                                  "bind_evtchn_to_irqhandler failed");
588                 goto fail;
589         }
590
591         show_device(info);
592         
593         return (0);
594         
595  fail:
596         netif_free(info);
597         return (error);
598 }
599
600 /**
601  * If this interface has an ipv4 address, send an arp for it. This
602  * helps to get the network going again after migrating hosts.
603  */
604 static void
605 netfront_send_fake_arp(device_t dev, struct netfront_info *info)
606 {
607         struct ifnet *ifp;
608         struct ifaddr *ifa;
609         
610         ifp = info->xn_ifp;
611         TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
612                 if (ifa->ifa_addr->sa_family == AF_INET) {
613                         arp_ifinit(ifp, ifa);
614                 }
615         }
616 }
617
618 /**
619  * Callback received when the backend's state changes.
620  */
621 static int
622 netfront_backend_changed(device_t dev, XenbusState newstate)
623 {
624         struct netfront_info *sc = device_get_softc(dev);
625                 
626         DPRINTK("newstate=%d\n", newstate);
627
628         switch (newstate) {
629         case XenbusStateInitialising:
630         case XenbusStateInitialised:
631         case XenbusStateConnected:
632         case XenbusStateUnknown:
633         case XenbusStateClosed:
634         case XenbusStateReconfigured:
635         case XenbusStateReconfiguring:
636                 break;
637         case XenbusStateInitWait:
638                 if (xenbus_get_state(dev) != XenbusStateInitialising)
639                         break;
640                 if (network_connect(sc) != 0)
641                         break;
642                 xenbus_set_state(dev, XenbusStateConnected);
643                 netfront_send_fake_arp(dev, sc);
644                 break;
645         case XenbusStateClosing:
646                 xenbus_set_state(dev, XenbusStateClosed);
647                 break;
648         }
649         return (0);
650 }
651
652 static void
653 xn_free_rx_ring(struct netfront_info *sc)
654 {
655 #if 0
656         int i;
657         
658         for (i = 0; i < NET_RX_RING_SIZE; i++) {
659                 if (sc->xn_cdata.xn_rx_chain[i] != NULL) {
660                         m_freem(sc->xn_cdata.xn_rx_chain[i]);
661                         sc->xn_cdata.xn_rx_chain[i] = NULL;
662                 }
663         }
664         
665         sc->rx.rsp_cons = 0;
666         sc->xn_rx_if->req_prod = 0;
667         sc->xn_rx_if->event = sc->rx.rsp_cons ;
668 #endif
669 }
670
671 static void
672 xn_free_tx_ring(struct netfront_info *sc)
673 {
674 #if 0
675         int i;
676         
677         for (i = 0; i < NET_TX_RING_SIZE; i++) {
678                 if (sc->xn_cdata.xn_tx_chain[i] != NULL) {
679                         m_freem(sc->xn_cdata.xn_tx_chain[i]);
680                         sc->xn_cdata.xn_tx_chain[i] = NULL;
681                 }
682         }
683         
684         return;
685 #endif
686 }
687
688 /*
689  * Do some brief math on the number of descriptors available to
690  * determine how many slots are available.
691  *
692  * Firstly - wouldn't something with RING_FREE_REQUESTS() be more applicable?
693  * Secondly - MAX_SKB_FRAGS is a Linux construct which may not apply here.
694  * Thirdly - it isn't used here anyway; the magic constant '24' is possibly
695  *   wrong?
696  * The "2" is presumably to ensure there are also enough slots available for
697  * the ring entries used for "options" (eg, the TSO entry before a packet
698  * is queued); I'm not sure why its 2 and not 1. Perhaps to make sure there's
699  * a "free" node in the tx mbuf list (node 0) to represent the freelist?
700  *
701  * This only figures out whether any xenbus ring descriptors are available;
702  * it doesn't at all reflect how many tx mbuf ring descriptors are also
703  * available.
704  */
705 static inline int
706 netfront_tx_slot_available(struct netfront_info *np)
707 {
708         return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
709                 (TX_MAX_TARGET - /* MAX_SKB_FRAGS */ 24 - 2));
710 }
711 static void
712 netif_release_tx_bufs(struct netfront_info *np)
713 {
714         struct mbuf *m;
715         int i;
716
717         for (i = 1; i <= NET_TX_RING_SIZE; i++) {
718                 m = np->xn_cdata.xn_tx_chain[i];
719
720                 if (((u_long)m) < KERNBASE)
721                         continue;
722                 gnttab_grant_foreign_access_ref(np->grant_tx_ref[i],
723                     xenbus_get_otherend_id(np->xbdev),
724                     virt_to_mfn(mtod(m, vm_offset_t)),
725                     GNTMAP_readonly);
726                 gnttab_release_grant_reference(&np->gref_tx_head,
727                     np->grant_tx_ref[i]);
728                 np->grant_tx_ref[i] = GRANT_INVALID_REF;
729                 add_id_to_freelist(np->tx_mbufs, i);
730                 np->xn_cdata.xn_tx_chain_cnt--;
731                 if (np->xn_cdata.xn_tx_chain_cnt < 0) {
732                         panic("netif_release_tx_bufs: tx_chain_cnt must be >= 0");
733                 }
734                 m_freem(m);
735         }
736 }
737
738 static void
739 network_alloc_rx_buffers(struct netfront_info *sc)
740 {
741         int otherend_id = xenbus_get_otherend_id(sc->xbdev);
742         unsigned short id;
743         struct mbuf *m_new;
744         int i, batch_target, notify;
745         RING_IDX req_prod;
746         struct xen_memory_reservation reservation;
747         grant_ref_t ref;
748         int nr_flips;
749         netif_rx_request_t *req;
750         vm_offset_t vaddr;
751         u_long pfn;
752         
753         req_prod = sc->rx.req_prod_pvt;
754
755         if (unlikely(sc->carrier == 0))
756                 return;
757         
758         /*
759          * Allocate skbuffs greedily, even though we batch updates to the
760          * receive ring. This creates a less bursty demand on the memory
761          * allocator, so should reduce the chance of failed allocation
762          * requests both for ourself and for other kernel subsystems.
763          */
764         batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons);
765         for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) {
766                 MGETHDR(m_new, M_DONTWAIT, MT_DATA);
767                 if (m_new == NULL) 
768                         goto no_mbuf;
769
770                 m_cljget(m_new, M_DONTWAIT, MJUMPAGESIZE);
771                 if ((m_new->m_flags & M_EXT) == 0) {
772                         m_freem(m_new);
773
774 no_mbuf:
775                         if (i != 0)
776                                 goto refill;
777                         /*
778                          * XXX set timer
779                          */
780                         break;
781                 }
782                 m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE;
783                 
784                 /* queue the mbufs allocated */
785                 mbufq_tail(&sc->xn_rx_batch, m_new);
786         }
787         
788         /* Is the batch large enough to be worthwhile? */
789         if (i < (sc->rx_target/2)) {
790                 if (req_prod >sc->rx.sring->req_prod)
791                         goto push;
792                 return;
793         }
794         /* Adjust floating fill target if we risked running out of buffers. */
795         if ( ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) &&
796              ((sc->rx_target *= 2) > sc->rx_max_target) )
797                 sc->rx_target = sc->rx_max_target;
798
799 refill:
800         for (nr_flips = i = 0; ; i++) {
801                 if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL)
802                         break;
803
804                 m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)(
805                                 vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT);
806
807                 id = xennet_rxidx(req_prod + i);
808
809                 KASSERT(sc->xn_cdata.xn_rx_chain[id] == NULL,
810                     ("non-NULL xm_rx_chain"));
811                 sc->xn_cdata.xn_rx_chain[id] = m_new;
812
813                 ref = gnttab_claim_grant_reference(&sc->gref_rx_head);
814                 KASSERT((short)ref >= 0, ("negative ref"));
815                 sc->grant_rx_ref[id] = ref;
816
817                 vaddr = mtod(m_new, vm_offset_t);
818                 pfn = vtophys(vaddr) >> PAGE_SHIFT;
819                 req = RING_GET_REQUEST(&sc->rx, req_prod + i);
820
821                 if (sc->copying_receiver == 0) {
822                         gnttab_grant_foreign_transfer_ref(ref,
823                             otherend_id, pfn);
824                         sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
825                         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
826                                 /* Remove this page before passing
827                                  * back to Xen.
828                                  */
829                                 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
830                                 MULTI_update_va_mapping(&sc->rx_mcl[i],
831                                     vaddr, 0, 0);
832                         }
833                         nr_flips++;
834                 } else {
835                         gnttab_grant_foreign_access_ref(ref,
836                             otherend_id,
837                             PFNTOMFN(pfn), 0);
838                 }
839                 req->id = id;
840                 req->gref = ref;
841                 
842                 sc->rx_pfn_array[i] =
843                     vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
844         } 
845         
846         KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
847         KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed"));
848         /*
849          * We may have allocated buffers which have entries outstanding
850          * in the page * update queue -- make sure we flush those first!
851          */
852         PT_UPDATES_FLUSH();
853         if (nr_flips != 0) {
854 #ifdef notyet
855                 /* Tell the ballon driver what is going on. */
856                 balloon_update_driver_allowance(i);
857 #endif
858                 set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array);
859                 reservation.nr_extents   = i;
860                 reservation.extent_order = 0;
861                 reservation.address_bits = 0;
862                 reservation.domid        = DOMID_SELF;
863
864                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
865
866                         /* After all PTEs have been zapped, flush the TLB. */
867                         sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
868                             UVMF_TLB_FLUSH|UVMF_ALL;
869         
870                         /* Give away a batch of pages. */
871                         sc->rx_mcl[i].op = __HYPERVISOR_memory_op;
872                         sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
873                         sc->rx_mcl[i].args[1] =  (u_long)&reservation;
874                         /* Zap PTEs and give away pages in one big multicall. */
875                         (void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
876
877                         /* Check return status of HYPERVISOR_dom_mem_op(). */
878                         if (unlikely(sc->rx_mcl[i].result != i))
879                                 panic("Unable to reduce memory reservation\n");
880                         } else {
881                                 if (HYPERVISOR_memory_op(
882                                     XENMEM_decrease_reservation, &reservation)
883                                     != i)
884                                         panic("Unable to reduce memory "
885                                             "reservation\n");
886                 }
887         } else {
888                 wmb();
889         }
890                         
891         /* Above is a suitable barrier to ensure backend will see requests. */
892         sc->rx.req_prod_pvt = req_prod + i;
893 push:
894         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify);
895         if (notify)
896                 notify_remote_via_irq(sc->irq);
897 }
898
899 static void
900 xn_rxeof(struct netfront_info *np)
901 {
902         struct ifnet *ifp;
903 #if __FreeBSD_version >= 700000
904         struct lro_ctrl *lro = &np->xn_lro;
905         struct lro_entry *queued;
906 #endif
907         struct netfront_rx_info rinfo;
908         struct netif_rx_response *rx = &rinfo.rx;
909         struct netif_extra_info *extras = rinfo.extras;
910         RING_IDX i, rp;
911         multicall_entry_t *mcl;
912         struct mbuf *m;
913         struct mbuf_head rxq, errq;
914         int err, pages_flipped = 0, work_to_do;
915
916         do {
917                 XN_RX_LOCK_ASSERT(np);
918                 if (!netfront_carrier_ok(np))
919                         return;
920
921                 mbufq_init(&errq);
922                 mbufq_init(&rxq);
923
924                 ifp = np->xn_ifp;
925         
926                 rp = np->rx.sring->rsp_prod;
927                 rmb();  /* Ensure we see queued responses up to 'rp'. */
928
929                 i = np->rx.rsp_cons;
930                 while ((i != rp)) {
931                         memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
932                         memset(extras, 0, sizeof(rinfo.extras));
933
934                         m = NULL;
935                         err = xennet_get_responses(np, &rinfo, rp, &m,
936                             &pages_flipped);
937
938                         if (unlikely(err)) {
939                                 if (m)
940                                         mbufq_tail(&errq, m);
941                                 np->stats.rx_errors++;
942                                 i = np->rx.rsp_cons;
943                                 continue;
944                         }
945
946                         m->m_pkthdr.rcvif = ifp;
947                         if ( rx->flags & NETRXF_data_validated ) {
948                                 /* Tell the stack the checksums are okay */
949                                 /*
950                                  * XXX this isn't necessarily the case - need to add
951                                  * check
952                                  */
953                                 
954                                 m->m_pkthdr.csum_flags |=
955                                         (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID
956                                             | CSUM_PSEUDO_HDR);
957                                 m->m_pkthdr.csum_data = 0xffff;
958                         }
959
960                         np->stats.rx_packets++;
961                         np->stats.rx_bytes += m->m_pkthdr.len;
962
963                         mbufq_tail(&rxq, m);
964                         np->rx.rsp_cons = ++i;
965                 }
966
967                 if (pages_flipped) {
968                         /* Some pages are no longer absent... */
969 #ifdef notyet
970                         balloon_update_driver_allowance(-pages_flipped);
971 #endif
972                         /* Do all the remapping work, and M->P updates, in one big
973                          * hypercall.
974                          */
975                         if (!!xen_feature(XENFEAT_auto_translated_physmap)) {
976                                 mcl = np->rx_mcl + pages_flipped;
977                                 mcl->op = __HYPERVISOR_mmu_update;
978                                 mcl->args[0] = (u_long)np->rx_mmu;
979                                 mcl->args[1] = pages_flipped;
980                                 mcl->args[2] = 0;
981                                 mcl->args[3] = DOMID_SELF;
982                                 (void)HYPERVISOR_multicall(np->rx_mcl,
983                                     pages_flipped + 1);
984                         }
985                 }
986         
987                 while ((m = mbufq_dequeue(&errq)))
988                         m_freem(m);
989
990                 /* 
991                  * Process all the mbufs after the remapping is complete.
992                  * Break the mbuf chain first though.
993                  */
994                 while ((m = mbufq_dequeue(&rxq)) != NULL) {
995                         ifp->if_ipackets++;
996                         
997                         /*
998                          * Do we really need to drop the rx lock?
999                          */
1000                         XN_RX_UNLOCK(np);
1001 #if __FreeBSD_version >= 700000
1002                         /* Use LRO if possible */
1003                         if ((ifp->if_capenable & IFCAP_LRO) == 0 ||
1004                             lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) {
1005                                 /*
1006                                  * If LRO fails, pass up to the stack
1007                                  * directly.
1008                                  */
1009                                 (*ifp->if_input)(ifp, m);
1010                         }
1011 #else
1012                         (*ifp->if_input)(ifp, m);
1013 #endif
1014                         XN_RX_LOCK(np);
1015                 }
1016         
1017                 np->rx.rsp_cons = i;
1018
1019 #if __FreeBSD_version >= 700000
1020                 /*
1021                  * Flush any outstanding LRO work
1022                  */
1023                 while (!SLIST_EMPTY(&lro->lro_active)) {
1024                         queued = SLIST_FIRST(&lro->lro_active);
1025                         SLIST_REMOVE_HEAD(&lro->lro_active, next);
1026                         tcp_lro_flush(lro, queued);
1027                 }
1028 #endif
1029
1030 #if 0
1031                 /* If we get a callback with very few responses, reduce fill target. */
1032                 /* NB. Note exponential increase, linear decrease. */
1033                 if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > 
1034                         ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target))
1035                         np->rx_target = np->rx_min_target;
1036 #endif
1037         
1038                 network_alloc_rx_buffers(np);
1039
1040                 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do);
1041         } while (work_to_do);
1042 }
1043
1044 static void 
1045 xn_txeof(struct netfront_info *np)
1046 {
1047         RING_IDX i, prod;
1048         unsigned short id;
1049         struct ifnet *ifp;
1050         netif_tx_response_t *txr;
1051         struct mbuf *m;
1052         
1053         XN_TX_LOCK_ASSERT(np);
1054         
1055         if (!netfront_carrier_ok(np))
1056                 return;
1057         
1058         ifp = np->xn_ifp;
1059         ifp->if_timer = 0;
1060         
1061         do {
1062                 prod = np->tx.sring->rsp_prod;
1063                 rmb(); /* Ensure we see responses up to 'rp'. */
1064                 
1065                 for (i = np->tx.rsp_cons; i != prod; i++) {
1066                         txr = RING_GET_RESPONSE(&np->tx, i);
1067                         if (txr->status == NETIF_RSP_NULL)
1068                                 continue;
1069
1070                         id = txr->id;
1071                         m = np->xn_cdata.xn_tx_chain[id]; 
1072                         KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
1073                         M_ASSERTVALID(m);
1074                         
1075                         /*
1076                          * Increment packet count if this is the last
1077                          * mbuf of the chain.
1078                          */
1079                         if (!m->m_next)
1080                                 ifp->if_opackets++;
1081                         if (unlikely(gnttab_query_foreign_access(
1082                             np->grant_tx_ref[id]) != 0)) {
1083                                 printf("network_tx_buf_gc: warning "
1084                                     "-- grant still in use by backend "
1085                                     "domain.\n");
1086                                 goto out; 
1087                         }
1088                         gnttab_end_foreign_access_ref(
1089                                 np->grant_tx_ref[id]);
1090                         gnttab_release_grant_reference(
1091                                 &np->gref_tx_head, np->grant_tx_ref[id]);
1092                         np->grant_tx_ref[id] = GRANT_INVALID_REF;
1093                         
1094                         np->xn_cdata.xn_tx_chain[id] = NULL;
1095                         add_id_to_freelist(np->xn_cdata.xn_tx_chain, id);
1096                         np->xn_cdata.xn_tx_chain_cnt--;
1097                         if (np->xn_cdata.xn_tx_chain_cnt < 0) {
1098                                 panic("netif_release_tx_bufs: tx_chain_cnt must be >= 0");
1099                         }
1100                         m_free(m);
1101                         /* Only mark the queue active if we've freed up at least one slot to try */
1102                         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1103                 }
1104                 np->tx.rsp_cons = prod;
1105                 
1106                 /*
1107                  * Set a new event, then check for race with update of
1108                  * tx_cons. Note that it is essential to schedule a
1109                  * callback, no matter how few buffers are pending. Even if
1110                  * there is space in the transmit ring, higher layers may
1111                  * be blocked because too much data is outstanding: in such
1112                  * cases notification from Xen is likely to be the only kick
1113                  * that we'll get.
1114                  */
1115                 np->tx.sring->rsp_event =
1116                     prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
1117
1118                 mb();
1119         } while (prod != np->tx.sring->rsp_prod);
1120         
1121  out: 
1122         if (np->tx_full &&
1123             ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) {
1124                 np->tx_full = 0;
1125 #if 0
1126                 if (np->user_state == UST_OPEN)
1127                         netif_wake_queue(dev);
1128 #endif
1129         }
1130
1131 }
1132
1133 static void
1134 xn_intr(void *xsc)
1135 {
1136         struct netfront_info *np = xsc;
1137         struct ifnet *ifp = np->xn_ifp;
1138
1139 #if 0
1140         if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod &&
1141             likely(netfront_carrier_ok(np)) &&
1142             ifp->if_drv_flags & IFF_DRV_RUNNING))
1143                 return;
1144 #endif
1145         if (np->tx.rsp_cons != np->tx.sring->rsp_prod) {
1146                 XN_TX_LOCK(np);
1147                 xn_txeof(np);
1148                 XN_TX_UNLOCK(np);                       
1149         }       
1150
1151         XN_RX_LOCK(np);
1152         xn_rxeof(np);
1153         XN_RX_UNLOCK(np);
1154
1155         if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1156             !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1157                 xn_start(ifp);
1158 }
1159
1160
1161 static void
1162 xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m,
1163         grant_ref_t ref)
1164 {
1165         int new = xennet_rxidx(np->rx.req_prod_pvt);
1166
1167         KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL"));
1168         np->rx_mbufs[new] = m;
1169         np->grant_rx_ref[new] = ref;
1170         RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
1171         RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
1172         np->rx.req_prod_pvt++;
1173 }
1174
1175 static int
1176 xennet_get_extras(struct netfront_info *np,
1177     struct netif_extra_info *extras, RING_IDX rp)
1178 {
1179         struct netif_extra_info *extra;
1180         RING_IDX cons = np->rx.rsp_cons;
1181
1182         int err = 0;
1183
1184         do {
1185                 struct mbuf *m;
1186                 grant_ref_t ref;
1187
1188                 if (unlikely(cons + 1 == rp)) {
1189 #if 0                   
1190                         if (net_ratelimit())
1191                                 WPRINTK("Missing extra info\n");
1192 #endif                  
1193                         err = -EINVAL;
1194                         break;
1195                 }
1196
1197                 extra = (struct netif_extra_info *)
1198                 RING_GET_RESPONSE(&np->rx, ++cons);
1199
1200                 if (unlikely(!extra->type ||
1201                         extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1202 #if 0                           
1203                         if (net_ratelimit())
1204                                 WPRINTK("Invalid extra type: %d\n",
1205                                         extra->type);
1206 #endif                  
1207                         err = -EINVAL;
1208                 } else {
1209                         memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
1210                 }
1211
1212                 m = xennet_get_rx_mbuf(np, cons);
1213                 ref = xennet_get_rx_ref(np, cons);
1214                 xennet_move_rx_slot(np, m, ref);
1215         } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
1216
1217         np->rx.rsp_cons = cons;
1218         return err;
1219 }
1220
1221 static int
1222 xennet_get_responses(struct netfront_info *np,
1223         struct netfront_rx_info *rinfo, RING_IDX rp,
1224         struct mbuf  **list,
1225         int *pages_flipped_p)
1226 {
1227         int pages_flipped = *pages_flipped_p;
1228         struct mmu_update *mmu;
1229         struct multicall_entry *mcl;
1230         struct netif_rx_response *rx = &rinfo->rx;
1231         struct netif_extra_info *extras = rinfo->extras;
1232         RING_IDX cons = np->rx.rsp_cons;
1233         struct mbuf *m, *m0, *m_prev;
1234         grant_ref_t ref = xennet_get_rx_ref(np, cons);
1235         int max = 5 /* MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD) */;
1236         int frags = 1;
1237         int err = 0;
1238         u_long ret;
1239
1240         m0 = m = m_prev = xennet_get_rx_mbuf(np, cons);
1241
1242         
1243         if (rx->flags & NETRXF_extra_info) {
1244                 err = xennet_get_extras(np, extras, rp);
1245                 cons = np->rx.rsp_cons;
1246         }
1247
1248
1249         if (m0 != NULL) {
1250                         m0->m_pkthdr.len = 0;
1251                         m0->m_next = NULL;
1252         }
1253         
1254         for (;;) {
1255                 u_long mfn;
1256
1257 #if 0           
1258                 printf("rx->status=%hd rx->offset=%hu frags=%u\n",
1259                         rx->status, rx->offset, frags);
1260 #endif
1261                 if (unlikely(rx->status < 0 ||
1262                         rx->offset + rx->status > PAGE_SIZE)) {
1263 #if 0                                           
1264                         if (net_ratelimit())
1265                                 WPRINTK("rx->offset: %x, size: %u\n",
1266                                         rx->offset, rx->status);
1267 #endif                                          
1268                         xennet_move_rx_slot(np, m, ref);
1269                         err = -EINVAL;
1270                         goto next;
1271                 }
1272                 
1273                 /*
1274                  * This definitely indicates a bug, either in this driver or in
1275                  * the backend driver. In future this should flag the bad
1276                  * situation to the system controller to reboot the backed.
1277                  */
1278                 if (ref == GRANT_INVALID_REF) {
1279 #if 0                           
1280                         if (net_ratelimit())
1281                                 WPRINTK("Bad rx response id %d.\n", rx->id);
1282 #endif                  
1283                         err = -EINVAL;
1284                         goto next;
1285                 }
1286
1287                 if (!np->copying_receiver) {
1288                         /* Memory pressure, insufficient buffer
1289                          * headroom, ...
1290                          */
1291                         if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
1292                                 if (net_ratelimit())
1293                                         WPRINTK("Unfulfilled rx req "
1294                                                 "(id=%d, st=%d).\n",
1295                                                 rx->id, rx->status);
1296                                 xennet_move_rx_slot(np, m, ref);
1297                                 err = -ENOMEM;
1298                                 goto next;
1299                         }
1300
1301                         if (!xen_feature( XENFEAT_auto_translated_physmap)) {
1302                                 /* Remap the page. */
1303                                 void *vaddr = mtod(m, void *);
1304                                 uint32_t pfn;
1305
1306                                 mcl = np->rx_mcl + pages_flipped;
1307                                 mmu = np->rx_mmu + pages_flipped;
1308
1309                                 MULTI_update_va_mapping(mcl, (u_long)vaddr,
1310                                     (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW |
1311                                     PG_V | PG_M | PG_A, 0);
1312                                 pfn = (uintptr_t)m->m_ext.ext_arg1;
1313                                 mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) |
1314                                     MMU_MACHPHYS_UPDATE;
1315                                 mmu->val = pfn;
1316
1317                                 set_phys_to_machine(pfn, mfn);
1318                         }
1319                         pages_flipped++;
1320                 } else {
1321                         ret = gnttab_end_foreign_access_ref(ref);
1322                         KASSERT(ret, ("ret != 0"));
1323                 }
1324
1325                 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1326
1327 next:
1328                 if (m == NULL)
1329                         break;
1330
1331                 m->m_len = rx->status;
1332                 m->m_data += rx->offset;
1333                 m0->m_pkthdr.len += rx->status;
1334                 
1335                 if (!(rx->flags & NETRXF_more_data))
1336                         break;
1337
1338                 if (cons + frags == rp) {
1339                         if (net_ratelimit())
1340                                 WPRINTK("Need more frags\n");
1341                         err = -ENOENT;
1342                                 break;
1343                 }
1344                 m_prev = m;
1345                 
1346                 rx = RING_GET_RESPONSE(&np->rx, cons + frags);
1347                 m = xennet_get_rx_mbuf(np, cons + frags);
1348
1349                 m_prev->m_next = m;
1350                 m->m_next = NULL;
1351                 ref = xennet_get_rx_ref(np, cons + frags);
1352                 frags++;
1353         }
1354         *list = m0;
1355
1356         if (unlikely(frags > max)) {
1357                 if (net_ratelimit())
1358                         WPRINTK("Too many frags\n");
1359                 err = -E2BIG;
1360         }
1361
1362         if (unlikely(err))
1363                 np->rx.rsp_cons = cons + frags;
1364
1365         *pages_flipped_p = pages_flipped;
1366
1367         return err;
1368 }
1369
1370 static void
1371 xn_tick_locked(struct netfront_info *sc) 
1372 {
1373         XN_RX_LOCK_ASSERT(sc);
1374         callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
1375
1376         /* XXX placeholder for printing debug information */
1377      
1378 }
1379
1380
1381 static void
1382 xn_tick(void *xsc) 
1383 {
1384         struct netfront_info *sc;
1385     
1386         sc = xsc;
1387         XN_RX_LOCK(sc);
1388         xn_tick_locked(sc);
1389         XN_RX_UNLOCK(sc);
1390      
1391 }
1392 static void
1393 xn_start_locked(struct ifnet *ifp) 
1394 {
1395         int otherend_id;
1396         unsigned short id;
1397         struct mbuf *m_head, *m;
1398         struct netfront_info *sc;
1399         netif_tx_request_t *tx;
1400         netif_extra_info_t *extra;
1401         RING_IDX i;
1402         grant_ref_t ref;
1403         u_long mfn, tx_bytes;
1404         int notify, nfrags;
1405
1406         sc = ifp->if_softc;
1407         otherend_id = xenbus_get_otherend_id(sc->xbdev);
1408         tx_bytes = 0;
1409
1410         if (!netfront_carrier_ok(sc))
1411                 return;
1412         
1413         for (i = sc->tx.req_prod_pvt; TRUE; i++) {
1414                 IF_DEQUEUE(&ifp->if_snd, m_head);
1415                 if (m_head == NULL) 
1416                         break;
1417                 
1418                 /*
1419                  * netfront_tx_slot_available() tries to do some math to
1420                  * ensure that there'll be enough xenbus ring slots available
1421                  * for the maximum number of packet fragments (and a couple more
1422                  * for what I guess are TSO and other ring entry items.)
1423                  */
1424                 if (!netfront_tx_slot_available(sc)) {
1425                         IF_PREPEND(&ifp->if_snd, m_head);
1426                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1427                         break;
1428                 }
1429
1430                 /*
1431                  * Defragment the mbuf if necessary.
1432                  */
1433                 for (m = m_head, nfrags = 0; m; m = m->m_next)
1434                         nfrags++;
1435                 if (nfrags > MAX_SKB_FRAGS) {
1436                         m = m_defrag(m_head, M_DONTWAIT);
1437                         if (!m) {
1438                                 m_freem(m_head);
1439                                 break;
1440                         }
1441                         m_head = m;
1442                 }
1443
1444                 /* Determine how many fragments now exist */
1445                 for (m = m_head, nfrags = 0; m; m = m->m_next)
1446                         nfrags++;
1447
1448                 /*
1449                  * Don't attempt to queue this packet if there aren't
1450                  * enough free entries in the chain.
1451                  *
1452                  * There isn't a 1:1 correspondance between the mbuf TX ring
1453                  * and the xenbus TX ring.
1454                  * xn_txeof() may need to be called to free up some slots.
1455                  *
1456                  * It is quite possible that this can be later eliminated if
1457                  * it turns out that partial * packets can be pushed into
1458                  * the ringbuffer, with fragments pushed in when further slots
1459                  * free up.
1460                  *
1461                  * It is also quite possible that the driver will lock up
1462                  * if the TX queue fills up with no RX traffic, and
1463                  * the mbuf ring is exhausted. The queue may need
1464                  * a swift kick to continue.
1465                  */
1466
1467                 /*
1468                  * It is not +1 like the allocation because we need to keep
1469                  * slot [0] free for the freelist head
1470                  */
1471                 if (sc->xn_cdata.xn_tx_chain_cnt + nfrags >= NET_TX_RING_SIZE) {
1472                         printf("xn_start_locked: xn_tx_chain_cnt (%d) + nfrags %d >= NET_TX_RING_SIZE (%d); must be full!\n",
1473                             (int) sc->xn_cdata.xn_tx_chain_cnt,
1474                             (int) nfrags, (int) NET_TX_RING_SIZE);
1475                         IF_PREPEND(&ifp->if_snd, m_head);
1476                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1477                         break;
1478                 }
1479
1480                 /*
1481                  * Make sure there's actually space available in the
1482                  * Xen TX ring for this. Overcompensate for the possibility
1483                  * of having a TCP offload fragment just in case for now
1484                  * (the +1) rather than adding logic to accurately calculate
1485                  * the required size.
1486                  */
1487                 if (RING_FREE_REQUESTS(&sc->tx) < (nfrags + 1)) {
1488                         printf("xn_start_locked: free ring slots (%d) < (nfrags + 1) (%d); must be full!\n",
1489                             (int) RING_FREE_REQUESTS(&sc->tx),
1490                             (int) (nfrags + 1));
1491                         IF_PREPEND(&ifp->if_snd, m_head);
1492                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1493                         break;
1494                 }
1495
1496                 /*
1497                  * Start packing the mbufs in this chain into
1498                  * the fragment pointers. Stop when we run out
1499                  * of fragments or hit the end of the mbuf chain.
1500                  */
1501                 m = m_head;
1502                 extra = NULL;
1503                 for (m = m_head; m; m = m->m_next) {
1504                         tx = RING_GET_REQUEST(&sc->tx, i);
1505                         id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain);
1506                         if (id == 0)
1507                                 panic("xn_start_locked: was allocated the freelist head!\n");
1508                         sc->xn_cdata.xn_tx_chain_cnt++;
1509                         if (sc->xn_cdata.xn_tx_chain_cnt >= NET_TX_RING_SIZE+1)
1510                                 panic("xn_start_locked: tx_chain_cnt must be < NET_TX_RING_SIZE+1\n");
1511                         sc->xn_cdata.xn_tx_chain[id] = m;
1512                         tx->id = id;
1513                         ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
1514                         KASSERT((short)ref >= 0, ("Negative ref"));
1515                         mfn = virt_to_mfn(mtod(m, vm_offset_t));
1516                         gnttab_grant_foreign_access_ref(ref, otherend_id,
1517                             mfn, GNTMAP_readonly);
1518                         tx->gref = sc->grant_tx_ref[id] = ref;
1519                         tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
1520                         tx->flags = 0;
1521                         if (m == m_head) {
1522                                 /*
1523                                  * The first fragment has the entire packet
1524                                  * size, subsequent fragments have just the
1525                                  * fragment size. The backend works out the
1526                                  * true size of the first fragment by
1527                                  * subtracting the sizes of the other
1528                                  * fragments.
1529                                  */
1530                                 tx->size = m->m_pkthdr.len;
1531
1532                                 /*
1533                                  * The first fragment contains the
1534                                  * checksum flags and is optionally
1535                                  * followed by extra data for TSO etc.
1536                                  */
1537                                 if (m->m_pkthdr.csum_flags
1538                                     & CSUM_DELAY_DATA) {
1539                                         tx->flags |= (NETTXF_csum_blank
1540                                             | NETTXF_data_validated);
1541                                 }
1542 #if __FreeBSD_version >= 700000
1543                                 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1544                                         struct netif_extra_info *gso =
1545                                                 (struct netif_extra_info *)
1546                                                 RING_GET_REQUEST(&sc->tx, ++i);
1547
1548                                         if (extra)
1549                                                 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
1550                                         else
1551                                                 tx->flags |= NETTXF_extra_info;
1552
1553                                         gso->u.gso.size = m->m_pkthdr.tso_segsz;
1554                                         gso->u.gso.type =
1555                                                 XEN_NETIF_GSO_TYPE_TCPV4;
1556                                         gso->u.gso.pad = 0;
1557                                         gso->u.gso.features = 0;
1558
1559                                         gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
1560                                         gso->flags = 0;
1561                                         extra = gso;
1562                                 }
1563 #endif
1564                         } else {
1565                                 tx->size = m->m_len;
1566                         }
1567                         if (m->m_next) {
1568                                 tx->flags |= NETTXF_more_data;
1569                                 i++;
1570                         }
1571                 }
1572
1573                 BPF_MTAP(ifp, m_head);
1574
1575                 sc->stats.tx_bytes += m_head->m_pkthdr.len;
1576                 sc->stats.tx_packets++;
1577         }
1578
1579         sc->tx.req_prod_pvt = i;
1580         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify);
1581         if (notify)
1582                 notify_remote_via_irq(sc->irq);
1583
1584         xn_txeof(sc);
1585
1586         if (RING_FULL(&sc->tx)) {
1587                 sc->tx_full = 1;
1588 #if 0
1589                 netif_stop_queue(dev);
1590 #endif
1591         }
1592
1593         return;
1594 }    
1595
1596 static void
1597 xn_start(struct ifnet *ifp)
1598 {
1599         struct netfront_info *sc;
1600         sc = ifp->if_softc;
1601         XN_TX_LOCK(sc);
1602         xn_start_locked(ifp);
1603         XN_TX_UNLOCK(sc);
1604 }
1605
1606 /* equivalent of network_open() in Linux */
1607 static void 
1608 xn_ifinit_locked(struct netfront_info *sc) 
1609 {
1610         struct ifnet *ifp;
1611         
1612         XN_LOCK_ASSERT(sc);
1613         
1614         ifp = sc->xn_ifp;
1615         
1616         if (ifp->if_drv_flags & IFF_DRV_RUNNING) 
1617                 return;
1618         
1619         xn_stop(sc);
1620         
1621         network_alloc_rx_buffers(sc);
1622         sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1;
1623         
1624         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1625         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1626         
1627         callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
1628
1629 }
1630
1631
1632 static void 
1633 xn_ifinit(void *xsc)
1634 {
1635         struct netfront_info *sc = xsc;
1636     
1637         XN_LOCK(sc);
1638         xn_ifinit_locked(sc);
1639         XN_UNLOCK(sc);
1640
1641 }
1642
1643
1644 static int
1645 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1646 {
1647         struct netfront_info *sc = ifp->if_softc;
1648         struct ifreq *ifr = (struct ifreq *) data;
1649         struct ifaddr *ifa = (struct ifaddr *)data;
1650
1651         int mask, error = 0;
1652         switch(cmd) {
1653         case SIOCSIFADDR:
1654         case SIOCGIFADDR:
1655                 XN_LOCK(sc);
1656                 if (ifa->ifa_addr->sa_family == AF_INET) {
1657                         ifp->if_flags |= IFF_UP;
1658                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 
1659                                 xn_ifinit_locked(sc);
1660                         arp_ifinit(ifp, ifa);
1661                         XN_UNLOCK(sc);  
1662                 } else {
1663                         XN_UNLOCK(sc);  
1664                         error = ether_ioctl(ifp, cmd, data);
1665                 }
1666                 break;
1667         case SIOCSIFMTU:
1668                 /* XXX can we alter the MTU on a VN ?*/
1669 #ifdef notyet
1670                 if (ifr->ifr_mtu > XN_JUMBO_MTU)
1671                         error = EINVAL;
1672                 else 
1673 #endif
1674                 {
1675                         ifp->if_mtu = ifr->ifr_mtu;
1676                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1677                         xn_ifinit(sc);
1678                 }
1679                 break;
1680         case SIOCSIFFLAGS:
1681                 XN_LOCK(sc);
1682                 if (ifp->if_flags & IFF_UP) {
1683                         /*
1684                          * If only the state of the PROMISC flag changed,
1685                          * then just use the 'set promisc mode' command
1686                          * instead of reinitializing the entire NIC. Doing
1687                          * a full re-init means reloading the firmware and
1688                          * waiting for it to start up, which may take a
1689                          * second or two.
1690                          */
1691 #ifdef notyet
1692                         /* No promiscuous mode with Xen */
1693                         if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1694                             ifp->if_flags & IFF_PROMISC &&
1695                             !(sc->xn_if_flags & IFF_PROMISC)) {
1696                                 XN_SETBIT(sc, XN_RX_MODE,
1697                                           XN_RXMODE_RX_PROMISC);
1698                         } else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1699                                    !(ifp->if_flags & IFF_PROMISC) &&
1700                                    sc->xn_if_flags & IFF_PROMISC) {
1701                                 XN_CLRBIT(sc, XN_RX_MODE,
1702                                           XN_RXMODE_RX_PROMISC);
1703                         } else
1704 #endif
1705                                 xn_ifinit_locked(sc);
1706                 } else {
1707                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1708                                 xn_stop(sc);
1709                         }
1710                 }
1711                 sc->xn_if_flags = ifp->if_flags;
1712                 XN_UNLOCK(sc);
1713                 error = 0;
1714                 break;
1715         case SIOCSIFCAP:
1716                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1717                 if (mask & IFCAP_TXCSUM) {
1718                         if (IFCAP_TXCSUM & ifp->if_capenable) {
1719                                 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1720                                 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1721                                     | CSUM_IP | CSUM_TSO);
1722                         } else {
1723                                 ifp->if_capenable |= IFCAP_TXCSUM;
1724                                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
1725                                     | CSUM_IP);
1726                         }
1727                 }
1728                 if (mask & IFCAP_RXCSUM) {
1729                         ifp->if_capenable ^= IFCAP_RXCSUM;
1730                 }
1731 #if __FreeBSD_version >= 700000
1732                 if (mask & IFCAP_TSO4) {
1733                         if (IFCAP_TSO4 & ifp->if_capenable) {
1734                                 ifp->if_capenable &= ~IFCAP_TSO4;
1735                                 ifp->if_hwassist &= ~CSUM_TSO;
1736                         } else if (IFCAP_TXCSUM & ifp->if_capenable) {
1737                                 ifp->if_capenable |= IFCAP_TSO4;
1738                                 ifp->if_hwassist |= CSUM_TSO;
1739                         } else {
1740                                 IPRINTK("Xen requires tx checksum offload"
1741                                     " be enabled to use TSO\n");
1742                                 error = EINVAL;
1743                         }
1744                 }
1745                 if (mask & IFCAP_LRO) {
1746                         ifp->if_capenable ^= IFCAP_LRO;
1747                         
1748                 }
1749 #endif
1750                 error = 0;
1751                 break;
1752         case SIOCADDMULTI:
1753         case SIOCDELMULTI:
1754 #ifdef notyet
1755                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1756                         XN_LOCK(sc);
1757                         xn_setmulti(sc);
1758                         XN_UNLOCK(sc);
1759                         error = 0;
1760                 }
1761 #endif
1762                 /* FALLTHROUGH */
1763         case SIOCSIFMEDIA:
1764         case SIOCGIFMEDIA:
1765                 error = EINVAL;
1766                 break;
1767         default:
1768                 error = ether_ioctl(ifp, cmd, data);
1769         }
1770     
1771         return (error);
1772 }
1773
1774 static void
1775 xn_stop(struct netfront_info *sc)
1776 {       
1777         struct ifnet *ifp;
1778
1779         XN_LOCK_ASSERT(sc);
1780     
1781         ifp = sc->xn_ifp;
1782
1783         callout_stop(&sc->xn_stat_ch);
1784
1785         xn_free_rx_ring(sc);
1786         xn_free_tx_ring(sc);
1787     
1788         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1789 }
1790
1791 /* START of Xenolinux helper functions adapted to FreeBSD */
1792 int
1793 network_connect(struct netfront_info *np)
1794 {
1795         int i, requeue_idx, error;
1796         grant_ref_t ref;
1797         netif_rx_request_t *req;
1798         u_int feature_rx_copy, feature_rx_flip;
1799
1800         error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev),
1801             "feature-rx-copy", NULL, "%u", &feature_rx_copy);
1802         if (error)
1803                 feature_rx_copy = 0;
1804         error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev),
1805             "feature-rx-flip", NULL, "%u", &feature_rx_flip);
1806         if (error)
1807                 feature_rx_flip = 1;
1808
1809         /*
1810          * Copy packets on receive path if:
1811          *  (a) This was requested by user, and the backend supports it; or
1812          *  (b) Flipping was requested, but this is unsupported by the backend.
1813          */
1814         np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
1815                                 (MODPARM_rx_flip && !feature_rx_flip));
1816
1817         XN_LOCK(np);
1818         /* Recovery procedure: */
1819         error = talk_to_backend(np->xbdev, np);
1820         if (error) 
1821                 return (error);
1822         
1823         /* Step 1: Reinitialise variables. */
1824         netif_release_tx_bufs(np);
1825
1826         /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1827         for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
1828                 struct mbuf *m;
1829                 u_long pfn;
1830
1831                 if (np->rx_mbufs[i] == NULL)
1832                         continue;
1833
1834                 m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i);
1835                 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
1836                 req = RING_GET_REQUEST(&np->rx, requeue_idx);
1837                 pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT;
1838
1839                 if (!np->copying_receiver) {
1840                         gnttab_grant_foreign_transfer_ref(ref,
1841                             xenbus_get_otherend_id(np->xbdev),
1842                             pfn);
1843                 } else {
1844                         gnttab_grant_foreign_access_ref(ref,
1845                             xenbus_get_otherend_id(np->xbdev),
1846                             PFNTOMFN(pfn), 0);
1847                 }
1848                 req->gref = ref;
1849                 req->id   = requeue_idx;
1850
1851                 requeue_idx++;
1852         }
1853
1854         np->rx.req_prod_pvt = requeue_idx;
1855         
1856         /* Step 3: All public and private state should now be sane.  Get
1857          * ready to start sending and receiving packets and give the driver
1858          * domain a kick because we've probably just requeued some
1859          * packets.
1860          */
1861         netfront_carrier_on(np);
1862         notify_remote_via_irq(np->irq);
1863         XN_TX_LOCK(np);
1864         xn_txeof(np);
1865         XN_TX_UNLOCK(np);
1866         network_alloc_rx_buffers(np);
1867         XN_UNLOCK(np);
1868
1869         return (0);
1870 }
1871
1872 static void 
1873 show_device(struct netfront_info *sc)
1874 {
1875 #ifdef DEBUG
1876         if (sc) {
1877                 IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
1878                         sc->xn_ifno,
1879                         be_state_name[sc->xn_backend_state],
1880                         sc->xn_user_state ? "open" : "closed",
1881                         sc->xn_evtchn,
1882                         sc->xn_irq,
1883                         sc->xn_tx_if,
1884                         sc->xn_rx_if);
1885         } else {
1886                 IPRINTK("<vif NULL>\n");
1887         }
1888 #endif
1889 }
1890
1891 /** Create a network device.
1892  * @param handle device handle
1893  */
1894 int 
1895 create_netdev(device_t dev)
1896 {
1897         int i;
1898         struct netfront_info *np;
1899         int err;
1900         struct ifnet *ifp;
1901
1902         np = device_get_softc(dev);
1903         
1904         np->xbdev         = dev;
1905     
1906         XN_LOCK_INIT(np, xennetif);
1907         np->rx_target     = RX_MIN_TARGET;
1908         np->rx_min_target = RX_MIN_TARGET;
1909         np->rx_max_target = RX_MAX_TARGET;
1910         
1911         /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
1912         for (i = 0; i <= NET_TX_RING_SIZE; i++) {
1913                 np->tx_mbufs[i] = (void *) ((u_long) i+1);
1914                 np->grant_tx_ref[i] = GRANT_INVALID_REF;        
1915         }
1916         for (i = 0; i <= NET_RX_RING_SIZE; i++) {
1917                 np->rx_mbufs[i] = NULL;
1918                 np->grant_rx_ref[i] = GRANT_INVALID_REF;
1919         }
1920         /* A grant for every tx ring slot */
1921         if (gnttab_alloc_grant_references(TX_MAX_TARGET,
1922                                           &np->gref_tx_head) < 0) {
1923                 printf("#### netfront can't alloc tx grant refs\n");
1924                 err = ENOMEM;
1925                 goto exit;
1926         }
1927         /* A grant for every rx ring slot */
1928         if (gnttab_alloc_grant_references(RX_MAX_TARGET,
1929                                           &np->gref_rx_head) < 0) {
1930                 printf("#### netfront can't alloc rx grant refs\n");
1931                 gnttab_free_grant_references(np->gref_tx_head);
1932                 err = ENOMEM;
1933                 goto exit;
1934         }
1935         
1936         err = xen_net_read_mac(dev, np->mac);
1937         if (err) {
1938                 xenbus_dev_fatal(dev, err, "parsing %s/mac",
1939                     xenbus_get_node(dev));
1940                 goto out;
1941         }
1942         
1943         /* Set up ifnet structure */
1944         ifp = np->xn_ifp = if_alloc(IFT_ETHER);
1945         ifp->if_softc = np;
1946         if_initname(ifp, "xn",  device_get_unit(dev));
1947         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1948         ifp->if_ioctl = xn_ioctl;
1949         ifp->if_output = ether_output;
1950         ifp->if_start = xn_start;
1951 #ifdef notyet
1952         ifp->if_watchdog = xn_watchdog;
1953 #endif
1954         ifp->if_init = xn_ifinit;
1955         ifp->if_mtu = ETHERMTU;
1956         ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
1957         
1958         ifp->if_hwassist = XN_CSUM_FEATURES;
1959         ifp->if_capabilities = IFCAP_HWCSUM;
1960 #if __FreeBSD_version >= 700000
1961         ifp->if_capabilities |= IFCAP_TSO4;
1962         if (xn_enable_lro) {
1963                 int err = tcp_lro_init(&np->xn_lro);
1964                 if (err) {
1965                         device_printf(dev, "LRO initialization failed\n");
1966                         goto exit;
1967                 }
1968                 np->xn_lro.ifp = ifp;
1969                 ifp->if_capabilities |= IFCAP_LRO;
1970         }
1971 #endif
1972         ifp->if_capenable = ifp->if_capabilities;
1973         
1974         ether_ifattach(ifp, np->mac);
1975         callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
1976         netfront_carrier_off(np);
1977
1978         return (0);
1979
1980 exit:
1981         gnttab_free_grant_references(np->gref_tx_head);
1982 out:
1983         panic("do something smart");
1984
1985 }
1986
1987 /**
1988  * Handle the change of state of the backend to Closing.  We must delete our
1989  * device-layer structures now, to ensure that writes are flushed through to
1990  * the backend.  Once is this done, we can switch to Closed in
1991  * acknowledgement.
1992  */
1993 #if 0
1994 static void netfront_closing(device_t dev)
1995 {
1996 #if 0
1997         struct netfront_info *info = dev->dev_driver_data;
1998
1999         DPRINTK("netfront_closing: %s removed\n", dev->nodename);
2000
2001         close_netdev(info);
2002 #endif
2003         xenbus_switch_state(dev, XenbusStateClosed);
2004 }
2005 #endif
2006
2007 static int netfront_detach(device_t dev)
2008 {
2009         struct netfront_info *info = device_get_softc(dev);
2010
2011         DPRINTK("%s\n", xenbus_get_node(dev));
2012
2013         netif_free(info);
2014
2015         return 0;
2016 }
2017
2018
2019 static void netif_free(struct netfront_info *info)
2020 {
2021         netif_disconnect_backend(info);
2022 #if 0
2023         close_netdev(info);
2024 #endif
2025 }
2026
2027 static void netif_disconnect_backend(struct netfront_info *info)
2028 {
2029         XN_RX_LOCK(info);
2030         XN_TX_LOCK(info);
2031         netfront_carrier_off(info);
2032         XN_TX_UNLOCK(info);
2033         XN_RX_UNLOCK(info);
2034
2035         end_access(info->tx_ring_ref, info->tx.sring);
2036         end_access(info->rx_ring_ref, info->rx.sring);
2037         info->tx_ring_ref = GRANT_INVALID_REF;
2038         info->rx_ring_ref = GRANT_INVALID_REF;
2039         info->tx.sring = NULL;
2040         info->rx.sring = NULL;
2041
2042         if (info->irq)
2043                 unbind_from_irqhandler(info->irq);
2044
2045         info->irq = 0;
2046 }
2047
2048
2049 static void end_access(int ref, void *page)
2050 {
2051         if (ref != GRANT_INVALID_REF)
2052                 gnttab_end_foreign_access(ref, page);
2053 }
2054
2055 /* ** Driver registration ** */
2056 static device_method_t netfront_methods[] = { 
2057         /* Device interface */ 
2058         DEVMETHOD(device_probe,         netfront_probe), 
2059         DEVMETHOD(device_attach,        netfront_attach), 
2060         DEVMETHOD(device_detach,        netfront_detach), 
2061         DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
2062         DEVMETHOD(device_suspend,       bus_generic_suspend), 
2063         DEVMETHOD(device_resume,        netfront_resume), 
2064  
2065         /* Xenbus interface */
2066         DEVMETHOD(xenbus_backend_changed, netfront_backend_changed),
2067
2068         { 0, 0 } 
2069 }; 
2070
2071 static driver_t netfront_driver = { 
2072         "xn", 
2073         netfront_methods, 
2074         sizeof(struct netfront_info),                      
2075 }; 
2076 devclass_t netfront_devclass; 
2077  
2078 DRIVER_MODULE(xe, xenbus, netfront_driver, netfront_devclass, 0, 0);