]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/hyperv/netvsc/hv_net_vsc.c
MFC: r264177
[FreeBSD/stable/10.git] / sys / dev / hyperv / netvsc / hv_net_vsc.c
1 /*-
2  * Copyright (c) 2009-2012 Microsoft Corp.
3  * Copyright (c) 2010-2012 Citrix Inc.
4  * Copyright (c) 2012 NetApp Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30
31 /**
32  * HyperV vmbus network VSC (virtual services client) module
33  *
34  */
35
36
37 #include <sys/param.h>
38 #include <sys/kernel.h>
39 #include <sys/socket.h>
40 #include <sys/lock.h>
41 #include <net/if.h>
42 #include <net/if_arp.h>
43 #include <machine/bus.h>
44 #include <machine/atomic.h>
45
46 #include <dev/hyperv/include/hyperv.h>
47 #include "hv_net_vsc.h"
48 #include "hv_rndis.h"
49 #include "hv_rndis_filter.h"
50
51
52 /*
53  * Forward declarations
54  */
55 static void hv_nv_on_channel_callback(void *context);
56 static int  hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
57 static int  hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
58 static int  hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
59 static int  hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
60 static int  hv_nv_connect_to_vsp(struct hv_device *device);
61 static void hv_nv_on_send_completion(struct hv_device *device,
62                                      hv_vm_packet_descriptor *pkt);
63 static void hv_nv_on_receive(struct hv_device *device,
64                              hv_vm_packet_descriptor *pkt);
65 static void hv_nv_send_receive_completion(struct hv_device *device,
66                                           uint64_t tid);
67
68
69 /*
70  *
71  */
72 static inline netvsc_dev *
73 hv_nv_alloc_net_device(struct hv_device *device)
74 {
75         netvsc_dev *net_dev;
76         hn_softc_t *sc = device_get_softc(device->device);
77
78         net_dev = malloc(sizeof(netvsc_dev), M_DEVBUF, M_NOWAIT | M_ZERO);
79         if (net_dev == NULL) {
80                 return (NULL);
81         }
82
83         net_dev->dev = device;
84         net_dev->destroy = FALSE;
85         sc->net_dev = net_dev;
86
87         return (net_dev);
88 }
89
90 /*
91  *
92  */
93 static inline netvsc_dev *
94 hv_nv_get_outbound_net_device(struct hv_device *device)
95 {
96         hn_softc_t *sc = device_get_softc(device->device);
97         netvsc_dev *net_dev = sc->net_dev;;
98
99         if ((net_dev != NULL) && net_dev->destroy) {
100                 return (NULL);
101         }
102
103         return (net_dev);
104 }
105
106 /*
107  *
108  */
109 static inline netvsc_dev *
110 hv_nv_get_inbound_net_device(struct hv_device *device)
111 {
112         hn_softc_t *sc = device_get_softc(device->device);
113         netvsc_dev *net_dev = sc->net_dev;;
114
115         if (net_dev == NULL) {
116                 return (net_dev);
117         }
118         /*
119          * When the device is being destroyed; we only
120          * permit incoming packets if and only if there
121          * are outstanding sends.
122          */
123         if (net_dev->destroy && net_dev->num_outstanding_sends == 0) {
124                 return (NULL);
125         }
126
127         return (net_dev);
128 }
129
130 /*
131  * Net VSC initialize receive buffer with net VSP
132  * 
133  * Net VSP:  Network virtual services client, also known as the
134  *     Hyper-V extensible switch and the synthetic data path.
135  */
136 static int 
137 hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device)
138 {
139         netvsc_dev *net_dev;
140         nvsp_msg *init_pkt;
141         int ret = 0;
142
143         net_dev = hv_nv_get_outbound_net_device(device);
144         if (!net_dev) {
145                 return (ENODEV);
146         }
147
148         net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_DEVBUF,
149             M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
150         if (net_dev->rx_buf == NULL) {
151                 ret = ENOMEM;
152                 goto cleanup;
153         }
154
155         /*
156          * Establish the GPADL handle for this buffer on this channel.
157          * Note:  This call uses the vmbus connection rather than the
158          * channel to establish the gpadl handle. 
159          * GPADL:  Guest physical address descriptor list.
160          */
161         ret = hv_vmbus_channel_establish_gpadl(
162                 device->channel, net_dev->rx_buf,
163                 net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle);
164         if (ret != 0) {
165                 goto cleanup;
166         }
167         
168         /* sema_wait(&ext->channel_init_sema); KYS CHECK */
169
170         /* Notify the NetVsp of the gpadl handle */
171         init_pkt = &net_dev->channel_init_packet;
172
173         memset(init_pkt, 0, sizeof(nvsp_msg));
174
175         init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf;
176         init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
177             net_dev->rx_buf_gpadl_handle;
178         init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
179             NETVSC_RECEIVE_BUFFER_ID;
180
181         /* Send the gpadl notification request */
182
183         ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
184             sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
185             HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
186             HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
187         if (ret != 0) {
188                 goto cleanup;
189         }
190
191         sema_wait(&net_dev->channel_init_sema);
192
193         /* Check the response */
194         if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status
195             != nvsp_status_success) {
196                 ret = EINVAL;
197                 goto cleanup;
198         }
199
200         net_dev->rx_section_count =
201             init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections;
202
203         net_dev->rx_sections = malloc(net_dev->rx_section_count *
204             sizeof(nvsp_1_rx_buf_section), M_DEVBUF, M_NOWAIT);
205         if (net_dev->rx_sections == NULL) {
206                 ret = EINVAL;
207                 goto cleanup;
208         }
209         memcpy(net_dev->rx_sections, 
210             init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections,
211             net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section));
212
213
214         /*
215          * For first release, there should only be 1 section that represents
216          * the entire receive buffer
217          */
218         if (net_dev->rx_section_count != 1
219             || net_dev->rx_sections->offset != 0) {
220                 ret = EINVAL;
221                 goto cleanup;
222         }
223
224         goto exit;
225
226 cleanup:
227         hv_nv_destroy_rx_buffer(net_dev);
228         
229 exit:
230         return (ret);
231 }
232
233 /*
234  * Net VSC initialize send buffer with net VSP
235  */
236 static int 
237 hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device)
238 {
239         netvsc_dev *net_dev;
240         nvsp_msg *init_pkt;
241         int ret = 0;
242
243         net_dev = hv_nv_get_outbound_net_device(device);
244         if (!net_dev) {
245                 return (ENODEV);
246         }
247
248         net_dev->send_buf  = contigmalloc(net_dev->send_buf_size, M_DEVBUF,
249             M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
250         if (net_dev->send_buf == NULL) {
251                 ret = ENOMEM;
252                 goto cleanup;
253         }
254
255         /*
256          * Establish the gpadl handle for this buffer on this channel.
257          * Note:  This call uses the vmbus connection rather than the
258          * channel to establish the gpadl handle. 
259          */
260         ret = hv_vmbus_channel_establish_gpadl(device->channel,
261             net_dev->send_buf, net_dev->send_buf_size,
262             &net_dev->send_buf_gpadl_handle);
263         if (ret != 0) {
264                 goto cleanup;
265         }
266
267         /* Notify the NetVsp of the gpadl handle */
268
269         init_pkt = &net_dev->channel_init_packet;
270
271         memset(init_pkt, 0, sizeof(nvsp_msg));
272
273         init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf;
274         init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
275             net_dev->send_buf_gpadl_handle;
276         init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
277             NETVSC_SEND_BUFFER_ID;
278
279         /* Send the gpadl notification request */
280
281         ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
282             sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
283             HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
284             HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
285         if (ret != 0) {
286                 goto cleanup;
287         }
288
289         sema_wait(&net_dev->channel_init_sema);
290
291         /* Check the response */
292         if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status
293             != nvsp_status_success) {
294                 ret = EINVAL;
295                 goto cleanup;
296         }
297
298         net_dev->send_section_size =
299             init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size;
300
301         goto exit;
302
303 cleanup:
304         hv_nv_destroy_send_buffer(net_dev);
305         
306 exit:
307         return (ret);
308 }
309
310 /*
311  * Net VSC destroy receive buffer
312  */
313 static int
314 hv_nv_destroy_rx_buffer(netvsc_dev *net_dev)
315 {
316         nvsp_msg *revoke_pkt;
317         int ret = 0;
318
319         /*
320          * If we got a section count, it means we received a
321          * send_rx_buf_complete msg 
322          * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
323          * we need to send a revoke msg here
324          */
325         if (net_dev->rx_section_count) {
326                 /* Send the revoke receive buffer */
327                 revoke_pkt = &net_dev->revoke_packet;
328                 memset(revoke_pkt, 0, sizeof(nvsp_msg));
329
330                 revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf;
331                 revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id =
332                     NETVSC_RECEIVE_BUFFER_ID;
333
334                 ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
335                     revoke_pkt, sizeof(nvsp_msg),
336                     (uint64_t)(uintptr_t)revoke_pkt,
337                     HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
338
339                 /*
340                  * If we failed here, we might as well return and have a leak 
341                  * rather than continue and a bugchk
342                  */
343                 if (ret != 0) {
344                         return (ret);
345                 }
346         }
347                 
348         /* Tear down the gpadl on the vsp end */
349         if (net_dev->rx_buf_gpadl_handle) {
350                 ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
351                     net_dev->rx_buf_gpadl_handle);
352                 /*
353                  * If we failed here, we might as well return and have a leak 
354                  * rather than continue and a bugchk
355                  */
356                 if (ret != 0) {
357                         return (ret);
358                 }
359                 net_dev->rx_buf_gpadl_handle = 0;
360         }
361
362         if (net_dev->rx_buf) {
363                 /* Free up the receive buffer */
364                 contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_DEVBUF);
365                 net_dev->rx_buf = NULL;
366         }
367
368         if (net_dev->rx_sections) {
369                 free(net_dev->rx_sections, M_DEVBUF);
370                 net_dev->rx_sections = NULL;
371                 net_dev->rx_section_count = 0;
372         }
373
374         return (ret);
375 }
376
377 /*
378  * Net VSC destroy send buffer
379  */
380 static int
381 hv_nv_destroy_send_buffer(netvsc_dev *net_dev)
382 {
383         nvsp_msg *revoke_pkt;
384         int ret = 0;
385
386         /*
387          * If we got a section count, it means we received a
388          * send_rx_buf_complete msg 
389          * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
390          * we need to send a revoke msg here
391          */
392         if (net_dev->send_section_size) {
393                 /* Send the revoke send buffer */
394                 revoke_pkt = &net_dev->revoke_packet;
395                 memset(revoke_pkt, 0, sizeof(nvsp_msg));
396
397                 revoke_pkt->hdr.msg_type =
398                     nvsp_msg_1_type_revoke_send_buf;
399                 revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id =
400                     NETVSC_SEND_BUFFER_ID;
401
402                 ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
403                     revoke_pkt, sizeof(nvsp_msg),
404                     (uint64_t)(uintptr_t)revoke_pkt,
405                     HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
406                 /*
407                  * If we failed here, we might as well return and have a leak 
408                  * rather than continue and a bugchk
409                  */
410                 if (ret != 0) {
411                         return (ret);
412                 }
413         }
414                 
415         /* Tear down the gpadl on the vsp end */
416         if (net_dev->send_buf_gpadl_handle) {
417                 ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
418                     net_dev->send_buf_gpadl_handle);
419
420                 /*
421                  * If we failed here, we might as well return and have a leak 
422                  * rather than continue and a bugchk
423                  */
424                 if (ret != 0) {
425                         return (ret);
426                 }
427                 net_dev->send_buf_gpadl_handle = 0;
428         }
429
430         if (net_dev->send_buf) {
431                 /* Free up the receive buffer */
432                 contigfree(net_dev->send_buf, net_dev->send_buf_size, M_DEVBUF);
433                 net_dev->send_buf = NULL;
434         }
435
436         return (ret);
437 }
438
439
440 /*
441  * Attempt to negotiate the caller-specified NVSP version
442  *
443  * For NVSP v2, Server 2008 R2 does not set
444  * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers
445  * to the negotiated version, so we cannot rely on that.
446  */
447 static int
448 hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev,
449                               uint32_t nvsp_ver)
450 {
451         nvsp_msg *init_pkt;
452         int ret;
453
454         init_pkt = &net_dev->channel_init_packet;
455         memset(init_pkt, 0, sizeof(nvsp_msg));
456         init_pkt->hdr.msg_type = nvsp_msg_type_init;
457
458         /*
459          * Specify parameter as the only acceptable protocol version
460          */
461         init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver;
462         init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver;
463
464         /* Send the init request */
465         ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
466             sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
467             HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
468             HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
469         if (ret != 0)
470                 return (-1);
471
472         sema_wait(&net_dev->channel_init_sema);
473
474         if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success)
475                 return (EINVAL);
476
477         return (0);
478 }
479
480 /*
481  * Send NDIS version 2 config packet containing MTU.
482  *
483  * Not valid for NDIS version 1.
484  */
485 static int
486 hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu)
487 {
488         netvsc_dev *net_dev;
489         nvsp_msg *init_pkt;
490         int ret;
491
492         net_dev = hv_nv_get_outbound_net_device(device);
493         if (!net_dev)
494                 return (-ENODEV);
495
496         /*
497          * Set up configuration packet, write MTU
498          * Indicate we are capable of handling VLAN tags
499          */
500         init_pkt = &net_dev->channel_init_packet;
501         memset(init_pkt, 0, sizeof(nvsp_msg));
502         init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config;
503         init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu;
504         init_pkt->
505                 msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q
506                 = 1;
507
508         /* Send the configuration packet */
509         ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
510             sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
511             HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
512         if (ret != 0)
513                 return (-EINVAL);
514
515         return (0);
516 }
517
518 /*
519  * Net VSC connect to VSP
520  */
521 static int
522 hv_nv_connect_to_vsp(struct hv_device *device)
523 {
524         netvsc_dev *net_dev;
525         nvsp_msg *init_pkt;
526         uint32_t nvsp_vers;
527         uint32_t ndis_version;
528         int ret = 0;
529         device_t dev = device->device;
530         hn_softc_t *sc = device_get_softc(dev);
531         struct ifnet *ifp = sc->arpcom.ac_ifp;
532
533         net_dev = hv_nv_get_outbound_net_device(device);
534         if (!net_dev) {
535                 return (ENODEV);
536         }
537
538         /*
539          * Negotiate the NVSP version.  Try NVSP v2 first.
540          */
541         nvsp_vers = NVSP_PROTOCOL_VERSION_2;
542         ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
543         if (ret != 0) {
544                 /* NVSP v2 failed, try NVSP v1 */
545                 nvsp_vers = NVSP_PROTOCOL_VERSION_1;
546                 ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
547                 if (ret != 0) {
548                         /* NVSP v1 failed, return bad status */
549                         return (ret);
550                 }
551         }
552         net_dev->nvsp_version = nvsp_vers;
553
554         /*
555          * Set the MTU if supported by this NVSP protocol version
556          * This needs to be right after the NVSP init message per Haiyang
557          */
558         if (nvsp_vers >= NVSP_PROTOCOL_VERSION_2)
559                 ret = hv_nv_send_ndis_config(device, ifp->if_mtu);
560
561         /*
562          * Send the NDIS version
563          */
564         init_pkt = &net_dev->channel_init_packet;
565
566         memset(init_pkt, 0, sizeof(nvsp_msg));
567
568         /*
569          * Updated to version 5.1, minimum, for VLAN per Haiyang
570          */
571         ndis_version = NDIS_VERSION;
572
573         init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers;
574         init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers =
575             (ndis_version & 0xFFFF0000) >> 16;
576         init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers =
577             ndis_version & 0xFFFF;
578
579         /* Send the init request */
580
581         ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
582             sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
583             HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
584         if (ret != 0) {
585                 goto cleanup;
586         }
587         /*
588          * TODO:  BUGBUG - We have to wait for the above msg since the netvsp
589          * uses KMCL which acknowledges packet (completion packet) 
590          * since our Vmbus always set the
591          * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag
592          */
593         /* sema_wait(&NetVscChannel->channel_init_sema); */
594
595         /* Post the big receive buffer to NetVSP */
596         ret = hv_nv_init_rx_buffer_with_net_vsp(device);
597         if (ret == 0)
598                 ret = hv_nv_init_send_buffer_with_net_vsp(device);
599
600 cleanup:
601         return (ret);
602 }
603
604 /*
605  * Net VSC disconnect from VSP
606  */
607 static void
608 hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
609 {
610         hv_nv_destroy_rx_buffer(net_dev);
611         hv_nv_destroy_send_buffer(net_dev);
612 }
613
614 /*
615  * Net VSC on device add
616  * 
617  * Callback when the device belonging to this driver is added
618  */
619 netvsc_dev *
620 hv_nv_on_device_add(struct hv_device *device, void *additional_info)
621 {
622         netvsc_dev *net_dev;
623         netvsc_packet *packet;
624         netvsc_packet *next_packet;
625         int i, ret = 0;
626
627         net_dev = hv_nv_alloc_net_device(device);
628         if (!net_dev)
629                 goto cleanup;
630
631         /* Initialize the NetVSC channel extension */
632         net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
633         mtx_init(&net_dev->rx_pkt_list_lock, "HV-RPL", NULL,
634             MTX_SPIN | MTX_RECURSE);
635
636         net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
637
638         /* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
639         STAILQ_INIT(&net_dev->myrx_packet_list);
640
641         /* 
642          * malloc a sufficient number of netvsc_packet buffers to hold
643          * a packet list.  Add them to the netvsc device packet queue.
644          */
645         for (i=0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
646                 packet = malloc(sizeof(netvsc_packet) +
647                     (NETVSC_RECEIVE_SG_COUNT * sizeof(hv_vmbus_page_buffer)),
648                     M_DEVBUF, M_NOWAIT | M_ZERO);
649                 if (!packet) {
650                         break;
651                 }
652                 STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet,
653                     mylist_entry);
654         }
655
656         sema_init(&net_dev->channel_init_sema, 0, "netdev_sema");
657
658         /*
659          * Open the channel
660          */
661         ret = hv_vmbus_channel_open(device->channel,
662             NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE,
663             NULL, 0, hv_nv_on_channel_callback, device);
664         if (ret != 0)
665                 goto cleanup;
666
667         /*
668          * Connect with the NetVsp
669          */
670         ret = hv_nv_connect_to_vsp(device);
671         if (ret != 0)
672                 goto close;
673
674         return (net_dev);
675
676 close:
677         /* Now, we can close the channel safely */
678
679         hv_vmbus_channel_close(device->channel);
680
681 cleanup:
682         /*
683          * Free the packet buffers on the netvsc device packet queue.
684          * Release other resources.
685          */
686         if (net_dev) {
687                 sema_destroy(&net_dev->channel_init_sema);
688
689                 packet = STAILQ_FIRST(&net_dev->myrx_packet_list);
690                 while (packet != NULL) {
691                         next_packet = STAILQ_NEXT(packet, mylist_entry);
692                         free(packet, M_DEVBUF);
693                         packet = next_packet;
694                 }
695                 /* Reset the list to initial state */
696                 STAILQ_INIT(&net_dev->myrx_packet_list);
697
698                 mtx_destroy(&net_dev->rx_pkt_list_lock);
699
700                 free(net_dev, M_DEVBUF);
701         }
702
703         return (NULL);
704 }
705
706 /*
707  * Net VSC on device remove
708  */
709 int
710 hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
711 {
712         netvsc_packet *net_vsc_pkt;
713         netvsc_packet *next_net_vsc_pkt;
714         hn_softc_t *sc = device_get_softc(device->device);
715         netvsc_dev *net_dev = sc->net_dev;;
716         
717         /* Stop outbound traffic ie sends and receives completions */
718         mtx_lock(&device->channel->inbound_lock);
719         net_dev->destroy = TRUE;
720         mtx_unlock(&device->channel->inbound_lock);
721
722         /* Wait for all send completions */
723         while (net_dev->num_outstanding_sends) {
724                 DELAY(100);
725         }
726
727         hv_nv_disconnect_from_vsp(net_dev);
728
729         /* At this point, no one should be accessing net_dev except in here */
730
731         /* Now, we can close the channel safely */
732
733         if (!destroy_channel) {
734                 device->channel->state =
735                     HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE;
736         }
737
738         hv_vmbus_channel_close(device->channel);
739
740         /* Release all resources */
741         net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
742         while (net_vsc_pkt != NULL) {
743                 next_net_vsc_pkt = STAILQ_NEXT(net_vsc_pkt, mylist_entry);
744                 free(net_vsc_pkt, M_DEVBUF);
745                 net_vsc_pkt = next_net_vsc_pkt;
746         }
747
748         /* Reset the list to initial state */
749         STAILQ_INIT(&net_dev->myrx_packet_list);
750
751         mtx_destroy(&net_dev->rx_pkt_list_lock);
752         sema_destroy(&net_dev->channel_init_sema);
753         free(net_dev, M_DEVBUF);
754
755         return (0);
756 }
757
758 /*
759  * Net VSC on send completion
760  */
761 static void 
762 hv_nv_on_send_completion(struct hv_device *device, hv_vm_packet_descriptor *pkt)
763 {
764         netvsc_dev *net_dev;
765         nvsp_msg *nvsp_msg_pkt;
766         netvsc_packet *net_vsc_pkt;
767
768         net_dev = hv_nv_get_inbound_net_device(device);
769         if (!net_dev) {
770                 return;
771         }
772
773         nvsp_msg_pkt =
774             (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
775
776         if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete
777                 || nvsp_msg_pkt->hdr.msg_type
778                         == nvsp_msg_1_type_send_rx_buf_complete
779                 || nvsp_msg_pkt->hdr.msg_type
780                         == nvsp_msg_1_type_send_send_buf_complete) {
781                 /* Copy the response back */
782                 memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
783                     sizeof(nvsp_msg));                  
784                 sema_post(&net_dev->channel_init_sema);
785         } else if (nvsp_msg_pkt->hdr.msg_type ==
786                                    nvsp_msg_1_type_send_rndis_pkt_complete) {
787                 /* Get the send context */
788                 net_vsc_pkt =
789                     (netvsc_packet *)(unsigned long)pkt->transaction_id;
790
791                 /* Notify the layer above us */
792                 net_vsc_pkt->compl.send.on_send_completion(
793                     net_vsc_pkt->compl.send.send_completion_context);
794
795                 atomic_subtract_int(&net_dev->num_outstanding_sends, 1);
796         }
797 }
798
799 /*
800  * Net VSC on send
801  * Sends a packet on the specified Hyper-V device.
802  * Returns 0 on success, non-zero on failure.
803  */
804 int
805 hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt)
806 {
807         netvsc_dev *net_dev;
808         nvsp_msg send_msg;
809         int ret;
810
811         net_dev = hv_nv_get_outbound_net_device(device);
812         if (!net_dev)
813                 return (ENODEV);
814
815         send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt;
816         if (pkt->is_data_pkt) {
817                 /* 0 is RMC_DATA */
818                 send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0;
819         } else {
820                 /* 1 is RMC_CONTROL */
821                 send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1;
822         }
823
824         /* Not using send buffer section */
825         send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx =
826             0xFFFFFFFF;
827         send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = 0;
828
829         if (pkt->page_buf_count) {
830                 ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel,
831                     pkt->page_buffers, pkt->page_buf_count,
832                     &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt);
833         } else {
834                 ret = hv_vmbus_channel_send_packet(device->channel,
835                     &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt,
836                     HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
837                     HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
838         }
839
840         /* Record outstanding send only if send_packet() succeeded */
841         if (ret == 0)
842                 atomic_add_int(&net_dev->num_outstanding_sends, 1);
843
844         return (ret);
845 }
846
847 /*
848  * Net VSC on receive
849  *
850  * In the FreeBSD Hyper-V virtual world, this function deals exclusively
851  * with virtual addresses.
852  */
853 static void 
854 hv_nv_on_receive(struct hv_device *device, hv_vm_packet_descriptor *pkt)
855 {
856         netvsc_dev *net_dev;
857         hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
858         nvsp_msg *nvsp_msg_pkt;
859         netvsc_packet *net_vsc_pkt = NULL;
860         unsigned long start;
861         xfer_page_packet *xfer_page_pkt = NULL;
862         STAILQ_HEAD(PKT_LIST, netvsc_packet_) mylist_head =
863             STAILQ_HEAD_INITIALIZER(mylist_head);
864         int count = 0;
865         int i = 0;
866
867         net_dev = hv_nv_get_inbound_net_device(device);
868         if (!net_dev)
869                 return;
870
871         /*
872          * All inbound packets other than send completion should be
873          * xfer page packet.
874          */
875         if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES)
876                 return;
877
878         nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt
879                 + (pkt->data_offset8 << 3));
880
881         /* Make sure this is a valid nvsp packet */
882         if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt)
883                 return;
884         
885         vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt;
886
887         if (vm_xfer_page_pkt->transfer_page_set_id
888                 != NETVSC_RECEIVE_BUFFER_ID) {
889                 return;
890         }
891
892         STAILQ_INIT(&mylist_head);
893
894         /*
895          * Grab free packets (range count + 1) to represent this xfer page
896          * packet.  +1 to represent the xfer page packet itself.  We grab it
897          * here so that we know exactly how many we can fulfill.
898          */
899         mtx_lock_spin(&net_dev->rx_pkt_list_lock);
900         while (!STAILQ_EMPTY(&net_dev->myrx_packet_list)) {     
901                 net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
902                 STAILQ_REMOVE_HEAD(&net_dev->myrx_packet_list, mylist_entry);
903
904                 STAILQ_INSERT_TAIL(&mylist_head, net_vsc_pkt, mylist_entry);
905
906                 if (++count == vm_xfer_page_pkt->range_count + 1)
907                         break;
908         }
909
910         mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
911
912         /*
913          * We need at least 2 netvsc pkts (1 to represent the xfer page
914          * and at least 1 for the range) i.e. we can handle some of the
915          * xfer page packet ranges...
916          */
917         if (count < 2) {
918                 /* Return netvsc packet to the freelist */
919                 mtx_lock_spin(&net_dev->rx_pkt_list_lock);
920                 for (i=count; i != 0; i--) {
921                         net_vsc_pkt = STAILQ_FIRST(&mylist_head);
922                         STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
923
924                         STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
925                             net_vsc_pkt, mylist_entry);
926                 }
927                 mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
928
929                 hv_nv_send_receive_completion(device,
930                     vm_xfer_page_pkt->d.transaction_id);
931
932                 return;
933         }
934
935         /* Take the first packet in the list */
936         xfer_page_pkt = (xfer_page_packet *)STAILQ_FIRST(&mylist_head);
937         STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
938
939         /* This is how many data packets we can supply */
940         xfer_page_pkt->count = count - 1;
941
942         /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
943         for (i=0; i < (count - 1); i++) {
944                 net_vsc_pkt = STAILQ_FIRST(&mylist_head);
945                 STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
946
947                 /*
948                  * Initialize the netvsc packet
949                  */
950                 net_vsc_pkt->xfer_page_pkt = xfer_page_pkt;
951                 net_vsc_pkt->compl.rx.rx_completion_context = net_vsc_pkt;
952                 net_vsc_pkt->device = device;
953                 /* Save this so that we can send it back */
954                 net_vsc_pkt->compl.rx.rx_completion_tid =
955                     vm_xfer_page_pkt->d.transaction_id;
956
957                 net_vsc_pkt->tot_data_buf_len =
958                     vm_xfer_page_pkt->ranges[i].byte_count;
959                 net_vsc_pkt->page_buf_count = 1;
960
961                 net_vsc_pkt->page_buffers[0].length =
962                     vm_xfer_page_pkt->ranges[i].byte_count;
963
964                 /* The virtual address of the packet in the receive buffer */
965                 start = ((unsigned long)net_dev->rx_buf +
966                     vm_xfer_page_pkt->ranges[i].byte_offset);
967                 start = ((unsigned long)start) & ~(PAGE_SIZE - 1);
968
969                 /* Page number of the virtual page containing packet start */
970                 net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT;
971
972                 /* Calculate the page relative offset */
973                 net_vsc_pkt->page_buffers[0].offset =
974                     vm_xfer_page_pkt->ranges[i].byte_offset & (PAGE_SIZE - 1);
975
976                 /*
977                  * In this implementation, we are dealing with virtual
978                  * addresses exclusively.  Since we aren't using physical
979                  * addresses at all, we don't care if a packet crosses a
980                  * page boundary.  For this reason, the original code to
981                  * check for and handle page crossings has been removed.
982                  */
983
984                 /*
985                  * Pass it to the upper layer.  The receive completion call
986                  * has been moved into this function.
987                  */
988                 hv_rf_on_receive(device, net_vsc_pkt);
989
990                 /*
991                  * Moved completion call back here so that all received 
992                  * messages (not just data messages) will trigger a response
993                  * message back to the host.
994                  */
995                 hv_nv_on_receive_completion(net_vsc_pkt);
996         }
997 }
998
999 /*
1000  * Net VSC send receive completion
1001  */
1002 static void
1003 hv_nv_send_receive_completion(struct hv_device *device, uint64_t tid)
1004 {
1005         nvsp_msg rx_comp_msg;
1006         int retries = 0;
1007         int ret = 0;
1008         
1009         rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete;
1010
1011         /* Pass in the status */
1012         rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status =
1013             nvsp_status_success;
1014
1015 retry_send_cmplt:
1016         /* Send the completion */
1017         ret = hv_vmbus_channel_send_packet(device->channel, &rx_comp_msg,
1018             sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
1019         if (ret == 0) {
1020                 /* success */
1021                 /* no-op */
1022         } else if (ret == EAGAIN) {
1023                 /* no more room... wait a bit and attempt to retry 3 times */
1024                 retries++;
1025
1026                 if (retries < 4) {
1027                         DELAY(100);
1028                         goto retry_send_cmplt;
1029                 }
1030         }
1031 }
1032
1033 /*
1034  * Net VSC on receive completion
1035  *
1036  * Send a receive completion packet to RNDIS device (ie NetVsp)
1037  */
1038 void
1039 hv_nv_on_receive_completion(void *context)
1040 {
1041         netvsc_packet *packet = (netvsc_packet *)context;
1042         struct hv_device *device = (struct hv_device *)packet->device;
1043         netvsc_dev    *net_dev;
1044         uint64_t       tid = 0;
1045         boolean_t send_rx_completion = FALSE;
1046
1047         /*
1048          * Even though it seems logical to do a hv_nv_get_outbound_net_device()
1049          * here to send out receive completion, we are using
1050          * hv_nv_get_inbound_net_device() since we may have disabled
1051          * outbound traffic already.
1052          */
1053         net_dev = hv_nv_get_inbound_net_device(device);
1054         if (net_dev == NULL)
1055                 return;
1056         
1057         /* Overloading use of the lock. */
1058         mtx_lock_spin(&net_dev->rx_pkt_list_lock);
1059
1060         packet->xfer_page_pkt->count--;
1061
1062         /*
1063          * Last one in the line that represent 1 xfer page packet.
1064          * Return the xfer page packet itself to the free list.
1065          */
1066         if (packet->xfer_page_pkt->count == 0) {
1067                 send_rx_completion = TRUE;
1068                 tid = packet->compl.rx.rx_completion_tid;
1069                 STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
1070                     (netvsc_packet *)(packet->xfer_page_pkt), mylist_entry);
1071         }
1072
1073         /* Put the packet back on the free list */
1074         STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet, mylist_entry);
1075         mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
1076
1077         /* Send a receive completion for the xfer page packet */
1078         if (send_rx_completion)
1079                 hv_nv_send_receive_completion(device, tid);
1080 }
1081
1082 /*
1083  * Net VSC on channel callback
1084  */
1085 static void
1086 hv_nv_on_channel_callback(void *context)
1087 {
1088         /* Fixme:  Magic number */
1089         const int net_pkt_size = 2048;
1090         struct hv_device *device = (struct hv_device *)context;
1091         netvsc_dev *net_dev;
1092         uint32_t bytes_rxed;
1093         uint64_t request_id;
1094         uint8_t  *packet;
1095         hv_vm_packet_descriptor *desc;
1096         uint8_t *buffer;
1097         int     bufferlen = net_pkt_size;
1098         int     ret = 0;
1099
1100         packet = malloc(net_pkt_size * sizeof(uint8_t), M_DEVBUF, M_NOWAIT);
1101         if (!packet)
1102                 return;
1103
1104         buffer = packet;
1105
1106         net_dev = hv_nv_get_inbound_net_device(device);
1107         if (net_dev == NULL)
1108                 goto out;
1109
1110         do {
1111                 ret = hv_vmbus_channel_recv_packet_raw(device->channel,
1112                     buffer, bufferlen, &bytes_rxed, &request_id);
1113                 if (ret == 0) {
1114                         if (bytes_rxed > 0) {
1115                                 desc = (hv_vm_packet_descriptor *)buffer;
1116                                 switch (desc->type) {
1117                                 case HV_VMBUS_PACKET_TYPE_COMPLETION:
1118                                         hv_nv_on_send_completion(device, desc);
1119                                         break;
1120                                 case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
1121                                         hv_nv_on_receive(device, desc);
1122                                         break;
1123                                 default:
1124                                         break;
1125                                 }
1126                         } else {
1127                                 break;
1128                         }
1129                 } else if (ret == ENOBUFS) {
1130                         /* Handle large packet */
1131                         free(buffer, M_DEVBUF);
1132                         buffer = malloc(bytes_rxed, M_DEVBUF, M_NOWAIT);
1133                         if (buffer == NULL) {
1134                                 break;
1135                         }
1136                         bufferlen = bytes_rxed;
1137                 }
1138         } while (1);
1139
1140 out:
1141         free(buffer, M_DEVBUF);
1142 }
1143