]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/hyperv/netvsc/hv_net_vsc.c
Copy head (r256279) to stable/10 as part of the 10.0-RELEASE cycle.
[FreeBSD/stable/10.git] / sys / dev / hyperv / netvsc / hv_net_vsc.c
1 /*-
2  * Copyright (c) 2009-2012 Microsoft Corp.
3  * Copyright (c) 2010-2012 Citrix Inc.
4  * Copyright (c) 2012 NetApp Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 /**
30  * HyperV vmbus network VSC (virtual services client) module
31  *
32  */
33
34
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/socket.h>
38 #include <sys/lock.h>
39 #include <net/if.h>
40 #include <net/if_arp.h>
41 #include <machine/bus.h>
42 #include <machine/atomic.h>
43
44 #include <dev/hyperv/include/hyperv.h>
45 #include "hv_net_vsc.h"
46 #include "hv_rndis.h"
47 #include "hv_rndis_filter.h"
48
49
50 /*
51  * Forward declarations
52  */
53 static void hv_nv_on_channel_callback(void *context);
54 static int  hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
55 static int  hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
56 static int  hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
57 static int  hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
58 static int  hv_nv_connect_to_vsp(struct hv_device *device);
59 static void hv_nv_on_send_completion(struct hv_device *device,
60                                      hv_vm_packet_descriptor *pkt);
61 static void hv_nv_on_receive(struct hv_device *device,
62                              hv_vm_packet_descriptor *pkt);
63 static void hv_nv_send_receive_completion(struct hv_device *device,
64                                           uint64_t tid);
65
66
67 /*
68  *
69  */
70 static inline netvsc_dev *
71 hv_nv_alloc_net_device(struct hv_device *device)
72 {
73         netvsc_dev *net_dev;
74         hn_softc_t *sc = device_get_softc(device->device);
75
76         net_dev = malloc(sizeof(netvsc_dev), M_DEVBUF, M_NOWAIT | M_ZERO);
77         if (net_dev == NULL) {
78                 return (NULL);
79         }
80
81         net_dev->dev = device;
82         net_dev->destroy = FALSE;
83         sc->net_dev = net_dev;
84
85         return (net_dev);
86 }
87
88 /*
89  *
90  */
91 static inline netvsc_dev *
92 hv_nv_get_outbound_net_device(struct hv_device *device)
93 {
94         hn_softc_t *sc = device_get_softc(device->device);
95         netvsc_dev *net_dev = sc->net_dev;;
96
97         if ((net_dev != NULL) && net_dev->destroy) {
98                 return (NULL);
99         }
100
101         return (net_dev);
102 }
103
104 /*
105  *
106  */
107 static inline netvsc_dev *
108 hv_nv_get_inbound_net_device(struct hv_device *device)
109 {
110         hn_softc_t *sc = device_get_softc(device->device);
111         netvsc_dev *net_dev = sc->net_dev;;
112
113         if (net_dev == NULL) {
114                 return (net_dev);
115         }
116         /*
117          * When the device is being destroyed; we only
118          * permit incoming packets if and only if there
119          * are outstanding sends.
120          */
121         if (net_dev->destroy && net_dev->num_outstanding_sends == 0) {
122                 return (NULL);
123         }
124
125         return (net_dev);
126 }
127
128 /*
129  * Net VSC initialize receive buffer with net VSP
130  * 
131  * Net VSP:  Network virtual services client, also known as the
132  *     Hyper-V extensible switch and the synthetic data path.
133  */
134 static int 
135 hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device)
136 {
137         netvsc_dev *net_dev;
138         nvsp_msg *init_pkt;
139         int ret = 0;
140
141         net_dev = hv_nv_get_outbound_net_device(device);
142         if (!net_dev) {
143                 return (ENODEV);
144         }
145
146         net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_DEVBUF,
147             M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
148         if (net_dev->rx_buf == NULL) {
149                 ret = ENOMEM;
150                 goto cleanup;
151         }
152
153         /*
154          * Establish the GPADL handle for this buffer on this channel.
155          * Note:  This call uses the vmbus connection rather than the
156          * channel to establish the gpadl handle. 
157          * GPADL:  Guest physical address descriptor list.
158          */
159         ret = hv_vmbus_channel_establish_gpadl(
160                 device->channel, net_dev->rx_buf,
161                 net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle);
162         if (ret != 0) {
163                 goto cleanup;
164         }
165         
166         /* sema_wait(&ext->channel_init_sema); KYS CHECK */
167
168         /* Notify the NetVsp of the gpadl handle */
169         init_pkt = &net_dev->channel_init_packet;
170
171         memset(init_pkt, 0, sizeof(nvsp_msg));
172
173         init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf;
174         init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
175             net_dev->rx_buf_gpadl_handle;
176         init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
177             NETVSC_RECEIVE_BUFFER_ID;
178
179         /* Send the gpadl notification request */
180
181         ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
182             sizeof(nvsp_msg), (uint64_t)init_pkt,
183             HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
184             HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
185         if (ret != 0) {
186                 goto cleanup;
187         }
188
189         sema_wait(&net_dev->channel_init_sema);
190
191         /* Check the response */
192         if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status
193             != nvsp_status_success) {
194                 ret = EINVAL;
195                 goto cleanup;
196         }
197
198         net_dev->rx_section_count =
199             init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections;
200
201         net_dev->rx_sections = malloc(net_dev->rx_section_count *
202             sizeof(nvsp_1_rx_buf_section), M_DEVBUF, M_NOWAIT);
203         if (net_dev->rx_sections == NULL) {
204                 ret = EINVAL;
205                 goto cleanup;
206         }
207         memcpy(net_dev->rx_sections, 
208             init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections,
209             net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section));
210
211
212         /*
213          * For first release, there should only be 1 section that represents
214          * the entire receive buffer
215          */
216         if (net_dev->rx_section_count != 1
217             || net_dev->rx_sections->offset != 0) {
218                 ret = EINVAL;
219                 goto cleanup;
220         }
221
222         goto exit;
223
224 cleanup:
225         hv_nv_destroy_rx_buffer(net_dev);
226         
227 exit:
228         return (ret);
229 }
230
231 /*
232  * Net VSC initialize send buffer with net VSP
233  */
234 static int 
235 hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device)
236 {
237         netvsc_dev *net_dev;
238         nvsp_msg *init_pkt;
239         int ret = 0;
240
241         net_dev = hv_nv_get_outbound_net_device(device);
242         if (!net_dev) {
243                 return (ENODEV);
244         }
245
246         net_dev->send_buf  = contigmalloc(net_dev->send_buf_size, M_DEVBUF,
247             M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
248         if (net_dev->send_buf == NULL) {
249                 ret = ENOMEM;
250                 goto cleanup;
251         }
252
253         /*
254          * Establish the gpadl handle for this buffer on this channel.
255          * Note:  This call uses the vmbus connection rather than the
256          * channel to establish the gpadl handle. 
257          */
258         ret = hv_vmbus_channel_establish_gpadl(device->channel,
259             net_dev->send_buf, net_dev->send_buf_size,
260             &net_dev->send_buf_gpadl_handle);
261         if (ret != 0) {
262                 goto cleanup;
263         }
264
265         /* Notify the NetVsp of the gpadl handle */
266
267         init_pkt = &net_dev->channel_init_packet;
268
269         memset(init_pkt, 0, sizeof(nvsp_msg));
270
271         init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf;
272         init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
273             net_dev->send_buf_gpadl_handle;
274         init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
275             NETVSC_SEND_BUFFER_ID;
276
277         /* Send the gpadl notification request */
278
279         ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
280             sizeof(nvsp_msg), (uint64_t)init_pkt,
281             HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
282             HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
283         if (ret != 0) {
284                 goto cleanup;
285         }
286
287         sema_wait(&net_dev->channel_init_sema);
288
289         /* Check the response */
290         if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status
291             != nvsp_status_success) {
292                 ret = EINVAL;
293                 goto cleanup;
294         }
295
296         net_dev->send_section_size =
297             init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size;
298
299         goto exit;
300
301 cleanup:
302         hv_nv_destroy_send_buffer(net_dev);
303         
304 exit:
305         return (ret);
306 }
307
308 /*
309  * Net VSC destroy receive buffer
310  */
311 static int
312 hv_nv_destroy_rx_buffer(netvsc_dev *net_dev)
313 {
314         nvsp_msg *revoke_pkt;
315         int ret = 0;
316
317         /*
318          * If we got a section count, it means we received a
319          * send_rx_buf_complete msg 
320          * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
321          * we need to send a revoke msg here
322          */
323         if (net_dev->rx_section_count) {
324                 /* Send the revoke receive buffer */
325                 revoke_pkt = &net_dev->revoke_packet;
326                 memset(revoke_pkt, 0, sizeof(nvsp_msg));
327
328                 revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf;
329                 revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id =
330                     NETVSC_RECEIVE_BUFFER_ID;
331
332                 ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
333                     revoke_pkt, sizeof(nvsp_msg),
334                     (uint64_t)revoke_pkt,
335                     HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
336
337                 /*
338                  * If we failed here, we might as well return and have a leak 
339                  * rather than continue and a bugchk
340                  */
341                 if (ret != 0) {
342                         return (ret);
343                 }
344         }
345                 
346         /* Tear down the gpadl on the vsp end */
347         if (net_dev->rx_buf_gpadl_handle) {
348                 ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
349                     net_dev->rx_buf_gpadl_handle);
350                 /*
351                  * If we failed here, we might as well return and have a leak 
352                  * rather than continue and a bugchk
353                  */
354                 if (ret != 0) {
355                         return (ret);
356                 }
357                 net_dev->rx_buf_gpadl_handle = 0;
358         }
359
360         if (net_dev->rx_buf) {
361                 /* Free up the receive buffer */
362                 contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_DEVBUF);
363                 net_dev->rx_buf = NULL;
364         }
365
366         if (net_dev->rx_sections) {
367                 free(net_dev->rx_sections, M_DEVBUF);
368                 net_dev->rx_sections = NULL;
369                 net_dev->rx_section_count = 0;
370         }
371
372         return (ret);
373 }
374
375 /*
376  * Net VSC destroy send buffer
377  */
378 static int
379 hv_nv_destroy_send_buffer(netvsc_dev *net_dev)
380 {
381         nvsp_msg *revoke_pkt;
382         int ret = 0;
383
384         /*
385          * If we got a section count, it means we received a
386          * send_rx_buf_complete msg 
387          * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
388          * we need to send a revoke msg here
389          */
390         if (net_dev->send_section_size) {
391                 /* Send the revoke send buffer */
392                 revoke_pkt = &net_dev->revoke_packet;
393                 memset(revoke_pkt, 0, sizeof(nvsp_msg));
394
395                 revoke_pkt->hdr.msg_type =
396                     nvsp_msg_1_type_revoke_send_buf;
397                 revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id =
398                     NETVSC_SEND_BUFFER_ID;
399
400                 ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
401                     revoke_pkt, sizeof(nvsp_msg),
402                     (uint64_t)revoke_pkt,
403                     HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
404                 /*
405                  * If we failed here, we might as well return and have a leak 
406                  * rather than continue and a bugchk
407                  */
408                 if (ret != 0) {
409                         return (ret);
410                 }
411         }
412                 
413         /* Tear down the gpadl on the vsp end */
414         if (net_dev->send_buf_gpadl_handle) {
415                 ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
416                     net_dev->send_buf_gpadl_handle);
417
418                 /*
419                  * If we failed here, we might as well return and have a leak 
420                  * rather than continue and a bugchk
421                  */
422                 if (ret != 0) {
423                         return (ret);
424                 }
425                 net_dev->send_buf_gpadl_handle = 0;
426         }
427
428         if (net_dev->send_buf) {
429                 /* Free up the receive buffer */
430                 contigfree(net_dev->send_buf, net_dev->send_buf_size, M_DEVBUF);
431                 net_dev->send_buf = NULL;
432         }
433
434         return (ret);
435 }
436
437
438 /*
439  * Attempt to negotiate the caller-specified NVSP version
440  *
441  * For NVSP v2, Server 2008 R2 does not set
442  * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers
443  * to the negotiated version, so we cannot rely on that.
444  */
445 static int
446 hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev,
447                               uint32_t nvsp_ver)
448 {
449         nvsp_msg *init_pkt;
450         int ret;
451
452         init_pkt = &net_dev->channel_init_packet;
453         memset(init_pkt, 0, sizeof(nvsp_msg));
454         init_pkt->hdr.msg_type = nvsp_msg_type_init;
455
456         /*
457          * Specify parameter as the only acceptable protocol version
458          */
459         init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver;
460         init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver;
461
462         /* Send the init request */
463         ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
464             sizeof(nvsp_msg), (uint64_t)init_pkt,
465             HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
466             HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
467         if (ret != 0)
468                 return (-1);
469
470         sema_wait(&net_dev->channel_init_sema);
471
472         if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success)
473                 return (EINVAL);
474
475         return (0);
476 }
477
478 /*
479  * Send NDIS version 2 config packet containing MTU.
480  *
481  * Not valid for NDIS version 1.
482  */
483 static int
484 hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu)
485 {
486         netvsc_dev *net_dev;
487         nvsp_msg *init_pkt;
488         int ret;
489
490         net_dev = hv_nv_get_outbound_net_device(device);
491         if (!net_dev)
492                 return (-ENODEV);
493
494         /*
495          * Set up configuration packet, write MTU
496          * Indicate we are capable of handling VLAN tags
497          */
498         init_pkt = &net_dev->channel_init_packet;
499         memset(init_pkt, 0, sizeof(nvsp_msg));
500         init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config;
501         init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu;
502         init_pkt->
503                 msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q
504                 = 1;
505
506         /* Send the configuration packet */
507         ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
508             sizeof(nvsp_msg), (uint64_t)init_pkt,
509             HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
510         if (ret != 0)
511                 return (-EINVAL);
512
513         return (0);
514 }
515
516 /*
517  * Net VSC connect to VSP
518  */
519 static int
520 hv_nv_connect_to_vsp(struct hv_device *device)
521 {
522         netvsc_dev *net_dev;
523         nvsp_msg *init_pkt;
524         uint32_t nvsp_vers;
525         uint32_t ndis_version;
526         int ret = 0;
527         device_t dev = device->device;
528         hn_softc_t *sc = device_get_softc(dev);
529         struct ifnet *ifp = sc->arpcom.ac_ifp;
530
531         net_dev = hv_nv_get_outbound_net_device(device);
532         if (!net_dev) {
533                 return (ENODEV);
534         }
535
536         /*
537          * Negotiate the NVSP version.  Try NVSP v2 first.
538          */
539         nvsp_vers = NVSP_PROTOCOL_VERSION_2;
540         ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
541         if (ret != 0) {
542                 /* NVSP v2 failed, try NVSP v1 */
543                 nvsp_vers = NVSP_PROTOCOL_VERSION_1;
544                 ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
545                 if (ret != 0) {
546                         /* NVSP v1 failed, return bad status */
547                         return (ret);
548                 }
549         }
550         net_dev->nvsp_version = nvsp_vers;
551
552         /*
553          * Set the MTU if supported by this NVSP protocol version
554          * This needs to be right after the NVSP init message per Haiyang
555          */
556         if (nvsp_vers >= NVSP_PROTOCOL_VERSION_2)
557                 ret = hv_nv_send_ndis_config(device, ifp->if_mtu);
558
559         /*
560          * Send the NDIS version
561          */
562         init_pkt = &net_dev->channel_init_packet;
563
564         memset(init_pkt, 0, sizeof(nvsp_msg));
565
566         /*
567          * Updated to version 5.1, minimum, for VLAN per Haiyang
568          */
569         ndis_version = NDIS_VERSION;
570
571         init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers;
572         init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers =
573             (ndis_version & 0xFFFF0000) >> 16;
574         init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers =
575             ndis_version & 0xFFFF;
576
577         /* Send the init request */
578
579         ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
580             sizeof(nvsp_msg), (uint64_t)init_pkt,
581             HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
582         if (ret != 0) {
583                 goto cleanup;
584         }
585         /*
586          * TODO:  BUGBUG - We have to wait for the above msg since the netvsp
587          * uses KMCL which acknowledges packet (completion packet) 
588          * since our Vmbus always set the
589          * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag
590          */
591         /* sema_wait(&NetVscChannel->channel_init_sema); */
592
593         /* Post the big receive buffer to NetVSP */
594         ret = hv_nv_init_rx_buffer_with_net_vsp(device);
595         if (ret == 0)
596                 ret = hv_nv_init_send_buffer_with_net_vsp(device);
597
598 cleanup:
599         return (ret);
600 }
601
602 /*
603  * Net VSC disconnect from VSP
604  */
605 static void
606 hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
607 {
608         hv_nv_destroy_rx_buffer(net_dev);
609         hv_nv_destroy_send_buffer(net_dev);
610 }
611
612 /*
613  * Net VSC on device add
614  * 
615  * Callback when the device belonging to this driver is added
616  */
617 netvsc_dev *
618 hv_nv_on_device_add(struct hv_device *device, void *additional_info)
619 {
620         netvsc_dev *net_dev;
621         netvsc_packet *packet;
622         netvsc_packet *next_packet;
623         int i, ret = 0;
624
625         net_dev = hv_nv_alloc_net_device(device);
626         if (!net_dev)
627                 goto cleanup;
628
629         /* Initialize the NetVSC channel extension */
630         net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
631         mtx_init(&net_dev->rx_pkt_list_lock, "HV-RPL", NULL,
632             MTX_SPIN | MTX_RECURSE);
633
634         net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
635
636         /* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
637         STAILQ_INIT(&net_dev->myrx_packet_list);
638
639         /* 
640          * malloc a sufficient number of netvsc_packet buffers to hold
641          * a packet list.  Add them to the netvsc device packet queue.
642          */
643         for (i=0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
644                 packet = malloc(sizeof(netvsc_packet) +
645                     (NETVSC_RECEIVE_SG_COUNT * sizeof(hv_vmbus_page_buffer)),
646                     M_DEVBUF, M_NOWAIT | M_ZERO);
647                 if (!packet) {
648                         break;
649                 }
650                 STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet,
651                     mylist_entry);
652         }
653
654         sema_init(&net_dev->channel_init_sema, 0, "netdev_sema");
655
656         /*
657          * Open the channel
658          */
659         ret = hv_vmbus_channel_open(device->channel,
660             NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE,
661             NULL, 0, hv_nv_on_channel_callback, device);
662         if (ret != 0)
663                 goto cleanup;
664
665         /*
666          * Connect with the NetVsp
667          */
668         ret = hv_nv_connect_to_vsp(device);
669         if (ret != 0)
670                 goto close;
671
672         return (net_dev);
673
674 close:
675         /* Now, we can close the channel safely */
676
677         hv_vmbus_channel_close(device->channel);
678
679 cleanup:
680         /*
681          * Free the packet buffers on the netvsc device packet queue.
682          * Release other resources.
683          */
684         if (net_dev) {
685                 sema_destroy(&net_dev->channel_init_sema);
686
687                 packet = STAILQ_FIRST(&net_dev->myrx_packet_list);
688                 while (packet != NULL) {
689                         next_packet = STAILQ_NEXT(packet, mylist_entry);
690                         free(packet, M_DEVBUF);
691                         packet = next_packet;
692                 }
693                 /* Reset the list to initial state */
694                 STAILQ_INIT(&net_dev->myrx_packet_list);
695
696                 mtx_destroy(&net_dev->rx_pkt_list_lock);
697
698                 free(net_dev, M_DEVBUF);
699         }
700
701         return (NULL);
702 }
703
704 /*
705  * Net VSC on device remove
706  */
707 int
708 hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
709 {
710         netvsc_packet *net_vsc_pkt;
711         netvsc_packet *next_net_vsc_pkt;
712         hn_softc_t *sc = device_get_softc(device->device);
713         netvsc_dev *net_dev = sc->net_dev;;
714         
715         /* Stop outbound traffic ie sends and receives completions */
716         mtx_lock(&device->channel->inbound_lock);
717         net_dev->destroy = TRUE;
718         mtx_unlock(&device->channel->inbound_lock);
719
720         /* Wait for all send completions */
721         while (net_dev->num_outstanding_sends) {
722                 DELAY(100);
723         }
724
725         hv_nv_disconnect_from_vsp(net_dev);
726
727         /* At this point, no one should be accessing net_dev except in here */
728
729         /* Now, we can close the channel safely */
730
731         if (!destroy_channel) {
732                 device->channel->state =
733                     HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE;
734         }
735
736         hv_vmbus_channel_close(device->channel);
737
738         /* Release all resources */
739         net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
740         while (net_vsc_pkt != NULL) {
741                 next_net_vsc_pkt = STAILQ_NEXT(net_vsc_pkt, mylist_entry);
742                 free(net_vsc_pkt, M_DEVBUF);
743                 net_vsc_pkt = next_net_vsc_pkt;
744         }
745
746         /* Reset the list to initial state */
747         STAILQ_INIT(&net_dev->myrx_packet_list);
748
749         mtx_destroy(&net_dev->rx_pkt_list_lock);
750         sema_destroy(&net_dev->channel_init_sema);
751         free(net_dev, M_DEVBUF);
752
753         return (0);
754 }
755
756 /*
757  * Net VSC on send completion
758  */
759 static void 
760 hv_nv_on_send_completion(struct hv_device *device, hv_vm_packet_descriptor *pkt)
761 {
762         netvsc_dev *net_dev;
763         nvsp_msg *nvsp_msg_pkt;
764         netvsc_packet *net_vsc_pkt;
765
766         net_dev = hv_nv_get_inbound_net_device(device);
767         if (!net_dev) {
768                 return;
769         }
770
771         nvsp_msg_pkt =
772             (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
773
774         if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete
775                 || nvsp_msg_pkt->hdr.msg_type
776                         == nvsp_msg_1_type_send_rx_buf_complete
777                 || nvsp_msg_pkt->hdr.msg_type
778                         == nvsp_msg_1_type_send_send_buf_complete) {
779                 /* Copy the response back */
780                 memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
781                     sizeof(nvsp_msg));                  
782                 sema_post(&net_dev->channel_init_sema);
783         } else if (nvsp_msg_pkt->hdr.msg_type ==
784                                    nvsp_msg_1_type_send_rndis_pkt_complete) {
785                 /* Get the send context */
786                 net_vsc_pkt =
787                     (netvsc_packet *)(unsigned long)pkt->transaction_id;
788
789                 /* Notify the layer above us */
790                 net_vsc_pkt->compl.send.on_send_completion(
791                     net_vsc_pkt->compl.send.send_completion_context);
792
793                 atomic_subtract_int(&net_dev->num_outstanding_sends, 1);
794         }
795 }
796
797 /*
798  * Net VSC on send
799  * Sends a packet on the specified Hyper-V device.
800  * Returns 0 on success, non-zero on failure.
801  */
802 int
803 hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt)
804 {
805         netvsc_dev *net_dev;
806         nvsp_msg send_msg;
807         int ret;
808
809         net_dev = hv_nv_get_outbound_net_device(device);
810         if (!net_dev)
811                 return (ENODEV);
812
813         send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt;
814         if (pkt->is_data_pkt) {
815                 /* 0 is RMC_DATA */
816                 send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0;
817         } else {
818                 /* 1 is RMC_CONTROL */
819                 send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1;
820         }
821
822         /* Not using send buffer section */
823         send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx =
824             0xFFFFFFFF;
825         send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = 0;
826
827         if (pkt->page_buf_count) {
828                 ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel,
829                     pkt->page_buffers, pkt->page_buf_count,
830                     &send_msg, sizeof(nvsp_msg), (uint64_t)pkt);
831         } else {
832                 ret = hv_vmbus_channel_send_packet(device->channel,
833                     &send_msg, sizeof(nvsp_msg), (uint64_t)pkt,
834                     HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
835                     HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
836         }
837
838         /* Record outstanding send only if send_packet() succeeded */
839         if (ret == 0)
840                 atomic_add_int(&net_dev->num_outstanding_sends, 1);
841
842         return (ret);
843 }
844
845 /*
846  * Net VSC on receive
847  *
848  * In the FreeBSD Hyper-V virtual world, this function deals exclusively
849  * with virtual addresses.
850  */
851 static void 
852 hv_nv_on_receive(struct hv_device *device, hv_vm_packet_descriptor *pkt)
853 {
854         netvsc_dev *net_dev;
855         hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
856         nvsp_msg *nvsp_msg_pkt;
857         netvsc_packet *net_vsc_pkt = NULL;
858         unsigned long start;
859         xfer_page_packet *xfer_page_pkt = NULL;
860         STAILQ_HEAD(PKT_LIST, netvsc_packet_) mylist_head =
861             STAILQ_HEAD_INITIALIZER(mylist_head);
862         int count = 0;
863         int i = 0;
864
865         net_dev = hv_nv_get_inbound_net_device(device);
866         if (!net_dev)
867                 return;
868
869         /*
870          * All inbound packets other than send completion should be
871          * xfer page packet.
872          */
873         if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES)
874                 return;
875
876         nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt
877                 + (pkt->data_offset8 << 3));
878
879         /* Make sure this is a valid nvsp packet */
880         if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt)
881                 return;
882         
883         vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt;
884
885         if (vm_xfer_page_pkt->transfer_page_set_id
886                 != NETVSC_RECEIVE_BUFFER_ID) {
887                 return;
888         }
889
890         STAILQ_INIT(&mylist_head);
891
892         /*
893          * Grab free packets (range count + 1) to represent this xfer page
894          * packet.  +1 to represent the xfer page packet itself.  We grab it
895          * here so that we know exactly how many we can fulfill.
896          */
897         mtx_lock_spin(&net_dev->rx_pkt_list_lock);
898         while (!STAILQ_EMPTY(&net_dev->myrx_packet_list)) {     
899                 net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
900                 STAILQ_REMOVE_HEAD(&net_dev->myrx_packet_list, mylist_entry);
901
902                 STAILQ_INSERT_TAIL(&mylist_head, net_vsc_pkt, mylist_entry);
903
904                 if (++count == vm_xfer_page_pkt->range_count + 1)
905                         break;
906         }
907
908         mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
909
910         /*
911          * We need at least 2 netvsc pkts (1 to represent the xfer page
912          * and at least 1 for the range) i.e. we can handle some of the
913          * xfer page packet ranges...
914          */
915         if (count < 2) {
916                 /* Return netvsc packet to the freelist */
917                 mtx_lock_spin(&net_dev->rx_pkt_list_lock);
918                 for (i=count; i != 0; i--) {
919                         net_vsc_pkt = STAILQ_FIRST(&mylist_head);
920                         STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
921
922                         STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
923                             net_vsc_pkt, mylist_entry);
924                 }
925                 mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
926
927                 hv_nv_send_receive_completion(device,
928                     vm_xfer_page_pkt->d.transaction_id);
929
930                 return;
931         }
932
933         /* Take the first packet in the list */
934         xfer_page_pkt = (xfer_page_packet *)STAILQ_FIRST(&mylist_head);
935         STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
936
937         /* This is how many data packets we can supply */
938         xfer_page_pkt->count = count - 1;
939
940         /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
941         for (i=0; i < (count - 1); i++) {
942                 net_vsc_pkt = STAILQ_FIRST(&mylist_head);
943                 STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
944
945                 /*
946                  * Initialize the netvsc packet
947                  */
948                 net_vsc_pkt->xfer_page_pkt = xfer_page_pkt;
949                 net_vsc_pkt->compl.rx.rx_completion_context = net_vsc_pkt;
950                 net_vsc_pkt->device = device;
951                 /* Save this so that we can send it back */
952                 net_vsc_pkt->compl.rx.rx_completion_tid =
953                     vm_xfer_page_pkt->d.transaction_id;
954
955                 net_vsc_pkt->tot_data_buf_len =
956                     vm_xfer_page_pkt->ranges[i].byte_count;
957                 net_vsc_pkt->page_buf_count = 1;
958
959                 net_vsc_pkt->page_buffers[0].length =
960                     vm_xfer_page_pkt->ranges[i].byte_count;
961
962                 /* The virtual address of the packet in the receive buffer */
963                 start = ((unsigned long)net_dev->rx_buf +
964                     vm_xfer_page_pkt->ranges[i].byte_offset);
965                 start = ((unsigned long)start) & ~(PAGE_SIZE - 1);
966
967                 /* Page number of the virtual page containing packet start */
968                 net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT;
969
970                 /* Calculate the page relative offset */
971                 net_vsc_pkt->page_buffers[0].offset =
972                     vm_xfer_page_pkt->ranges[i].byte_offset & (PAGE_SIZE - 1);
973
974                 /*
975                  * In this implementation, we are dealing with virtual
976                  * addresses exclusively.  Since we aren't using physical
977                  * addresses at all, we don't care if a packet crosses a
978                  * page boundary.  For this reason, the original code to
979                  * check for and handle page crossings has been removed.
980                  */
981
982                 /*
983                  * Pass it to the upper layer.  The receive completion call
984                  * has been moved into this function.
985                  */
986                 hv_rf_on_receive(device, net_vsc_pkt);
987
988                 /*
989                  * Moved completion call back here so that all received 
990                  * messages (not just data messages) will trigger a response
991                  * message back to the host.
992                  */
993                 hv_nv_on_receive_completion(net_vsc_pkt);
994         }
995 }
996
997 /*
998  * Net VSC send receive completion
999  */
1000 static void
1001 hv_nv_send_receive_completion(struct hv_device *device, uint64_t tid)
1002 {
1003         nvsp_msg rx_comp_msg;
1004         int retries = 0;
1005         int ret = 0;
1006         
1007         rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete;
1008
1009         /* Pass in the status */
1010         rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status =
1011             nvsp_status_success;
1012
1013 retry_send_cmplt:
1014         /* Send the completion */
1015         ret = hv_vmbus_channel_send_packet(device->channel, &rx_comp_msg,
1016             sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
1017         if (ret == 0) {
1018                 /* success */
1019                 /* no-op */
1020         } else if (ret == EAGAIN) {
1021                 /* no more room... wait a bit and attempt to retry 3 times */
1022                 retries++;
1023
1024                 if (retries < 4) {
1025                         DELAY(100);
1026                         goto retry_send_cmplt;
1027                 }
1028         }
1029 }
1030
1031 /*
1032  * Net VSC on receive completion
1033  *
1034  * Send a receive completion packet to RNDIS device (ie NetVsp)
1035  */
1036 void
1037 hv_nv_on_receive_completion(void *context)
1038 {
1039         netvsc_packet *packet = (netvsc_packet *)context;
1040         struct hv_device *device = (struct hv_device *)packet->device;
1041         netvsc_dev    *net_dev;
1042         uint64_t       tid = 0;
1043         boolean_t send_rx_completion = FALSE;
1044
1045         /*
1046          * Even though it seems logical to do a hv_nv_get_outbound_net_device()
1047          * here to send out receive completion, we are using
1048          * hv_nv_get_inbound_net_device() since we may have disabled
1049          * outbound traffic already.
1050          */
1051         net_dev = hv_nv_get_inbound_net_device(device);
1052         if (net_dev == NULL)
1053                 return;
1054         
1055         /* Overloading use of the lock. */
1056         mtx_lock_spin(&net_dev->rx_pkt_list_lock);
1057
1058         packet->xfer_page_pkt->count--;
1059
1060         /*
1061          * Last one in the line that represent 1 xfer page packet.
1062          * Return the xfer page packet itself to the free list.
1063          */
1064         if (packet->xfer_page_pkt->count == 0) {
1065                 send_rx_completion = TRUE;
1066                 tid = packet->compl.rx.rx_completion_tid;
1067                 STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
1068                     (netvsc_packet *)(packet->xfer_page_pkt), mylist_entry);
1069         }
1070
1071         /* Put the packet back on the free list */
1072         STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet, mylist_entry);
1073         mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
1074
1075         /* Send a receive completion for the xfer page packet */
1076         if (send_rx_completion)
1077                 hv_nv_send_receive_completion(device, tid);
1078 }
1079
1080 /*
1081  * Net VSC on channel callback
1082  */
1083 static void
1084 hv_nv_on_channel_callback(void *context)
1085 {
1086         /* Fixme:  Magic number */
1087         const int net_pkt_size = 2048;
1088         struct hv_device *device = (struct hv_device *)context;
1089         netvsc_dev *net_dev;
1090         uint32_t bytes_rxed;
1091         uint64_t request_id;
1092         uint8_t  *packet;
1093         hv_vm_packet_descriptor *desc;
1094         uint8_t *buffer;
1095         int     bufferlen = net_pkt_size;
1096         int     ret = 0;
1097
1098         packet = malloc(net_pkt_size * sizeof(uint8_t), M_DEVBUF, M_NOWAIT);
1099         if (!packet)
1100                 return;
1101
1102         buffer = packet;
1103
1104         net_dev = hv_nv_get_inbound_net_device(device);
1105         if (net_dev == NULL)
1106                 goto out;
1107
1108         do {
1109                 ret = hv_vmbus_channel_recv_packet_raw(device->channel,
1110                     buffer, bufferlen, &bytes_rxed, &request_id);
1111                 if (ret == 0) {
1112                         if (bytes_rxed > 0) {
1113                                 desc = (hv_vm_packet_descriptor *)buffer;
1114                                 switch (desc->type) {
1115                                 case HV_VMBUS_PACKET_TYPE_COMPLETION:
1116                                         hv_nv_on_send_completion(device, desc);
1117                                         break;
1118                                 case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
1119                                         hv_nv_on_receive(device, desc);
1120                                         break;
1121                                 default:
1122                                         break;
1123                                 }
1124                         } else {
1125                                 break;
1126                         }
1127                 } else if (ret == ENOBUFS) {
1128                         /* Handle large packet */
1129                         free(buffer, M_DEVBUF);
1130                         buffer = malloc(bytes_rxed, M_DEVBUF, M_NOWAIT);
1131                         if (buffer == NULL) {
1132                                 break;
1133                         }
1134                         bufferlen = bytes_rxed;
1135                 }
1136         } while (1);
1137
1138 out:
1139         free(buffer, M_DEVBUF);
1140 }
1141