]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/hyperv/vmbus/hv_channel_mgmt.c
MFC 301017,301018,301019,301020,301021,301022,301106
[FreeBSD/stable/10.git] / sys / dev / hyperv / vmbus / hv_channel_mgmt.c
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/lock.h>
35 #include <sys/mbuf.h>
36 #include <sys/mutex.h>
37
38 #include <dev/hyperv/vmbus/hv_vmbus_priv.h>
39 #include <dev/hyperv/vmbus/vmbus_reg.h>
40 #include <dev/hyperv/vmbus/vmbus_var.h>
41
42 /*
43  * Internal functions
44  */
45
46 static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr);
47 static void vmbus_channel_on_offer_internal(void* context);
48 static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr);
49 static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr);
50 static void vmbus_channel_on_offer_rescind_internal(void* context);
51 static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr);
52 static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
53 static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
54 static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
55
56 /**
57  * Channel message dispatch table
58  */
59 hv_vmbus_channel_msg_table_entry
60     g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = {
61         { HV_CHANNEL_MESSAGE_INVALID,
62                 NULL },
63         { HV_CHANNEL_MESSAGE_OFFER_CHANNEL,
64                 vmbus_channel_on_offer },
65         { HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER,
66                 vmbus_channel_on_offer_rescind },
67         { HV_CHANNEL_MESSAGE_REQUEST_OFFERS,
68                 NULL },
69         { HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED,
70                 vmbus_channel_on_offers_delivered },
71         { HV_CHANNEL_MESSAGE_OPEN_CHANNEL,
72                 NULL },
73         { HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT,
74                 vmbus_channel_on_open_result },
75         { HV_CHANNEL_MESSAGE_CLOSE_CHANNEL,
76                 NULL },
77         { HV_CHANNEL_MESSAGEL_GPADL_HEADER,
78                 NULL },
79         { HV_CHANNEL_MESSAGE_GPADL_BODY,
80                 NULL },
81         { HV_CHANNEL_MESSAGE_GPADL_CREATED,
82                 vmbus_channel_on_gpadl_created },
83         { HV_CHANNEL_MESSAGE_GPADL_TEARDOWN,
84                 NULL },
85         { HV_CHANNEL_MESSAGE_GPADL_TORNDOWN,
86                 vmbus_channel_on_gpadl_torndown },
87         { HV_CHANNEL_MESSAGE_REL_ID_RELEASED,
88                 NULL },
89         { HV_CHANNEL_MESSAGE_INITIATED_CONTACT,
90                 NULL },
91         { HV_CHANNEL_MESSAGE_VERSION_RESPONSE,
92                 vmbus_channel_on_version_response },
93         { HV_CHANNEL_MESSAGE_UNLOAD,
94                 NULL }
95 };
96
97 typedef struct hv_work_item {
98         struct task     work;
99         void            (*callback)(void *);
100         void*           context;
101 } hv_work_item;
102
103 static struct mtx       vmbus_chwait_lock;
104 MTX_SYSINIT(vmbus_chwait_lk, &vmbus_chwait_lock, "vmbus primarych wait lock",
105     MTX_DEF);
106 static uint32_t         vmbus_chancnt;
107 static uint32_t         vmbus_devcnt;
108
109 #define VMBUS_CHANCNT_DONE      0x80000000
110
111 /**
112  * Implementation of the work abstraction.
113  */
114 static void
115 work_item_callback(void *work, int pending)
116 {
117         struct hv_work_item *w = (struct hv_work_item *)work;
118
119         w->callback(w->context);
120
121         free(w, M_DEVBUF);
122 }
123
124 /**
125  * @brief Create work item
126  */
127 static int
128 hv_queue_work_item(
129         void (*callback)(void *), void *context)
130 {
131         struct hv_work_item *w = malloc(sizeof(struct hv_work_item),
132                                         M_DEVBUF, M_NOWAIT);
133         KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n"));
134         if (w == NULL)
135             return (ENOMEM);
136
137         w->callback = callback;
138         w->context = context;
139
140         TASK_INIT(&w->work, 0, work_item_callback, w);
141
142         return (taskqueue_enqueue(taskqueue_thread, &w->work));
143 }
144
145
146 /**
147  * @brief Allocate and initialize a vmbus channel object
148  */
149 hv_vmbus_channel*
150 hv_vmbus_allocate_channel(void)
151 {
152         hv_vmbus_channel* channel;
153
154         channel = (hv_vmbus_channel*) malloc(
155                                         sizeof(hv_vmbus_channel),
156                                         M_DEVBUF,
157                                         M_WAITOK | M_ZERO);
158
159         mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
160         TAILQ_INIT(&channel->sc_list_anchor);
161
162         return (channel);
163 }
164
165 /**
166  * @brief Release the resources used by the vmbus channel object
167  */
168 void
169 hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
170 {
171         mtx_destroy(&channel->sc_lock);
172         free(channel, M_DEVBUF);
173 }
174
175 /**
176  * @brief Process the offer by creating a channel/device
177  * associated with this offer
178  */
179 static void
180 vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
181 {
182         hv_vmbus_channel*       channel;
183         int                     ret;
184         uint32_t                relid;
185
186         relid = new_channel->offer_msg.child_rel_id;
187         /*
188          * Make sure this is a new offer
189          */
190         mtx_lock(&hv_vmbus_g_connection.channel_lock);
191         if (relid == 0) {
192                 /*
193                  * XXX channel0 will not be processed; skip it.
194                  */
195                 printf("VMBUS: got channel0 offer\n");
196         } else {
197                 hv_vmbus_g_connection.channels[relid] = new_channel;
198         }
199
200         TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
201             list_entry) {
202                 if (memcmp(&channel->offer_msg.offer.interface_type,
203                     &new_channel->offer_msg.offer.interface_type,
204                     sizeof(hv_guid)) == 0 &&
205                     memcmp(&channel->offer_msg.offer.interface_instance,
206                     &new_channel->offer_msg.offer.interface_instance,
207                     sizeof(hv_guid)) == 0)
208                         break;
209         }
210
211         if (channel == NULL) {
212                 /* Install the new primary channel */
213                 TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
214                     new_channel, list_entry);
215         }
216         mtx_unlock(&hv_vmbus_g_connection.channel_lock);
217
218         if (channel != NULL) {
219                 /*
220                  * Check if this is a sub channel.
221                  */
222                 if (new_channel->offer_msg.offer.sub_channel_index != 0) {
223                         /*
224                          * It is a sub channel offer, process it.
225                          */
226                         new_channel->primary_channel = channel;
227                         new_channel->device = channel->device;
228                         mtx_lock(&channel->sc_lock);
229                         TAILQ_INSERT_TAIL(&channel->sc_list_anchor,
230                             new_channel, sc_list_entry);
231                         mtx_unlock(&channel->sc_lock);
232
233                         if (bootverbose) {
234                                 printf("VMBUS get multi-channel offer, "
235                                     "rel=%u, sub=%u\n",
236                                     new_channel->offer_msg.child_rel_id,
237                                     new_channel->offer_msg.offer.sub_channel_index);    
238                         }
239
240                         /* Insert new channel into channel_anchor. */
241                         mtx_lock(&hv_vmbus_g_connection.channel_lock);
242                         TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
243                             new_channel, list_entry);                           
244                         mtx_unlock(&hv_vmbus_g_connection.channel_lock);
245
246                         if(bootverbose)
247                                 printf("VMBUS: new multi-channel offer <%p>, "
248                                     "its primary channel is <%p>.\n",
249                                     new_channel, new_channel->primary_channel);
250
251                         new_channel->state = HV_CHANNEL_OPEN_STATE;
252
253                         /*
254                          * Bump up sub-channel count and notify anyone that is
255                          * interested in this sub-channel, after this sub-channel
256                          * is setup.
257                          */
258                         mtx_lock(&channel->sc_lock);
259                         channel->subchan_cnt++;
260                         mtx_unlock(&channel->sc_lock);
261                         wakeup(channel);
262
263                         return;
264                 }
265
266                 printf("VMBUS: duplicated primary channel%u\n",
267                     new_channel->offer_msg.child_rel_id);
268                 hv_vmbus_free_vmbus_channel(new_channel);
269                 return;
270         }
271
272         new_channel->state = HV_CHANNEL_OPEN_STATE;
273
274         /*
275          * Start the process of binding this offer to the driver
276          * (We need to set the device field before calling
277          * hv_vmbus_child_device_add())
278          */
279         new_channel->device = hv_vmbus_child_device_create(
280             new_channel->offer_msg.offer.interface_type,
281             new_channel->offer_msg.offer.interface_instance, new_channel);
282
283         /*
284          * Add the new device to the bus. This will kick off device-driver
285          * binding which eventually invokes the device driver's AddDevice()
286          * method.
287          */
288         ret = hv_vmbus_child_device_register(new_channel->device);
289         if (ret != 0) {
290                 mtx_lock(&hv_vmbus_g_connection.channel_lock);
291                 TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
292                     new_channel, list_entry);
293                 mtx_unlock(&hv_vmbus_g_connection.channel_lock);
294                 hv_vmbus_free_vmbus_channel(new_channel);
295         }
296
297         mtx_lock(&vmbus_chwait_lock);
298         vmbus_devcnt++;
299         mtx_unlock(&vmbus_chwait_lock);
300         wakeup(&vmbus_devcnt);
301 }
302
303 void
304 vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
305 {
306         KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
307
308         if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
309             hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) {
310                 /* Only cpu0 is supported */
311                 cpu = 0;
312         }
313
314         chan->target_cpu = cpu;
315         chan->target_vcpu = VMBUS_PCPU_GET(vmbus_get_softc(), vcpuid, cpu);
316
317         if (bootverbose) {
318                 printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
319                     chan->offer_msg.child_rel_id,
320                     chan->target_cpu, chan->target_vcpu);
321         }
322 }
323
324 /**
325  * Array of device guids that are performance critical. We try to distribute
326  * the interrupt load for these devices across all online cpus. 
327  */
328 static const hv_guid high_perf_devices[] = {
329         {HV_NIC_GUID, },
330         {HV_IDE_GUID, },
331         {HV_SCSI_GUID, },
332 };
333
334 enum {
335         PERF_CHN_NIC = 0,
336         PERF_CHN_IDE,
337         PERF_CHN_SCSI,
338         MAX_PERF_CHN,
339 };
340
341 /*
342  * We use this static number to distribute the channel interrupt load.
343  */
344 static uint32_t next_vcpu;
345
346 /**
347  * Starting with Win8, we can statically distribute the incoming
348  * channel interrupt load by binding a channel to VCPU. We
349  * implement here a simple round robin scheme for distributing
350  * the interrupt load.
351  * We will bind channels that are not performance critical to cpu 0 and
352  * performance critical channels (IDE, SCSI and Network) will be uniformly
353  * distributed across all available CPUs.
354  */
355 static void
356 vmbus_channel_select_defcpu(struct hv_vmbus_channel *channel)
357 {
358         uint32_t current_cpu;
359         int i;
360         boolean_t is_perf_channel = FALSE;
361         const hv_guid *guid = &channel->offer_msg.offer.interface_type;
362
363         for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
364                 if (memcmp(guid->data, high_perf_devices[i].data,
365                     sizeof(hv_guid)) == 0) {
366                         is_perf_channel = TRUE;
367                         break;
368                 }
369         }
370
371         if (!is_perf_channel) {
372                 /* Stick to cpu0 */
373                 vmbus_channel_cpu_set(channel, 0);
374                 return;
375         }
376         /* mp_ncpus should have the number cpus currently online */
377         current_cpu = (++next_vcpu % mp_ncpus);
378         vmbus_channel_cpu_set(channel, current_cpu);
379 }
380
381 /**
382  * @brief Handler for channel offers from Hyper-V/Azure
383  *
384  * Handler for channel offers from vmbus in parent partition. We ignore
385  * all offers except network and storage offers. For each network and storage
386  * offers, we create a channel object and queue a work item to the channel
387  * object to process the offer synchronously
388  */
389 static void
390 vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
391 {
392         hv_vmbus_channel_offer_channel* offer;
393         hv_vmbus_channel_offer_channel* copied;
394
395         offer = (hv_vmbus_channel_offer_channel*) hdr;
396
397         // copy offer data
398         copied = malloc(sizeof(*copied), M_DEVBUF, M_NOWAIT);
399         if (copied == NULL) {
400                 printf("fail to allocate memory\n");
401                 return;
402         }
403
404         memcpy(copied, hdr, sizeof(*copied));
405         hv_queue_work_item(vmbus_channel_on_offer_internal, copied);
406
407         mtx_lock(&vmbus_chwait_lock);
408         if ((vmbus_chancnt & VMBUS_CHANCNT_DONE) == 0)
409                 vmbus_chancnt++;
410         mtx_unlock(&vmbus_chwait_lock);
411 }
412
413 static void
414 vmbus_channel_on_offer_internal(void* context)
415 {
416         hv_vmbus_channel* new_channel;
417
418         hv_vmbus_channel_offer_channel* offer = (hv_vmbus_channel_offer_channel*)context;
419         /* Allocate the channel object and save this offer */
420         new_channel = hv_vmbus_allocate_channel();
421
422         /*
423          * By default we setup state to enable batched
424          * reading. A specific service can choose to
425          * disable this prior to opening the channel.
426          */
427         new_channel->batched_reading = TRUE;
428
429         new_channel->signal_event_param =
430             (hv_vmbus_input_signal_event *)
431             (HV_ALIGN_UP((unsigned long)
432                 &new_channel->signal_event_buffer,
433                 HV_HYPERCALL_PARAM_ALIGN));
434
435         new_channel->signal_event_param->connection_id.as_uint32_t = 0; 
436         new_channel->signal_event_param->connection_id.u.id =
437             HV_VMBUS_EVENT_CONNECTION_ID;
438         new_channel->signal_event_param->flag_number = 0;
439         new_channel->signal_event_param->rsvd_z = 0;
440
441         if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
442                 new_channel->is_dedicated_interrupt =
443                     (offer->is_dedicated_interrupt != 0);
444                 new_channel->signal_event_param->connection_id.u.id =
445                     offer->connection_id;
446         }
447
448         memcpy(&new_channel->offer_msg, offer,
449             sizeof(hv_vmbus_channel_offer_channel));
450         new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
451         new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
452
453         /* Select default cpu for this channel. */
454         vmbus_channel_select_defcpu(new_channel);
455
456         vmbus_channel_process_offer(new_channel);
457
458         free(offer, M_DEVBUF);
459 }
460
461 /**
462  * @brief Rescind offer handler.
463  *
464  * We queue a work item to process this offer
465  * synchronously
466  */
467 static void
468 vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr)
469 {
470         hv_vmbus_channel_rescind_offer* rescind;
471         hv_vmbus_channel*               channel;
472
473         rescind = (hv_vmbus_channel_rescind_offer*) hdr;
474
475         channel = hv_vmbus_g_connection.channels[rescind->child_rel_id];
476         if (channel == NULL)
477             return;
478
479         hv_queue_work_item(vmbus_channel_on_offer_rescind_internal, channel);
480         hv_vmbus_g_connection.channels[rescind->child_rel_id] = NULL;
481 }
482
483 static void
484 vmbus_channel_on_offer_rescind_internal(void *context)
485 {
486         hv_vmbus_channel*               channel;
487
488         channel = (hv_vmbus_channel*)context;
489         if (HV_VMBUS_CHAN_ISPRIMARY(channel)) {
490                 /* Only primary channel owns the hv_device */
491                 hv_vmbus_child_device_unregister(channel->device);
492         }
493 }
494
495 /**
496  *
497  * @brief Invoked when all offers have been delivered.
498  */
499 static void
500 vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr)
501 {
502
503         mtx_lock(&vmbus_chwait_lock);
504         vmbus_chancnt |= VMBUS_CHANCNT_DONE;
505         mtx_unlock(&vmbus_chwait_lock);
506         wakeup(&vmbus_chancnt);
507 }
508
509 /**
510  * @brief Open result handler.
511  *
512  * This is invoked when we received a response
513  * to our channel open request. Find the matching request, copy the
514  * response and signal the requesting thread.
515  */
516 static void
517 vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr)
518 {
519         hv_vmbus_channel_open_result*   result;
520         hv_vmbus_channel_msg_info*      msg_info;
521         hv_vmbus_channel_msg_header*    requestHeader;
522         hv_vmbus_channel_open_channel*  openMsg;
523
524         result = (hv_vmbus_channel_open_result*) hdr;
525
526         /*
527          * Find the open msg, copy the result and signal/unblock the wait event
528          */
529         mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
530
531         TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
532             msg_list_entry) {
533             requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
534
535             if (requestHeader->message_type ==
536                     HV_CHANNEL_MESSAGE_OPEN_CHANNEL) {
537                 openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg;
538                 if (openMsg->child_rel_id == result->child_rel_id
539                     && openMsg->open_id == result->open_id) {
540                     memcpy(&msg_info->response.open_result, result,
541                         sizeof(hv_vmbus_channel_open_result));
542                     sema_post(&msg_info->wait_sema);
543                     break;
544                 }
545             }
546         }
547         mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
548
549 }
550
551 /**
552  * @brief GPADL created handler.
553  *
554  * This is invoked when we received a response
555  * to our gpadl create request. Find the matching request, copy the
556  * response and signal the requesting thread.
557  */
558 static void
559 vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr)
560 {
561         hv_vmbus_channel_gpadl_created*         gpadl_created;
562         hv_vmbus_channel_msg_info*              msg_info;
563         hv_vmbus_channel_msg_header*            request_header;
564         hv_vmbus_channel_gpadl_header*          gpadl_header;
565
566         gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr;
567
568         /* Find the establish msg, copy the result and signal/unblock
569          * the wait event
570          */
571         mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
572         TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
573                 msg_list_entry) {
574             request_header = (hv_vmbus_channel_msg_header*) msg_info->msg;
575             if (request_header->message_type ==
576                     HV_CHANNEL_MESSAGEL_GPADL_HEADER) {
577                 gpadl_header =
578                     (hv_vmbus_channel_gpadl_header*) request_header;
579
580                 if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id)
581                     && (gpadl_created->gpadl == gpadl_header->gpadl)) {
582                     memcpy(&msg_info->response.gpadl_created,
583                         gpadl_created,
584                         sizeof(hv_vmbus_channel_gpadl_created));
585                     sema_post(&msg_info->wait_sema);
586                     break;
587                 }
588             }
589         }
590         mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
591 }
592
593 /**
594  * @brief GPADL torndown handler.
595  *
596  * This is invoked when we received a respons
597  * to our gpadl teardown request. Find the matching request, copy the
598  * response and signal the requesting thread
599  */
600 static void
601 vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr)
602 {
603         hv_vmbus_channel_gpadl_torndown*        gpadl_torndown;
604         hv_vmbus_channel_msg_info*              msg_info;
605         hv_vmbus_channel_msg_header*            requestHeader;
606         hv_vmbus_channel_gpadl_teardown*        gpadlTeardown;
607
608         gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr;
609
610         /*
611          * Find the open msg, copy the result and signal/unblock the
612          * wait event.
613          */
614
615         mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
616
617         TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
618                 msg_list_entry) {
619             requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
620
621             if (requestHeader->message_type
622                     == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) {
623                 gpadlTeardown =
624                     (hv_vmbus_channel_gpadl_teardown*) requestHeader;
625
626                 if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) {
627                     memcpy(&msg_info->response.gpadl_torndown,
628                         gpadl_torndown,
629                         sizeof(hv_vmbus_channel_gpadl_torndown));
630                     sema_post(&msg_info->wait_sema);
631                     break;
632                 }
633             }
634         }
635     mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
636 }
637
638 /**
639  * @brief Version response handler.
640  *
641  * This is invoked when we received a response
642  * to our initiate contact request. Find the matching request, copy th
643  * response and signal the requesting thread.
644  */
645 static void
646 vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
647 {
648         hv_vmbus_channel_msg_info*              msg_info;
649         hv_vmbus_channel_msg_header*            requestHeader;
650         hv_vmbus_channel_initiate_contact*      initiate;
651         hv_vmbus_channel_version_response*      versionResponse;
652
653         versionResponse = (hv_vmbus_channel_version_response*)hdr;
654
655         mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
656         TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
657             msg_list_entry) {
658             requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
659             if (requestHeader->message_type
660                 == HV_CHANNEL_MESSAGE_INITIATED_CONTACT) {
661                 initiate =
662                     (hv_vmbus_channel_initiate_contact*) requestHeader;
663                 memcpy(&msg_info->response.version_response,
664                     versionResponse,
665                     sizeof(hv_vmbus_channel_version_response));
666                 sema_post(&msg_info->wait_sema);
667             }
668         }
669     mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
670
671 }
672
673 /**
674  *  @brief Send a request to get all our pending offers.
675  */
676 int
677 hv_vmbus_request_channel_offers(void)
678 {
679         int                             ret;
680         hv_vmbus_channel_msg_header*    msg;
681         hv_vmbus_channel_msg_info*      msg_info;
682
683         msg_info = (hv_vmbus_channel_msg_info *)
684             malloc(sizeof(hv_vmbus_channel_msg_info)
685                     + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT);
686
687         if (msg_info == NULL) {
688             if(bootverbose)
689                 printf("Error VMBUS: malloc failed for Request Offers\n");
690             return (ENOMEM);
691         }
692
693         msg = (hv_vmbus_channel_msg_header*) msg_info->msg;
694         msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS;
695
696         ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header));
697
698         free(msg_info, M_DEVBUF);
699
700         return (ret);
701 }
702
703 /**
704  * @brief Release channels that are unattached/unconnected (i.e., no drivers associated)
705  */
706 void
707 hv_vmbus_release_unattached_channels(void) 
708 {
709         hv_vmbus_channel *channel;
710
711         mtx_lock(&hv_vmbus_g_connection.channel_lock);
712
713         while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
714             channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
715             TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
716                             channel, list_entry);
717
718             if (HV_VMBUS_CHAN_ISPRIMARY(channel)) {
719                 /* Only primary channel owns the hv_device */
720                 hv_vmbus_child_device_unregister(channel->device);
721             }
722             hv_vmbus_free_vmbus_channel(channel);
723         }
724         bzero(hv_vmbus_g_connection.channels,
725             sizeof(hv_vmbus_channel*) * VMBUS_CHAN_MAX);
726         mtx_unlock(&hv_vmbus_g_connection.channel_lock);
727 }
728
729 /**
730  * @brief Select the best outgoing channel
731  * 
732  * The channel whose vcpu binding is closest to the currect vcpu will
733  * be selected.
734  * If no multi-channel, always select primary channel
735  * 
736  * @param primary - primary channel
737  */
738 struct hv_vmbus_channel *
739 vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
740 {
741         hv_vmbus_channel *new_channel = NULL;
742         hv_vmbus_channel *outgoing_channel = primary;
743         int old_cpu_distance = 0;
744         int new_cpu_distance = 0;
745         int cur_vcpu = 0;
746         int smp_pro_id = PCPU_GET(cpuid);
747
748         if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
749                 return outgoing_channel;
750         }
751
752         if (smp_pro_id >= MAXCPU) {
753                 return outgoing_channel;
754         }
755
756         cur_vcpu = VMBUS_PCPU_GET(vmbus_get_softc(), vcpuid, smp_pro_id);
757         
758         TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
759                 if (new_channel->state != HV_CHANNEL_OPENED_STATE){
760                         continue;
761                 }
762
763                 if (new_channel->target_vcpu == cur_vcpu){
764                         return new_channel;
765                 }
766
767                 old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
768                     (outgoing_channel->target_vcpu - cur_vcpu) :
769                     (cur_vcpu - outgoing_channel->target_vcpu));
770
771                 new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
772                     (new_channel->target_vcpu - cur_vcpu) :
773                     (cur_vcpu - new_channel->target_vcpu));
774
775                 if (old_cpu_distance < new_cpu_distance) {
776                         continue;
777                 }
778
779                 outgoing_channel = new_channel;
780         }
781
782         return(outgoing_channel);
783 }
784
785 void
786 vmbus_scan(void)
787 {
788         uint32_t chancnt;
789
790         mtx_lock(&vmbus_chwait_lock);
791         while ((vmbus_chancnt & VMBUS_CHANCNT_DONE) == 0)
792                 mtx_sleep(&vmbus_chancnt, &vmbus_chwait_lock, 0, "waitch", 0);
793         chancnt = vmbus_chancnt & ~VMBUS_CHANCNT_DONE;
794
795         while (vmbus_devcnt != chancnt)
796                 mtx_sleep(&vmbus_devcnt, &vmbus_chwait_lock, 0, "waitdev", 0);
797         mtx_unlock(&vmbus_chwait_lock);
798 }
799
800 struct hv_vmbus_channel **
801 vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
802 {
803         struct hv_vmbus_channel **ret, *chan;
804         int i;
805
806         ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
807             M_WAITOK);
808
809         mtx_lock(&pri_chan->sc_lock);
810
811         while (pri_chan->subchan_cnt < subchan_cnt)
812                 mtx_sleep(pri_chan, &pri_chan->sc_lock, 0, "subch", 0);
813
814         i = 0;
815         TAILQ_FOREACH(chan, &pri_chan->sc_list_anchor, sc_list_entry) {
816                 /* TODO: refcnt chan */
817                 ret[i] = chan;
818
819                 ++i;
820                 if (i == subchan_cnt)
821                         break;
822         }
823         KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
824             pri_chan->subchan_cnt, subchan_cnt));
825
826         mtx_unlock(&pri_chan->sc_lock);
827
828         return ret;
829 }
830
831 void
832 vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
833 {
834
835         free(subchan, M_TEMP);
836 }