2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
35 #include "hv_vmbus_priv.h"
41 static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr);
42 static void vmbus_channel_on_offer_internal(void* context);
43 static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr);
44 static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr);
45 static void vmbus_channel_on_offer_rescind_internal(void* context);
46 static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr);
47 static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
48 static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
49 static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
52 * Channel message dispatch table
54 hv_vmbus_channel_msg_table_entry
55 g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = {
56 { HV_CHANNEL_MESSAGE_INVALID,
58 { HV_CHANNEL_MESSAGE_OFFER_CHANNEL,
59 vmbus_channel_on_offer },
60 { HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER,
61 vmbus_channel_on_offer_rescind },
62 { HV_CHANNEL_MESSAGE_REQUEST_OFFERS,
64 { HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED,
65 vmbus_channel_on_offers_delivered },
66 { HV_CHANNEL_MESSAGE_OPEN_CHANNEL,
68 { HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT,
69 vmbus_channel_on_open_result },
70 { HV_CHANNEL_MESSAGE_CLOSE_CHANNEL,
72 { HV_CHANNEL_MESSAGEL_GPADL_HEADER,
74 { HV_CHANNEL_MESSAGE_GPADL_BODY,
76 { HV_CHANNEL_MESSAGE_GPADL_CREATED,
77 vmbus_channel_on_gpadl_created },
78 { HV_CHANNEL_MESSAGE_GPADL_TEARDOWN,
80 { HV_CHANNEL_MESSAGE_GPADL_TORNDOWN,
81 vmbus_channel_on_gpadl_torndown },
82 { HV_CHANNEL_MESSAGE_REL_ID_RELEASED,
84 { HV_CHANNEL_MESSAGE_INITIATED_CONTACT,
86 { HV_CHANNEL_MESSAGE_VERSION_RESPONSE,
87 vmbus_channel_on_version_response },
88 { HV_CHANNEL_MESSAGE_UNLOAD,
92 typedef struct hv_work_item {
94 void (*callback)(void *);
99 * Implementation of the work abstraction.
102 work_item_callback(void *work, int pending)
104 struct hv_work_item *w = (struct hv_work_item *)work;
106 w->callback(w->context);
112 * @brief Create work item
116 void (*callback)(void *), void *context)
118 struct hv_work_item *w = malloc(sizeof(struct hv_work_item),
120 KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n"));
124 w->callback = callback;
125 w->context = context;
127 TASK_INIT(&w->work, 0, work_item_callback, w);
129 return (taskqueue_enqueue(taskqueue_thread, &w->work));
134 * @brief Allocate and initialize a vmbus channel object
137 hv_vmbus_allocate_channel(void)
139 hv_vmbus_channel* channel;
141 channel = (hv_vmbus_channel*) malloc(
142 sizeof(hv_vmbus_channel),
146 mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
147 TAILQ_INIT(&channel->sc_list_anchor);
153 * @brief Release the resources used by the vmbus channel object
156 hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
158 mtx_destroy(&channel->sc_lock);
159 free(channel, M_DEVBUF);
163 * @brief Process the offer by creating a channel/device
164 * associated with this offer
167 vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
170 hv_vmbus_channel* channel;
176 relid = new_channel->offer_msg.child_rel_id;
178 * Make sure this is a new offer
180 mtx_lock(&hv_vmbus_g_connection.channel_lock);
181 hv_vmbus_g_connection.channels[relid] = new_channel;
183 TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
186 if (memcmp(&channel->offer_msg.offer.interface_type,
187 &new_channel->offer_msg.offer.interface_type,
188 sizeof(hv_guid)) == 0 &&
189 memcmp(&channel->offer_msg.offer.interface_instance,
190 &new_channel->offer_msg.offer.interface_instance,
191 sizeof(hv_guid)) == 0) {
200 &hv_vmbus_g_connection.channel_anchor,
204 mtx_unlock(&hv_vmbus_g_connection.channel_lock);
206 /*XXX add new channel to percpu_list */
210 * Check if this is a sub channel.
212 if (new_channel->offer_msg.offer.sub_channel_index != 0) {
214 * It is a sub channel offer, process it.
216 new_channel->primary_channel = channel;
217 new_channel->device = channel->device;
218 mtx_lock(&channel->sc_lock);
220 &channel->sc_list_anchor,
223 mtx_unlock(&channel->sc_lock);
225 /* Insert new channel into channel_anchor. */
226 printf("VMBUS get multi-channel offer, rel=%u,sub=%u\n",
227 new_channel->offer_msg.child_rel_id,
228 new_channel->offer_msg.offer.sub_channel_index);
229 mtx_lock(&hv_vmbus_g_connection.channel_lock);
230 TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
231 new_channel, list_entry);
232 mtx_unlock(&hv_vmbus_g_connection.channel_lock);
235 printf("VMBUS: new multi-channel offer <%p>, "
236 "its primary channel is <%p>.\n",
237 new_channel, new_channel->primary_channel);
239 /*XXX add it to percpu_list */
241 new_channel->state = HV_CHANNEL_OPEN_STATE;
242 if (channel->sc_creation_callback != NULL) {
243 channel->sc_creation_callback(new_channel);
248 hv_vmbus_free_vmbus_channel(new_channel);
252 new_channel->state = HV_CHANNEL_OPEN_STATE;
255 * Start the process of binding this offer to the driver
256 * (We need to set the device field before calling
257 * hv_vmbus_child_device_add())
259 new_channel->device = hv_vmbus_child_device_create(
260 new_channel->offer_msg.offer.interface_type,
261 new_channel->offer_msg.offer.interface_instance, new_channel);
264 * Add the new device to the bus. This will kick off device-driver
265 * binding which eventually invokes the device driver's AddDevice()
268 ret = hv_vmbus_child_device_register(new_channel->device);
270 mtx_lock(&hv_vmbus_g_connection.channel_lock);
272 &hv_vmbus_g_connection.channel_anchor,
275 mtx_unlock(&hv_vmbus_g_connection.channel_lock);
276 hv_vmbus_free_vmbus_channel(new_channel);
281 * Array of device guids that are performance critical. We try to distribute
282 * the interrupt load for these devices across all online cpus.
284 static const hv_guid high_perf_devices[] = {
298 * We use this static number to distribute the channel interrupt load.
300 static uint32_t next_vcpu;
303 * Starting with Win8, we can statically distribute the incoming
304 * channel interrupt load by binding a channel to VCPU. We
305 * implement here a simple round robin scheme for distributing
306 * the interrupt load.
307 * We will bind channels that are not performance critical to cpu 0 and
308 * performance critical channels (IDE, SCSI and Network) will be uniformly
309 * distributed across all available CPUs.
312 vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid)
314 uint32_t current_cpu;
316 boolean_t is_perf_channel = FALSE;
318 for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
319 if (memcmp(guid->data, high_perf_devices[i].data,
320 sizeof(hv_guid)) == 0) {
321 is_perf_channel = TRUE;
326 if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
327 (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) ||
328 (!is_perf_channel)) {
329 /* Host's view of guest cpu */
330 channel->target_vcpu = 0;
331 /* Guest's own view of cpu */
332 channel->target_cpu = 0;
335 /* mp_ncpus should have the number cpus currently online */
336 current_cpu = (++next_vcpu % mp_ncpus);
337 channel->target_cpu = current_cpu;
338 channel->target_vcpu =
339 hv_vmbus_g_context.hv_vcpu_index[current_cpu];
341 printf("VMBUS: Total online cpus %d, assign perf channel %d "
342 "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu,
347 * @brief Handler for channel offers from Hyper-V/Azure
349 * Handler for channel offers from vmbus in parent partition. We ignore
350 * all offers except network and storage offers. For each network and storage
351 * offers, we create a channel object and queue a work item to the channel
352 * object to process the offer synchronously
355 vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
357 hv_vmbus_channel_offer_channel* offer;
358 hv_vmbus_channel_offer_channel* copied;
360 offer = (hv_vmbus_channel_offer_channel*) hdr;
363 hv_guid *guidInstance;
365 guidType = &offer->offer.interface_type;
366 guidInstance = &offer->offer.interface_instance;
369 copied = malloc(sizeof(*copied), M_DEVBUF, M_NOWAIT);
370 if (copied == NULL) {
371 printf("fail to allocate memory\n");
375 memcpy(copied, hdr, sizeof(*copied));
376 hv_queue_work_item(vmbus_channel_on_offer_internal, copied);
380 vmbus_channel_on_offer_internal(void* context)
382 hv_vmbus_channel* new_channel;
384 hv_vmbus_channel_offer_channel* offer = (hv_vmbus_channel_offer_channel*)context;
385 /* Allocate the channel object and save this offer */
386 new_channel = hv_vmbus_allocate_channel();
389 * By default we setup state to enable batched
390 * reading. A specific service can choose to
391 * disable this prior to opening the channel.
393 new_channel->batched_reading = TRUE;
395 new_channel->signal_event_param =
396 (hv_vmbus_input_signal_event *)
397 (HV_ALIGN_UP((unsigned long)
398 &new_channel->signal_event_buffer,
399 HV_HYPERCALL_PARAM_ALIGN));
401 new_channel->signal_event_param->connection_id.as_uint32_t = 0;
402 new_channel->signal_event_param->connection_id.u.id =
403 HV_VMBUS_EVENT_CONNECTION_ID;
404 new_channel->signal_event_param->flag_number = 0;
405 new_channel->signal_event_param->rsvd_z = 0;
407 if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
408 new_channel->is_dedicated_interrupt =
409 (offer->is_dedicated_interrupt != 0);
410 new_channel->signal_event_param->connection_id.u.id =
411 offer->connection_id;
415 * Bind the channel to a chosen cpu.
417 vmbus_channel_select_cpu(new_channel,
418 &offer->offer.interface_type);
420 memcpy(&new_channel->offer_msg, offer,
421 sizeof(hv_vmbus_channel_offer_channel));
422 new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
423 new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
425 vmbus_channel_process_offer(new_channel);
427 free(offer, M_DEVBUF);
431 * @brief Rescind offer handler.
433 * We queue a work item to process this offer
437 vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr)
439 hv_vmbus_channel_rescind_offer* rescind;
440 hv_vmbus_channel* channel;
442 rescind = (hv_vmbus_channel_rescind_offer*) hdr;
444 channel = hv_vmbus_g_connection.channels[rescind->child_rel_id];
448 hv_queue_work_item(vmbus_channel_on_offer_rescind_internal, channel);
449 hv_vmbus_g_connection.channels[rescind->child_rel_id] = NULL;
453 vmbus_channel_on_offer_rescind_internal(void *context)
455 hv_vmbus_channel* channel;
457 channel = (hv_vmbus_channel*)context;
458 if (HV_VMBUS_CHAN_ISPRIMARY(channel)) {
459 /* Only primary channel owns the hv_device */
460 hv_vmbus_child_device_unregister(channel->device);
466 * @brief Invoked when all offers have been delivered.
469 vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr)
474 * @brief Open result handler.
476 * This is invoked when we received a response
477 * to our channel open request. Find the matching request, copy the
478 * response and signal the requesting thread.
481 vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr)
483 hv_vmbus_channel_open_result* result;
484 hv_vmbus_channel_msg_info* msg_info;
485 hv_vmbus_channel_msg_header* requestHeader;
486 hv_vmbus_channel_open_channel* openMsg;
488 result = (hv_vmbus_channel_open_result*) hdr;
491 * Find the open msg, copy the result and signal/unblock the wait event
493 mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
495 TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
497 requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
499 if (requestHeader->message_type ==
500 HV_CHANNEL_MESSAGE_OPEN_CHANNEL) {
501 openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg;
502 if (openMsg->child_rel_id == result->child_rel_id
503 && openMsg->open_id == result->open_id) {
504 memcpy(&msg_info->response.open_result, result,
505 sizeof(hv_vmbus_channel_open_result));
506 sema_post(&msg_info->wait_sema);
511 mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
516 * @brief GPADL created handler.
518 * This is invoked when we received a response
519 * to our gpadl create request. Find the matching request, copy the
520 * response and signal the requesting thread.
523 vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr)
525 hv_vmbus_channel_gpadl_created* gpadl_created;
526 hv_vmbus_channel_msg_info* msg_info;
527 hv_vmbus_channel_msg_header* request_header;
528 hv_vmbus_channel_gpadl_header* gpadl_header;
530 gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr;
532 /* Find the establish msg, copy the result and signal/unblock
535 mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
536 TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
538 request_header = (hv_vmbus_channel_msg_header*) msg_info->msg;
539 if (request_header->message_type ==
540 HV_CHANNEL_MESSAGEL_GPADL_HEADER) {
542 (hv_vmbus_channel_gpadl_header*) request_header;
544 if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id)
545 && (gpadl_created->gpadl == gpadl_header->gpadl)) {
546 memcpy(&msg_info->response.gpadl_created,
548 sizeof(hv_vmbus_channel_gpadl_created));
549 sema_post(&msg_info->wait_sema);
554 mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
558 * @brief GPADL torndown handler.
560 * This is invoked when we received a respons
561 * to our gpadl teardown request. Find the matching request, copy the
562 * response and signal the requesting thread
565 vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr)
567 hv_vmbus_channel_gpadl_torndown* gpadl_torndown;
568 hv_vmbus_channel_msg_info* msg_info;
569 hv_vmbus_channel_msg_header* requestHeader;
570 hv_vmbus_channel_gpadl_teardown* gpadlTeardown;
572 gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr;
575 * Find the open msg, copy the result and signal/unblock the
579 mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
581 TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
583 requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
585 if (requestHeader->message_type
586 == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) {
588 (hv_vmbus_channel_gpadl_teardown*) requestHeader;
590 if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) {
591 memcpy(&msg_info->response.gpadl_torndown,
593 sizeof(hv_vmbus_channel_gpadl_torndown));
594 sema_post(&msg_info->wait_sema);
599 mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
603 * @brief Version response handler.
605 * This is invoked when we received a response
606 * to our initiate contact request. Find the matching request, copy th
607 * response and signal the requesting thread.
610 vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
612 hv_vmbus_channel_msg_info* msg_info;
613 hv_vmbus_channel_msg_header* requestHeader;
614 hv_vmbus_channel_initiate_contact* initiate;
615 hv_vmbus_channel_version_response* versionResponse;
617 versionResponse = (hv_vmbus_channel_version_response*)hdr;
619 mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
620 TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
622 requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
623 if (requestHeader->message_type
624 == HV_CHANNEL_MESSAGE_INITIATED_CONTACT) {
626 (hv_vmbus_channel_initiate_contact*) requestHeader;
627 memcpy(&msg_info->response.version_response,
629 sizeof(hv_vmbus_channel_version_response));
630 sema_post(&msg_info->wait_sema);
633 mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
638 * @brief Send a request to get all our pending offers.
641 hv_vmbus_request_channel_offers(void)
644 hv_vmbus_channel_msg_header* msg;
645 hv_vmbus_channel_msg_info* msg_info;
647 msg_info = (hv_vmbus_channel_msg_info *)
648 malloc(sizeof(hv_vmbus_channel_msg_info)
649 + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT);
651 if (msg_info == NULL) {
653 printf("Error VMBUS: malloc failed for Request Offers\n");
657 msg = (hv_vmbus_channel_msg_header*) msg_info->msg;
658 msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS;
660 ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header));
662 free(msg_info, M_DEVBUF);
668 * @brief Release channels that are unattached/unconnected (i.e., no drivers associated)
671 hv_vmbus_release_unattached_channels(void)
673 hv_vmbus_channel *channel;
675 mtx_lock(&hv_vmbus_g_connection.channel_lock);
677 while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
678 channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
679 TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
680 channel, list_entry);
682 if (HV_VMBUS_CHAN_ISPRIMARY(channel)) {
683 /* Only primary channel owns the hv_device */
684 hv_vmbus_child_device_unregister(channel->device);
686 hv_vmbus_free_vmbus_channel(channel);
688 bzero(hv_vmbus_g_connection.channels,
689 sizeof(hv_vmbus_channel*) * HV_CHANNEL_MAX_COUNT);
690 mtx_unlock(&hv_vmbus_g_connection.channel_lock);
694 * @brief Select the best outgoing channel
696 * The channel whose vcpu binding is closest to the currect vcpu will
698 * If no multi-channel, always select primary channel
700 * @param primary - primary channel
702 struct hv_vmbus_channel *
703 vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
705 hv_vmbus_channel *new_channel = NULL;
706 hv_vmbus_channel *outgoing_channel = primary;
707 int old_cpu_distance = 0;
708 int new_cpu_distance = 0;
710 int smp_pro_id = PCPU_GET(cpuid);
712 if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
713 return outgoing_channel;
716 if (smp_pro_id >= MAXCPU) {
717 return outgoing_channel;
720 cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id];
722 TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
723 if (new_channel->state != HV_CHANNEL_OPENED_STATE){
727 if (new_channel->target_vcpu == cur_vcpu){
731 old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
732 (outgoing_channel->target_vcpu - cur_vcpu) :
733 (cur_vcpu - outgoing_channel->target_vcpu));
735 new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
736 (new_channel->target_vcpu - cur_vcpu) :
737 (cur_vcpu - new_channel->target_vcpu));
739 if (old_cpu_distance < new_cpu_distance) {
743 outgoing_channel = new_channel;
746 return(outgoing_channel);