2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/param.h>
32 #include "hv_vmbus_priv.h"
38 static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr);
39 static void vmbus_channel_on_offer_internal(void* context);
40 static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr);
41 static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr);
42 static void vmbus_channel_on_offer_rescind_internal(void* context);
43 static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr);
44 static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
45 static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
46 static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
49 * Channel message dispatch table
51 hv_vmbus_channel_msg_table_entry
52 g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = {
53 { HV_CHANNEL_MESSAGE_INVALID,
55 { HV_CHANNEL_MESSAGE_OFFER_CHANNEL,
56 vmbus_channel_on_offer },
57 { HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER,
58 vmbus_channel_on_offer_rescind },
59 { HV_CHANNEL_MESSAGE_REQUEST_OFFERS,
61 { HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED,
62 vmbus_channel_on_offers_delivered },
63 { HV_CHANNEL_MESSAGE_OPEN_CHANNEL,
65 { HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT,
66 vmbus_channel_on_open_result },
67 { HV_CHANNEL_MESSAGE_CLOSE_CHANNEL,
69 { HV_CHANNEL_MESSAGEL_GPADL_HEADER,
71 { HV_CHANNEL_MESSAGE_GPADL_BODY,
73 { HV_CHANNEL_MESSAGE_GPADL_CREATED,
74 vmbus_channel_on_gpadl_created },
75 { HV_CHANNEL_MESSAGE_GPADL_TEARDOWN,
77 { HV_CHANNEL_MESSAGE_GPADL_TORNDOWN,
78 vmbus_channel_on_gpadl_torndown },
79 { HV_CHANNEL_MESSAGE_REL_ID_RELEASED,
81 { HV_CHANNEL_MESSAGE_INITIATED_CONTACT,
83 { HV_CHANNEL_MESSAGE_VERSION_RESPONSE,
84 vmbus_channel_on_version_response },
85 { HV_CHANNEL_MESSAGE_UNLOAD,
89 typedef struct hv_work_item {
91 void (*callback)(void *);
96 * Implementation of the work abstraction.
99 work_item_callback(void *work, int pending)
101 struct hv_work_item *w = (struct hv_work_item *)work;
103 w->callback(w->context);
109 * @brief Create work item
113 void (*callback)(void *), void *context)
115 struct hv_work_item *w = malloc(sizeof(struct hv_work_item),
117 KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n"));
121 w->callback = callback;
122 w->context = context;
124 TASK_INIT(&w->work, 0, work_item_callback, w);
126 return (taskqueue_enqueue(taskqueue_thread, &w->work));
131 * @brief Allocate and initialize a vmbus channel object
134 hv_vmbus_allocate_channel(void)
136 hv_vmbus_channel* channel;
138 channel = (hv_vmbus_channel*) malloc(
139 sizeof(hv_vmbus_channel),
143 mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
144 TAILQ_INIT(&channel->sc_list_anchor);
150 * @brief Release the resources used by the vmbus channel object
153 hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
155 mtx_destroy(&channel->sc_lock);
156 free(channel, M_DEVBUF);
160 * @brief Process the offer by creating a channel/device
161 * associated with this offer
164 vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
167 hv_vmbus_channel* channel;
173 relid = new_channel->offer_msg.child_rel_id;
175 * Make sure this is a new offer
177 mtx_lock(&hv_vmbus_g_connection.channel_lock);
178 hv_vmbus_g_connection.channels[relid] = new_channel;
180 TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
183 if (memcmp(&channel->offer_msg.offer.interface_type,
184 &new_channel->offer_msg.offer.interface_type,
185 sizeof(hv_guid)) == 0 &&
186 memcmp(&channel->offer_msg.offer.interface_instance,
187 &new_channel->offer_msg.offer.interface_instance,
188 sizeof(hv_guid)) == 0) {
197 &hv_vmbus_g_connection.channel_anchor,
201 mtx_unlock(&hv_vmbus_g_connection.channel_lock);
203 /*XXX add new channel to percpu_list */
207 * Check if this is a sub channel.
209 if (new_channel->offer_msg.offer.sub_channel_index != 0) {
211 * It is a sub channel offer, process it.
213 new_channel->primary_channel = channel;
214 new_channel->device = channel->device;
215 mtx_lock(&channel->sc_lock);
217 &channel->sc_list_anchor,
220 mtx_unlock(&channel->sc_lock);
223 printf("VMBUS get multi-channel offer, "
225 new_channel->offer_msg.child_rel_id,
226 new_channel->offer_msg.offer.sub_channel_index);
229 /* Insert new channel into channel_anchor. */
230 mtx_lock(&hv_vmbus_g_connection.channel_lock);
231 TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
232 new_channel, list_entry);
233 mtx_unlock(&hv_vmbus_g_connection.channel_lock);
236 printf("VMBUS: new multi-channel offer <%p>, "
237 "its primary channel is <%p>.\n",
238 new_channel, new_channel->primary_channel);
240 /*XXX add it to percpu_list */
242 new_channel->state = HV_CHANNEL_OPEN_STATE;
243 if (channel->sc_creation_callback != NULL) {
244 channel->sc_creation_callback(new_channel);
249 hv_vmbus_free_vmbus_channel(new_channel);
253 new_channel->state = HV_CHANNEL_OPEN_STATE;
256 * Start the process of binding this offer to the driver
257 * (We need to set the device field before calling
258 * hv_vmbus_child_device_add())
260 new_channel->device = hv_vmbus_child_device_create(
261 new_channel->offer_msg.offer.interface_type,
262 new_channel->offer_msg.offer.interface_instance, new_channel);
265 * Add the new device to the bus. This will kick off device-driver
266 * binding which eventually invokes the device driver's AddDevice()
269 ret = hv_vmbus_child_device_register(new_channel->device);
271 mtx_lock(&hv_vmbus_g_connection.channel_lock);
273 &hv_vmbus_g_connection.channel_anchor,
276 mtx_unlock(&hv_vmbus_g_connection.channel_lock);
277 hv_vmbus_free_vmbus_channel(new_channel);
282 vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
284 KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
286 chan->target_cpu = cpu;
287 chan->target_vcpu = hv_vmbus_g_context.hv_vcpu_index[cpu];
290 printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
291 chan->offer_msg.child_rel_id,
292 chan->target_cpu, chan->target_vcpu);
297 * Array of device guids that are performance critical. We try to distribute
298 * the interrupt load for these devices across all online cpus.
300 static const hv_guid high_perf_devices[] = {
314 * We use this static number to distribute the channel interrupt load.
316 static uint32_t next_vcpu;
319 * Starting with Win8, we can statically distribute the incoming
320 * channel interrupt load by binding a channel to VCPU. We
321 * implement here a simple round robin scheme for distributing
322 * the interrupt load.
323 * We will bind channels that are not performance critical to cpu 0 and
324 * performance critical channels (IDE, SCSI and Network) will be uniformly
325 * distributed across all available CPUs.
328 vmbus_channel_select_defcpu(struct hv_vmbus_channel *channel)
330 uint32_t current_cpu;
332 boolean_t is_perf_channel = FALSE;
333 const hv_guid *guid = &channel->offer_msg.offer.interface_type;
335 for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
336 if (memcmp(guid->data, high_perf_devices[i].data,
337 sizeof(hv_guid)) == 0) {
338 is_perf_channel = TRUE;
343 if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
344 (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) ||
345 (!is_perf_channel)) {
347 vmbus_channel_cpu_set(channel, 0);
350 /* mp_ncpus should have the number cpus currently online */
351 current_cpu = (++next_vcpu % mp_ncpus);
352 vmbus_channel_cpu_set(channel, current_cpu);
356 * @brief Handler for channel offers from Hyper-V/Azure
358 * Handler for channel offers from vmbus in parent partition. We ignore
359 * all offers except network and storage offers. For each network and storage
360 * offers, we create a channel object and queue a work item to the channel
361 * object to process the offer synchronously
364 vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
366 hv_vmbus_channel_offer_channel* offer;
367 hv_vmbus_channel_offer_channel* copied;
369 offer = (hv_vmbus_channel_offer_channel*) hdr;
372 hv_guid *guidInstance;
374 guidType = &offer->offer.interface_type;
375 guidInstance = &offer->offer.interface_instance;
378 copied = malloc(sizeof(*copied), M_DEVBUF, M_NOWAIT);
379 if (copied == NULL) {
380 printf("fail to allocate memory\n");
384 memcpy(copied, hdr, sizeof(*copied));
385 hv_queue_work_item(vmbus_channel_on_offer_internal, copied);
389 vmbus_channel_on_offer_internal(void* context)
391 hv_vmbus_channel* new_channel;
393 hv_vmbus_channel_offer_channel* offer = (hv_vmbus_channel_offer_channel*)context;
394 /* Allocate the channel object and save this offer */
395 new_channel = hv_vmbus_allocate_channel();
398 * By default we setup state to enable batched
399 * reading. A specific service can choose to
400 * disable this prior to opening the channel.
402 new_channel->batched_reading = TRUE;
404 new_channel->signal_event_param =
405 (hv_vmbus_input_signal_event *)
406 (HV_ALIGN_UP((unsigned long)
407 &new_channel->signal_event_buffer,
408 HV_HYPERCALL_PARAM_ALIGN));
410 new_channel->signal_event_param->connection_id.as_uint32_t = 0;
411 new_channel->signal_event_param->connection_id.u.id =
412 HV_VMBUS_EVENT_CONNECTION_ID;
413 new_channel->signal_event_param->flag_number = 0;
414 new_channel->signal_event_param->rsvd_z = 0;
416 if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
417 new_channel->is_dedicated_interrupt =
418 (offer->is_dedicated_interrupt != 0);
419 new_channel->signal_event_param->connection_id.u.id =
420 offer->connection_id;
423 memcpy(&new_channel->offer_msg, offer,
424 sizeof(hv_vmbus_channel_offer_channel));
425 new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
426 new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
428 /* Select default cpu for this channel. */
429 vmbus_channel_select_defcpu(new_channel);
431 vmbus_channel_process_offer(new_channel);
433 free(offer, M_DEVBUF);
437 * @brief Rescind offer handler.
439 * We queue a work item to process this offer
443 vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr)
445 hv_vmbus_channel_rescind_offer* rescind;
446 hv_vmbus_channel* channel;
448 rescind = (hv_vmbus_channel_rescind_offer*) hdr;
450 channel = hv_vmbus_g_connection.channels[rescind->child_rel_id];
454 hv_queue_work_item(vmbus_channel_on_offer_rescind_internal, channel);
455 hv_vmbus_g_connection.channels[rescind->child_rel_id] = NULL;
459 vmbus_channel_on_offer_rescind_internal(void *context)
461 hv_vmbus_channel* channel;
463 channel = (hv_vmbus_channel*)context;
464 if (HV_VMBUS_CHAN_ISPRIMARY(channel)) {
465 /* Only primary channel owns the hv_device */
466 hv_vmbus_child_device_unregister(channel->device);
472 * @brief Invoked when all offers have been delivered.
475 vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr)
480 * @brief Open result handler.
482 * This is invoked when we received a response
483 * to our channel open request. Find the matching request, copy the
484 * response and signal the requesting thread.
487 vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr)
489 hv_vmbus_channel_open_result* result;
490 hv_vmbus_channel_msg_info* msg_info;
491 hv_vmbus_channel_msg_header* requestHeader;
492 hv_vmbus_channel_open_channel* openMsg;
494 result = (hv_vmbus_channel_open_result*) hdr;
497 * Find the open msg, copy the result and signal/unblock the wait event
499 mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
501 TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
503 requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
505 if (requestHeader->message_type ==
506 HV_CHANNEL_MESSAGE_OPEN_CHANNEL) {
507 openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg;
508 if (openMsg->child_rel_id == result->child_rel_id
509 && openMsg->open_id == result->open_id) {
510 memcpy(&msg_info->response.open_result, result,
511 sizeof(hv_vmbus_channel_open_result));
512 sema_post(&msg_info->wait_sema);
517 mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
522 * @brief GPADL created handler.
524 * This is invoked when we received a response
525 * to our gpadl create request. Find the matching request, copy the
526 * response and signal the requesting thread.
529 vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr)
531 hv_vmbus_channel_gpadl_created* gpadl_created;
532 hv_vmbus_channel_msg_info* msg_info;
533 hv_vmbus_channel_msg_header* request_header;
534 hv_vmbus_channel_gpadl_header* gpadl_header;
536 gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr;
538 /* Find the establish msg, copy the result and signal/unblock
541 mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
542 TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
544 request_header = (hv_vmbus_channel_msg_header*) msg_info->msg;
545 if (request_header->message_type ==
546 HV_CHANNEL_MESSAGEL_GPADL_HEADER) {
548 (hv_vmbus_channel_gpadl_header*) request_header;
550 if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id)
551 && (gpadl_created->gpadl == gpadl_header->gpadl)) {
552 memcpy(&msg_info->response.gpadl_created,
554 sizeof(hv_vmbus_channel_gpadl_created));
555 sema_post(&msg_info->wait_sema);
560 mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
564 * @brief GPADL torndown handler.
566 * This is invoked when we received a respons
567 * to our gpadl teardown request. Find the matching request, copy the
568 * response and signal the requesting thread
571 vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr)
573 hv_vmbus_channel_gpadl_torndown* gpadl_torndown;
574 hv_vmbus_channel_msg_info* msg_info;
575 hv_vmbus_channel_msg_header* requestHeader;
576 hv_vmbus_channel_gpadl_teardown* gpadlTeardown;
578 gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr;
581 * Find the open msg, copy the result and signal/unblock the
585 mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
587 TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
589 requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
591 if (requestHeader->message_type
592 == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) {
594 (hv_vmbus_channel_gpadl_teardown*) requestHeader;
596 if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) {
597 memcpy(&msg_info->response.gpadl_torndown,
599 sizeof(hv_vmbus_channel_gpadl_torndown));
600 sema_post(&msg_info->wait_sema);
605 mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
609 * @brief Version response handler.
611 * This is invoked when we received a response
612 * to our initiate contact request. Find the matching request, copy th
613 * response and signal the requesting thread.
616 vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
618 hv_vmbus_channel_msg_info* msg_info;
619 hv_vmbus_channel_msg_header* requestHeader;
620 hv_vmbus_channel_initiate_contact* initiate;
621 hv_vmbus_channel_version_response* versionResponse;
623 versionResponse = (hv_vmbus_channel_version_response*)hdr;
625 mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
626 TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
628 requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
629 if (requestHeader->message_type
630 == HV_CHANNEL_MESSAGE_INITIATED_CONTACT) {
632 (hv_vmbus_channel_initiate_contact*) requestHeader;
633 memcpy(&msg_info->response.version_response,
635 sizeof(hv_vmbus_channel_version_response));
636 sema_post(&msg_info->wait_sema);
639 mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
644 * @brief Send a request to get all our pending offers.
647 hv_vmbus_request_channel_offers(void)
650 hv_vmbus_channel_msg_header* msg;
651 hv_vmbus_channel_msg_info* msg_info;
653 msg_info = (hv_vmbus_channel_msg_info *)
654 malloc(sizeof(hv_vmbus_channel_msg_info)
655 + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT);
657 if (msg_info == NULL) {
659 printf("Error VMBUS: malloc failed for Request Offers\n");
663 msg = (hv_vmbus_channel_msg_header*) msg_info->msg;
664 msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS;
666 ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header));
668 free(msg_info, M_DEVBUF);
674 * @brief Release channels that are unattached/unconnected (i.e., no drivers associated)
677 hv_vmbus_release_unattached_channels(void)
679 hv_vmbus_channel *channel;
681 mtx_lock(&hv_vmbus_g_connection.channel_lock);
683 while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
684 channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
685 TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
686 channel, list_entry);
688 if (HV_VMBUS_CHAN_ISPRIMARY(channel)) {
689 /* Only primary channel owns the hv_device */
690 hv_vmbus_child_device_unregister(channel->device);
692 hv_vmbus_free_vmbus_channel(channel);
694 bzero(hv_vmbus_g_connection.channels,
695 sizeof(hv_vmbus_channel*) * HV_CHANNEL_MAX_COUNT);
696 mtx_unlock(&hv_vmbus_g_connection.channel_lock);
700 * @brief Select the best outgoing channel
702 * The channel whose vcpu binding is closest to the currect vcpu will
704 * If no multi-channel, always select primary channel
706 * @param primary - primary channel
708 struct hv_vmbus_channel *
709 vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
711 hv_vmbus_channel *new_channel = NULL;
712 hv_vmbus_channel *outgoing_channel = primary;
713 int old_cpu_distance = 0;
714 int new_cpu_distance = 0;
716 int smp_pro_id = PCPU_GET(cpuid);
718 if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
719 return outgoing_channel;
722 if (smp_pro_id >= MAXCPU) {
723 return outgoing_channel;
726 cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id];
728 TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
729 if (new_channel->state != HV_CHANNEL_OPENED_STATE){
733 if (new_channel->target_vcpu == cur_vcpu){
737 old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
738 (outgoing_channel->target_vcpu - cur_vcpu) :
739 (cur_vcpu - outgoing_channel->target_vcpu));
741 new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
742 (new_channel->target_vcpu - cur_vcpu) :
743 (cur_vcpu - new_channel->target_vcpu));
745 if (old_cpu_distance < new_cpu_distance) {
749 outgoing_channel = new_channel;
752 return(outgoing_channel);