2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * VM Bus Driver Implementation
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
42 #include <sys/sysctl.h>
43 #include <sys/syslog.h>
44 #include <sys/systm.h>
45 #include <sys/rtprio.h>
46 #include <sys/interrupt.h>
48 #include <sys/taskqueue.h>
49 #include <sys/mutex.h>
52 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 #include <machine/intr_machdep.h>
57 #include <machine/md_var.h>
58 #include <machine/segments.h>
60 #include <machine/apicvar.h>
62 #include <dev/hyperv/include/hyperv.h>
63 #include "hv_vmbus_priv.h"
65 #include <contrib/dev/acpica/include/acpi.h>
68 static device_t vmbus_devp;
69 static int vmbus_inited;
70 static hv_setup_args setup_args; /* only CPU 0 supported at this time */
72 static char *vmbus_ids[] = { "VMBUS", NULL };
75 * @brief Software interrupt thread routine to handle channel messages from
79 vmbus_msg_swintr(void *arg)
83 hv_vmbus_channel_msg_header *hdr;
84 hv_vmbus_channel_msg_table_entry *entry;
85 hv_vmbus_channel_msg_type msg_type;
86 hv_vmbus_message* msg;
87 hv_vmbus_message* copied;
88 static bool warned = false;
91 KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: "
92 "cpu out of range!"));
94 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
95 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
98 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE)
99 break; /* no message */
101 hdr = (hv_vmbus_channel_msg_header *)msg->u.payload;
102 msg_type = hdr->message_type;
104 if (msg_type >= HV_CHANNEL_MESSAGE_COUNT && !warned) {
106 printf("VMBUS: unknown message type = %d\n", msg_type);
110 entry = &g_channel_message_table[msg_type];
112 if (entry->handler_no_sleep)
113 entry->messageHandler(hdr);
116 copied = malloc(sizeof(hv_vmbus_message),
118 KASSERT(copied != NULL,
119 ("Error VMBUS: malloc failed to allocate"
120 " hv_vmbus_message!"));
124 memcpy(copied, msg, sizeof(hv_vmbus_message));
125 hv_queue_work_item(hv_vmbus_g_connection.work_queue,
126 hv_vmbus_on_channel_message,
130 msg->header.message_type = HV_MESSAGE_TYPE_NONE;
133 * Make sure the write to message_type (ie set to
134 * HV_MESSAGE_TYPE_NONE) happens before we read the
135 * message_pending and EOMing. Otherwise, the EOMing will
136 * not deliver any more messages
137 * since there is no empty slot
141 if (msg->header.message_flags.u.message_pending) {
143 * This will cause message queue rescan to possibly
144 * deliver another msg from the hypervisor
146 wrmsr(HV_X64_MSR_EOM, 0);
152 * @brief Interrupt filter routine for VMBUS.
154 * The purpose of this routine is to determine the type of VMBUS protocol
155 * message to process - an event or a channel message.
158 hv_vmbus_isr(struct trapframe *frame)
161 hv_vmbus_message* msg;
162 hv_vmbus_synic_event_flags* event;
165 cpu = PCPU_GET(cpuid);
168 * The Windows team has advised that we check for events
169 * before checking for messages. This is the way they do it
170 * in Windows when running as a guest in Hyper-V
173 page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
174 event = (hv_vmbus_synic_event_flags*)
175 page_addr + HV_VMBUS_MESSAGE_SINT;
177 if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
178 (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
179 /* Since we are a child, we only need to check bit 0 */
180 if (synch_test_and_clear_bit(0, &event->flags32[0])) {
181 swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
185 * On host with Win8 or above, we can directly look at
186 * the event page. If bit n is set, we have an interrupt
187 * on the channel with id n.
188 * Directly schedule the event software interrupt on
191 swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
194 /* Check if there are actual msgs to be process */
195 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
196 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
198 /* we call eventtimer process the message */
199 if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) {
200 msg->header.message_type = HV_MESSAGE_TYPE_NONE;
203 * Make sure the write to message_type (ie set to
204 * HV_MESSAGE_TYPE_NONE) happens before we read the
205 * message_pending and EOMing. Otherwise, the EOMing will
206 * not deliver any more messages
207 * since there is no empty slot
211 if (msg->header.message_flags.u.message_pending) {
213 * This will cause message queue rescan to possibly
214 * deliver another msg from the hypervisor
216 wrmsr(HV_X64_MSR_EOM, 0);
219 return (FILTER_HANDLED);
222 if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
223 swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0);
226 return (FILTER_HANDLED);
229 uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
230 u_long *hv_vmbus_intr_cpu[MAXCPU];
233 hv_vector_handler(struct trapframe *trap_frame)
238 * Disable preemption.
243 * Do a little interrupt counting.
245 cpu = PCPU_GET(cpuid);
246 (*hv_vmbus_intr_cpu[cpu])++;
248 hv_vmbus_isr(trap_frame);
263 struct hv_device *child_dev_ctx = device_get_ivars(child);
267 case HV_VMBUS_IVAR_TYPE:
268 *result = (uintptr_t) &child_dev_ctx->class_id;
270 case HV_VMBUS_IVAR_INSTANCE:
271 *result = (uintptr_t) &child_dev_ctx->device_id;
273 case HV_VMBUS_IVAR_DEVCTX:
274 *result = (uintptr_t) child_dev_ctx;
276 case HV_VMBUS_IVAR_NODE:
277 *result = (uintptr_t) child_dev_ctx->device;
292 case HV_VMBUS_IVAR_TYPE:
293 case HV_VMBUS_IVAR_INSTANCE:
294 case HV_VMBUS_IVAR_DEVCTX:
295 case HV_VMBUS_IVAR_NODE:
303 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
306 struct hv_device *dev_ctx = device_get_ivars(child);
308 strlcat(buf, "classid=", buflen);
309 snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->class_id);
310 strlcat(buf, guidbuf, buflen);
312 strlcat(buf, " deviceid=", buflen);
313 snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->device_id);
314 strlcat(buf, guidbuf, buflen);
320 hv_vmbus_child_device_create(
323 hv_vmbus_channel* channel)
325 hv_device* child_dev;
328 * Allocate the new child device
330 child_dev = malloc(sizeof(hv_device), M_DEVBUF,
332 KASSERT(child_dev != NULL,
333 ("Error VMBUS: malloc failed to allocate hv_device!"));
335 if (child_dev == NULL)
338 child_dev->channel = channel;
339 memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
340 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
346 snprintf_hv_guid(char *buf, size_t sz, const hv_guid *guid)
349 const unsigned char *d = guid->data;
351 cnt = snprintf(buf, sz,
352 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
353 d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6],
354 d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
359 hv_vmbus_child_device_register(struct hv_device *child_dev)
366 snprintf_hv_guid(name, sizeof(name), &child_dev->class_id);
367 printf("VMBUS: Class ID: %s\n", name);
370 child = device_add_child(vmbus_devp, NULL, -1);
371 child_dev->device = child;
372 device_set_ivars(child, child_dev);
375 ret = device_probe_and_attach(child);
382 hv_vmbus_child_device_unregister(struct hv_device *child_dev)
386 * XXXKYS: Ensure that this is the opposite of
390 ret = device_delete_child(vmbus_devp, child_dev->device);
396 vmbus_probe(device_t dev) {
397 if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL ||
398 device_get_unit(dev) != 0)
401 device_set_desc(dev, "Vmbus Devices");
403 return (BUS_PROBE_DEFAULT);
407 extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback);
410 * @brief Find a free IDT slot and setup the interrupt handler.
413 vmbus_vector_alloc(void)
417 struct gate_descriptor *ip;
420 * Search backwards form the highest IDT vector available for use
421 * as vmbus channel callback vector. We install 'hv_vmbus_callback'
422 * handler at that vector and use it to interrupt vcpus.
424 vector = APIC_SPURIOUS_INT;
425 while (--vector >= APIC_IPI_INTS) {
427 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
428 if (func == (uintptr_t)&IDTVEC(rsvd)) {
430 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT,
431 SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
433 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT,
444 * @brief Restore the IDT slot to rsvd.
447 vmbus_vector_free(int vector)
450 struct gate_descriptor *ip;
455 KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
456 ("invalid vector %d", vector));
459 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
460 KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback),
461 ("invalid vector %d", vector));
463 setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
469 vmbus_vector_alloc(void)
475 vmbus_vector_free(int vector)
482 * @brief Main vmbus driver initialization routine.
485 * - initialize the vmbus driver context
486 * - setup various driver entry points
487 * - invoke the vmbus hv main init routine
488 * - get the irq resource
489 * - invoke the vmbus to add the vmbus root device
490 * - setup the vmbus root device
491 * - retrieve the channel offers
497 char buf[MAXCOMLEN + 1];
504 ret = hv_vmbus_init();
508 printf("Error VMBUS: Hypervisor Initialization Failed!\n");
513 * Find a free IDT slot for vmbus callback.
515 hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc();
517 if (hv_vmbus_g_context.hv_cb_vector == 0) {
519 printf("Error VMBUS: Cannot find free IDT slot for "
520 "vmbus callback!\n");
525 printf("VMBUS: vmbus callback vector %d\n",
526 hv_vmbus_g_context.hv_cb_vector);
529 * Notify the hypervisor of our vector.
531 setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
534 hv_vmbus_swintr_event_cpu[j] = 0;
535 hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
536 hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
537 hv_vmbus_g_context.event_swintr[j] = NULL;
538 hv_vmbus_g_context.msg_swintr[j] = NULL;
540 snprintf(buf, sizeof(buf), "cpu%d:hyperv", j);
541 intrcnt_add(buf, &hv_vmbus_intr_cpu[j]);
543 for (i = 0; i < 2; i++)
544 setup_args.page_buffers[2 * j + i] = NULL;
552 * Setup software interrupt thread and handler for msg handling.
554 ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j],
555 "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0,
556 &hv_vmbus_g_context.msg_swintr[j]);
559 printf("VMBUS: failed to setup msg swi for "
565 * Bind the swi thread to the cpu.
567 ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j],
571 printf("VMBUS: failed to bind msg swi thread "
577 * Setup software interrupt thread and handler for
580 ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j],
581 "hv_event", hv_vmbus_on_events, (void *)(long)j,
582 SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]);
585 printf("VMBUS: failed to setup event swi for "
591 * Prepare the per cpu msg and event pages to be called on each cpu.
593 for(i = 0; i < 2; i++) {
594 setup_args.page_buffers[2 * j + i] =
595 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
596 if (setup_args.page_buffers[2 * j + i] == NULL) {
597 KASSERT(setup_args.page_buffers[2 * j + i] != NULL,
598 ("Error VMBUS: malloc failed!"));
605 printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
608 smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
611 * Connect to VMBus in the root partition
613 ret = hv_vmbus_connect();
618 hv_vmbus_request_channel_offers();
623 * Free pages alloc'ed
625 for (n = 0; n < 2 * MAXCPU; n++)
626 if (setup_args.page_buffers[n] != NULL)
627 free(setup_args.page_buffers[n], M_DEVBUF);
630 * remove swi and vmbus callback vector;
633 if (hv_vmbus_g_context.msg_swintr[j] != NULL)
634 swi_remove(hv_vmbus_g_context.msg_swintr[j]);
635 if (hv_vmbus_g_context.event_swintr[j] != NULL)
636 swi_remove(hv_vmbus_g_context.event_swintr[j]);
637 hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
638 hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
641 vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
650 vmbus_attach(device_t dev)
653 device_printf(dev, "VMBUS: attach dev: %p\n", dev);
657 * If the system has already booted and thread
658 * scheduling is possible indicated by the global
659 * cold set to zero, we just call the driver
660 * initialization directly.
671 if (vm_guest != VM_GUEST_HV)
675 * If the system has already booted and thread
676 * scheduling is possible, as indicated by the
677 * global cold set to zero, we just call the driver
678 * initialization directly.
689 hv_vmbus_release_unattached_channels();
690 hv_vmbus_disconnect();
692 smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
694 for(i = 0; i < 2 * MAXCPU; i++) {
695 if (setup_args.page_buffers[i] != 0)
696 free(setup_args.page_buffers[i], M_DEVBUF);
703 if (hv_vmbus_g_context.msg_swintr[i] != NULL)
704 swi_remove(hv_vmbus_g_context.msg_swintr[i]);
705 if (hv_vmbus_g_context.event_swintr[i] != NULL)
706 swi_remove(hv_vmbus_g_context.event_swintr[i]);
707 hv_vmbus_g_context.hv_msg_intr_event[i] = NULL;
708 hv_vmbus_g_context.hv_event_intr_event[i] = NULL;
711 vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
723 vmbus_detach(device_t dev)
733 printf("VMBUS: load\n");
737 vmbus_mod_unload(void)
740 printf("VMBUS: unload\n");
744 vmbus_modevent(module_t mod, int what, void *arg)
759 static device_method_t vmbus_methods[] = {
760 /** Device interface */
761 DEVMETHOD(device_probe, vmbus_probe),
762 DEVMETHOD(device_attach, vmbus_attach),
763 DEVMETHOD(device_detach, vmbus_detach),
764 DEVMETHOD(device_shutdown, bus_generic_shutdown),
765 DEVMETHOD(device_suspend, bus_generic_suspend),
766 DEVMETHOD(device_resume, bus_generic_resume),
769 DEVMETHOD(bus_add_child, bus_generic_add_child),
770 DEVMETHOD(bus_print_child, bus_generic_print_child),
771 DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
772 DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
773 DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str),
777 static char driver_name[] = "vmbus";
778 static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
781 devclass_t vmbus_devclass;
783 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
784 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
785 MODULE_VERSION(vmbus, 1);
787 /* We want to be started after SMP is initialized */
788 SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);