2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * VM Bus Driver Implementation
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/sysctl.h>
42 #include <sys/syslog.h>
43 #include <sys/systm.h>
44 #include <sys/rtprio.h>
45 #include <sys/interrupt.h>
47 #include <sys/taskqueue.h>
48 #include <sys/mutex.h>
51 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 #include <machine/intr_machdep.h>
56 #include <machine/md_var.h>
57 #include <machine/segments.h>
59 #include <x86/apicvar.h>
61 #include "hv_vmbus_priv.h"
66 static device_t vmbus_devp;
67 static int vmbus_inited;
68 static hv_setup_args setup_args; /* only CPU 0 supported at this time */
71 * @brief Software interrupt thread routine to handle channel messages from
75 vmbus_msg_swintr(void *arg)
79 hv_vmbus_message* msg;
80 hv_vmbus_message* copied;
83 KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: "
84 "cpu out of range!"));
86 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
87 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
90 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
91 break; /* no message */
93 copied = malloc(sizeof(hv_vmbus_message),
95 KASSERT(copied != NULL,
96 ("Error VMBUS: malloc failed to allocate"
97 " hv_vmbus_message!"));
100 memcpy(copied, msg, sizeof(hv_vmbus_message));
101 hv_queue_work_item(hv_vmbus_g_connection.work_queue,
102 hv_vmbus_on_channel_message, copied);
105 msg->header.message_type = HV_MESSAGE_TYPE_NONE;
108 * Make sure the write to message_type (ie set to
109 * HV_MESSAGE_TYPE_NONE) happens before we read the
110 * message_pending and EOMing. Otherwise, the EOMing will
111 * not deliver any more messages
112 * since there is no empty slot
116 if (msg->header.message_flags.u.message_pending) {
118 * This will cause message queue rescan to possibly
119 * deliver another msg from the hypervisor
121 wrmsr(HV_X64_MSR_EOM, 0);
127 * @brief Interrupt filter routine for VMBUS.
129 * The purpose of this routine is to determine the type of VMBUS protocol
130 * message to process - an event or a channel message.
133 hv_vmbus_isr(void *unused)
136 hv_vmbus_message* msg;
137 hv_vmbus_synic_event_flags* event;
140 cpu = PCPU_GET(cpuid);
143 * The Windows team has advised that we check for events
144 * before checking for messages. This is the way they do it
145 * in Windows when running as a guest in Hyper-V
148 page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
149 event = (hv_vmbus_synic_event_flags*)
150 page_addr + HV_VMBUS_MESSAGE_SINT;
152 if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
153 (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
154 /* Since we are a child, we only need to check bit 0 */
155 if (synch_test_and_clear_bit(0, &event->flags32[0])) {
156 swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
160 * On host with Win8 or above, we can directly look at
161 * the event page. If bit n is set, we have an interrupt
162 * on the channel with id n.
163 * Directly schedule the event software interrupt on
166 swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
169 /* Check if there are actual msgs to be process */
170 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
171 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
173 if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
174 swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0);
177 return FILTER_HANDLED;
181 uint32_t hv_intr_count = 0;
183 uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
184 uint32_t hv_vmbus_intr_cpu[MAXCPU];
187 hv_vector_handler(struct trapframe *trap_frame)
194 * Disable preemption.
200 * Do a little interrupt counting.
202 cpu = PCPU_GET(cpuid);
203 hv_vmbus_intr_cpu[cpu]++;
222 struct hv_device *child_dev_ctx = device_get_ivars(child);
226 case HV_VMBUS_IVAR_TYPE:
227 *result = (uintptr_t) &child_dev_ctx->class_id;
229 case HV_VMBUS_IVAR_INSTANCE:
230 *result = (uintptr_t) &child_dev_ctx->device_id;
232 case HV_VMBUS_IVAR_DEVCTX:
233 *result = (uintptr_t) child_dev_ctx;
235 case HV_VMBUS_IVAR_NODE:
236 *result = (uintptr_t) child_dev_ctx->device;
251 case HV_VMBUS_IVAR_TYPE:
252 case HV_VMBUS_IVAR_INSTANCE:
253 case HV_VMBUS_IVAR_DEVCTX:
254 case HV_VMBUS_IVAR_NODE:
262 hv_vmbus_child_device_create(
265 hv_vmbus_channel* channel)
267 hv_device* child_dev;
270 * Allocate the new child device
272 child_dev = malloc(sizeof(hv_device), M_DEVBUF,
274 KASSERT(child_dev != NULL,
275 ("Error VMBUS: malloc failed to allocate hv_device!"));
277 if (child_dev == NULL)
280 child_dev->channel = channel;
281 memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
282 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
288 print_dev_guid(struct hv_device *dev)
291 unsigned char guid_name[100];
292 for (i = 0; i < 32; i += 2)
293 sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
295 printf("VMBUS: Class ID: %s\n", guid_name);
299 hv_vmbus_child_device_register(struct hv_device *child_dev)
304 print_dev_guid(child_dev);
307 child = device_add_child(vmbus_devp, NULL, -1);
308 child_dev->device = child;
309 device_set_ivars(child, child_dev);
312 ret = device_probe_and_attach(child);
319 hv_vmbus_child_device_unregister(struct hv_device *child_dev)
323 * XXXKYS: Ensure that this is the opposite of
327 ret = device_delete_child(vmbus_devp, child_dev->device);
333 vmbus_identify(driver_t *driver, device_t parent)
335 if (!hv_vmbus_query_hypervisor_presence())
338 vm_guest = VM_GUEST_HV;
340 BUS_ADD_CHILD(parent, 0, "vmbus", 0);
344 vmbus_probe(device_t dev) {
346 device_printf(dev, "VMBUS: probe\n");
348 device_set_desc(dev, "Vmbus Devices");
350 return (BUS_PROBE_NOWILDCARD);
354 extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback);
357 * @brief Find a free IDT slot and setup the interrupt handler.
360 vmbus_vector_alloc(void)
364 struct gate_descriptor *ip;
367 * Search backwards form the highest IDT vector available for use
368 * as vmbus channel callback vector. We install 'hv_vmbus_callback'
369 * handler at that vector and use it to interrupt vcpus.
371 vector = APIC_SPURIOUS_INT;
372 while (--vector >= APIC_IPI_INTS) {
374 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
375 if (func == (uintptr_t)&IDTVEC(rsvd)) {
377 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT,
378 SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
380 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT,
391 * @brief Restore the IDT slot to rsvd.
394 vmbus_vector_free(int vector)
397 struct gate_descriptor *ip;
402 KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
403 ("invalid vector %d", vector));
406 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
407 KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback),
408 ("invalid vector %d", vector));
410 setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
416 vmbus_vector_alloc(void)
422 vmbus_vector_free(int vector)
429 * @brief Main vmbus driver initialization routine.
432 * - initialize the vmbus driver context
433 * - setup various driver entry points
434 * - invoke the vmbus hv main init routine
435 * - get the irq resource
436 * - invoke the vmbus to add the vmbus root device
437 * - setup the vmbus root device
438 * - retrieve the channel offers
450 ret = hv_vmbus_init();
454 printf("Error VMBUS: Hypervisor Initialization Failed!\n");
459 * Find a free IDT slot for vmbus callback.
461 hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc();
463 if (hv_vmbus_g_context.hv_cb_vector == 0) {
465 printf("Error VMBUS: Cannot find free IDT slot for "
466 "vmbus callback!\n");
471 printf("VMBUS: vmbus callback vector %d\n",
472 hv_vmbus_g_context.hv_cb_vector);
475 * Notify the hypervisor of our vector.
477 setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
480 hv_vmbus_intr_cpu[j] = 0;
481 hv_vmbus_swintr_event_cpu[j] = 0;
482 hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
483 hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
484 hv_vmbus_g_context.event_swintr[j] = NULL;
485 hv_vmbus_g_context.msg_swintr[j] = NULL;
487 for (i = 0; i < 2; i++)
488 setup_args.page_buffers[2 * j + i] = NULL;
496 * Setup software interrupt thread and handler for msg handling.
498 ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j],
499 "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0,
500 &hv_vmbus_g_context.msg_swintr[j]);
503 printf("VMBUS: failed to setup msg swi for "
509 * Bind the swi thread to the cpu.
511 ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j],
515 printf("VMBUS: failed to bind msg swi thread "
521 * Setup software interrupt thread and handler for
524 ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j],
525 "hv_event", hv_vmbus_on_events, (void *)(long)j,
526 SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]);
529 printf("VMBUS: failed to setup event swi for "
535 * Prepare the per cpu msg and event pages to be called on each cpu.
537 for(i = 0; i < 2; i++) {
538 setup_args.page_buffers[2 * j + i] =
539 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
540 if (setup_args.page_buffers[2 * j + i] == NULL) {
541 KASSERT(setup_args.page_buffers[2 * j + i] != NULL,
542 ("Error VMBUS: malloc failed!"));
549 printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
552 smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
555 * Connect to VMBus in the root partition
557 ret = hv_vmbus_connect();
562 hv_vmbus_request_channel_offers();
567 * Free pages alloc'ed
569 for (n = 0; n < 2 * MAXCPU; n++)
570 if (setup_args.page_buffers[n] != NULL)
571 free(setup_args.page_buffers[n], M_DEVBUF);
574 * remove swi and vmbus callback vector;
577 if (hv_vmbus_g_context.msg_swintr[j] != NULL)
578 swi_remove(hv_vmbus_g_context.msg_swintr[j]);
579 if (hv_vmbus_g_context.event_swintr[j] != NULL)
580 swi_remove(hv_vmbus_g_context.event_swintr[j]);
581 hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
582 hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
585 vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
594 vmbus_attach(device_t dev)
597 device_printf(dev, "VMBUS: attach dev: %p\n", dev);
601 * If the system has already booted and thread
602 * scheduling is possible indicated by the global
603 * cold set to zero, we just call the driver
604 * initialization directly.
615 if (vm_guest != VM_GUEST_HV)
619 * If the system has already booted and thread
620 * scheduling is possible, as indicated by the
621 * global cold set to zero, we just call the driver
622 * initialization directly.
633 hv_vmbus_release_unattached_channels();
634 hv_vmbus_disconnect();
636 smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
638 for(i = 0; i < 2 * MAXCPU; i++) {
639 if (setup_args.page_buffers[i] != 0)
640 free(setup_args.page_buffers[i], M_DEVBUF);
647 if (hv_vmbus_g_context.msg_swintr[i] != NULL)
648 swi_remove(hv_vmbus_g_context.msg_swintr[i]);
649 if (hv_vmbus_g_context.event_swintr[i] != NULL)
650 swi_remove(hv_vmbus_g_context.event_swintr[i]);
651 hv_vmbus_g_context.hv_msg_intr_event[i] = NULL;
652 hv_vmbus_g_context.hv_event_intr_event[i] = NULL;
655 vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
667 vmbus_detach(device_t dev)
677 printf("VMBUS: load\n");
681 vmbus_mod_unload(void)
684 printf("VMBUS: unload\n");
688 vmbus_modevent(module_t mod, int what, void *arg)
703 static device_method_t vmbus_methods[] = {
704 /** Device interface */
705 DEVMETHOD(device_identify, vmbus_identify),
706 DEVMETHOD(device_probe, vmbus_probe),
707 DEVMETHOD(device_attach, vmbus_attach),
708 DEVMETHOD(device_detach, vmbus_detach),
709 DEVMETHOD(device_shutdown, bus_generic_shutdown),
710 DEVMETHOD(device_suspend, bus_generic_suspend),
711 DEVMETHOD(device_resume, bus_generic_resume),
714 DEVMETHOD(bus_add_child, bus_generic_add_child),
715 DEVMETHOD(bus_print_child, bus_generic_print_child),
716 DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
717 DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
721 static char driver_name[] = "vmbus";
722 static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
725 devclass_t vmbus_devclass;
727 DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
728 MODULE_VERSION(vmbus,1);
730 /* We want to be started after SMP is initialized */
731 SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);