2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * VM Bus Driver Implementation
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
42 #include <sys/sysctl.h>
43 #include <sys/syslog.h>
44 #include <sys/systm.h>
45 #include <sys/rtprio.h>
46 #include <sys/interrupt.h>
48 #include <sys/taskqueue.h>
49 #include <sys/mutex.h>
52 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 #include <machine/intr_machdep.h>
57 #include <machine/md_var.h>
58 #include <machine/segments.h>
60 #include <machine/apicvar.h>
62 #include <dev/hyperv/include/hyperv.h>
63 #include <dev/hyperv/vmbus/hv_vmbus_priv.h>
64 #include <dev/hyperv/vmbus/vmbus_var.h>
66 #include <contrib/dev/acpica/include/acpi.h>
69 struct vmbus_softc *vmbus_sc;
71 static device_t vmbus_devp;
72 static int vmbus_inited;
73 static hv_setup_args setup_args; /* only CPU 0 supported at this time */
75 static char *vmbus_ids[] = { "VMBUS", NULL };
78 vmbus_msg_task(void *arg __unused, int pending __unused)
80 hv_vmbus_message *msg;
82 msg = ((hv_vmbus_message *)hv_vmbus_g_context.syn_ic_msg_page[curcpu]) +
83 HV_VMBUS_MESSAGE_SINT;
85 const hv_vmbus_channel_msg_table_entry *entry;
86 hv_vmbus_channel_msg_header *hdr;
87 hv_vmbus_channel_msg_type msg_type;
89 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE)
90 break; /* no message */
92 hdr = (hv_vmbus_channel_msg_header *)msg->u.payload;
93 msg_type = hdr->message_type;
95 if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) {
96 printf("VMBUS: unknown message type = %d\n", msg_type);
100 entry = &g_channel_message_table[msg_type];
101 if (entry->messageHandler)
102 entry->messageHandler(hdr);
104 msg->header.message_type = HV_MESSAGE_TYPE_NONE;
106 * Make sure the write to message_type (ie set to
107 * HV_MESSAGE_TYPE_NONE) happens before we read the
108 * message_pending and EOMing. Otherwise, the EOMing will
109 * not deliver any more messages
110 * since there is no empty slot
113 * mb() is used here, since atomic_thread_fence_seq_cst()
114 * will become compiler fence on UP kernel.
117 if (msg->header.message_flags.u.message_pending) {
119 * This will cause message queue rescan to possibly
120 * deliver another msg from the hypervisor
122 wrmsr(HV_X64_MSR_EOM, 0);
128 * @brief Interrupt filter routine for VMBUS.
130 * The purpose of this routine is to determine the type of VMBUS protocol
131 * message to process - an event or a channel message.
134 hv_vmbus_isr(struct trapframe *frame)
136 struct vmbus_softc *sc = vmbus_get_softc();
138 hv_vmbus_message *msg;
142 * The Windows team has advised that we check for events
143 * before checking for messages. This is the way they do it
144 * in Windows when running as a guest in Hyper-V
146 sc->vmbus_event_proc(sc, cpu);
148 /* Check if there are actual msgs to be process */
149 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
150 msg = ((hv_vmbus_message *)page_addr) + HV_VMBUS_TIMER_SINT;
152 /* we call eventtimer process the message */
153 if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) {
154 msg->header.message_type = HV_MESSAGE_TYPE_NONE;
156 /* call intrrupt handler of event timer */
160 * Make sure the write to message_type (ie set to
161 * HV_MESSAGE_TYPE_NONE) happens before we read the
162 * message_pending and EOMing. Otherwise, the EOMing will
163 * not deliver any more messages
164 * since there is no empty slot
167 * mb() is used here, since atomic_thread_fence_seq_cst()
168 * will become compiler fence on UP kernel.
172 if (msg->header.message_flags.u.message_pending) {
174 * This will cause message queue rescan to possibly
175 * deliver another msg from the hypervisor
177 wrmsr(HV_X64_MSR_EOM, 0);
181 msg = ((hv_vmbus_message *)page_addr) + HV_VMBUS_MESSAGE_SINT;
182 if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
183 taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[cpu],
184 &hv_vmbus_g_context.hv_msg_task[cpu]);
187 return (FILTER_HANDLED);
190 u_long *hv_vmbus_intr_cpu[MAXCPU];
193 hv_vector_handler(struct trapframe *trap_frame)
198 * Disable preemption.
203 * Do a little interrupt counting.
205 cpu = PCPU_GET(cpuid);
206 (*hv_vmbus_intr_cpu[cpu])++;
208 hv_vmbus_isr(trap_frame);
223 struct hv_device *child_dev_ctx = device_get_ivars(child);
227 case HV_VMBUS_IVAR_TYPE:
228 *result = (uintptr_t) &child_dev_ctx->class_id;
230 case HV_VMBUS_IVAR_INSTANCE:
231 *result = (uintptr_t) &child_dev_ctx->device_id;
233 case HV_VMBUS_IVAR_DEVCTX:
234 *result = (uintptr_t) child_dev_ctx;
236 case HV_VMBUS_IVAR_NODE:
237 *result = (uintptr_t) child_dev_ctx->device;
252 case HV_VMBUS_IVAR_TYPE:
253 case HV_VMBUS_IVAR_INSTANCE:
254 case HV_VMBUS_IVAR_DEVCTX:
255 case HV_VMBUS_IVAR_NODE:
263 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
266 struct hv_device *dev_ctx = device_get_ivars(child);
271 strlcat(buf, "classid=", buflen);
272 snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->class_id);
273 strlcat(buf, guidbuf, buflen);
275 strlcat(buf, " deviceid=", buflen);
276 snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->device_id);
277 strlcat(buf, guidbuf, buflen);
283 hv_vmbus_child_device_create(
286 hv_vmbus_channel* channel)
288 hv_device* child_dev;
291 * Allocate the new child device
293 child_dev = malloc(sizeof(hv_device), M_DEVBUF,
296 child_dev->channel = channel;
297 memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
298 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
304 snprintf_hv_guid(char *buf, size_t sz, const hv_guid *guid)
307 const unsigned char *d = guid->data;
309 cnt = snprintf(buf, sz,
310 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
311 d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6],
312 d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
317 hv_vmbus_child_device_register(struct hv_device *child_dev)
323 snprintf_hv_guid(name, sizeof(name), &child_dev->class_id);
324 printf("VMBUS: Class ID: %s\n", name);
327 child = device_add_child(vmbus_devp, NULL, -1);
328 child_dev->device = child;
329 device_set_ivars(child, child_dev);
335 hv_vmbus_child_device_unregister(struct hv_device *child_dev)
339 * XXXKYS: Ensure that this is the opposite of
343 ret = device_delete_child(vmbus_devp, child_dev->device);
349 vmbus_probe(device_t dev) {
350 if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL ||
351 device_get_unit(dev) != 0)
354 device_set_desc(dev, "Vmbus Devices");
356 return (BUS_PROBE_DEFAULT);
359 extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback);
362 * @brief Find a free IDT slot and setup the interrupt handler.
365 vmbus_vector_alloc(void)
369 struct gate_descriptor *ip;
372 * Search backwards form the highest IDT vector available for use
373 * as vmbus channel callback vector. We install 'hv_vmbus_callback'
374 * handler at that vector and use it to interrupt vcpus.
376 vector = APIC_SPURIOUS_INT;
377 while (--vector >= APIC_IPI_INTS) {
379 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
380 if (func == (uintptr_t)&IDTVEC(rsvd)) {
382 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT,
383 SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
385 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT,
396 * @brief Restore the IDT slot to rsvd.
399 vmbus_vector_free(int vector)
402 struct gate_descriptor *ip;
407 KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
408 ("invalid vector %d", vector));
411 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
412 KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback),
413 ("invalid vector %d", vector));
415 setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
419 vmbus_cpuset_setthread_task(void *xmask, int pending __unused)
421 cpuset_t *mask = xmask;
424 error = cpuset_setthread(curthread->td_tid, mask);
426 panic("curthread=%ju: can't pin; error=%d",
427 (uintmax_t)curthread->td_tid, error);
432 * @brief Main vmbus driver initialization routine.
435 * - initialize the vmbus driver context
436 * - setup various driver entry points
437 * - invoke the vmbus hv main init routine
438 * - get the irq resource
439 * - invoke the vmbus to add the vmbus root device
440 * - setup the vmbus root device
441 * - retrieve the channel offers
446 struct vmbus_softc *sc;
448 char buf[MAXCOMLEN + 1];
455 sc = vmbus_get_softc();
457 ret = hv_vmbus_init();
461 printf("Error VMBUS: Hypervisor Initialization Failed!\n");
466 * Find a free IDT slot for vmbus callback.
468 hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc();
470 if (hv_vmbus_g_context.hv_cb_vector == 0) {
472 printf("Error VMBUS: Cannot find free IDT slot for "
473 "vmbus callback!\n");
478 printf("VMBUS: vmbus callback vector %d\n",
479 hv_vmbus_g_context.hv_cb_vector);
482 * Notify the hypervisor of our vector.
484 setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
487 snprintf(buf, sizeof(buf), "cpu%d:hyperv", j);
488 intrcnt_add(buf, &hv_vmbus_intr_cpu[j]);
490 for (i = 0; i < 2; i++)
491 setup_args.page_buffers[2 * j + i] = NULL;
498 struct task cpuset_task;
501 * Setup taskqueue to handle events
503 hv_vmbus_g_context.hv_event_queue[j] = taskqueue_create_fast("hyperv event", M_WAITOK,
504 taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_event_queue[j]);
505 taskqueue_start_threads(&hv_vmbus_g_context.hv_event_queue[j], 1, PI_NET,
508 CPU_SETOF(j, &cpu_mask);
509 TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task, &cpu_mask);
510 taskqueue_enqueue(hv_vmbus_g_context.hv_event_queue[j], &cpuset_task);
511 taskqueue_drain(hv_vmbus_g_context.hv_event_queue[j], &cpuset_task);
514 * Setup per-cpu tasks and taskqueues to handle msg.
516 hv_vmbus_g_context.hv_msg_tq[j] = taskqueue_create_fast(
517 "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
518 &hv_vmbus_g_context.hv_msg_tq[j]);
519 taskqueue_start_threads(&hv_vmbus_g_context.hv_msg_tq[j], 1, PI_NET,
521 TASK_INIT(&hv_vmbus_g_context.hv_msg_task[j], 0,
522 vmbus_msg_task, NULL);
524 CPU_SETOF(j, &cpu_mask);
525 TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task, &cpu_mask);
526 taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[j], &cpuset_task);
527 taskqueue_drain(hv_vmbus_g_context.hv_msg_tq[j], &cpuset_task);
530 * Prepare the per cpu msg and event pages to be called on each cpu.
532 for(i = 0; i < 2; i++) {
533 setup_args.page_buffers[2 * j + i] =
534 malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
539 printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
542 smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
545 * Connect to VMBus in the root partition
547 ret = hv_vmbus_connect();
552 if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
553 hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)
554 sc->vmbus_event_proc = vmbus_event_proc_compat;
556 sc->vmbus_event_proc = vmbus_event_proc;
558 hv_vmbus_request_channel_offers();
561 bus_generic_attach(vmbus_devp);
562 device_printf(vmbus_devp, "device scan, probe and attach done\n");
568 * Free pages alloc'ed
570 for (n = 0; n < 2 * MAXCPU; n++)
571 if (setup_args.page_buffers[n] != NULL)
572 free(setup_args.page_buffers[n], M_DEVBUF);
575 * remove swi and vmbus callback vector;
578 if (hv_vmbus_g_context.hv_event_queue[j] != NULL) {
579 taskqueue_free(hv_vmbus_g_context.hv_event_queue[j]);
580 hv_vmbus_g_context.hv_event_queue[j] = NULL;
584 vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
593 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
598 vmbus_attach(device_t dev)
601 device_printf(dev, "VMBUS: attach dev: %p\n", dev);
604 vmbus_sc = device_get_softc(dev);
607 * Event processing logic will be configured:
608 * - After the vmbus protocol version negotiation.
609 * - Before we request channel offers.
611 vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
614 * If the system has already booted and thread
615 * scheduling is possible indicated by the global
616 * cold set to zero, we just call the driver
617 * initialization directly.
622 bus_generic_probe(dev);
629 if (vm_guest != VM_GUEST_HV || vmbus_get_softc() == NULL)
633 * If the system has already booted and thread
634 * scheduling is possible, as indicated by the
635 * global cold set to zero, we just call the driver
636 * initialization directly.
643 vmbus_detach(device_t dev)
647 hv_vmbus_release_unattached_channels();
648 hv_vmbus_disconnect();
650 smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
652 for(i = 0; i < 2 * MAXCPU; i++) {
653 if (setup_args.page_buffers[i] != NULL)
654 free(setup_args.page_buffers[i], M_DEVBUF);
661 if (hv_vmbus_g_context.hv_event_queue[i] != NULL) {
662 taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]);
663 hv_vmbus_g_context.hv_event_queue[i] = NULL;
667 vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
672 static device_method_t vmbus_methods[] = {
673 /** Device interface */
674 DEVMETHOD(device_probe, vmbus_probe),
675 DEVMETHOD(device_attach, vmbus_attach),
676 DEVMETHOD(device_detach, vmbus_detach),
677 DEVMETHOD(device_shutdown, bus_generic_shutdown),
678 DEVMETHOD(device_suspend, bus_generic_suspend),
679 DEVMETHOD(device_resume, bus_generic_resume),
682 DEVMETHOD(bus_add_child, bus_generic_add_child),
683 DEVMETHOD(bus_print_child, bus_generic_print_child),
684 DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
685 DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
686 DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str),
690 static driver_t vmbus_driver = {
693 sizeof(struct vmbus_softc)
696 devclass_t vmbus_devclass;
698 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
699 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
700 MODULE_VERSION(vmbus, 1);
702 /* We want to be started after SMP is initialized */
703 SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);