2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * VM Bus Driver Implementation
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
42 #include <sys/sysctl.h>
43 #include <sys/syslog.h>
44 #include <sys/systm.h>
45 #include <sys/rtprio.h>
46 #include <sys/interrupt.h>
48 #include <sys/taskqueue.h>
49 #include <sys/mutex.h>
52 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 #include <machine/intr_machdep.h>
57 #include <machine/md_var.h>
58 #include <machine/segments.h>
60 #include <machine/apicvar.h>
62 #include <dev/hyperv/include/hyperv.h>
63 #include <dev/hyperv/vmbus/hv_vmbus_priv.h>
64 #include <dev/hyperv/vmbus/vmbus_var.h>
66 #include <contrib/dev/acpica/include/acpi.h>
69 struct vmbus_softc *vmbus_sc;
71 static int vmbus_inited;
72 static hv_setup_args setup_args; /* only CPU 0 supported at this time */
74 static char *vmbus_ids[] = { "VMBUS", NULL };
77 vmbus_msg_task(void *arg __unused, int pending __unused)
79 hv_vmbus_message *msg;
81 msg = hv_vmbus_g_context.syn_ic_msg_page[curcpu] +
82 HV_VMBUS_MESSAGE_SINT;
85 const hv_vmbus_channel_msg_table_entry *entry;
86 hv_vmbus_channel_msg_header *hdr;
87 hv_vmbus_channel_msg_type msg_type;
89 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE)
90 break; /* no message */
92 hdr = (hv_vmbus_channel_msg_header *)msg->u.payload;
93 msg_type = hdr->message_type;
95 if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) {
96 printf("VMBUS: unknown message type = %d\n", msg_type);
100 entry = &g_channel_message_table[msg_type];
101 if (entry->messageHandler)
102 entry->messageHandler(hdr);
104 msg->header.message_type = HV_MESSAGE_TYPE_NONE;
106 * Make sure the write to message_type (ie set to
107 * HV_MESSAGE_TYPE_NONE) happens before we read the
108 * message_pending and EOMing. Otherwise, the EOMing will
109 * not deliver any more messages
110 * since there is no empty slot
113 * mb() is used here, since atomic_thread_fence_seq_cst()
114 * will become compiler fence on UP kernel.
117 if (msg->header.message_flags.u.message_pending) {
119 * This will cause message queue rescan to possibly
120 * deliver another msg from the hypervisor
122 wrmsr(HV_X64_MSR_EOM, 0);
128 * @brief Interrupt filter routine for VMBUS.
130 * The purpose of this routine is to determine the type of VMBUS protocol
131 * message to process - an event or a channel message.
134 hv_vmbus_isr(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
136 hv_vmbus_message *msg, *msg_base;
139 * The Windows team has advised that we check for events
140 * before checking for messages. This is the way they do it
141 * in Windows when running as a guest in Hyper-V
143 sc->vmbus_event_proc(sc, cpu);
145 /* Check if there are actual msgs to be process */
146 msg_base = hv_vmbus_g_context.syn_ic_msg_page[cpu];
147 msg = msg_base + HV_VMBUS_TIMER_SINT;
149 /* we call eventtimer process the message */
150 if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) {
151 msg->header.message_type = HV_MESSAGE_TYPE_NONE;
153 /* call intrrupt handler of event timer */
157 * Make sure the write to message_type (ie set to
158 * HV_MESSAGE_TYPE_NONE) happens before we read the
159 * message_pending and EOMing. Otherwise, the EOMing will
160 * not deliver any more messages
161 * since there is no empty slot
164 * mb() is used here, since atomic_thread_fence_seq_cst()
165 * will become compiler fence on UP kernel.
169 if (msg->header.message_flags.u.message_pending) {
171 * This will cause message queue rescan to possibly
172 * deliver another msg from the hypervisor
174 wrmsr(HV_X64_MSR_EOM, 0);
178 msg = msg_base + HV_VMBUS_MESSAGE_SINT;
179 if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
180 taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[cpu],
181 &hv_vmbus_g_context.hv_msg_task[cpu]);
184 return (FILTER_HANDLED);
188 hv_vector_handler(struct trapframe *trap_frame)
190 struct vmbus_softc *sc = vmbus_get_softc();
194 * Disable preemption.
199 * Do a little interrupt counting.
201 (*VMBUS_SC_PCPU_GET(sc, intr_cnt, cpu))++;
203 hv_vmbus_isr(sc, trap_frame, cpu);
212 vmbus_synic_setup(void *arg)
214 struct vmbus_softc *sc = vmbus_get_softc();
216 uint64_t hv_vcpu_index;
217 hv_vmbus_synic_simp simp;
218 hv_vmbus_synic_siefp siefp;
219 hv_vmbus_synic_scontrol sctrl;
220 hv_vmbus_synic_sint shared_sint;
222 hv_setup_args* setup_args = (hv_setup_args *)arg;
224 cpu = PCPU_GET(cpuid);
227 * TODO: Check the version
229 version = rdmsr(HV_X64_MSR_SVERSION);
231 hv_vmbus_g_context.syn_ic_msg_page[cpu] =
232 setup_args->page_buffers[2 * cpu];
233 hv_vmbus_g_context.syn_ic_event_page[cpu] =
234 setup_args->page_buffers[2 * cpu + 1];
237 * Setup the Synic's message page
240 simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP);
241 simp.u.simp_enabled = 1;
242 simp.u.base_simp_gpa = ((hv_get_phys_addr(
243 hv_vmbus_g_context.syn_ic_msg_page[cpu])) >> PAGE_SHIFT);
245 wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t);
248 * Setup the Synic's event page
250 siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP);
251 siefp.u.siefp_enabled = 1;
252 siefp.u.base_siefp_gpa = ((hv_get_phys_addr(
253 hv_vmbus_g_context.syn_ic_event_page[cpu])) >> PAGE_SHIFT);
255 wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
257 /*HV_SHARED_SINT_IDT_VECTOR + 0x20; */
258 shared_sint.as_uint64_t = 0;
259 shared_sint.u.vector = sc->vmbus_idtvec;
260 shared_sint.u.masked = FALSE;
261 shared_sint.u.auto_eoi = TRUE;
263 wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
264 shared_sint.as_uint64_t);
266 wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT,
267 shared_sint.as_uint64_t);
269 /* Enable the global synic bit */
270 sctrl.as_uint64_t = rdmsr(HV_X64_MSR_SCONTROL);
273 wrmsr(HV_X64_MSR_SCONTROL, sctrl.as_uint64_t);
275 hv_vmbus_g_context.syn_ic_initialized = TRUE;
278 * Set up the cpuid mapping from Hyper-V to FreeBSD.
279 * The array is indexed using FreeBSD cpuid.
281 hv_vcpu_index = rdmsr(HV_X64_MSR_VP_INDEX);
282 hv_vmbus_g_context.hv_vcpu_index[cpu] = (uint32_t)hv_vcpu_index;
286 vmbus_synic_teardown(void *arg)
288 hv_vmbus_synic_sint shared_sint;
289 hv_vmbus_synic_simp simp;
290 hv_vmbus_synic_siefp siefp;
292 if (!hv_vmbus_g_context.syn_ic_initialized)
295 shared_sint.as_uint64_t = rdmsr(
296 HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT);
298 shared_sint.u.masked = 1;
301 * Disable the interrupt 0
304 HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
305 shared_sint.as_uint64_t);
307 shared_sint.as_uint64_t = rdmsr(
308 HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT);
310 shared_sint.u.masked = 1;
313 * Disable the interrupt 1
316 HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT,
317 shared_sint.as_uint64_t);
318 simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP);
319 simp.u.simp_enabled = 0;
320 simp.u.base_simp_gpa = 0;
322 wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t);
324 siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP);
325 siefp.u.siefp_enabled = 0;
326 siefp.u.base_siefp_gpa = 0;
328 wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
338 struct hv_device *child_dev_ctx = device_get_ivars(child);
342 case HV_VMBUS_IVAR_TYPE:
343 *result = (uintptr_t) &child_dev_ctx->class_id;
345 case HV_VMBUS_IVAR_INSTANCE:
346 *result = (uintptr_t) &child_dev_ctx->device_id;
348 case HV_VMBUS_IVAR_DEVCTX:
349 *result = (uintptr_t) child_dev_ctx;
351 case HV_VMBUS_IVAR_NODE:
352 *result = (uintptr_t) child_dev_ctx->device;
367 case HV_VMBUS_IVAR_TYPE:
368 case HV_VMBUS_IVAR_INSTANCE:
369 case HV_VMBUS_IVAR_DEVCTX:
370 case HV_VMBUS_IVAR_NODE:
378 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
381 struct hv_device *dev_ctx = device_get_ivars(child);
386 strlcat(buf, "classid=", buflen);
387 snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->class_id);
388 strlcat(buf, guidbuf, buflen);
390 strlcat(buf, " deviceid=", buflen);
391 snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->device_id);
392 strlcat(buf, guidbuf, buflen);
398 hv_vmbus_child_device_create(
401 hv_vmbus_channel* channel)
403 hv_device* child_dev;
406 * Allocate the new child device
408 child_dev = malloc(sizeof(hv_device), M_DEVBUF,
411 child_dev->channel = channel;
412 memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
413 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
419 snprintf_hv_guid(char *buf, size_t sz, const hv_guid *guid)
422 const unsigned char *d = guid->data;
424 cnt = snprintf(buf, sz,
425 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
426 d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6],
427 d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
432 hv_vmbus_child_device_register(struct hv_device *child_dev)
438 snprintf_hv_guid(name, sizeof(name), &child_dev->class_id);
439 printf("VMBUS: Class ID: %s\n", name);
442 child = device_add_child(vmbus_get_device(), NULL, -1);
443 child_dev->device = child;
444 device_set_ivars(child, child_dev);
450 hv_vmbus_child_device_unregister(struct hv_device *child_dev)
454 * XXXKYS: Ensure that this is the opposite of
458 ret = device_delete_child(vmbus_get_device(), child_dev->device);
464 vmbus_probe(device_t dev)
466 if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL ||
467 device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV)
470 device_set_desc(dev, "Hyper-V Vmbus");
472 return (BUS_PROBE_DEFAULT);
475 extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback);
478 * @brief Find a free IDT slot and setup the interrupt handler.
481 vmbus_vector_alloc(void)
485 struct gate_descriptor *ip;
488 * Search backwards form the highest IDT vector available for use
489 * as vmbus channel callback vector. We install 'hv_vmbus_callback'
490 * handler at that vector and use it to interrupt vcpus.
492 vector = APIC_SPURIOUS_INT;
493 while (--vector >= APIC_IPI_INTS) {
495 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
496 if (func == (uintptr_t)&IDTVEC(rsvd)) {
498 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT,
499 SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
501 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT,
512 * @brief Restore the IDT slot to rsvd.
515 vmbus_vector_free(int vector)
518 struct gate_descriptor *ip;
523 KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
524 ("invalid vector %d", vector));
527 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
528 KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback),
529 ("invalid vector %d", vector));
531 setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
535 vmbus_cpuset_setthread_task(void *xmask, int pending __unused)
537 cpuset_t *mask = xmask;
540 error = cpuset_setthread(curthread->td_tid, mask);
542 panic("curthread=%ju: can't pin; error=%d",
543 (uintmax_t)curthread->td_tid, error);
548 * @brief Main vmbus driver initialization routine.
551 * - initialize the vmbus driver context
552 * - setup various driver entry points
553 * - invoke the vmbus hv main init routine
554 * - get the irq resource
555 * - invoke the vmbus to add the vmbus root device
556 * - setup the vmbus root device
557 * - retrieve the channel offers
562 struct vmbus_softc *sc;
564 char buf[MAXCOMLEN + 1];
571 sc = vmbus_get_softc();
574 * Find a free IDT vector for vmbus messages/events.
576 sc->vmbus_idtvec = vmbus_vector_alloc();
577 if (sc->vmbus_idtvec == 0) {
578 device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
583 device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
588 snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
589 intrcnt_add(buf, VMBUS_SC_PCPU_PTR(sc, intr_cnt, cpu));
591 for (i = 0; i < 2; i++)
592 setup_args.page_buffers[2 * cpu + i] = NULL;
599 struct task cpuset_task;
602 * Setup taskqueue to handle events
604 hv_vmbus_g_context.hv_event_queue[cpu] =
605 taskqueue_create_fast("hyperv event", M_WAITOK,
606 taskqueue_thread_enqueue,
607 &hv_vmbus_g_context.hv_event_queue[cpu]);
608 taskqueue_start_threads(&hv_vmbus_g_context.hv_event_queue[cpu],
609 1, PI_NET, "hvevent%d", cpu);
611 CPU_SETOF(cpu, &cpu_mask);
612 TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
614 taskqueue_enqueue(hv_vmbus_g_context.hv_event_queue[cpu],
616 taskqueue_drain(hv_vmbus_g_context.hv_event_queue[cpu],
620 * Setup per-cpu tasks and taskqueues to handle msg.
622 hv_vmbus_g_context.hv_msg_tq[cpu] = taskqueue_create_fast(
623 "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
624 &hv_vmbus_g_context.hv_msg_tq[cpu]);
625 taskqueue_start_threads(&hv_vmbus_g_context.hv_msg_tq[cpu], 1,
626 PI_NET, "hvmsg%d", cpu);
627 TASK_INIT(&hv_vmbus_g_context.hv_msg_task[cpu], 0,
628 vmbus_msg_task, NULL);
630 CPU_SETOF(cpu, &cpu_mask);
631 TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
633 taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[cpu],
635 taskqueue_drain(hv_vmbus_g_context.hv_msg_tq[cpu],
639 * Prepare the per cpu msg and event pages to be called on
642 for(i = 0; i < 2; i++) {
643 setup_args.page_buffers[2 * cpu + i] =
644 malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
649 printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
652 smp_rendezvous(NULL, vmbus_synic_setup, NULL, &setup_args);
655 * Connect to VMBus in the root partition
657 ret = hv_vmbus_connect();
662 if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
663 hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)
664 sc->vmbus_event_proc = vmbus_event_proc_compat;
666 sc->vmbus_event_proc = vmbus_event_proc;
668 hv_vmbus_request_channel_offers();
671 bus_generic_attach(sc->vmbus_dev);
672 device_printf(sc->vmbus_dev, "device scan, probe and attach done\n");
678 * Free pages alloc'ed
680 for (n = 0; n < 2 * MAXCPU; n++)
681 if (setup_args.page_buffers[n] != NULL)
682 free(setup_args.page_buffers[n], M_DEVBUF);
685 * remove swi and vmbus callback vector;
688 if (hv_vmbus_g_context.hv_event_queue[cpu] != NULL) {
689 taskqueue_free(hv_vmbus_g_context.hv_event_queue[cpu]);
690 hv_vmbus_g_context.hv_event_queue[cpu] = NULL;
694 vmbus_vector_free(sc->vmbus_idtvec);
701 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
706 vmbus_attach(device_t dev)
708 vmbus_sc = device_get_softc(dev);
709 vmbus_sc->vmbus_dev = dev;
712 * Event processing logic will be configured:
713 * - After the vmbus protocol version negotiation.
714 * - Before we request channel offers.
716 vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
719 * If the system has already booted and thread
720 * scheduling is possible indicated by the global
721 * cold set to zero, we just call the driver
722 * initialization directly.
727 bus_generic_probe(dev);
732 vmbus_sysinit(void *arg __unused)
734 if (vm_guest != VM_GUEST_HV || vmbus_get_softc() == NULL)
738 * If the system has already booted and thread
739 * scheduling is possible, as indicated by the
740 * global cold set to zero, we just call the driver
741 * initialization directly.
748 vmbus_detach(device_t dev)
750 struct vmbus_softc *sc = device_get_softc(dev);
753 hv_vmbus_release_unattached_channels();
754 hv_vmbus_disconnect();
756 smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
758 for(i = 0; i < 2 * MAXCPU; i++) {
759 if (setup_args.page_buffers[i] != NULL)
760 free(setup_args.page_buffers[i], M_DEVBUF);
765 if (hv_vmbus_g_context.hv_event_queue[i] != NULL) {
766 taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]);
767 hv_vmbus_g_context.hv_event_queue[i] = NULL;
771 vmbus_vector_free(sc->vmbus_idtvec);
776 static device_method_t vmbus_methods[] = {
777 /* Device interface */
778 DEVMETHOD(device_probe, vmbus_probe),
779 DEVMETHOD(device_attach, vmbus_attach),
780 DEVMETHOD(device_detach, vmbus_detach),
781 DEVMETHOD(device_shutdown, bus_generic_shutdown),
782 DEVMETHOD(device_suspend, bus_generic_suspend),
783 DEVMETHOD(device_resume, bus_generic_resume),
786 DEVMETHOD(bus_add_child, bus_generic_add_child),
787 DEVMETHOD(bus_print_child, bus_generic_print_child),
788 DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
789 DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
790 DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str),
795 static driver_t vmbus_driver = {
798 sizeof(struct vmbus_softc)
801 static devclass_t vmbus_devclass;
803 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
804 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
805 MODULE_VERSION(vmbus, 1);
809 * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
812 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);