2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * VM Bus Driver Implementation
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
42 #include <sys/sysctl.h>
43 #include <sys/syslog.h>
44 #include <sys/systm.h>
45 #include <sys/rtprio.h>
46 #include <sys/interrupt.h>
48 #include <sys/taskqueue.h>
49 #include <sys/mutex.h>
52 #include <machine/resource.h>
55 #include <machine/stdarg.h>
56 #include <machine/intr_machdep.h>
57 #include <machine/md_var.h>
58 #include <machine/segments.h>
60 #include <machine/apicvar.h>
62 #include <dev/hyperv/include/hyperv.h>
63 #include <dev/hyperv/vmbus/hv_vmbus_priv.h>
64 #include <dev/hyperv/vmbus/hyperv_reg.h>
65 #include <dev/hyperv/vmbus/hyperv_var.h>
66 #include <dev/hyperv/vmbus/vmbus_var.h>
68 #include <contrib/dev/acpica/include/acpi.h>
71 struct vmbus_softc *vmbus_sc;
73 static char *vmbus_ids[] = { "VMBUS", NULL };
75 extern inthand_t IDTVEC(rsvd), IDTVEC(vmbus_isr);
78 vmbus_msg_task(void *xsc, int pending __unused)
80 struct vmbus_softc *sc = xsc;
81 hv_vmbus_message *msg;
83 msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
85 const hv_vmbus_channel_msg_table_entry *entry;
86 hv_vmbus_channel_msg_header *hdr;
87 hv_vmbus_channel_msg_type msg_type;
89 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE)
90 break; /* no message */
92 hdr = (hv_vmbus_channel_msg_header *)msg->u.payload;
93 msg_type = hdr->message_type;
95 if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) {
96 printf("VMBUS: unknown message type = %d\n", msg_type);
100 entry = &g_channel_message_table[msg_type];
101 if (entry->messageHandler)
102 entry->messageHandler(hdr);
104 msg->header.message_type = HV_MESSAGE_TYPE_NONE;
106 * Make sure the write to message_type (ie set to
107 * HV_MESSAGE_TYPE_NONE) happens before we read the
108 * message_pending and EOMing. Otherwise, the EOMing will
109 * not deliver any more messages
110 * since there is no empty slot
113 * mb() is used here, since atomic_thread_fence_seq_cst()
114 * will become compiler fence on UP kernel.
117 if (msg->header.message_flags.u.message_pending) {
119 * This will cause message queue rescan to possibly
120 * deliver another msg from the hypervisor
122 wrmsr(MSR_HV_EOM, 0);
128 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
130 hv_vmbus_message *msg, *msg_base;
132 msg_base = VMBUS_PCPU_GET(sc, message, cpu);
137 * TODO: move this to independent IDT vector.
139 msg = msg_base + VMBUS_SINT_TIMER;
140 if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) {
141 msg->header.message_type = HV_MESSAGE_TYPE_NONE;
143 vmbus_et_intr(frame);
146 * Make sure the write to message_type (ie set to
147 * HV_MESSAGE_TYPE_NONE) happens before we read the
148 * message_pending and EOMing. Otherwise, the EOMing will
149 * not deliver any more messages
150 * since there is no empty slot
153 * mb() is used here, since atomic_thread_fence_seq_cst()
154 * will become compiler fence on UP kernel.
158 if (msg->header.message_flags.u.message_pending) {
160 * This will cause message queue rescan to possibly
161 * deliver another msg from the hypervisor
163 wrmsr(MSR_HV_EOM, 0);
168 * Check events. Hot path for network and storage I/O data; high rate.
171 * As recommended by the Windows guest fellows, we check events before
174 sc->vmbus_event_proc(sc, cpu);
177 * Check messages. Mainly management stuffs; ultra low rate.
179 msg = msg_base + VMBUS_SINT_MESSAGE;
180 if (__predict_false(msg->header.message_type != HV_MESSAGE_TYPE_NONE)) {
181 taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
182 VMBUS_PCPU_PTR(sc, message_task, cpu));
185 return (FILTER_HANDLED);
189 vmbus_handle_intr(struct trapframe *trap_frame)
191 struct vmbus_softc *sc = vmbus_get_softc();
195 * Disable preemption.
200 * Do a little interrupt counting.
202 (*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
204 vmbus_handle_intr1(sc, trap_frame, cpu);
213 vmbus_synic_setup(void *xsc)
215 struct vmbus_softc *sc = xsc;
220 if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
222 * Save virtual processor id.
224 VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
228 * Virtual processoor id is only used by a pretty broken
229 * channel selection code from storvsc. It's nothing
230 * critical even if CPUID_HV_MSR_VP_INDEX is not set; keep
233 VMBUS_PCPU_GET(sc, vcpuid, cpu) = cpu;
237 * Setup the SynIC message.
239 orig = rdmsr(MSR_HV_SIMP);
240 val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
241 ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
242 MSR_HV_SIMP_PGSHIFT);
243 wrmsr(MSR_HV_SIMP, val);
246 * Setup the SynIC event flags.
248 orig = rdmsr(MSR_HV_SIEFP);
249 val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
250 ((VMBUS_PCPU_GET(sc, event_flag_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
251 MSR_HV_SIEFP_PGSHIFT);
252 wrmsr(MSR_HV_SIEFP, val);
256 * Configure and unmask SINT for message and event flags.
258 sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
260 val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
261 (orig & MSR_HV_SINT_RSVD_MASK);
265 * Configure and unmask SINT for timer.
267 sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
269 val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
270 (orig & MSR_HV_SINT_RSVD_MASK);
274 * All done; enable SynIC.
276 orig = rdmsr(MSR_HV_SCONTROL);
277 val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
278 wrmsr(MSR_HV_SCONTROL, val);
282 vmbus_synic_teardown(void *arg)
290 orig = rdmsr(MSR_HV_SCONTROL);
291 wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
294 * Mask message and event flags SINT.
296 sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
298 wrmsr(sint, orig | MSR_HV_SINT_MASKED);
303 sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
305 wrmsr(sint, orig | MSR_HV_SINT_MASKED);
308 * Teardown SynIC message.
310 orig = rdmsr(MSR_HV_SIMP);
311 wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
314 * Teardown SynIC event flags.
316 orig = rdmsr(MSR_HV_SIEFP);
317 wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
321 vmbus_dma_alloc(struct vmbus_softc *sc)
329 * Per-cpu messages and event flags.
331 ptr = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
332 PAGE_SIZE, 0, PAGE_SIZE,
333 VMBUS_PCPU_PTR(sc, message_dma, cpu),
334 BUS_DMA_WAITOK | BUS_DMA_ZERO);
337 VMBUS_PCPU_GET(sc, message, cpu) = ptr;
339 ptr = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
340 PAGE_SIZE, 0, PAGE_SIZE,
341 VMBUS_PCPU_PTR(sc, event_flag_dma, cpu),
342 BUS_DMA_WAITOK | BUS_DMA_ZERO);
345 VMBUS_PCPU_GET(sc, event_flag, cpu) = ptr;
351 vmbus_dma_free(struct vmbus_softc *sc)
356 if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
358 VMBUS_PCPU_PTR(sc, message_dma, cpu),
359 VMBUS_PCPU_GET(sc, message, cpu));
360 VMBUS_PCPU_GET(sc, message, cpu) = NULL;
362 if (VMBUS_PCPU_GET(sc, event_flag, cpu) != NULL) {
364 VMBUS_PCPU_PTR(sc, event_flag_dma, cpu),
365 VMBUS_PCPU_GET(sc, event_flag, cpu));
366 VMBUS_PCPU_GET(sc, event_flag, cpu) = NULL;
372 * @brief Find a free IDT slot and setup the interrupt handler.
375 vmbus_vector_alloc(void)
379 struct gate_descriptor *ip;
382 * Search backwards form the highest IDT vector available for use
383 * as vmbus channel callback vector. We install 'hv_vmbus_callback'
384 * handler at that vector and use it to interrupt vcpus.
386 vector = APIC_SPURIOUS_INT;
387 while (--vector >= APIC_IPI_INTS) {
389 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
390 if (func == (uintptr_t)&IDTVEC(rsvd)) {
392 setidt(vector , IDTVEC(vmbus_isr), SDT_SYS386IGT,
393 SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
395 setidt(vector , IDTVEC(vmbus_isr), SDT_SYSIGT,
406 * @brief Restore the IDT slot to rsvd.
409 vmbus_vector_free(int vector)
412 struct gate_descriptor *ip;
417 KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
418 ("invalid vector %d", vector));
421 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
422 KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback),
423 ("invalid vector %d", vector));
425 setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
429 vmbus_cpuset_setthread_task(void *xmask, int pending __unused)
431 cpuset_t *mask = xmask;
434 error = cpuset_setthread(curthread->td_tid, mask);
436 panic("curthread=%ju: can't pin; error=%d",
437 (uintmax_t)curthread->td_tid, error);
442 vmbus_intr_setup(struct vmbus_softc *sc)
447 struct task cpuset_task;
448 char buf[MAXCOMLEN + 1];
451 /* Allocate an interrupt counter for Hyper-V interrupt */
452 snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
453 intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
456 * Setup taskqueue to handle events. Task will be per-
459 VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
460 "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
461 VMBUS_PCPU_PTR(sc, event_tq, cpu));
462 taskqueue_start_threads(VMBUS_PCPU_PTR(sc, event_tq, cpu),
463 1, PI_NET, "hvevent%d", cpu);
465 CPU_SETOF(cpu, &cpu_mask);
466 TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
468 taskqueue_enqueue(VMBUS_PCPU_GET(sc, event_tq, cpu),
470 taskqueue_drain(VMBUS_PCPU_GET(sc, event_tq, cpu),
474 * Setup tasks and taskqueues to handle messages.
476 VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
477 "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
478 VMBUS_PCPU_PTR(sc, message_tq, cpu));
479 taskqueue_start_threads(VMBUS_PCPU_PTR(sc, message_tq, cpu), 1,
480 PI_NET, "hvmsg%d", cpu);
481 TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
484 CPU_SETOF(cpu, &cpu_mask);
485 TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
487 taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
489 taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
494 * All Hyper-V ISR required resources are setup, now let's find a
495 * free IDT vector for Hyper-V ISR and set it up.
497 sc->vmbus_idtvec = vmbus_vector_alloc();
498 if (sc->vmbus_idtvec == 0) {
499 device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
503 device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
510 vmbus_intr_teardown(struct vmbus_softc *sc)
514 vmbus_vector_free(sc->vmbus_idtvec);
517 if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
518 taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
519 VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
521 if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
522 taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
523 VMBUS_PCPU_PTR(sc, message_task, cpu));
524 taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
525 VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
531 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
533 struct hv_device *child_dev_ctx = device_get_ivars(child);
536 case HV_VMBUS_IVAR_TYPE:
537 *result = (uintptr_t) &child_dev_ctx->class_id;
539 case HV_VMBUS_IVAR_INSTANCE:
540 *result = (uintptr_t) &child_dev_ctx->device_id;
542 case HV_VMBUS_IVAR_DEVCTX:
543 *result = (uintptr_t) child_dev_ctx;
545 case HV_VMBUS_IVAR_NODE:
546 *result = (uintptr_t) child_dev_ctx->device;
553 vmbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
556 case HV_VMBUS_IVAR_TYPE:
557 case HV_VMBUS_IVAR_INSTANCE:
558 case HV_VMBUS_IVAR_DEVCTX:
559 case HV_VMBUS_IVAR_NODE:
567 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
570 struct hv_device *dev_ctx = device_get_ivars(child);
575 strlcat(buf, "classid=", buflen);
576 snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->class_id);
577 strlcat(buf, guidbuf, buflen);
579 strlcat(buf, " deviceid=", buflen);
580 snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->device_id);
581 strlcat(buf, guidbuf, buflen);
587 hv_vmbus_child_device_create(hv_guid type, hv_guid instance,
588 hv_vmbus_channel *channel)
590 hv_device *child_dev;
593 * Allocate the new child device
595 child_dev = malloc(sizeof(hv_device), M_DEVBUF, M_WAITOK | M_ZERO);
597 child_dev->channel = channel;
598 memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
599 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
605 snprintf_hv_guid(char *buf, size_t sz, const hv_guid *guid)
608 const unsigned char *d = guid->data;
610 cnt = snprintf(buf, sz,
611 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
612 d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6],
613 d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
618 hv_vmbus_child_device_register(struct hv_device *child_dev)
624 snprintf_hv_guid(name, sizeof(name), &child_dev->class_id);
625 printf("VMBUS: Class ID: %s\n", name);
628 child = device_add_child(vmbus_get_device(), NULL, -1);
629 child_dev->device = child;
630 device_set_ivars(child, child_dev);
636 hv_vmbus_child_device_unregister(struct hv_device *child_dev)
640 * XXXKYS: Ensure that this is the opposite of
644 ret = device_delete_child(vmbus_get_device(), child_dev->device);
650 vmbus_probe(device_t dev)
652 if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL ||
653 device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
654 (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
657 device_set_desc(dev, "Hyper-V Vmbus");
659 return (BUS_PROBE_DEFAULT);
663 * @brief Main vmbus driver initialization routine.
666 * - initialize the vmbus driver context
667 * - setup various driver entry points
668 * - invoke the vmbus hv main init routine
669 * - get the irq resource
670 * - invoke the vmbus to add the vmbus root device
671 * - setup the vmbus root device
672 * - retrieve the channel offers
677 struct vmbus_softc *sc = vmbus_get_softc();
680 if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
682 sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
685 * Allocate DMA stuffs.
687 ret = vmbus_dma_alloc(sc);
694 ret = vmbus_intr_setup(sc);
702 device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
703 smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
704 sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
707 * Connect to VMBus in the root partition
709 ret = hv_vmbus_connect();
714 if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
715 hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)
716 sc->vmbus_event_proc = vmbus_event_proc_compat;
718 sc->vmbus_event_proc = vmbus_event_proc;
720 hv_vmbus_request_channel_offers();
723 bus_generic_attach(sc->vmbus_dev);
724 device_printf(sc->vmbus_dev, "device scan, probe and attach done\n");
729 vmbus_intr_teardown(sc);
736 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
741 vmbus_attach(device_t dev)
743 vmbus_sc = device_get_softc(dev);
744 vmbus_sc->vmbus_dev = dev;
747 * Event processing logic will be configured:
748 * - After the vmbus protocol version negotiation.
749 * - Before we request channel offers.
751 vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
754 * If the system has already booted and thread
755 * scheduling is possible indicated by the global
756 * cold set to zero, we just call the driver
757 * initialization directly.
762 bus_generic_probe(dev);
767 vmbus_sysinit(void *arg __unused)
769 if (vm_guest != VM_GUEST_HV || vmbus_get_softc() == NULL)
773 * If the system has already booted and thread
774 * scheduling is possible, as indicated by the
775 * global cold set to zero, we just call the driver
776 * initialization directly.
783 vmbus_detach(device_t dev)
785 struct vmbus_softc *sc = device_get_softc(dev);
787 hv_vmbus_release_unattached_channels();
788 hv_vmbus_disconnect();
790 if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
791 sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
792 smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
795 vmbus_intr_teardown(sc);
801 static device_method_t vmbus_methods[] = {
802 /* Device interface */
803 DEVMETHOD(device_probe, vmbus_probe),
804 DEVMETHOD(device_attach, vmbus_attach),
805 DEVMETHOD(device_detach, vmbus_detach),
806 DEVMETHOD(device_shutdown, bus_generic_shutdown),
807 DEVMETHOD(device_suspend, bus_generic_suspend),
808 DEVMETHOD(device_resume, bus_generic_resume),
811 DEVMETHOD(bus_add_child, bus_generic_add_child),
812 DEVMETHOD(bus_print_child, bus_generic_print_child),
813 DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
814 DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
815 DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str),
820 static driver_t vmbus_driver = {
823 sizeof(struct vmbus_softc)
826 static devclass_t vmbus_devclass;
828 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
829 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
830 MODULE_VERSION(vmbus, 1);
834 * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
837 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);