2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * VM Bus Driver Implementation
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/sysctl.h>
42 #include <sys/syslog.h>
43 #include <sys/systm.h>
44 #include <sys/rtprio.h>
45 #include <sys/interrupt.h>
47 #include <sys/taskqueue.h>
48 #include <sys/mutex.h>
51 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 #include <machine/intr_machdep.h>
58 #include "hv_vmbus_priv.h"
63 static struct intr_event *hv_msg_intr_event;
64 static struct intr_event *hv_event_intr_event;
65 static void *msg_swintr;
66 static void *event_swintr;
67 static device_t vmbus_devp;
68 static void *vmbus_cookiep;
70 struct resource *intr_res;
71 static int vmbus_irq = VMBUS_IRQ;
72 static int vmbus_inited;
73 static hv_setup_args setup_args; /* only CPU 0 supported at this time */
76 * @brief Software interrupt thread routine to handle channel messages from
80 vmbus_msg_swintr(void *dummy)
84 hv_vmbus_message* msg;
85 hv_vmbus_message* copied;
87 cpu = PCPU_GET(cpuid);
88 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
89 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
92 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
93 break; /* no message */
95 copied = malloc(sizeof(hv_vmbus_message),
97 KASSERT(copied != NULL,
98 ("Error VMBUS: malloc failed to allocate"
99 " hv_vmbus_message!"));
102 memcpy(copied, msg, sizeof(hv_vmbus_message));
103 hv_queue_work_item(hv_vmbus_g_connection.work_queue,
104 hv_vmbus_on_channel_message, copied);
107 msg->header.message_type = HV_MESSAGE_TYPE_NONE;
110 * Make sure the write to message_type (ie set to
111 * HV_MESSAGE_TYPE_NONE) happens before we read the
112 * message_pending and EOMing. Otherwise, the EOMing will
113 * not deliver any more messages
114 * since there is no empty slot
118 if (msg->header.message_flags.u.message_pending) {
120 * This will cause message queue rescan to possibly
121 * deliver another msg from the hypervisor
123 wrmsr(HV_X64_MSR_EOM, 0);
129 * @brief Interrupt filter routine for VMBUS.
131 * The purpose of this routine is to determine the type of VMBUS protocol
132 * message to process - an event or a channel message.
133 * As this is an interrupt filter routine, the function runs in a very
134 * restricted envinronment. From the manpage for bus_setup_intr(9)
136 * In this restricted environment, care must be taken to account for all
137 * races. A careful analysis of races should be done as well. It is gener-
138 * ally cheaper to take an extra interrupt, for example, than to protect
139 * variables with spinlocks. Read, modify, write cycles of hardware regis-
140 * ters need to be carefully analyzed if other threads are accessing the
144 hv_vmbus_isr(void *unused)
147 hv_vmbus_message* msg;
148 hv_vmbus_synic_event_flags* event;
151 cpu = PCPU_GET(cpuid);
152 /* (Temporary limit) */
153 KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
156 * The Windows team has advised that we check for events
157 * before checking for messages. This is the way they do it
158 * in Windows when running as a guest in Hyper-V
161 page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
162 event = (hv_vmbus_synic_event_flags*)
163 page_addr + HV_VMBUS_MESSAGE_SINT;
165 /* Since we are a child, we only need to check bit 0 */
166 if (synch_test_and_clear_bit(0, &event->flags32[0])) {
167 swi_sched(event_swintr, 0);
170 /* Check if there are actual msgs to be process */
171 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
172 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
174 if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
175 swi_sched(msg_swintr, 0);
178 return FILTER_HANDLED;
188 struct hv_device *child_dev_ctx = device_get_ivars(child);
192 case HV_VMBUS_IVAR_TYPE:
193 *result = (uintptr_t) &child_dev_ctx->class_id;
195 case HV_VMBUS_IVAR_INSTANCE:
196 *result = (uintptr_t) &child_dev_ctx->device_id;
198 case HV_VMBUS_IVAR_DEVCTX:
199 *result = (uintptr_t) child_dev_ctx;
201 case HV_VMBUS_IVAR_NODE:
202 *result = (uintptr_t) child_dev_ctx->device;
217 case HV_VMBUS_IVAR_TYPE:
218 case HV_VMBUS_IVAR_INSTANCE:
219 case HV_VMBUS_IVAR_DEVCTX:
220 case HV_VMBUS_IVAR_NODE:
228 hv_vmbus_child_device_create(
231 hv_vmbus_channel* channel)
233 hv_device* child_dev;
236 * Allocate the new child device
238 child_dev = malloc(sizeof(hv_device), M_DEVBUF,
240 KASSERT(child_dev != NULL,
241 ("Error VMBUS: malloc failed to allocate hv_device!"));
243 if (child_dev == NULL)
246 child_dev->channel = channel;
247 memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
248 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
254 print_dev_guid(struct hv_device *dev)
257 unsigned char guid_name[100];
258 for (i = 0; i < 32; i += 2)
259 sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
261 printf("VMBUS: Class ID: %s\n", guid_name);
265 hv_vmbus_child_device_register(struct hv_device *child_dev)
270 print_dev_guid(child_dev);
273 child = device_add_child(vmbus_devp, NULL, -1);
274 child_dev->device = child;
275 device_set_ivars(child, child_dev);
278 ret = device_probe_and_attach(child);
285 hv_vmbus_child_device_unregister(struct hv_device *child_dev)
289 * XXXKYS: Ensure that this is the opposite of
293 ret = device_delete_child(vmbus_devp, child_dev->device);
298 static void vmbus_identify(driver_t *driver, device_t parent) {
299 BUS_ADD_CHILD(parent, 0, "vmbus", 0);
300 if (device_find_child(parent, "vmbus", 0) == NULL) {
301 BUS_ADD_CHILD(parent, 0, "vmbus", 0);
306 vmbus_probe(device_t dev) {
308 device_printf(dev, "VMBUS: probe\n");
310 if (!hv_vmbus_query_hypervisor_presence())
313 device_set_desc(dev, "Vmbus Devices");
319 * @brief Main vmbus driver initialization routine.
322 * - initialize the vmbus driver context
323 * - setup various driver entry points
324 * - invoke the vmbus hv main init routine
325 * - get the irq resource
326 * - invoke the vmbus to add the vmbus root device
327 * - setup the vmbus root device
328 * - retrieve the channel offers
333 struct ioapic_intsrc {
334 struct intsrc io_intsrc;
340 u_int io_edgetrigger:1;
346 unsigned int vector = 0;
348 struct ioapic_intsrc *intpin;
355 ret = hv_vmbus_init();
359 printf("Error VMBUS: Hypervisor Initialization Failed!\n");
363 ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
364 NULL, SWI_CLOCK, 0, &msg_swintr);
370 * Message SW interrupt handler checks a per-CPU page and
371 * thus the thread needs to be bound to CPU-0 - which is where
372 * all interrupts are processed.
374 ret = intr_event_bind(hv_msg_intr_event, 0);
379 ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
380 NULL, SWI_CLOCK, 0, &event_swintr);
385 intr_res = bus_alloc_resource(vmbus_devp,
386 SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
388 if (intr_res == NULL) {
389 ret = ENOMEM; /* XXXKYS: Need a better errno */
394 * Setup interrupt filter handler
396 ret = bus_setup_intr(vmbus_devp, intr_res,
397 INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
398 NULL, &vmbus_cookiep);
403 ret = bus_bind_intr(vmbus_devp, intr_res, 0);
407 isrc = intr_lookup_source(vmbus_irq);
408 if ((isrc == NULL) || (isrc->is_event == NULL)) {
413 /* vector = isrc->is_event->ie_vector; */
414 intpin = (struct ioapic_intsrc *)isrc;
415 vector = intpin->io_vector;
418 printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
421 * Notify the hypervisor of our irq.
423 setup_args.vector = vector;
424 for(i = 0; i < 2; i++) {
425 setup_args.page_buffers[i] =
426 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
427 if (setup_args.page_buffers[i] == NULL) {
428 KASSERT(setup_args.page_buffers[i] != NULL,
429 ("Error VMBUS: malloc failed!"));
431 free(setup_args.page_buffers[0], M_DEVBUF);
436 /* only CPU #0 supported at this time */
437 smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
440 * Connect to VMBus in the root partition
442 ret = hv_vmbus_connect();
447 hv_vmbus_request_channel_offers();
453 * remove swi, bus and intr resource
455 bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
458 bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
461 swi_remove(event_swintr);
464 swi_remove(msg_swintr);
473 vmbus_attach(device_t dev)
476 device_printf(dev, "VMBUS: attach dev: %p\n", dev);
480 * If the system has already booted and thread
481 * scheduling is possible indicated by the global
482 * cold set to zero, we just call the driver
483 * initialization directly.
495 * If the system has already booted and thread
496 * scheduling is possible indicated by the global
497 * cold set to zero, we just call the driver
498 * initialization directly.
509 hv_vmbus_release_unattached_channels();
510 hv_vmbus_disconnect();
512 smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
514 for(i = 0; i < 2; i++) {
515 if (setup_args.page_buffers[i] != 0)
516 free(setup_args.page_buffers[i], M_DEVBUF);
521 /* remove swi, bus and intr resource */
522 bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
524 bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
526 swi_remove(msg_swintr);
527 swi_remove(event_swintr);
539 vmbus_detach(device_t dev)
549 printf("VMBUS: load\n");
553 vmbus_mod_unload(void)
556 printf("VMBUS: unload\n");
560 vmbus_modevent(module_t mod, int what, void *arg)
575 static device_method_t vmbus_methods[] = {
576 /** Device interface */
577 DEVMETHOD(device_identify, vmbus_identify),
578 DEVMETHOD(device_probe, vmbus_probe),
579 DEVMETHOD(device_attach, vmbus_attach),
580 DEVMETHOD(device_detach, vmbus_detach),
581 DEVMETHOD(device_shutdown, bus_generic_shutdown),
582 DEVMETHOD(device_suspend, bus_generic_suspend),
583 DEVMETHOD(device_resume, bus_generic_resume),
586 DEVMETHOD(bus_add_child, bus_generic_add_child),
587 DEVMETHOD(bus_print_child, bus_generic_print_child),
588 DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
589 DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
593 static char driver_name[] = "vmbus";
594 static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
597 devclass_t vmbus_devclass;
599 DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
600 MODULE_VERSION(vmbus,1);
602 /* TODO: We want to be earlier than SI_SUB_VFS */
603 SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);