2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * VM Bus Driver Implementation
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/sysctl.h>
42 #include <sys/syslog.h>
43 #include <sys/systm.h>
44 #include <sys/rtprio.h>
45 #include <sys/interrupt.h>
47 #include <sys/taskqueue.h>
48 #include <sys/mutex.h>
51 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 #include <machine/intr_machdep.h>
58 #include "hv_vmbus_priv.h"
63 static struct intr_event *hv_msg_intr_event;
64 static struct intr_event *hv_event_intr_event;
65 static void *msg_swintr;
66 static void *event_swintr;
67 static device_t vmbus_devp;
68 static void *vmbus_cookiep;
70 struct resource *intr_res;
71 static int vmbus_irq = VMBUS_IRQ;
72 static int vmbus_inited;
73 static hv_setup_args setup_args; /* only CPU 0 supported at this time */
76 * @brief Software interrupt thread routine to handle channel messages from
80 vmbus_msg_swintr(void *dummy)
84 hv_vmbus_message* msg;
85 hv_vmbus_message* copied;
87 cpu = PCPU_GET(cpuid);
88 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
89 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
92 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
93 break; /* no message */
95 copied = malloc(sizeof(hv_vmbus_message),
97 KASSERT(copied != NULL,
98 ("Error VMBUS: malloc failed to allocate"
99 " hv_vmbus_message!"));
102 memcpy(copied, msg, sizeof(hv_vmbus_message));
103 hv_queue_work_item(hv_vmbus_g_connection.work_queue,
104 hv_vmbus_on_channel_message, copied);
107 msg->header.message_type = HV_MESSAGE_TYPE_NONE;
110 * Make sure the write to message_type (ie set to
111 * HV_MESSAGE_TYPE_NONE) happens before we read the
112 * message_pending and EOMing. Otherwise, the EOMing will
113 * not deliver any more messages
114 * since there is no empty slot
118 if (msg->header.message_flags.u.message_pending) {
120 * This will cause message queue rescan to possibly
121 * deliver another msg from the hypervisor
123 wrmsr(HV_X64_MSR_EOM, 0);
129 * @brief Interrupt filter routine for VMBUS.
131 * The purpose of this routine is to determine the type of VMBUS protocol
132 * message to process - an event or a channel message.
133 * As this is an interrupt filter routine, the function runs in a very
134 * restricted envinronment. From the manpage for bus_setup_intr(9)
136 * In this restricted environment, care must be taken to account for all
137 * races. A careful analysis of races should be done as well. It is gener-
138 * ally cheaper to take an extra interrupt, for example, than to protect
139 * variables with spinlocks. Read, modify, write cycles of hardware regis-
140 * ters need to be carefully analyzed if other threads are accessing the
144 hv_vmbus_isr(void *unused)
147 hv_vmbus_message* msg;
148 hv_vmbus_synic_event_flags* event;
151 cpu = PCPU_GET(cpuid);
152 /* (Temporary limit) */
153 KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
156 * The Windows team has advised that we check for events
157 * before checking for messages. This is the way they do it
158 * in Windows when running as a guest in Hyper-V
161 page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
162 event = (hv_vmbus_synic_event_flags*)
163 page_addr + HV_VMBUS_MESSAGE_SINT;
165 /* Since we are a child, we only need to check bit 0 */
166 if (synch_test_and_clear_bit(0, &event->flags32[0])) {
167 swi_sched(event_swintr, 0);
170 /* Check if there are actual msgs to be process */
171 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
172 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
174 if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
175 swi_sched(msg_swintr, 0);
178 return FILTER_HANDLED;
188 struct hv_device *child_dev_ctx = device_get_ivars(child);
192 case HV_VMBUS_IVAR_TYPE:
193 *result = (uintptr_t) &child_dev_ctx->class_id;
195 case HV_VMBUS_IVAR_INSTANCE:
196 *result = (uintptr_t) &child_dev_ctx->device_id;
198 case HV_VMBUS_IVAR_DEVCTX:
199 *result = (uintptr_t) child_dev_ctx;
201 case HV_VMBUS_IVAR_NODE:
202 *result = (uintptr_t) child_dev_ctx->device;
217 case HV_VMBUS_IVAR_TYPE:
218 case HV_VMBUS_IVAR_INSTANCE:
219 case HV_VMBUS_IVAR_DEVCTX:
220 case HV_VMBUS_IVAR_NODE:
228 hv_vmbus_child_device_create(
231 hv_vmbus_channel* channel)
233 hv_device* child_dev;
236 * Allocate the new child device
238 child_dev = malloc(sizeof(hv_device), M_DEVBUF,
240 KASSERT(child_dev != NULL,
241 ("Error VMBUS: malloc failed to allocate hv_device!"));
243 if (child_dev == NULL)
246 child_dev->channel = channel;
247 memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
248 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
254 print_dev_guid(struct hv_device *dev)
257 unsigned char guid_name[100];
258 for (i = 0; i < 32; i += 2)
259 sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
261 printf("VMBUS: Class ID: %s\n", guid_name);
265 hv_vmbus_child_device_register(struct hv_device *child_dev)
270 print_dev_guid(child_dev);
273 child = device_add_child(vmbus_devp, NULL, -1);
274 child_dev->device = child;
275 device_set_ivars(child, child_dev);
278 ret = device_probe_and_attach(child);
285 hv_vmbus_child_device_unregister(struct hv_device *child_dev)
289 * XXXKYS: Ensure that this is the opposite of
293 ret = device_delete_child(vmbus_devp, child_dev->device);
299 vmbus_identify(driver_t *driver, device_t parent)
301 if (!hv_vmbus_query_hypervisor_presence())
304 vm_guest = VM_GUEST_HV;
306 BUS_ADD_CHILD(parent, 0, "vmbus", 0);
310 vmbus_probe(device_t dev) {
312 device_printf(dev, "VMBUS: probe\n");
314 device_set_desc(dev, "Vmbus Devices");
316 return (BUS_PROBE_NOWILDCARD);
320 * @brief Main vmbus driver initialization routine.
323 * - initialize the vmbus driver context
324 * - setup various driver entry points
325 * - invoke the vmbus hv main init routine
326 * - get the irq resource
327 * - invoke the vmbus to add the vmbus root device
328 * - setup the vmbus root device
329 * - retrieve the channel offers
334 struct ioapic_intsrc {
335 struct intsrc io_intsrc;
341 u_int io_edgetrigger:1;
347 unsigned int vector = 0;
349 struct ioapic_intsrc *intpin;
356 ret = hv_vmbus_init();
360 printf("Error VMBUS: Hypervisor Initialization Failed!\n");
364 ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
365 NULL, SWI_CLOCK, 0, &msg_swintr);
371 * Message SW interrupt handler checks a per-CPU page and
372 * thus the thread needs to be bound to CPU-0 - which is where
373 * all interrupts are processed.
375 ret = intr_event_bind(hv_msg_intr_event, 0);
380 ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
381 NULL, SWI_CLOCK, 0, &event_swintr);
386 intr_res = bus_alloc_resource(vmbus_devp,
387 SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
389 if (intr_res == NULL) {
390 ret = ENOMEM; /* XXXKYS: Need a better errno */
395 * Setup interrupt filter handler
397 ret = bus_setup_intr(vmbus_devp, intr_res,
398 INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
399 NULL, &vmbus_cookiep);
404 ret = bus_bind_intr(vmbus_devp, intr_res, 0);
408 isrc = intr_lookup_source(vmbus_irq);
409 if ((isrc == NULL) || (isrc->is_event == NULL)) {
414 /* vector = isrc->is_event->ie_vector; */
415 intpin = (struct ioapic_intsrc *)isrc;
416 vector = intpin->io_vector;
419 printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
422 * Notify the hypervisor of our irq.
424 setup_args.vector = vector;
425 for(i = 0; i < 2; i++) {
426 setup_args.page_buffers[i] =
427 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
428 if (setup_args.page_buffers[i] == NULL) {
429 KASSERT(setup_args.page_buffers[i] != NULL,
430 ("Error VMBUS: malloc failed!"));
432 free(setup_args.page_buffers[0], M_DEVBUF);
437 /* only CPU #0 supported at this time */
438 smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
441 * Connect to VMBus in the root partition
443 ret = hv_vmbus_connect();
448 hv_vmbus_request_channel_offers();
454 * remove swi, bus and intr resource
456 bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
459 bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
462 swi_remove(event_swintr);
465 swi_remove(msg_swintr);
474 vmbus_attach(device_t dev)
477 device_printf(dev, "VMBUS: attach dev: %p\n", dev);
481 * If the system has already booted and thread
482 * scheduling is possible indicated by the global
483 * cold set to zero, we just call the driver
484 * initialization directly.
495 if (vm_guest != VM_GUEST_HV)
499 * If the system has already booted and thread
500 * scheduling is possible, as indicated by the
501 * global cold set to zero, we just call the driver
502 * initialization directly.
513 hv_vmbus_release_unattached_channels();
514 hv_vmbus_disconnect();
516 smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
518 for(i = 0; i < 2; i++) {
519 if (setup_args.page_buffers[i] != 0)
520 free(setup_args.page_buffers[i], M_DEVBUF);
525 /* remove swi, bus and intr resource */
526 bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
528 bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
530 swi_remove(msg_swintr);
531 swi_remove(event_swintr);
543 vmbus_detach(device_t dev)
553 printf("VMBUS: load\n");
557 vmbus_mod_unload(void)
560 printf("VMBUS: unload\n");
564 vmbus_modevent(module_t mod, int what, void *arg)
579 static device_method_t vmbus_methods[] = {
580 /** Device interface */
581 DEVMETHOD(device_identify, vmbus_identify),
582 DEVMETHOD(device_probe, vmbus_probe),
583 DEVMETHOD(device_attach, vmbus_attach),
584 DEVMETHOD(device_detach, vmbus_detach),
585 DEVMETHOD(device_shutdown, bus_generic_shutdown),
586 DEVMETHOD(device_suspend, bus_generic_suspend),
587 DEVMETHOD(device_resume, bus_generic_resume),
590 DEVMETHOD(bus_add_child, bus_generic_add_child),
591 DEVMETHOD(bus_print_child, bus_generic_print_child),
592 DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
593 DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
597 static char driver_name[] = "vmbus";
598 static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
601 devclass_t vmbus_devclass;
603 DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
604 MODULE_VERSION(vmbus,1);
606 /* TODO: We want to be earlier than SI_SUB_VFS */
607 SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);