2 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * VM Bus Driver Implementation
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
43 #include <sys/sysctl.h>
44 #include <sys/systm.h>
45 #include <sys/taskqueue.h>
47 #include <machine/bus.h>
48 #include <machine/intr_machdep.h>
49 #include <machine/md_var.h>
50 #include <machine/resource.h>
51 #include <x86/include/apicvar.h>
53 #include <contrib/dev/acpica/include/acpi.h>
54 #include <dev/acpica/acpivar.h>
56 #include <dev/hyperv/include/hyperv.h>
57 #include <dev/hyperv/include/vmbus_xact.h>
58 #include <dev/hyperv/vmbus/hyperv_reg.h>
59 #include <dev/hyperv/vmbus/hyperv_var.h>
60 #include <dev/hyperv/vmbus/vmbus_reg.h>
61 #include <dev/hyperv/vmbus/vmbus_var.h>
62 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
68 #define VMBUS_GPADL_START 0xe1e10
71 struct vmbus_xact *mh_xact;
72 struct hypercall_postmsg_in mh_inprm_save;
75 static void vmbus_identify(driver_t *, device_t);
76 static int vmbus_probe(device_t);
77 static int vmbus_attach(device_t);
78 static int vmbus_detach(device_t);
79 static int vmbus_read_ivar(device_t, device_t, int,
81 static int vmbus_child_pnpinfo_str(device_t, device_t,
83 static struct resource *vmbus_alloc_resource(device_t dev,
84 device_t child, int type, int *rid,
85 rman_res_t start, rman_res_t end,
86 rman_res_t count, u_int flags);
87 static int vmbus_alloc_msi(device_t bus, device_t dev,
88 int count, int maxcount, int *irqs);
89 static int vmbus_release_msi(device_t bus, device_t dev,
90 int count, int *irqs);
91 static int vmbus_alloc_msix(device_t bus, device_t dev,
93 static int vmbus_release_msix(device_t bus, device_t dev,
95 static int vmbus_map_msi(device_t bus, device_t dev,
96 int irq, uint64_t *addr, uint32_t *data);
97 static uint32_t vmbus_get_version_method(device_t, device_t);
98 static int vmbus_probe_guid_method(device_t, device_t,
99 const struct hyperv_guid *);
100 static uint32_t vmbus_get_vcpu_id_method(device_t bus,
101 device_t dev, int cpu);
102 static struct taskqueue *vmbus_get_eventtq_method(device_t, device_t,
104 #ifdef EARLY_AP_STARTUP
105 static void vmbus_intrhook(void *);
108 static int vmbus_init(struct vmbus_softc *);
109 static int vmbus_connect(struct vmbus_softc *, uint32_t);
110 static int vmbus_req_channels(struct vmbus_softc *sc);
111 static void vmbus_disconnect(struct vmbus_softc *);
112 static int vmbus_scan(struct vmbus_softc *);
113 static void vmbus_scan_teardown(struct vmbus_softc *);
114 static void vmbus_scan_done(struct vmbus_softc *,
115 const struct vmbus_message *);
116 static void vmbus_chanmsg_handle(struct vmbus_softc *,
117 const struct vmbus_message *);
118 static void vmbus_msg_task(void *, int);
119 static void vmbus_synic_setup(void *);
120 static void vmbus_synic_teardown(void *);
121 static int vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
122 static int vmbus_dma_alloc(struct vmbus_softc *);
123 static void vmbus_dma_free(struct vmbus_softc *);
124 static int vmbus_intr_setup(struct vmbus_softc *);
125 static void vmbus_intr_teardown(struct vmbus_softc *);
126 static int vmbus_doattach(struct vmbus_softc *);
127 static void vmbus_event_proc_dummy(struct vmbus_softc *,
130 static struct vmbus_softc *vmbus_sc;
132 SYSCTL_NODE(_hw, OID_AUTO, vmbus, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
135 static int vmbus_pin_evttask = 1;
136 SYSCTL_INT(_hw_vmbus, OID_AUTO, pin_evttask, CTLFLAG_RDTUN,
137 &vmbus_pin_evttask, 0, "Pin event tasks to their respective CPU");
139 extern inthand_t IDTVEC(vmbus_isr), IDTVEC(vmbus_isr_pti);
141 static const uint32_t vmbus_version[] = {
142 VMBUS_VERSION_WIN8_1,
148 static const vmbus_chanmsg_proc_t
149 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
150 VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
151 VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
154 static device_method_t vmbus_methods[] = {
155 /* Device interface */
156 DEVMETHOD(device_identify, vmbus_identify),
157 DEVMETHOD(device_probe, vmbus_probe),
158 DEVMETHOD(device_attach, vmbus_attach),
159 DEVMETHOD(device_detach, vmbus_detach),
160 DEVMETHOD(device_shutdown, bus_generic_shutdown),
161 DEVMETHOD(device_suspend, bus_generic_suspend),
162 DEVMETHOD(device_resume, bus_generic_resume),
165 DEVMETHOD(bus_add_child, bus_generic_add_child),
166 DEVMETHOD(bus_print_child, bus_generic_print_child),
167 DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
168 DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str),
169 DEVMETHOD(bus_alloc_resource, vmbus_alloc_resource),
170 DEVMETHOD(bus_release_resource, bus_generic_release_resource),
171 DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
172 DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
173 DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
174 DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
175 #if __FreeBSD_version >= 1100000
176 DEVMETHOD(bus_get_cpus, bus_generic_get_cpus),
180 DEVMETHOD(pcib_alloc_msi, vmbus_alloc_msi),
181 DEVMETHOD(pcib_release_msi, vmbus_release_msi),
182 DEVMETHOD(pcib_alloc_msix, vmbus_alloc_msix),
183 DEVMETHOD(pcib_release_msix, vmbus_release_msix),
184 DEVMETHOD(pcib_map_msi, vmbus_map_msi),
186 /* Vmbus interface */
187 DEVMETHOD(vmbus_get_version, vmbus_get_version_method),
188 DEVMETHOD(vmbus_probe_guid, vmbus_probe_guid_method),
189 DEVMETHOD(vmbus_get_vcpu_id, vmbus_get_vcpu_id_method),
190 DEVMETHOD(vmbus_get_event_taskq, vmbus_get_eventtq_method),
195 static driver_t vmbus_driver = {
198 sizeof(struct vmbus_softc)
201 static devclass_t vmbus_devclass;
203 DRIVER_MODULE(vmbus, pcib, vmbus_driver, vmbus_devclass, NULL, NULL);
204 DRIVER_MODULE(vmbus, acpi_syscontainer, vmbus_driver, vmbus_devclass,
207 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
208 MODULE_DEPEND(vmbus, pci, 1, 1, 1);
209 MODULE_VERSION(vmbus, 1);
211 static __inline struct vmbus_softc *
212 vmbus_get_softc(void)
218 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
220 struct hypercall_postmsg_in *inprm;
222 if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
223 panic("invalid data size %zu", dsize);
225 inprm = vmbus_xact_req_data(mh->mh_xact);
226 memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
227 inprm->hc_connid = VMBUS_CONNID_MESSAGE;
228 inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
229 inprm->hc_dsize = dsize;
233 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
235 struct vmbus_msghc *mh;
236 struct vmbus_xact *xact;
238 if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
239 panic("invalid data size %zu", dsize);
241 xact = vmbus_xact_get(sc->vmbus_xc,
242 dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
246 mh = vmbus_xact_priv(xact, sizeof(*mh));
249 vmbus_msghc_reset(mh, dsize);
254 vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
257 vmbus_xact_put(mh->mh_xact);
261 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
263 struct hypercall_postmsg_in *inprm;
265 inprm = vmbus_xact_req_data(mh->mh_xact);
266 return (inprm->hc_data);
270 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
272 sbintime_t time = SBT_1MS;
273 struct hypercall_postmsg_in *inprm;
274 bus_addr_t inprm_paddr;
277 inprm = vmbus_xact_req_data(mh->mh_xact);
278 inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
281 * Save the input parameter so that we could restore the input
282 * parameter if the Hypercall failed.
285 * Is this really necessary?! i.e. Will the Hypercall ever
286 * overwrite the input parameter?
288 memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
291 * In order to cope with transient failures, e.g. insufficient
292 * resources on host side, we retry the post message Hypercall
293 * several times. 20 retries seem sufficient.
295 #define HC_RETRY_MAX 20
297 for (i = 0; i < HC_RETRY_MAX; ++i) {
300 status = hypercall_post_message(inprm_paddr);
301 if (status == HYPERCALL_STATUS_SUCCESS)
304 pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
305 if (time < SBT_1S * 2)
308 /* Restore input parameter and try again */
309 memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
318 vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
322 vmbus_xact_activate(mh->mh_xact);
323 error = vmbus_msghc_exec_noresult(mh);
325 vmbus_xact_deactivate(mh->mh_xact);
330 vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
333 vmbus_xact_deactivate(mh->mh_xact);
336 const struct vmbus_message *
337 vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
341 return (vmbus_xact_wait(mh->mh_xact, &resp_len));
344 const struct vmbus_message *
345 vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
349 return (vmbus_xact_poll(mh->mh_xact, &resp_len));
353 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
356 vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
360 vmbus_gpadl_alloc(struct vmbus_softc *sc)
365 gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
372 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
374 struct vmbus_chanmsg_connect *req;
375 const struct vmbus_message *msg;
376 struct vmbus_msghc *mh;
379 mh = vmbus_msghc_get(sc, sizeof(*req));
383 req = vmbus_msghc_dataptr(mh);
384 req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
385 req->chm_ver = version;
386 req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
387 req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
388 req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
390 error = vmbus_msghc_exec(sc, mh);
392 vmbus_msghc_put(sc, mh);
396 msg = vmbus_msghc_wait_result(sc, mh);
397 done = ((const struct vmbus_chanmsg_connect_resp *)
398 msg->msg_data)->chm_done;
400 vmbus_msghc_put(sc, mh);
402 return (done ? 0 : EOPNOTSUPP);
406 vmbus_init(struct vmbus_softc *sc)
410 for (i = 0; i < nitems(vmbus_version); ++i) {
413 error = vmbus_connect(sc, vmbus_version[i]);
415 sc->vmbus_version = vmbus_version[i];
416 device_printf(sc->vmbus_dev, "version %u.%u\n",
417 VMBUS_VERSION_MAJOR(sc->vmbus_version),
418 VMBUS_VERSION_MINOR(sc->vmbus_version));
426 vmbus_disconnect(struct vmbus_softc *sc)
428 struct vmbus_chanmsg_disconnect *req;
429 struct vmbus_msghc *mh;
432 mh = vmbus_msghc_get(sc, sizeof(*req));
434 device_printf(sc->vmbus_dev,
435 "can not get msg hypercall for disconnect\n");
439 req = vmbus_msghc_dataptr(mh);
440 req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
442 error = vmbus_msghc_exec_noresult(mh);
443 vmbus_msghc_put(sc, mh);
446 device_printf(sc->vmbus_dev,
447 "disconnect msg hypercall failed\n");
452 vmbus_req_channels(struct vmbus_softc *sc)
454 struct vmbus_chanmsg_chrequest *req;
455 struct vmbus_msghc *mh;
458 mh = vmbus_msghc_get(sc, sizeof(*req));
462 req = vmbus_msghc_dataptr(mh);
463 req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
465 error = vmbus_msghc_exec_noresult(mh);
466 vmbus_msghc_put(sc, mh);
472 vmbus_scan_done_task(void *xsc, int pending __unused)
474 struct vmbus_softc *sc = xsc;
477 sc->vmbus_scandone = true;
479 wakeup(&sc->vmbus_scandone);
483 vmbus_scan_done(struct vmbus_softc *sc,
484 const struct vmbus_message *msg __unused)
487 taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
491 vmbus_scan(struct vmbus_softc *sc)
496 * Identify, probe and attach for non-channel devices.
498 bus_generic_probe(sc->vmbus_dev);
499 bus_generic_attach(sc->vmbus_dev);
502 * This taskqueue serializes vmbus devices' attach and detach
503 * for channel offer and rescind messages.
505 sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
506 taskqueue_thread_enqueue, &sc->vmbus_devtq);
507 taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
508 TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
511 * This taskqueue handles sub-channel detach, so that vmbus
512 * device's detach running in vmbus_devtq can drain its sub-
515 sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
516 taskqueue_thread_enqueue, &sc->vmbus_subchtq);
517 taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
520 * Start vmbus scanning.
522 error = vmbus_req_channels(sc);
524 device_printf(sc->vmbus_dev, "channel request failed: %d\n",
530 * Wait for all vmbus devices from the initial channel offers to be
534 while (!sc->vmbus_scandone)
535 mtx_sleep(&sc->vmbus_scandone, &Giant, 0, "vmbusdev", 0);
538 device_printf(sc->vmbus_dev, "device scan, probe and attach "
545 vmbus_scan_teardown(struct vmbus_softc *sc)
549 if (sc->vmbus_devtq != NULL) {
551 taskqueue_free(sc->vmbus_devtq);
553 sc->vmbus_devtq = NULL;
555 if (sc->vmbus_subchtq != NULL) {
557 taskqueue_free(sc->vmbus_subchtq);
559 sc->vmbus_subchtq = NULL;
564 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
566 vmbus_chanmsg_proc_t msg_proc;
569 msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
570 if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
571 device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
576 msg_proc = vmbus_chanmsg_handlers[msg_type];
577 if (msg_proc != NULL)
580 /* Channel specific processing */
581 vmbus_chan_msgproc(sc, msg);
585 vmbus_msg_task(void *xsc, int pending __unused)
587 struct vmbus_softc *sc = xsc;
588 volatile struct vmbus_message *msg;
590 msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
592 if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
595 } else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
596 /* Channel message */
597 vmbus_chanmsg_handle(sc,
598 __DEVOLATILE(const struct vmbus_message *, msg));
601 msg->msg_type = HYPERV_MSGTYPE_NONE;
603 * Make sure the write to msg_type (i.e. set to
604 * HYPERV_MSGTYPE_NONE) happens before we read the
605 * msg_flags and EOMing. Otherwise, the EOMing will
606 * not deliver any more messages since there is no
610 * mb() is used here, since atomic_thread_fence_seq_cst()
611 * will become compiler fence on UP kernel.
614 if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
616 * This will cause message queue rescan to possibly
617 * deliver another msg from the hypervisor
619 wrmsr(MSR_HV_EOM, 0);
625 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
627 volatile struct vmbus_message *msg;
628 struct vmbus_message *msg_base;
630 msg_base = VMBUS_PCPU_GET(sc, message, cpu);
635 * TODO: move this to independent IDT vector.
637 msg = msg_base + VMBUS_SINT_TIMER;
638 if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
639 msg->msg_type = HYPERV_MSGTYPE_NONE;
641 vmbus_et_intr(frame);
644 * Make sure the write to msg_type (i.e. set to
645 * HYPERV_MSGTYPE_NONE) happens before we read the
646 * msg_flags and EOMing. Otherwise, the EOMing will
647 * not deliver any more messages since there is no
651 * mb() is used here, since atomic_thread_fence_seq_cst()
652 * will become compiler fence on UP kernel.
655 if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
657 * This will cause message queue rescan to possibly
658 * deliver another msg from the hypervisor
660 wrmsr(MSR_HV_EOM, 0);
665 * Check events. Hot path for network and storage I/O data; high rate.
668 * As recommended by the Windows guest fellows, we check events before
671 sc->vmbus_event_proc(sc, cpu);
674 * Check messages. Mainly management stuffs; ultra low rate.
676 msg = msg_base + VMBUS_SINT_MESSAGE;
677 if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
678 taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
679 VMBUS_PCPU_PTR(sc, message_task, cpu));
682 return (FILTER_HANDLED);
686 vmbus_handle_intr(struct trapframe *trap_frame)
688 struct vmbus_softc *sc = vmbus_get_softc();
692 * Disable preemption.
697 * Do a little interrupt counting.
699 (*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
701 vmbus_handle_intr1(sc, trap_frame, cpu);
710 vmbus_synic_setup(void *xsc)
712 struct vmbus_softc *sc = xsc;
717 if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
718 /* Save virtual processor id. */
719 VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
721 /* Set virtual processor id to 0 for compatibility. */
722 VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
726 * Setup the SynIC message.
728 orig = rdmsr(MSR_HV_SIMP);
729 val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
730 ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
731 MSR_HV_SIMP_PGSHIFT);
732 wrmsr(MSR_HV_SIMP, val);
735 * Setup the SynIC event flags.
737 orig = rdmsr(MSR_HV_SIEFP);
738 val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
739 ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
740 >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
741 wrmsr(MSR_HV_SIEFP, val);
745 * Configure and unmask SINT for message and event flags.
747 sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
749 val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
750 (orig & MSR_HV_SINT_RSVD_MASK);
754 * Configure and unmask SINT for timer.
756 sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
758 val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
759 (orig & MSR_HV_SINT_RSVD_MASK);
763 * All done; enable SynIC.
765 orig = rdmsr(MSR_HV_SCONTROL);
766 val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
767 wrmsr(MSR_HV_SCONTROL, val);
771 vmbus_synic_teardown(void *arg)
779 orig = rdmsr(MSR_HV_SCONTROL);
780 wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
783 * Mask message and event flags SINT.
785 sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
787 wrmsr(sint, orig | MSR_HV_SINT_MASKED);
792 sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
794 wrmsr(sint, orig | MSR_HV_SINT_MASKED);
797 * Teardown SynIC message.
799 orig = rdmsr(MSR_HV_SIMP);
800 wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
803 * Teardown SynIC event flags.
805 orig = rdmsr(MSR_HV_SIEFP);
806 wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
810 vmbus_dma_alloc(struct vmbus_softc *sc)
812 bus_dma_tag_t parent_dtag;
816 parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
821 * Per-cpu messages and event flags.
823 ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
824 PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
825 BUS_DMA_WAITOK | BUS_DMA_ZERO);
828 VMBUS_PCPU_GET(sc, message, cpu) = ptr;
830 ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
831 PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
832 BUS_DMA_WAITOK | BUS_DMA_ZERO);
835 VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
838 evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
839 PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
840 if (evtflags == NULL)
842 sc->vmbus_rx_evtflags = (u_long *)evtflags;
843 sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
844 sc->vmbus_evtflags = evtflags;
846 sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
847 PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
848 if (sc->vmbus_mnf1 == NULL)
851 sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
852 sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
853 BUS_DMA_WAITOK | BUS_DMA_ZERO);
854 if (sc->vmbus_mnf2 == NULL)
861 vmbus_dma_free(struct vmbus_softc *sc)
865 if (sc->vmbus_evtflags != NULL) {
866 hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
867 sc->vmbus_evtflags = NULL;
868 sc->vmbus_rx_evtflags = NULL;
869 sc->vmbus_tx_evtflags = NULL;
871 if (sc->vmbus_mnf1 != NULL) {
872 hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
873 sc->vmbus_mnf1 = NULL;
875 if (sc->vmbus_mnf2 != NULL) {
876 hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
877 sc->vmbus_mnf2 = NULL;
881 if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
883 VMBUS_PCPU_PTR(sc, message_dma, cpu),
884 VMBUS_PCPU_GET(sc, message, cpu));
885 VMBUS_PCPU_GET(sc, message, cpu) = NULL;
887 if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
889 VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
890 VMBUS_PCPU_GET(sc, event_flags, cpu));
891 VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
897 vmbus_intr_setup(struct vmbus_softc *sc)
902 char buf[MAXCOMLEN + 1];
905 /* Allocate an interrupt counter for Hyper-V interrupt */
906 snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
907 intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
910 * Setup taskqueue to handle events. Task will be per-
913 VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
914 "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
915 VMBUS_PCPU_PTR(sc, event_tq, cpu));
916 if (vmbus_pin_evttask) {
917 CPU_SETOF(cpu, &cpu_mask);
918 taskqueue_start_threads_cpuset(
919 VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
920 &cpu_mask, "hvevent%d", cpu);
922 taskqueue_start_threads(
923 VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
928 * Setup tasks and taskqueues to handle messages.
930 VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
931 "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
932 VMBUS_PCPU_PTR(sc, message_tq, cpu));
933 CPU_SETOF(cpu, &cpu_mask);
934 taskqueue_start_threads_cpuset(
935 VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
937 TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
942 * All Hyper-V ISR required resources are setup, now let's find a
943 * free IDT vector for Hyper-V ISR and set it up.
945 sc->vmbus_idtvec = lapic_ipi_alloc(pti ? IDTVEC(vmbus_isr_pti) :
947 if (sc->vmbus_idtvec < 0) {
948 device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
952 device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
959 vmbus_intr_teardown(struct vmbus_softc *sc)
963 if (sc->vmbus_idtvec >= 0) {
964 lapic_ipi_free(sc->vmbus_idtvec);
965 sc->vmbus_idtvec = -1;
969 if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
970 taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
971 VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
973 if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
974 taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
975 VMBUS_PCPU_PTR(sc, message_task, cpu));
976 taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
977 VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
983 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
989 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
991 const struct vmbus_channel *chan;
992 char guidbuf[HYPERV_GUID_STRLEN];
994 chan = vmbus_get_channel(child);
996 /* Event timer device, which does not belong to a channel */
1000 strlcat(buf, "classid=", buflen);
1001 hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
1002 strlcat(buf, guidbuf, buflen);
1004 strlcat(buf, " deviceid=", buflen);
1005 hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
1006 strlcat(buf, guidbuf, buflen);
1012 vmbus_add_child(struct vmbus_channel *chan)
1014 struct vmbus_softc *sc = chan->ch_vmbus;
1015 device_t parent = sc->vmbus_dev;
1019 chan->ch_dev = device_add_child(parent, NULL, -1);
1020 if (chan->ch_dev == NULL) {
1022 device_printf(parent, "device_add_child for chan%u failed\n",
1026 device_set_ivars(chan->ch_dev, chan);
1027 device_probe_and_attach(chan->ch_dev);
1034 vmbus_delete_child(struct vmbus_channel *chan)
1039 if (chan->ch_dev != NULL) {
1040 error = device_delete_child(chan->ch_vmbus->vmbus_dev,
1042 chan->ch_dev = NULL;
1049 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
1051 struct vmbus_softc *sc = arg1;
1054 snprintf(verstr, sizeof(verstr), "%u.%u",
1055 VMBUS_VERSION_MAJOR(sc->vmbus_version),
1056 VMBUS_VERSION_MINOR(sc->vmbus_version));
1057 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
1061 * We need the function to make sure the MMIO resource is allocated from the
1062 * ranges found in _CRS.
1064 * For the release function, we can use bus_generic_release_resource().
1066 static struct resource *
1067 vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
1068 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1070 device_t parent = device_get_parent(dev);
1071 struct resource *res;
1074 if (type == SYS_RES_MEMORY) {
1075 struct vmbus_softc *sc = device_get_softc(dev);
1077 res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
1078 rid, start, end, count, flags);
1082 res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
1090 vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
1093 return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
1098 vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
1101 return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs));
1105 vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
1108 return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
1112 vmbus_release_msix(device_t bus, device_t dev, int irq)
1115 return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
1119 vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
1123 return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
1127 vmbus_get_version_method(device_t bus, device_t dev)
1129 struct vmbus_softc *sc = device_get_softc(bus);
1131 return sc->vmbus_version;
1135 vmbus_probe_guid_method(device_t bus, device_t dev,
1136 const struct hyperv_guid *guid)
1138 const struct vmbus_channel *chan = vmbus_get_channel(dev);
1140 if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
1146 vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
1148 const struct vmbus_softc *sc = device_get_softc(bus);
1150 return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
1153 static struct taskqueue *
1154 vmbus_get_eventtq_method(device_t bus, device_t dev __unused, int cpu)
1156 const struct vmbus_softc *sc = device_get_softc(bus);
1158 KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu%d", cpu));
1159 return (VMBUS_PCPU_GET(sc, event_tq, cpu));
1163 #define VTPM_BASE_ADDR 0xfed40000
1164 #define FOUR_GB (1ULL << 32)
1166 enum parse_pass { parse_64, parse_32 };
1168 struct parse_context {
1170 enum parse_pass pass;
1174 parse_crs(ACPI_RESOURCE *res, void *ctx)
1176 const struct parse_context *pc = ctx;
1177 device_t vmbus_dev = pc->vmbus_dev;
1179 struct vmbus_softc *sc = device_get_softc(vmbus_dev);
1182 switch (res->Type) {
1183 case ACPI_RESOURCE_TYPE_ADDRESS32:
1184 start = res->Data.Address32.Address.Minimum;
1185 end = res->Data.Address32.Address.Maximum;
1188 case ACPI_RESOURCE_TYPE_ADDRESS64:
1189 start = res->Data.Address64.Address.Minimum;
1190 end = res->Data.Address64.Address.Maximum;
1199 * We don't use <1MB addresses.
1204 /* Don't conflict with vTPM. */
1205 if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
1206 end = VTPM_BASE_ADDR - 1;
1208 if ((pc->pass == parse_32 && start < FOUR_GB) ||
1209 (pc->pass == parse_64 && start >= FOUR_GB))
1210 pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
1217 vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
1219 struct parse_context pc;
1223 device_printf(dev, "walking _CRS, pass=%d\n", pass);
1225 pc.vmbus_dev = vmbus_dev;
1227 status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
1230 if (bootverbose && ACPI_FAILURE(status))
1231 device_printf(dev, "_CRS: not found, pass=%d\n", pass);
1235 vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
1237 device_t acpi0, parent;
1239 parent = device_get_parent(dev);
1241 acpi0 = device_get_parent(parent);
1242 if (strcmp("acpi0", device_get_nameunit(acpi0)) == 0) {
1247 * Try to locate VMBUS resources and find _CRS on them.
1249 if (device_get_children(acpi0, &children, &count) == 0) {
1252 for (i = 0; i < count; ++i) {
1253 if (!device_is_attached(children[i]))
1256 if (strcmp("vmbus_res",
1257 device_get_name(children[i])) == 0)
1258 vmbus_get_crs(children[i], dev, pass);
1260 free(children, M_TEMP);
1264 * Try to find _CRS on acpi.
1266 vmbus_get_crs(acpi0, dev, pass);
1268 device_printf(dev, "not grandchild of acpi\n");
1272 * Try to find _CRS on parent.
1274 vmbus_get_crs(parent, dev, pass);
1278 vmbus_get_mmio_res(device_t dev)
1280 struct vmbus_softc *sc = device_get_softc(dev);
1282 * We walk the resources twice to make sure that: in the resource
1283 * list, the 32-bit resources appear behind the 64-bit resources.
1284 * NB: resource_list_add() uses INSERT_TAIL. This way, when we
1285 * iterate through the list to find a range for a 64-bit BAR in
1286 * vmbus_alloc_resource(), we can make sure we try to use >4GB
1289 pcib_host_res_init(dev, &sc->vmbus_mmio_res);
1291 vmbus_get_mmio_res_pass(dev, parse_64);
1292 vmbus_get_mmio_res_pass(dev, parse_32);
1296 vmbus_free_mmio_res(device_t dev)
1298 struct vmbus_softc *sc = device_get_softc(dev);
1300 pcib_host_res_free(dev, &sc->vmbus_mmio_res);
1302 #endif /* NEW_PCIB */
1305 vmbus_identify(driver_t *driver, device_t parent)
1308 if (device_get_unit(parent) != 0 || vm_guest != VM_GUEST_HV ||
1309 (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1311 device_add_child(parent, "vmbus", -1);
1315 vmbus_probe(device_t dev)
1318 if (device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1319 (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1322 device_set_desc(dev, "Hyper-V Vmbus");
1323 return (BUS_PROBE_DEFAULT);
1327 * @brief Main vmbus driver initialization routine.
1330 * - initialize the vmbus driver context
1331 * - setup various driver entry points
1332 * - invoke the vmbus hv main init routine
1333 * - get the irq resource
1334 * - invoke the vmbus to add the vmbus root device
1335 * - setup the vmbus root device
1336 * - retrieve the channel offers
1339 vmbus_doattach(struct vmbus_softc *sc)
1341 struct sysctl_oid_list *child;
1342 struct sysctl_ctx_list *ctx;
1345 if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1349 vmbus_get_mmio_res(sc->vmbus_dev);
1352 sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1354 sc->vmbus_gpadl = VMBUS_GPADL_START;
1355 mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1356 TAILQ_INIT(&sc->vmbus_prichans);
1357 mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
1358 TAILQ_INIT(&sc->vmbus_chans);
1359 sc->vmbus_chmap = malloc(
1360 sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1364 * Create context for "post message" Hypercalls
1366 sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1367 HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1368 sizeof(struct vmbus_msghc));
1369 if (sc->vmbus_xc == NULL) {
1375 * Allocate DMA stuffs.
1377 ret = vmbus_dma_alloc(sc);
1384 ret = vmbus_intr_setup(sc);
1392 device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1393 smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1394 sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1397 * Initialize vmbus, e.g. connect to Hypervisor.
1399 ret = vmbus_init(sc);
1403 if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1404 sc->vmbus_version == VMBUS_VERSION_WIN7)
1405 sc->vmbus_event_proc = vmbus_event_proc_compat;
1407 sc->vmbus_event_proc = vmbus_event_proc;
1409 ret = vmbus_scan(sc);
1413 ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1414 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1415 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1416 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1417 vmbus_sysctl_version, "A", "vmbus version");
1422 vmbus_scan_teardown(sc);
1423 vmbus_intr_teardown(sc);
1425 if (sc->vmbus_xc != NULL) {
1426 vmbus_xact_ctx_destroy(sc->vmbus_xc);
1427 sc->vmbus_xc = NULL;
1429 free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1430 mtx_destroy(&sc->vmbus_prichan_lock);
1431 mtx_destroy(&sc->vmbus_chan_lock);
1437 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1441 #ifdef EARLY_AP_STARTUP
1444 vmbus_intrhook(void *xsc)
1446 struct vmbus_softc *sc = xsc;
1449 device_printf(sc->vmbus_dev, "intrhook\n");
1451 config_intrhook_disestablish(&sc->vmbus_intrhook);
1454 #endif /* EARLY_AP_STARTUP */
1457 vmbus_attach(device_t dev)
1459 vmbus_sc = device_get_softc(dev);
1460 vmbus_sc->vmbus_dev = dev;
1461 vmbus_sc->vmbus_idtvec = -1;
1464 * Event processing logic will be configured:
1465 * - After the vmbus protocol version negotiation.
1466 * - Before we request channel offers.
1468 vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1470 #ifdef EARLY_AP_STARTUP
1472 * Defer the real attach until the pause(9) works as expected.
1474 vmbus_sc->vmbus_intrhook.ich_func = vmbus_intrhook;
1475 vmbus_sc->vmbus_intrhook.ich_arg = vmbus_sc;
1476 config_intrhook_establish(&vmbus_sc->vmbus_intrhook);
1477 #else /* !EARLY_AP_STARTUP */
1479 * If the system has already booted and thread
1480 * scheduling is possible indicated by the global
1481 * cold set to zero, we just call the driver
1482 * initialization directly.
1485 vmbus_doattach(vmbus_sc);
1486 #endif /* EARLY_AP_STARTUP */
1492 vmbus_detach(device_t dev)
1494 struct vmbus_softc *sc = device_get_softc(dev);
1496 bus_generic_detach(dev);
1497 vmbus_chan_destroy_all(sc);
1499 vmbus_scan_teardown(sc);
1501 vmbus_disconnect(sc);
1503 if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1504 sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1505 smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1508 vmbus_intr_teardown(sc);
1511 if (sc->vmbus_xc != NULL) {
1512 vmbus_xact_ctx_destroy(sc->vmbus_xc);
1513 sc->vmbus_xc = NULL;
1516 free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1517 mtx_destroy(&sc->vmbus_prichan_lock);
1518 mtx_destroy(&sc->vmbus_chan_lock);
1521 vmbus_free_mmio_res(dev);
1527 #ifndef EARLY_AP_STARTUP
1530 vmbus_sysinit(void *arg __unused)
1532 struct vmbus_softc *sc = vmbus_get_softc();
1534 if (vm_guest != VM_GUEST_HV || sc == NULL)
1538 * If the system has already booted and thread
1539 * scheduling is possible, as indicated by the
1540 * global cold set to zero, we just call the driver
1541 * initialization directly.
1548 * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1551 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1553 #endif /* !EARLY_AP_STARTUP */