]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/amd64/vmm/vmm_dev.c
vmmdev: return EFAULT when trying to read beyond VM system memory max address
[FreeBSD/FreeBSD.git] / sys / amd64 / vmm / vmm_dev.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/queue.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/malloc.h>
40 #include <sys/conf.h>
41 #include <sys/sysctl.h>
42 #include <sys/libkern.h>
43 #include <sys/ioccom.h>
44 #include <sys/mman.h>
45 #include <sys/uio.h>
46
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_map.h>
50 #include <vm/vm_object.h>
51
52 #include <machine/vmparam.h>
53 #include <machine/vmm.h>
54 #include <machine/vmm_instruction_emul.h>
55 #include <machine/vmm_dev.h>
56
57 #include "vmm_lapic.h"
58 #include "vmm_stat.h"
59 #include "vmm_mem.h"
60 #include "io/ppt.h"
61 #include "io/vatpic.h"
62 #include "io/vioapic.h"
63 #include "io/vhpet.h"
64 #include "io/vrtc.h"
65
66 struct devmem_softc {
67         int     segid;
68         char    *name;
69         struct cdev *cdev;
70         struct vmmdev_softc *sc;
71         SLIST_ENTRY(devmem_softc) link;
72 };
73
74 struct vmmdev_softc {
75         struct vm       *vm;            /* vm instance cookie */
76         struct cdev     *cdev;
77         SLIST_ENTRY(vmmdev_softc) link;
78         SLIST_HEAD(, devmem_softc) devmem;
79         int             flags;
80 };
81 #define VSC_LINKED              0x01
82
83 static SLIST_HEAD(, vmmdev_softc) head;
84
85 static struct mtx vmmdev_mtx;
86
87 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
88
89 SYSCTL_DECL(_hw_vmm);
90
91 static int devmem_create_cdev(const char *vmname, int id, char *devmem);
92 static void devmem_destroy(void *arg);
93
94 static int
95 vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
96 {
97         int error;
98
99         if (vcpu < 0 || vcpu >= VM_MAXCPU)
100                 return (EINVAL);
101
102         error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
103         return (error);
104 }
105
106 static void
107 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu)
108 {
109         enum vcpu_state state;
110
111         state = vcpu_get_state(sc->vm, vcpu, NULL);
112         if (state != VCPU_FROZEN) {
113                 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
114                     vcpu, state);
115         }
116
117         vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
118 }
119
120 static int
121 vcpu_lock_all(struct vmmdev_softc *sc)
122 {
123         int error, vcpu;
124
125         for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
126                 error = vcpu_lock_one(sc, vcpu);
127                 if (error)
128                         break;
129         }
130
131         if (error) {
132                 while (--vcpu >= 0)
133                         vcpu_unlock_one(sc, vcpu);
134         }
135
136         return (error);
137 }
138
139 static void
140 vcpu_unlock_all(struct vmmdev_softc *sc)
141 {
142         int vcpu;
143
144         for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
145                 vcpu_unlock_one(sc, vcpu);
146 }
147
148 static struct vmmdev_softc *
149 vmmdev_lookup(const char *name)
150 {
151         struct vmmdev_softc *sc;
152
153 #ifdef notyet   /* XXX kernel is not compiled with invariants */
154         mtx_assert(&vmmdev_mtx, MA_OWNED);
155 #endif
156
157         SLIST_FOREACH(sc, &head, link) {
158                 if (strcmp(name, vm_name(sc->vm)) == 0)
159                         break;
160         }
161
162         return (sc);
163 }
164
165 static struct vmmdev_softc *
166 vmmdev_lookup2(struct cdev *cdev)
167 {
168
169         return (cdev->si_drv1);
170 }
171
172 static int
173 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
174 {
175         int error, off, c, prot;
176         vm_paddr_t gpa, maxaddr;
177         void *hpa, *cookie;
178         struct vmmdev_softc *sc;
179
180         sc = vmmdev_lookup2(cdev);
181         if (sc == NULL)
182                 return (ENXIO);
183
184         /*
185          * Get a read lock on the guest memory map by freezing any vcpu.
186          */
187         error = vcpu_lock_one(sc, VM_MAXCPU - 1);
188         if (error)
189                 return (error);
190
191         prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
192         maxaddr = vmm_sysmem_maxaddr(sc->vm);
193         while (uio->uio_resid > 0 && error == 0) {
194                 gpa = uio->uio_offset;
195                 off = gpa & PAGE_MASK;
196                 c = min(uio->uio_resid, PAGE_SIZE - off);
197
198                 /*
199                  * The VM has a hole in its physical memory map. If we want to
200                  * use 'dd' to inspect memory beyond the hole we need to
201                  * provide bogus data for memory that lies in the hole.
202                  *
203                  * Since this device does not support lseek(2), dd(1) will
204                  * read(2) blocks of data to simulate the lseek(2).
205                  */
206                 hpa = vm_gpa_hold(sc->vm, VM_MAXCPU - 1, gpa, c, prot, &cookie);
207                 if (hpa == NULL) {
208                         if (uio->uio_rw == UIO_READ && gpa < maxaddr)
209                                 error = uiomove(__DECONST(void *, zero_region),
210                                     c, uio);
211                         else
212                                 error = EFAULT;
213                 } else {
214                         error = uiomove(hpa, c, uio);
215                         vm_gpa_release(cookie);
216                 }
217         }
218         vcpu_unlock_one(sc, VM_MAXCPU - 1);
219         return (error);
220 }
221
222 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1);
223
224 static int
225 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
226 {
227         struct devmem_softc *dsc;
228         int error;
229         bool sysmem;
230
231         error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
232         if (error || mseg->len == 0)
233                 return (error);
234
235         if (!sysmem) {
236                 SLIST_FOREACH(dsc, &sc->devmem, link) {
237                         if (dsc->segid == mseg->segid)
238                                 break;
239                 }
240                 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
241                     __func__, mseg->segid));
242                 error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL);
243         } else {
244                 bzero(mseg->name, sizeof(mseg->name));
245         }
246
247         return (error);
248 }
249
250 static int
251 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
252 {
253         char *name;
254         int error;
255         bool sysmem;
256
257         error = 0;
258         name = NULL;
259         sysmem = true;
260
261         if (VM_MEMSEG_NAME(mseg)) {
262                 sysmem = false;
263                 name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK);
264                 error = copystr(mseg->name, name, SPECNAMELEN + 1, 0);
265                 if (error)
266                         goto done;
267         }
268
269         error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
270         if (error)
271                 goto done;
272
273         if (VM_MEMSEG_NAME(mseg)) {
274                 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
275                 if (error)
276                         vm_free_memseg(sc->vm, mseg->segid);
277                 else
278                         name = NULL;    /* freed when 'cdev' is destroyed */
279         }
280 done:
281         free(name, M_VMMDEV);
282         return (error);
283 }
284
285 static int
286 vm_get_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum,
287     uint64_t *regval)
288 {
289         int error, i;
290
291         error = 0;
292         for (i = 0; i < count; i++) {
293                 error = vm_get_register(vm, vcpu, regnum[i], &regval[i]);
294                 if (error)
295                         break;
296         }
297         return (error);
298 }
299
300 static int
301 vm_set_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum,
302     uint64_t *regval)
303 {
304         int error, i;
305
306         error = 0;
307         for (i = 0; i < count; i++) {
308                 error = vm_set_register(vm, vcpu, regnum[i], regval[i]);
309                 if (error)
310                         break;
311         }
312         return (error);
313 }
314
315 static int
316 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
317              struct thread *td)
318 {
319         int error, vcpu, state_changed, size;
320         cpuset_t *cpuset;
321         struct vmmdev_softc *sc;
322         struct vm_register *vmreg;
323         struct vm_seg_desc *vmsegdesc;
324         struct vm_register_set *vmregset;
325         struct vm_run *vmrun;
326         struct vm_exception *vmexc;
327         struct vm_lapic_irq *vmirq;
328         struct vm_lapic_msi *vmmsi;
329         struct vm_ioapic_irq *ioapic_irq;
330         struct vm_isa_irq *isa_irq;
331         struct vm_isa_irq_trigger *isa_irq_trigger;
332         struct vm_capability *vmcap;
333         struct vm_pptdev *pptdev;
334         struct vm_pptdev_mmio *pptmmio;
335         struct vm_pptdev_msi *pptmsi;
336         struct vm_pptdev_msix *pptmsix;
337         struct vm_nmi *vmnmi;
338         struct vm_stats *vmstats;
339         struct vm_stat_desc *statdesc;
340         struct vm_x2apic *x2apic;
341         struct vm_gpa_pte *gpapte;
342         struct vm_suspend *vmsuspend;
343         struct vm_gla2gpa *gg;
344         struct vm_activate_cpu *vac;
345         struct vm_cpuset *vm_cpuset;
346         struct vm_intinfo *vmii;
347         struct vm_rtc_time *rtctime;
348         struct vm_rtc_data *rtcdata;
349         struct vm_memmap *mm;
350         struct vm_cpu_topology *topology;
351         uint64_t *regvals;
352         int *regnums;
353
354         sc = vmmdev_lookup2(cdev);
355         if (sc == NULL)
356                 return (ENXIO);
357
358         error = 0;
359         vcpu = -1;
360         state_changed = 0;
361
362         /*
363          * Some VMM ioctls can operate only on vcpus that are not running.
364          */
365         switch (cmd) {
366         case VM_RUN:
367         case VM_GET_REGISTER:
368         case VM_SET_REGISTER:
369         case VM_GET_SEGMENT_DESCRIPTOR:
370         case VM_SET_SEGMENT_DESCRIPTOR:
371         case VM_GET_REGISTER_SET:
372         case VM_SET_REGISTER_SET:
373         case VM_INJECT_EXCEPTION:
374         case VM_GET_CAPABILITY:
375         case VM_SET_CAPABILITY:
376         case VM_PPTDEV_MSI:
377         case VM_PPTDEV_MSIX:
378         case VM_SET_X2APIC_STATE:
379         case VM_GLA2GPA:
380         case VM_GLA2GPA_NOFAULT:
381         case VM_ACTIVATE_CPU:
382         case VM_SET_INTINFO:
383         case VM_GET_INTINFO:
384         case VM_RESTART_INSTRUCTION:
385                 /*
386                  * XXX fragile, handle with care
387                  * Assumes that the first field of the ioctl data is the vcpu.
388                  */
389                 vcpu = *(int *)data;
390                 error = vcpu_lock_one(sc, vcpu);
391                 if (error)
392                         goto done;
393                 state_changed = 1;
394                 break;
395
396         case VM_MAP_PPTDEV_MMIO:
397         case VM_BIND_PPTDEV:
398         case VM_UNBIND_PPTDEV:
399         case VM_ALLOC_MEMSEG:
400         case VM_MMAP_MEMSEG:
401         case VM_REINIT:
402                 /*
403                  * ioctls that operate on the entire virtual machine must
404                  * prevent all vcpus from running.
405                  */
406                 error = vcpu_lock_all(sc);
407                 if (error)
408                         goto done;
409                 state_changed = 2;
410                 break;
411
412         case VM_GET_MEMSEG:
413         case VM_MMAP_GETNEXT:
414                 /*
415                  * Lock a vcpu to make sure that the memory map cannot be
416                  * modified while it is being inspected.
417                  */
418                 vcpu = VM_MAXCPU - 1;
419                 error = vcpu_lock_one(sc, vcpu);
420                 if (error)
421                         goto done;
422                 state_changed = 1;
423                 break;
424
425         default:
426                 break;
427         }
428
429         switch(cmd) {
430         case VM_RUN:
431                 vmrun = (struct vm_run *)data;
432                 error = vm_run(sc->vm, vmrun);
433                 break;
434         case VM_SUSPEND:
435                 vmsuspend = (struct vm_suspend *)data;
436                 error = vm_suspend(sc->vm, vmsuspend->how);
437                 break;
438         case VM_REINIT:
439                 error = vm_reinit(sc->vm);
440                 break;
441         case VM_STAT_DESC: {
442                 statdesc = (struct vm_stat_desc *)data;
443                 error = vmm_stat_desc_copy(statdesc->index,
444                                         statdesc->desc, sizeof(statdesc->desc));
445                 break;
446         }
447         case VM_STATS: {
448                 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
449                 vmstats = (struct vm_stats *)data;
450                 getmicrotime(&vmstats->tv);
451                 error = vmm_stat_copy(sc->vm, vmstats->cpuid,
452                                       &vmstats->num_entries, vmstats->statbuf);
453                 break;
454         }
455         case VM_PPTDEV_MSI:
456                 pptmsi = (struct vm_pptdev_msi *)data;
457                 error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
458                                       pptmsi->bus, pptmsi->slot, pptmsi->func,
459                                       pptmsi->addr, pptmsi->msg,
460                                       pptmsi->numvec);
461                 break;
462         case VM_PPTDEV_MSIX:
463                 pptmsix = (struct vm_pptdev_msix *)data;
464                 error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
465                                        pptmsix->bus, pptmsix->slot, 
466                                        pptmsix->func, pptmsix->idx,
467                                        pptmsix->addr, pptmsix->msg,
468                                        pptmsix->vector_control);
469                 break;
470         case VM_MAP_PPTDEV_MMIO:
471                 pptmmio = (struct vm_pptdev_mmio *)data;
472                 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
473                                      pptmmio->func, pptmmio->gpa, pptmmio->len,
474                                      pptmmio->hpa);
475                 break;
476         case VM_BIND_PPTDEV:
477                 pptdev = (struct vm_pptdev *)data;
478                 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
479                                          pptdev->func);
480                 break;
481         case VM_UNBIND_PPTDEV:
482                 pptdev = (struct vm_pptdev *)data;
483                 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
484                                            pptdev->func);
485                 break;
486         case VM_INJECT_EXCEPTION:
487                 vmexc = (struct vm_exception *)data;
488                 error = vm_inject_exception(sc->vm, vmexc->cpuid,
489                     vmexc->vector, vmexc->error_code_valid, vmexc->error_code,
490                     vmexc->restart_instruction);
491                 break;
492         case VM_INJECT_NMI:
493                 vmnmi = (struct vm_nmi *)data;
494                 error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
495                 break;
496         case VM_LAPIC_IRQ:
497                 vmirq = (struct vm_lapic_irq *)data;
498                 error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector);
499                 break;
500         case VM_LAPIC_LOCAL_IRQ:
501                 vmirq = (struct vm_lapic_irq *)data;
502                 error = lapic_set_local_intr(sc->vm, vmirq->cpuid,
503                     vmirq->vector);
504                 break;
505         case VM_LAPIC_MSI:
506                 vmmsi = (struct vm_lapic_msi *)data;
507                 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
508                 break;
509         case VM_IOAPIC_ASSERT_IRQ:
510                 ioapic_irq = (struct vm_ioapic_irq *)data;
511                 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
512                 break;
513         case VM_IOAPIC_DEASSERT_IRQ:
514                 ioapic_irq = (struct vm_ioapic_irq *)data;
515                 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq);
516                 break;
517         case VM_IOAPIC_PULSE_IRQ:
518                 ioapic_irq = (struct vm_ioapic_irq *)data;
519                 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq);
520                 break;
521         case VM_IOAPIC_PINCOUNT:
522                 *(int *)data = vioapic_pincount(sc->vm);
523                 break;
524         case VM_ISA_ASSERT_IRQ:
525                 isa_irq = (struct vm_isa_irq *)data;
526                 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq);
527                 if (error == 0 && isa_irq->ioapic_irq != -1)
528                         error = vioapic_assert_irq(sc->vm,
529                             isa_irq->ioapic_irq);
530                 break;
531         case VM_ISA_DEASSERT_IRQ:
532                 isa_irq = (struct vm_isa_irq *)data;
533                 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq);
534                 if (error == 0 && isa_irq->ioapic_irq != -1)
535                         error = vioapic_deassert_irq(sc->vm,
536                             isa_irq->ioapic_irq);
537                 break;
538         case VM_ISA_PULSE_IRQ:
539                 isa_irq = (struct vm_isa_irq *)data;
540                 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq);
541                 if (error == 0 && isa_irq->ioapic_irq != -1)
542                         error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq);
543                 break;
544         case VM_ISA_SET_IRQ_TRIGGER:
545                 isa_irq_trigger = (struct vm_isa_irq_trigger *)data;
546                 error = vatpic_set_irq_trigger(sc->vm,
547                     isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
548                 break;
549         case VM_MMAP_GETNEXT:
550                 mm = (struct vm_memmap *)data;
551                 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
552                     &mm->segoff, &mm->len, &mm->prot, &mm->flags);
553                 break;
554         case VM_MMAP_MEMSEG:
555                 mm = (struct vm_memmap *)data;
556                 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
557                     mm->len, mm->prot, mm->flags);
558                 break;
559         case VM_ALLOC_MEMSEG:
560                 error = alloc_memseg(sc, (struct vm_memseg *)data);
561                 break;
562         case VM_GET_MEMSEG:
563                 error = get_memseg(sc, (struct vm_memseg *)data);
564                 break;
565         case VM_GET_REGISTER:
566                 vmreg = (struct vm_register *)data;
567                 error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
568                                         &vmreg->regval);
569                 break;
570         case VM_SET_REGISTER:
571                 vmreg = (struct vm_register *)data;
572                 error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
573                                         vmreg->regval);
574                 break;
575         case VM_SET_SEGMENT_DESCRIPTOR:
576                 vmsegdesc = (struct vm_seg_desc *)data;
577                 error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
578                                         vmsegdesc->regnum,
579                                         &vmsegdesc->desc);
580                 break;
581         case VM_GET_SEGMENT_DESCRIPTOR:
582                 vmsegdesc = (struct vm_seg_desc *)data;
583                 error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
584                                         vmsegdesc->regnum,
585                                         &vmsegdesc->desc);
586                 break;
587         case VM_GET_REGISTER_SET:
588                 vmregset = (struct vm_register_set *)data;
589                 if (vmregset->count > VM_REG_LAST) {
590                         error = EINVAL;
591                         break;
592                 }
593                 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
594                     M_WAITOK);
595                 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
596                     M_WAITOK);
597                 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
598                     vmregset->count);
599                 if (error == 0)
600                         error = vm_get_register_set(sc->vm, vmregset->cpuid,
601                             vmregset->count, regnums, regvals);
602                 if (error == 0)
603                         error = copyout(regvals, vmregset->regvals,
604                             sizeof(regvals[0]) * vmregset->count);
605                 free(regvals, M_VMMDEV);
606                 free(regnums, M_VMMDEV);
607                 break;
608         case VM_SET_REGISTER_SET:
609                 vmregset = (struct vm_register_set *)data;
610                 if (vmregset->count > VM_REG_LAST) {
611                         error = EINVAL;
612                         break;
613                 }
614                 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
615                     M_WAITOK);
616                 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
617                     M_WAITOK);
618                 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
619                     vmregset->count);
620                 if (error == 0)
621                         error = copyin(vmregset->regvals, regvals,
622                             sizeof(regvals[0]) * vmregset->count);
623                 if (error == 0)
624                         error = vm_set_register_set(sc->vm, vmregset->cpuid,
625                             vmregset->count, regnums, regvals);
626                 free(regvals, M_VMMDEV);
627                 free(regnums, M_VMMDEV);
628                 break;
629         case VM_GET_CAPABILITY:
630                 vmcap = (struct vm_capability *)data;
631                 error = vm_get_capability(sc->vm, vmcap->cpuid,
632                                           vmcap->captype,
633                                           &vmcap->capval);
634                 break;
635         case VM_SET_CAPABILITY:
636                 vmcap = (struct vm_capability *)data;
637                 error = vm_set_capability(sc->vm, vmcap->cpuid,
638                                           vmcap->captype,
639                                           vmcap->capval);
640                 break;
641         case VM_SET_X2APIC_STATE:
642                 x2apic = (struct vm_x2apic *)data;
643                 error = vm_set_x2apic_state(sc->vm,
644                                             x2apic->cpuid, x2apic->state);
645                 break;
646         case VM_GET_X2APIC_STATE:
647                 x2apic = (struct vm_x2apic *)data;
648                 error = vm_get_x2apic_state(sc->vm,
649                                             x2apic->cpuid, &x2apic->state);
650                 break;
651         case VM_GET_GPA_PMAP:
652                 gpapte = (struct vm_gpa_pte *)data;
653                 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
654                                  gpapte->gpa, gpapte->pte, &gpapte->ptenum);
655                 error = 0;
656                 break;
657         case VM_GET_HPET_CAPABILITIES:
658                 error = vhpet_getcap((struct vm_hpet_cap *)data);
659                 break;
660         case VM_GLA2GPA: {
661                 CTASSERT(PROT_READ == VM_PROT_READ);
662                 CTASSERT(PROT_WRITE == VM_PROT_WRITE);
663                 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
664                 gg = (struct vm_gla2gpa *)data;
665                 error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
666                     gg->prot, &gg->gpa, &gg->fault);
667                 KASSERT(error == 0 || error == EFAULT,
668                     ("%s: vm_gla2gpa unknown error %d", __func__, error));
669                 break;
670         }
671         case VM_GLA2GPA_NOFAULT:
672                 gg = (struct vm_gla2gpa *)data;
673                 error = vm_gla2gpa_nofault(sc->vm, gg->vcpuid, &gg->paging,
674                     gg->gla, gg->prot, &gg->gpa, &gg->fault);
675                 KASSERT(error == 0 || error == EFAULT,
676                     ("%s: vm_gla2gpa unknown error %d", __func__, error));
677                 break;
678         case VM_ACTIVATE_CPU:
679                 vac = (struct vm_activate_cpu *)data;
680                 error = vm_activate_cpu(sc->vm, vac->vcpuid);
681                 break;
682         case VM_GET_CPUS:
683                 error = 0;
684                 vm_cpuset = (struct vm_cpuset *)data;
685                 size = vm_cpuset->cpusetsize;
686                 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
687                         error = ERANGE;
688                         break;
689                 }
690                 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
691                 if (vm_cpuset->which == VM_ACTIVE_CPUS)
692                         *cpuset = vm_active_cpus(sc->vm);
693                 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
694                         *cpuset = vm_suspended_cpus(sc->vm);
695                 else if (vm_cpuset->which == VM_DEBUG_CPUS)
696                         *cpuset = vm_debug_cpus(sc->vm);
697                 else
698                         error = EINVAL;
699                 if (error == 0)
700                         error = copyout(cpuset, vm_cpuset->cpus, size);
701                 free(cpuset, M_TEMP);
702                 break;
703         case VM_SUSPEND_CPU:
704                 vac = (struct vm_activate_cpu *)data;
705                 error = vm_suspend_cpu(sc->vm, vac->vcpuid);
706                 break;
707         case VM_RESUME_CPU:
708                 vac = (struct vm_activate_cpu *)data;
709                 error = vm_resume_cpu(sc->vm, vac->vcpuid);
710                 break;
711         case VM_SET_INTINFO:
712                 vmii = (struct vm_intinfo *)data;
713                 error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1);
714                 break;
715         case VM_GET_INTINFO:
716                 vmii = (struct vm_intinfo *)data;
717                 error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
718                     &vmii->info2);
719                 break;
720         case VM_RTC_WRITE:
721                 rtcdata = (struct vm_rtc_data *)data;
722                 error = vrtc_nvram_write(sc->vm, rtcdata->offset,
723                     rtcdata->value);
724                 break;
725         case VM_RTC_READ:
726                 rtcdata = (struct vm_rtc_data *)data;
727                 error = vrtc_nvram_read(sc->vm, rtcdata->offset,
728                     &rtcdata->value);
729                 break;
730         case VM_RTC_SETTIME:
731                 rtctime = (struct vm_rtc_time *)data;
732                 error = vrtc_set_time(sc->vm, rtctime->secs);
733                 break;
734         case VM_RTC_GETTIME:
735                 error = 0;
736                 rtctime = (struct vm_rtc_time *)data;
737                 rtctime->secs = vrtc_get_time(sc->vm);
738                 break;
739         case VM_RESTART_INSTRUCTION:
740                 error = vm_restart_instruction(sc->vm, vcpu);
741                 break;
742         case VM_SET_TOPOLOGY:
743                 topology = (struct vm_cpu_topology *)data;
744                 error = vm_set_topology(sc->vm, topology->sockets,
745                     topology->cores, topology->threads, topology->maxcpus);
746                 break;
747         case VM_GET_TOPOLOGY:
748                 topology = (struct vm_cpu_topology *)data;
749                 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
750                     &topology->threads, &topology->maxcpus);
751                 error = 0;
752                 break;
753         default:
754                 error = ENOTTY;
755                 break;
756         }
757
758         if (state_changed == 1)
759                 vcpu_unlock_one(sc, vcpu);
760         else if (state_changed == 2)
761                 vcpu_unlock_all(sc);
762
763 done:
764         /* Make sure that no handler returns a bogus value like ERESTART */
765         KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
766         return (error);
767 }
768
769 static int
770 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
771     struct vm_object **objp, int nprot)
772 {
773         struct vmmdev_softc *sc;
774         vm_paddr_t gpa;
775         size_t len;
776         vm_ooffset_t segoff, first, last;
777         int error, found, segid;
778         bool sysmem;
779
780         first = *offset;
781         last = first + mapsize;
782         if ((nprot & PROT_EXEC) || first < 0 || first >= last)
783                 return (EINVAL);
784
785         sc = vmmdev_lookup2(cdev);
786         if (sc == NULL) {
787                 /* virtual machine is in the process of being created */
788                 return (EINVAL);
789         }
790
791         /*
792          * Get a read lock on the guest memory map by freezing any vcpu.
793          */
794         error = vcpu_lock_one(sc, VM_MAXCPU - 1);
795         if (error)
796                 return (error);
797
798         gpa = 0;
799         found = 0;
800         while (!found) {
801                 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
802                     NULL, NULL);
803                 if (error)
804                         break;
805
806                 if (first >= gpa && last <= gpa + len)
807                         found = 1;
808                 else
809                         gpa += len;
810         }
811
812         if (found) {
813                 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
814                 KASSERT(error == 0 && *objp != NULL,
815                     ("%s: invalid memory segment %d", __func__, segid));
816                 if (sysmem) {
817                         vm_object_reference(*objp);
818                         *offset = segoff + (first - gpa);
819                 } else {
820                         error = EINVAL;
821                 }
822         }
823         vcpu_unlock_one(sc, VM_MAXCPU - 1);
824         return (error);
825 }
826
827 static void
828 vmmdev_destroy(void *arg)
829 {
830         struct vmmdev_softc *sc = arg;
831         struct devmem_softc *dsc;
832         int error;
833
834         error = vcpu_lock_all(sc);
835         KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
836
837         while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
838                 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
839                 SLIST_REMOVE_HEAD(&sc->devmem, link);
840                 free(dsc->name, M_VMMDEV);
841                 free(dsc, M_VMMDEV);
842         }
843
844         if (sc->cdev != NULL)
845                 destroy_dev(sc->cdev);
846
847         if (sc->vm != NULL)
848                 vm_destroy(sc->vm);
849
850         if ((sc->flags & VSC_LINKED) != 0) {
851                 mtx_lock(&vmmdev_mtx);
852                 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
853                 mtx_unlock(&vmmdev_mtx);
854         }
855
856         free(sc, M_VMMDEV);
857 }
858
859 static int
860 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
861 {
862         int error;
863         char buf[VM_MAX_NAMELEN];
864         struct devmem_softc *dsc;
865         struct vmmdev_softc *sc;
866         struct cdev *cdev;
867
868         strlcpy(buf, "beavis", sizeof(buf));
869         error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
870         if (error != 0 || req->newptr == NULL)
871                 return (error);
872
873         mtx_lock(&vmmdev_mtx);
874         sc = vmmdev_lookup(buf);
875         if (sc == NULL || sc->cdev == NULL) {
876                 mtx_unlock(&vmmdev_mtx);
877                 return (EINVAL);
878         }
879
880         /*
881          * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
882          * goes down to 0 so we should not do it again in the callback.
883          *
884          * Setting 'sc->cdev' to NULL is also used to indicate that the VM
885          * is scheduled for destruction.
886          */
887         cdev = sc->cdev;
888         sc->cdev = NULL;                
889         mtx_unlock(&vmmdev_mtx);
890
891         /*
892          * Schedule all cdevs to be destroyed:
893          *
894          * - any new operations on the 'cdev' will return an error (ENXIO).
895          *
896          * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
897          *   be destroyed and the callback will be invoked in a taskqueue
898          *   context.
899          *
900          * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
901          */
902         SLIST_FOREACH(dsc, &sc->devmem, link) {
903                 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
904                 destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc);
905         }
906         destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
907         return (0);
908 }
909 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
910             NULL, 0, sysctl_vmm_destroy, "A", NULL);
911
912 static struct cdevsw vmmdevsw = {
913         .d_name         = "vmmdev",
914         .d_version      = D_VERSION,
915         .d_ioctl        = vmmdev_ioctl,
916         .d_mmap_single  = vmmdev_mmap_single,
917         .d_read         = vmmdev_rw,
918         .d_write        = vmmdev_rw,
919 };
920
921 static int
922 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
923 {
924         int error;
925         struct vm *vm;
926         struct cdev *cdev;
927         struct vmmdev_softc *sc, *sc2;
928         char buf[VM_MAX_NAMELEN];
929
930         strlcpy(buf, "beavis", sizeof(buf));
931         error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
932         if (error != 0 || req->newptr == NULL)
933                 return (error);
934
935         mtx_lock(&vmmdev_mtx);
936         sc = vmmdev_lookup(buf);
937         mtx_unlock(&vmmdev_mtx);
938         if (sc != NULL)
939                 return (EEXIST);
940
941         error = vm_create(buf, &vm);
942         if (error != 0)
943                 return (error);
944
945         sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
946         sc->vm = vm;
947         SLIST_INIT(&sc->devmem);
948
949         /*
950          * Lookup the name again just in case somebody sneaked in when we
951          * dropped the lock.
952          */
953         mtx_lock(&vmmdev_mtx);
954         sc2 = vmmdev_lookup(buf);
955         if (sc2 == NULL) {
956                 SLIST_INSERT_HEAD(&head, sc, link);
957                 sc->flags |= VSC_LINKED;
958         }
959         mtx_unlock(&vmmdev_mtx);
960
961         if (sc2 != NULL) {
962                 vmmdev_destroy(sc);
963                 return (EEXIST);
964         }
965
966         error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
967                            UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
968         if (error != 0) {
969                 vmmdev_destroy(sc);
970                 return (error);
971         }
972
973         mtx_lock(&vmmdev_mtx);
974         sc->cdev = cdev;
975         sc->cdev->si_drv1 = sc;
976         mtx_unlock(&vmmdev_mtx);
977
978         return (0);
979 }
980 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
981             NULL, 0, sysctl_vmm_create, "A", NULL);
982
983 void
984 vmmdev_init(void)
985 {
986         mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
987 }
988
989 int
990 vmmdev_cleanup(void)
991 {
992         int error;
993
994         if (SLIST_EMPTY(&head))
995                 error = 0;
996         else
997                 error = EBUSY;
998
999         return (error);
1000 }
1001
1002 static int
1003 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1004     struct vm_object **objp, int nprot)
1005 {
1006         struct devmem_softc *dsc;
1007         vm_ooffset_t first, last;
1008         size_t seglen;
1009         int error;
1010         bool sysmem;
1011
1012         dsc = cdev->si_drv1;
1013         if (dsc == NULL) {
1014                 /* 'cdev' has been created but is not ready for use */
1015                 return (ENXIO);
1016         }
1017
1018         first = *offset;
1019         last = *offset + len;
1020         if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1021                 return (EINVAL);
1022
1023         error = vcpu_lock_one(dsc->sc, VM_MAXCPU - 1);
1024         if (error)
1025                 return (error);
1026
1027         error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1028         KASSERT(error == 0 && !sysmem && *objp != NULL,
1029             ("%s: invalid devmem segment %d", __func__, dsc->segid));
1030
1031         vcpu_unlock_one(dsc->sc, VM_MAXCPU - 1);
1032
1033         if (seglen >= last) {
1034                 vm_object_reference(*objp);
1035                 return (0);
1036         } else {
1037                 return (EINVAL);
1038         }
1039 }
1040
1041 static struct cdevsw devmemsw = {
1042         .d_name         = "devmem",
1043         .d_version      = D_VERSION,
1044         .d_mmap_single  = devmem_mmap_single,
1045 };
1046
1047 static int
1048 devmem_create_cdev(const char *vmname, int segid, char *devname)
1049 {
1050         struct devmem_softc *dsc;
1051         struct vmmdev_softc *sc;
1052         struct cdev *cdev;
1053         int error;
1054
1055         error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
1056             UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
1057         if (error)
1058                 return (error);
1059
1060         dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1061
1062         mtx_lock(&vmmdev_mtx);
1063         sc = vmmdev_lookup(vmname);
1064         KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
1065         if (sc->cdev == NULL) {
1066                 /* virtual machine is being created or destroyed */
1067                 mtx_unlock(&vmmdev_mtx);
1068                 free(dsc, M_VMMDEV);
1069                 destroy_dev_sched_cb(cdev, NULL, 0);
1070                 return (ENODEV);
1071         }
1072
1073         dsc->segid = segid;
1074         dsc->name = devname;
1075         dsc->cdev = cdev;
1076         dsc->sc = sc;
1077         SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1078         mtx_unlock(&vmmdev_mtx);
1079
1080         /* The 'cdev' is ready for use after 'si_drv1' is initialized */
1081         cdev->si_drv1 = dsc;
1082         return (0);
1083 }
1084
1085 static void
1086 devmem_destroy(void *arg)
1087 {
1088         struct devmem_softc *dsc = arg;
1089
1090         KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
1091         dsc->cdev = NULL;
1092         dsc->sc = NULL;
1093 }