]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/amd64/vmm/x86.c
vmm: Expose struct vcpu as an opaque type.
[FreeBSD/FreeBSD.git] / sys / amd64 / vmm / x86.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35 #include <sys/pcpu.h>
36 #include <sys/systm.h>
37 #include <sys/sysctl.h>
38
39 #include <machine/clock.h>
40 #include <machine/cpufunc.h>
41 #include <machine/md_var.h>
42 #include <machine/segments.h>
43 #include <machine/specialreg.h>
44
45 #include <machine/vmm.h>
46
47 #include "vmm_host.h"
48 #include "vmm_ktr.h"
49 #include "vmm_util.h"
50 #include "x86.h"
51
52 SYSCTL_DECL(_hw_vmm);
53 static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
54     NULL);
55
56 #define CPUID_VM_HIGH           0x40000000
57
58 static const char bhyve_id[12] = "bhyve bhyve ";
59
60 static uint64_t bhyve_xcpuids;
61 SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
62     "Number of times an unknown cpuid leaf was accessed");
63
64 #if __FreeBSD_version < 1200060 /* Remove after 11 EOL helps MFCing */
65 extern u_int threads_per_core;
66 SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
67     &threads_per_core, 0, NULL);
68
69 extern u_int cores_per_package;
70 SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
71     &cores_per_package, 0, NULL);
72 #endif
73
74 static int cpuid_leaf_b = 1;
75 SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
76     &cpuid_leaf_b, 0, NULL);
77
78 /*
79  * Round up to the next power of two, if necessary, and then take log2.
80  * Returns -1 if argument is zero.
81  */
82 static __inline int
83 log2(u_int x)
84 {
85
86         return (fls(x << (1 - powerof2(x))) - 1);
87 }
88
89 int
90 x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx,
91     uint64_t *rcx, uint64_t *rdx)
92 {
93         const struct xsave_limits *limits;
94         uint64_t cr4;
95         int error, enable_invpcid, enable_rdpid, enable_rdtscp, level,
96             width, x2apic_id;
97         unsigned int func, regs[4], logical_cpus, param;
98         enum x2apic_state x2apic_state;
99         uint16_t cores, maxcpus, sockets, threads;
100
101         /*
102          * The function of CPUID is controlled through the provided value of
103          * %eax (and secondarily %ecx, for certain leaf data).
104          */
105         func = (uint32_t)*rax;
106         param = (uint32_t)*rcx;
107
108         VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", func, param);
109
110         /*
111          * Requests for invalid CPUID levels should map to the highest
112          * available level instead.
113          */
114         if (cpu_exthigh != 0 && func >= 0x80000000) {
115                 if (func > cpu_exthigh)
116                         func = cpu_exthigh;
117         } else if (func >= 0x40000000) {
118                 if (func > CPUID_VM_HIGH)
119                         func = CPUID_VM_HIGH;
120         } else if (func > cpu_high) {
121                 func = cpu_high;
122         }
123
124         /*
125          * In general the approach used for CPU topology is to
126          * advertise a flat topology where all CPUs are packages with
127          * no multi-core or SMT.
128          */
129         switch (func) {
130                 /*
131                  * Pass these through to the guest
132                  */
133                 case CPUID_0000_0000:
134                 case CPUID_0000_0002:
135                 case CPUID_0000_0003:
136                 case CPUID_8000_0000:
137                 case CPUID_8000_0002:
138                 case CPUID_8000_0003:
139                 case CPUID_8000_0004:
140                 case CPUID_8000_0006:
141                         cpuid_count(func, param, regs);
142                         break;
143                 case CPUID_8000_0008:
144                         cpuid_count(func, param, regs);
145                         if (vmm_is_svm()) {
146                                 /*
147                                  * As on Intel (0000_0007:0, EDX), mask out
148                                  * unsupported or unsafe AMD extended features
149                                  * (8000_0008 EBX).
150                                  */
151                                 regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF |
152                                     AMDFEID_XSAVEERPTR);
153
154                                 vm_get_topology(vm, &sockets, &cores, &threads,
155                                     &maxcpus);
156                                 /*
157                                  * Here, width is ApicIdCoreIdSize, present on
158                                  * at least Family 15h and newer.  It
159                                  * represents the "number of bits in the
160                                  * initial apicid that indicate thread id
161                                  * within a package."
162                                  *
163                                  * Our topo_probe_amd() uses it for
164                                  * pkg_id_shift and other OSes may rely on it.
165                                  */
166                                 width = MIN(0xF, log2(threads * cores));
167                                 if (width < 0x4)
168                                         width = 0;
169                                 logical_cpus = MIN(0xFF, threads * cores - 1);
170                                 regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus;
171                         }
172                         break;
173
174                 case CPUID_8000_0001:
175                         cpuid_count(func, param, regs);
176
177                         /*
178                          * Hide SVM from guest.
179                          */
180                         regs[2] &= ~AMDID2_SVM;
181
182                         /*
183                          * Don't advertise extended performance counter MSRs
184                          * to the guest.
185                          */
186                         regs[2] &= ~AMDID2_PCXC;
187                         regs[2] &= ~AMDID2_PNXC;
188                         regs[2] &= ~AMDID2_PTSCEL2I;
189
190                         /*
191                          * Don't advertise Instruction Based Sampling feature.
192                          */
193                         regs[2] &= ~AMDID2_IBS;
194
195                         /* NodeID MSR not available */
196                         regs[2] &= ~AMDID2_NODE_ID;
197
198                         /* Don't advertise the OS visible workaround feature */
199                         regs[2] &= ~AMDID2_OSVW;
200
201                         /* Hide mwaitx/monitorx capability from the guest */
202                         regs[2] &= ~AMDID2_MWAITX;
203
204                         /* Advertise RDTSCP if it is enabled. */
205                         error = vm_get_capability(vm, vcpu_id,
206                             VM_CAP_RDTSCP, &enable_rdtscp);
207                         if (error == 0 && enable_rdtscp)
208                                 regs[3] |= AMDID_RDTSCP;
209                         else
210                                 regs[3] &= ~AMDID_RDTSCP;
211                         break;
212
213                 case CPUID_8000_0007:
214                         /*
215                          * AMD uses this leaf to advertise the processor's
216                          * power monitoring and RAS capabilities. These
217                          * features are hardware-specific and exposing
218                          * them to a guest doesn't make a lot of sense.
219                          *
220                          * Intel uses this leaf only to advertise the
221                          * "Invariant TSC" feature with all other bits
222                          * being reserved (set to zero).
223                          */
224                         regs[0] = 0;
225                         regs[1] = 0;
226                         regs[2] = 0;
227                         regs[3] = 0;
228
229                         /*
230                          * "Invariant TSC" can be advertised to the guest if:
231                          * - host TSC frequency is invariant
232                          * - host TSCs are synchronized across physical cpus
233                          *
234                          * XXX This still falls short because the vcpu
235                          * can observe the TSC moving backwards as it
236                          * migrates across physical cpus. But at least
237                          * it should discourage the guest from using the
238                          * TSC to keep track of time.
239                          */
240                         if (tsc_is_invariant && smp_tsc)
241                                 regs[3] |= AMDPM_TSC_INVARIANT;
242                         break;
243
244                 case CPUID_8000_001D:
245                         /* AMD Cache topology, like 0000_0004 for Intel. */
246                         if (!vmm_is_svm())
247                                 goto default_leaf;
248
249                         /*
250                          * Similar to Intel, generate a ficticious cache
251                          * topology for the guest with L3 shared by the
252                          * package, and L1 and L2 local to a core.
253                          */
254                         vm_get_topology(vm, &sockets, &cores, &threads,
255                             &maxcpus);
256                         switch (param) {
257                         case 0:
258                                 logical_cpus = threads;
259                                 level = 1;
260                                 func = 1;       /* data cache */
261                                 break;
262                         case 1:
263                                 logical_cpus = threads;
264                                 level = 2;
265                                 func = 3;       /* unified cache */
266                                 break;
267                         case 2:
268                                 logical_cpus = threads * cores;
269                                 level = 3;
270                                 func = 3;       /* unified cache */
271                                 break;
272                         default:
273                                 logical_cpus = 0;
274                                 level = 0;
275                                 func = 0;
276                                 break;
277                         }
278
279                         logical_cpus = MIN(0xfff, logical_cpus - 1);
280                         regs[0] = (logical_cpus << 14) | (1 << 8) |
281                             (level << 5) | func;
282                         regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
283                         regs[2] = 0;
284                         regs[3] = 0;
285                         break;
286
287                 case CPUID_8000_001E:
288                         /*
289                          * AMD Family 16h+ and Hygon Family 18h additional
290                          * identifiers.
291                          */
292                         if (!vmm_is_svm() || CPUID_TO_FAMILY(cpu_id) < 0x16)
293                                 goto default_leaf;
294
295                         vm_get_topology(vm, &sockets, &cores, &threads,
296                             &maxcpus);
297                         regs[0] = vcpu_id;
298                         threads = MIN(0xFF, threads - 1);
299                         regs[1] = (threads << 8) |
300                             (vcpu_id >> log2(threads + 1));
301                         /*
302                          * XXX Bhyve topology cannot yet represent >1 node per
303                          * processor.
304                          */
305                         regs[2] = 0;
306                         regs[3] = 0;
307                         break;
308
309                 case CPUID_0000_0001:
310                         do_cpuid(1, regs);
311
312                         error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
313                         if (error) {
314                                 panic("x86_emulate_cpuid: error %d "
315                                       "fetching x2apic state", error);
316                         }
317
318                         /*
319                          * Override the APIC ID only in ebx
320                          */
321                         regs[1] &= ~(CPUID_LOCAL_APIC_ID);
322                         regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
323
324                         /*
325                          * Don't expose VMX, SpeedStep, TME or SMX capability.
326                          * Advertise x2APIC capability and Hypervisor guest.
327                          */
328                         regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
329                         regs[2] &= ~(CPUID2_SMX);
330
331                         regs[2] |= CPUID2_HV;
332
333                         if (x2apic_state != X2APIC_DISABLED)
334                                 regs[2] |= CPUID2_X2APIC;
335                         else
336                                 regs[2] &= ~CPUID2_X2APIC;
337
338                         /*
339                          * Only advertise CPUID2_XSAVE in the guest if
340                          * the host is using XSAVE.
341                          */
342                         if (!(regs[2] & CPUID2_OSXSAVE))
343                                 regs[2] &= ~CPUID2_XSAVE;
344
345                         /*
346                          * If CPUID2_XSAVE is being advertised and the
347                          * guest has set CR4_XSAVE, set
348                          * CPUID2_OSXSAVE.
349                          */
350                         regs[2] &= ~CPUID2_OSXSAVE;
351                         if (regs[2] & CPUID2_XSAVE) {
352                                 error = vm_get_register(vm, vcpu_id,
353                                     VM_REG_GUEST_CR4, &cr4);
354                                 if (error)
355                                         panic("x86_emulate_cpuid: error %d "
356                                               "fetching %%cr4", error);
357                                 if (cr4 & CR4_XSAVE)
358                                         regs[2] |= CPUID2_OSXSAVE;
359                         }
360
361                         /*
362                          * Hide monitor/mwait until we know how to deal with
363                          * these instructions.
364                          */
365                         regs[2] &= ~CPUID2_MON;
366
367                         /*
368                          * Hide the performance and debug features.
369                          */
370                         regs[2] &= ~CPUID2_PDCM;
371
372                         /*
373                          * No TSC deadline support in the APIC yet
374                          */
375                         regs[2] &= ~CPUID2_TSCDLT;
376
377                         /*
378                          * Hide thermal monitoring
379                          */
380                         regs[3] &= ~(CPUID_ACPI | CPUID_TM);
381
382                         /*
383                          * Hide the debug store capability.
384                          */
385                         regs[3] &= ~CPUID_DS;
386
387                         /*
388                          * Advertise the Machine Check and MTRR capability.
389                          *
390                          * Some guest OSes (e.g. Windows) will not boot if
391                          * these features are absent.
392                          */
393                         regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
394
395                         vm_get_topology(vm, &sockets, &cores, &threads,
396                             &maxcpus);
397                         logical_cpus = threads * cores;
398                         regs[1] &= ~CPUID_HTT_CORES;
399                         regs[1] |= (logical_cpus & 0xff) << 16;
400                         regs[3] |= CPUID_HTT;
401                         break;
402
403                 case CPUID_0000_0004:
404                         cpuid_count(func, param, regs);
405
406                         if (regs[0] || regs[1] || regs[2] || regs[3]) {
407                                 vm_get_topology(vm, &sockets, &cores, &threads,
408                                     &maxcpus);
409                                 regs[0] &= 0x3ff;
410                                 regs[0] |= (cores - 1) << 26;
411                                 /*
412                                  * Cache topology:
413                                  * - L1 and L2 are shared only by the logical
414                                  *   processors in a single core.
415                                  * - L3 and above are shared by all logical
416                                  *   processors in the package.
417                                  */
418                                 logical_cpus = threads;
419                                 level = (regs[0] >> 5) & 0x7;
420                                 if (level >= 3)
421                                         logical_cpus *= cores;
422                                 regs[0] |= (logical_cpus - 1) << 14;
423                         }
424                         break;
425
426                 case CPUID_0000_0007:
427                         regs[0] = 0;
428                         regs[1] = 0;
429                         regs[2] = 0;
430                         regs[3] = 0;
431
432                         /* leaf 0 */
433                         if (param == 0) {
434                                 cpuid_count(func, param, regs);
435
436                                 /* Only leaf 0 is supported */
437                                 regs[0] = 0;
438
439                                 /*
440                                  * Expose known-safe features.
441                                  */
442                                 regs[1] &= (CPUID_STDEXT_FSGSBASE |
443                                     CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
444                                     CPUID_STDEXT_AVX2 | CPUID_STDEXT_SMEP |
445                                     CPUID_STDEXT_BMI2 |
446                                     CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
447                                     CPUID_STDEXT_AVX512F |
448                                     CPUID_STDEXT_RDSEED |
449                                     CPUID_STDEXT_SMAP |
450                                     CPUID_STDEXT_AVX512PF |
451                                     CPUID_STDEXT_AVX512ER |
452                                     CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA);
453                                 regs[2] = 0;
454                                 regs[3] &= CPUID_STDEXT3_MD_CLEAR;
455
456                                 /* Advertise RDPID if it is enabled. */
457                                 error = vm_get_capability(vm, vcpu_id,
458                                     VM_CAP_RDPID, &enable_rdpid);
459                                 if (error == 0 && enable_rdpid)
460                                         regs[2] |= CPUID_STDEXT2_RDPID;
461
462                                 /* Advertise INVPCID if it is enabled. */
463                                 error = vm_get_capability(vm, vcpu_id,
464                                     VM_CAP_ENABLE_INVPCID, &enable_invpcid);
465                                 if (error == 0 && enable_invpcid)
466                                         regs[1] |= CPUID_STDEXT_INVPCID;
467                         }
468                         break;
469
470                 case CPUID_0000_0006:
471                         regs[0] = CPUTPM1_ARAT;
472                         regs[1] = 0;
473                         regs[2] = 0;
474                         regs[3] = 0;
475                         break;
476
477                 case CPUID_0000_000A:
478                         /*
479                          * Handle the access, but report 0 for
480                          * all options
481                          */
482                         regs[0] = 0;
483                         regs[1] = 0;
484                         regs[2] = 0;
485                         regs[3] = 0;
486                         break;
487
488                 case CPUID_0000_000B:
489                         /*
490                          * Intel processor topology enumeration
491                          */
492                         if (vmm_is_intel()) {
493                                 vm_get_topology(vm, &sockets, &cores, &threads,
494                                     &maxcpus);
495                                 if (param == 0) {
496                                         logical_cpus = threads;
497                                         width = log2(logical_cpus);
498                                         level = CPUID_TYPE_SMT;
499                                         x2apic_id = vcpu_id;
500                                 }
501
502                                 if (param == 1) {
503                                         logical_cpus = threads * cores;
504                                         width = log2(logical_cpus);
505                                         level = CPUID_TYPE_CORE;
506                                         x2apic_id = vcpu_id;
507                                 }
508
509                                 if (!cpuid_leaf_b || param >= 2) {
510                                         width = 0;
511                                         logical_cpus = 0;
512                                         level = 0;
513                                         x2apic_id = 0;
514                                 }
515
516                                 regs[0] = width & 0x1f;
517                                 regs[1] = logical_cpus & 0xffff;
518                                 regs[2] = (level << 8) | (param & 0xff);
519                                 regs[3] = x2apic_id;
520                         } else {
521                                 regs[0] = 0;
522                                 regs[1] = 0;
523                                 regs[2] = 0;
524                                 regs[3] = 0;
525                         }
526                         break;
527
528                 case CPUID_0000_000D:
529                         limits = vmm_get_xsave_limits();
530                         if (!limits->xsave_enabled) {
531                                 regs[0] = 0;
532                                 regs[1] = 0;
533                                 regs[2] = 0;
534                                 regs[3] = 0;
535                                 break;
536                         }
537
538                         cpuid_count(func, param, regs);
539                         switch (param) {
540                         case 0:
541                                 /*
542                                  * Only permit the guest to use bits
543                                  * that are active in the host in
544                                  * %xcr0.  Also, claim that the
545                                  * maximum save area size is
546                                  * equivalent to the host's current
547                                  * save area size.  Since this runs
548                                  * "inside" of vmrun(), it runs with
549                                  * the guest's xcr0, so the current
550                                  * save area size is correct as-is.
551                                  */
552                                 regs[0] &= limits->xcr0_allowed;
553                                 regs[2] = limits->xsave_max_size;
554                                 regs[3] &= (limits->xcr0_allowed >> 32);
555                                 break;
556                         case 1:
557                                 /* Only permit XSAVEOPT. */
558                                 regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
559                                 regs[1] = 0;
560                                 regs[2] = 0;
561                                 regs[3] = 0;
562                                 break;
563                         default:
564                                 /*
565                                  * If the leaf is for a permitted feature,
566                                  * pass through as-is, otherwise return
567                                  * all zeroes.
568                                  */
569                                 if (!(limits->xcr0_allowed & (1ul << param))) {
570                                         regs[0] = 0;
571                                         regs[1] = 0;
572                                         regs[2] = 0;
573                                         regs[3] = 0;
574                                 }
575                                 break;
576                         }
577                         break;
578
579                 case CPUID_0000_000F:
580                 case CPUID_0000_0010:
581                         /*
582                          * Do not report any Resource Director Technology
583                          * capabilities.  Exposing control of cache or memory
584                          * controller resource partitioning to the guest is not
585                          * at all sensible.
586                          *
587                          * This is already hidden at a high level by masking of
588                          * leaf 0x7.  Even still, a guest may look here for
589                          * detailed capability information.
590                          */
591                         regs[0] = 0;
592                         regs[1] = 0;
593                         regs[2] = 0;
594                         regs[3] = 0;
595                         break;
596
597                 case CPUID_0000_0015:
598                         /*
599                          * Don't report CPU TSC/Crystal ratio and clock
600                          * values since guests may use these to derive the
601                          * local APIC frequency..
602                          */
603                         regs[0] = 0;
604                         regs[1] = 0;
605                         regs[2] = 0;
606                         regs[3] = 0;
607                         break;
608
609                 case 0x40000000:
610                         regs[0] = CPUID_VM_HIGH;
611                         bcopy(bhyve_id, &regs[1], 4);
612                         bcopy(bhyve_id + 4, &regs[2], 4);
613                         bcopy(bhyve_id + 8, &regs[3], 4);
614                         break;
615
616                 default:
617 default_leaf:
618                         /*
619                          * The leaf value has already been clamped so
620                          * simply pass this through, keeping count of
621                          * how many unhandled leaf values have been seen.
622                          */
623                         atomic_add_long(&bhyve_xcpuids, 1);
624                         cpuid_count(func, param, regs);
625                         break;
626         }
627
628         /*
629          * CPUID clears the upper 32-bits of the long-mode registers.
630          */
631         *rax = regs[0];
632         *rbx = regs[1];
633         *rcx = regs[2];
634         *rdx = regs[3];
635
636         return (1);
637 }
638
639 bool
640 vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap)
641 {
642         bool rv;
643
644         KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d",
645             __func__, cap));
646
647         /*
648          * Simply passthrough the capabilities of the host cpu for now.
649          */
650         rv = false;
651         switch (cap) {
652         case VCC_NO_EXECUTE:
653                 if (amd_feature & AMDID_NX)
654                         rv = true;
655                 break;
656         case VCC_FFXSR:
657                 if (amd_feature & AMDID_FFXSR)
658                         rv = true;
659                 break;
660         case VCC_TCE:
661                 if (amd_feature2 & AMDID2_TCE)
662                         rv = true;
663                 break;
664         default:
665                 panic("%s: unknown vm_cpu_capability %d", __func__, cap);
666         }
667         return (rv);
668 }
669
670 int
671 vm_rdmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t *val)
672 {
673         switch (num) {
674         case MSR_MTRRcap:
675                 *val = MTRR_CAP_WC | MTRR_CAP_FIXED | VMM_MTRR_VAR_MAX;
676                 break;
677         case MSR_MTRRdefType:
678                 *val = mtrr->def_type;
679                 break;
680         case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
681                 *val = mtrr->fixed4k[num - MSR_MTRR4kBase];
682                 break;
683         case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
684                 *val = mtrr->fixed16k[num - MSR_MTRR16kBase];
685                 break;
686         case MSR_MTRR64kBase:
687                 *val = mtrr->fixed64k;
688                 break;
689         case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: {
690                 u_int offset = num - MSR_MTRRVarBase;
691                 if (offset % 2 == 0) {
692                         *val = mtrr->var[offset / 2].base;
693                 } else {
694                         *val = mtrr->var[offset / 2].mask;
695                 }
696                 break;
697         }
698         default:
699                 return (-1);
700         }
701
702         return (0);
703 }
704
705 int
706 vm_wrmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t val)
707 {
708         switch (num) {
709         case MSR_MTRRcap:
710                 /* MTRRCAP is read only */
711                 return (-1);
712         case MSR_MTRRdefType:
713                 if (val & ~VMM_MTRR_DEF_MASK) {
714                         /* generate #GP on writes to reserved fields */
715                         return (-1);
716                 }
717                 mtrr->def_type = val;
718                 break;
719         case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
720                 mtrr->fixed4k[num - MSR_MTRR4kBase] = val;
721                 break;
722         case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
723                 mtrr->fixed16k[num - MSR_MTRR16kBase] = val;
724                 break;
725         case MSR_MTRR64kBase:
726                 mtrr->fixed64k = val;
727                 break;
728         case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: {
729                 u_int offset = num - MSR_MTRRVarBase;
730                 if (offset % 2 == 0) {
731                         if (val & ~VMM_MTRR_PHYSBASE_MASK) {
732                                 /* generate #GP on writes to reserved fields */
733                                 return (-1);
734                         }
735                         mtrr->var[offset / 2].base = val;
736                 } else {
737                         if (val & ~VMM_MTRR_PHYSMASK_MASK) {
738                                 /* generate #GP on writes to reserved fields */
739                                 return (-1);
740                         }
741                         mtrr->var[offset / 2].mask = val;
742                 }
743                 break;
744         }
745         default:
746                 return (-1);
747         }
748
749         return (0);
750 }