2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2011 NetApp, Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include <sys/param.h>
35 #include <sys/systm.h>
38 #include <machine/clock.h>
39 #include <machine/cpufunc.h>
40 #include <machine/md_var.h>
41 #include <machine/pcb.h>
42 #include <machine/specialreg.h>
43 #include <machine/vmm.h>
50 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
53 return ((msr_val & (1UL << (bitpos + 32))) != 0);
57 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
60 return ((msr_val & (1UL << bitpos)) == 0);
67 return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
71 * Generate a bitmask to be used for the VMCS execution control fields.
73 * The caller specifies what bits should be set to one in 'ones_mask'
74 * and what bits should be set to zero in 'zeros_mask'. The don't-care
75 * bits are set to the default value. The default values are obtained
76 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
79 * Returns zero on success and non-zero on error.
82 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
83 uint32_t zeros_mask, uint32_t *retval)
86 uint64_t val, trueval;
87 bool true_ctls_avail, one_allowed, zero_allowed;
89 /* We cannot ask the same bit to be set to both '1' and '0' */
90 if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
93 true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0;
97 trueval = rdmsr(true_ctl_reg); /* step c */
99 trueval = val; /* step a */
101 for (i = 0; i < 32; i++) {
102 one_allowed = vmx_ctl_allows_one_setting(trueval, i);
103 zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
105 KASSERT(one_allowed || zero_allowed,
106 ("invalid zero/one setting for bit %d of ctl 0x%0x, "
107 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
109 if (zero_allowed && !one_allowed) { /* b(i),c(i) */
110 if (ones_mask & (1 << i))
112 *retval &= ~(1 << i);
113 } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */
114 if (zeros_mask & (1 << i))
118 if (zeros_mask & (1 << i)) /* b(ii),c(ii) */
119 *retval &= ~(1 << i);
120 else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
122 else if (!true_ctls_avail)
123 *retval &= ~(1 << i); /* b(iii) */
124 else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
125 *retval &= ~(1 << i);
126 else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
129 panic("vmx_set_ctlreg: unable to determine "
130 "correct value of ctl bit %d for msr "
131 "0x%0x and true msr 0x%0x", i, ctl_reg,
141 msr_bitmap_initialize(char *bitmap)
144 memset(bitmap, 0xff, PAGE_SIZE);
148 msr_bitmap_change_access(char *bitmap, u_int msr, int access)
152 if (msr <= 0x00001FFF)
154 else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
155 byte = 1024 + (msr - 0xC0000000) / 8;
161 if (access & MSR_BITMAP_ACCESS_READ)
162 bitmap[byte] &= ~(1 << bit);
164 bitmap[byte] |= 1 << bit;
167 if (access & MSR_BITMAP_ACCESS_WRITE)
168 bitmap[byte] &= ~(1 << bit);
170 bitmap[byte] |= 1 << bit;
175 static uint64_t misc_enable;
176 static uint64_t platform_info;
177 static uint64_t turbo_ratio_limit;
178 static uint64_t host_msrs[GUEST_MSR_NUM];
186 * The family:model numbers belonging to the Nehalem microarchitecture
187 * are documented in Section 35.5, Intel SDM dated Feb 2014.
189 family = CPUID_TO_FAMILY(cpu_id);
190 model = CPUID_TO_MODEL(cpu_id);
211 * The family:model numbers belonging to the Westmere microarchitecture
212 * are documented in Section 35.6, Intel SDM dated Feb 2014.
214 family = CPUID_TO_FAMILY(cpu_id);
215 model = CPUID_TO_MODEL(cpu_id);
229 pat_valid(uint64_t val)
234 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
236 * Extract PA0 through PA7 and validate that each one encodes a
239 for (i = 0; i < 8; i++) {
240 pa = (val >> (i * 8)) & 0xff;
241 if (pa == 2 || pa == 3 || pa >= 8)
250 uint64_t bus_freq, ratio;
254 * It is safe to cache the values of the following MSRs because
255 * they don't change based on curcpu, curproc or curthread.
257 host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
258 host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
259 host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
260 host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
263 * Initialize emulated MSRs
265 misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
268 * 11: branch trace disabled
269 * 12: PEBS unavailable
270 * Clear unsupported features
271 * 16: SpeedStep enable
272 * 18: enable MONITOR FSM
274 misc_enable |= (1 << 12) | (1 << 11);
275 misc_enable &= ~((1 << 18) | (1 << 16));
277 if (nehalem_cpu() || westmere_cpu())
278 bus_freq = 133330000; /* 133Mhz */
280 bus_freq = 100000000; /* 100Mhz */
284 * The ratio should really be based on the virtual TSC frequency as
285 * opposed to the host TSC.
287 ratio = (tsc_freq / bus_freq) & 0xff;
290 * The register definition is based on the micro-architecture
291 * but the following bits are always the same:
292 * [15:8] Maximum Non-Turbo Ratio
293 * [28] Programmable Ratio Limit for Turbo Mode
294 * [29] Programmable TDC-TDP Limit for Turbo Mode
295 * [47:40] Maximum Efficiency Ratio
297 * The other bits can be safely set to 0 on all
298 * micro-architectures up to Haswell.
300 platform_info = (ratio << 8) | (ratio << 40);
303 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
304 * dependent on the maximum cores per package supported by the micro-
305 * architecture. For e.g., Westmere supports 6 cores per package and
306 * uses the low 48 bits. Sandybridge support 8 cores per package and
307 * uses up all 64 bits.
309 * However, the unused bits are reserved so we pretend that all bits
310 * in this MSR are valid.
312 for (i = 0; i < 8; i++)
313 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
317 vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
319 uint64_t *guest_msrs;
321 guest_msrs = vmx->guest_msrs[vcpuid];
324 * The permissions bitmap is shared between all vcpus so initialize it
325 * once when initializing the vBSP.
328 guest_msr_rw(vmx, MSR_LSTAR);
329 guest_msr_rw(vmx, MSR_CSTAR);
330 guest_msr_rw(vmx, MSR_STAR);
331 guest_msr_rw(vmx, MSR_SF_MASK);
332 guest_msr_rw(vmx, MSR_KGSBASE);
336 * Initialize guest IA32_PAT MSR with default value after reset.
338 guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
339 PAT_VALUE(1, PAT_WRITE_THROUGH) |
340 PAT_VALUE(2, PAT_UNCACHED) |
341 PAT_VALUE(3, PAT_UNCACHEABLE) |
342 PAT_VALUE(4, PAT_WRITE_BACK) |
343 PAT_VALUE(5, PAT_WRITE_THROUGH) |
344 PAT_VALUE(6, PAT_UNCACHED) |
345 PAT_VALUE(7, PAT_UNCACHEABLE);
351 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
353 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
355 /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */
356 update_pcb_bases(curpcb);
357 wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
358 wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
359 wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
360 wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
361 wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
365 vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid)
367 uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX];
368 uint32_t host_aux = cpu_auxmsr();
370 if (vmx_have_msr_tsc_aux(vmx) && guest_tsc_aux != host_aux)
371 wrmsr(MSR_TSC_AUX, guest_tsc_aux);
375 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
377 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
379 /* Save guest MSRs */
380 guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
381 guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
382 guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
383 guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
384 guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
386 /* Restore host MSRs */
387 wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
388 wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
389 wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
390 wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
392 /* MSR_KGSBASE will be restored on the way back to userspace */
396 vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid)
398 uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX];
399 uint32_t host_aux = cpu_auxmsr();
401 if (vmx_have_msr_tsc_aux(vmx) && guest_tsc_aux != host_aux)
403 * Note that it is not necessary to save the guest value
404 * here; vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX] always
405 * contains the current value since it is updated whenever
406 * the guest writes to it (which is expected to be very
409 wrmsr(MSR_TSC_AUX, host_aux);
413 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
415 const uint64_t *guest_msrs;
418 guest_msrs = vmx->guest_msrs[vcpuid];
427 case MSR_MTRRdefType:
428 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
429 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
430 case MSR_MTRR64kBase:
433 case MSR_IA32_MISC_ENABLE:
436 case MSR_PLATFORM_INFO:
437 *val = platform_info;
439 case MSR_TURBO_RATIO_LIMIT:
440 case MSR_TURBO_RATIO_LIMIT1:
441 *val = turbo_ratio_limit;
444 *val = guest_msrs[IDX_MSR_PAT];
454 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
456 uint64_t *guest_msrs;
460 guest_msrs = vmx->guest_msrs[vcpuid];
466 break; /* ignore writes */
468 vm_inject_gp(vmx->vm, vcpuid);
470 case MSR_MTRRdefType:
471 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
472 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
473 case MSR_MTRR64kBase:
474 break; /* Ignore writes */
475 case MSR_IA32_MISC_ENABLE:
476 changed = val ^ misc_enable;
478 * If the host has disabled the NX feature then the guest
479 * also cannot use it. However, a Linux guest will try to
480 * enable the NX feature by writing to the MISC_ENABLE MSR.
482 * This can be safely ignored because the memory management
483 * code looks at CPUID.80000001H:EDX.NX to check if the
484 * functionality is actually enabled.
486 changed &= ~(1UL << 34);
489 * Punt to userspace if any other bits are being modified.
497 guest_msrs[IDX_MSR_PAT] = val;
499 vm_inject_gp(vmx->vm, vcpuid);
502 error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());
505 if (vmx_have_msr_tsc_aux(vmx))
507 * vmx_msr_guest_enter_tsc_aux() will apply this
508 * value when it is called immediately before guest
511 guest_msrs[IDX_MSR_TSC_AUX] = val;
513 vm_inject_gp(vmx->vm, vcpuid);