2 * Copyright (c) 2011 NetApp, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <machine/clock.h>
36 #include <machine/cpufunc.h>
37 #include <machine/md_var.h>
38 #include <machine/specialreg.h>
39 #include <machine/vmm.h>
45 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
48 if (msr_val & (1UL << (bitpos + 32)))
55 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
58 if ((msr_val & (1UL << bitpos)) == 0)
68 return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
72 * Generate a bitmask to be used for the VMCS execution control fields.
74 * The caller specifies what bits should be set to one in 'ones_mask'
75 * and what bits should be set to zero in 'zeros_mask'. The don't-care
76 * bits are set to the default value. The default values are obtained
77 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
80 * Returns zero on success and non-zero on error.
83 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
84 uint32_t zeros_mask, uint32_t *retval)
87 uint64_t val, trueval;
88 boolean_t true_ctls_avail, one_allowed, zero_allowed;
90 /* We cannot ask the same bit to be set to both '1' and '0' */
91 if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
94 if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
95 true_ctls_avail = TRUE;
97 true_ctls_avail = FALSE;
101 trueval = rdmsr(true_ctl_reg); /* step c */
103 trueval = val; /* step a */
105 for (i = 0; i < 32; i++) {
106 one_allowed = vmx_ctl_allows_one_setting(trueval, i);
107 zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
109 KASSERT(one_allowed || zero_allowed,
110 ("invalid zero/one setting for bit %d of ctl 0x%0x, "
111 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
113 if (zero_allowed && !one_allowed) { /* b(i),c(i) */
114 if (ones_mask & (1 << i))
116 *retval &= ~(1 << i);
117 } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */
118 if (zeros_mask & (1 << i))
122 if (zeros_mask & (1 << i)) /* b(ii),c(ii) */
123 *retval &= ~(1 << i);
124 else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
126 else if (!true_ctls_avail)
127 *retval &= ~(1 << i); /* b(iii) */
128 else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
129 *retval &= ~(1 << i);
130 else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
133 panic("vmx_set_ctlreg: unable to determine "
134 "correct value of ctl bit %d for msr "
135 "0x%0x and true msr 0x%0x", i, ctl_reg,
145 msr_bitmap_initialize(char *bitmap)
148 memset(bitmap, 0xff, PAGE_SIZE);
152 msr_bitmap_change_access(char *bitmap, u_int msr, int access)
156 if (msr <= 0x00001FFF)
158 else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
159 byte = 1024 + (msr - 0xC0000000) / 8;
165 if (access & MSR_BITMAP_ACCESS_READ)
166 bitmap[byte] &= ~(1 << bit);
168 bitmap[byte] |= 1 << bit;
171 if (access & MSR_BITMAP_ACCESS_WRITE)
172 bitmap[byte] &= ~(1 << bit);
174 bitmap[byte] |= 1 << bit;
179 static uint64_t misc_enable;
180 static uint64_t platform_info;
181 static uint64_t turbo_ratio_limit;
182 static uint64_t host_msrs[GUEST_MSR_NUM];
190 * The family:model numbers belonging to the Nehalem microarchitecture
191 * are documented in Section 35.5, Intel SDM dated Feb 2014.
193 family = CPUID_TO_FAMILY(cpu_id);
194 model = CPUID_TO_MODEL(cpu_id);
215 * The family:model numbers belonging to the Westmere microarchitecture
216 * are documented in Section 35.6, Intel SDM dated Feb 2014.
218 family = CPUID_TO_FAMILY(cpu_id);
219 model = CPUID_TO_MODEL(cpu_id);
233 pat_valid(uint64_t val)
238 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
240 * Extract PA0 through PA7 and validate that each one encodes a
243 for (i = 0; i < 8; i++) {
244 pa = (val >> (i * 8)) & 0xff;
245 if (pa == 2 || pa == 3 || pa >= 8)
254 uint64_t bus_freq, ratio;
258 * It is safe to cache the values of the following MSRs because
259 * they don't change based on curcpu, curproc or curthread.
261 host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
262 host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
263 host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
264 host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
267 * Initialize emulated MSRs
269 misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
272 * 11: branch trace disabled
273 * 12: PEBS unavailable
274 * Clear unsupported features
275 * 16: SpeedStep enable
276 * 18: enable MONITOR FSM
278 misc_enable |= (1 << 12) | (1 << 11);
279 misc_enable &= ~((1 << 18) | (1 << 16));
281 if (nehalem_cpu() || westmere_cpu())
282 bus_freq = 133330000; /* 133Mhz */
284 bus_freq = 100000000; /* 100Mhz */
288 * The ratio should really be based on the virtual TSC frequency as
289 * opposed to the host TSC.
291 ratio = (tsc_freq / bus_freq) & 0xff;
294 * The register definition is based on the micro-architecture
295 * but the following bits are always the same:
296 * [15:8] Maximum Non-Turbo Ratio
297 * [28] Programmable Ratio Limit for Turbo Mode
298 * [29] Programmable TDC-TDP Limit for Turbo Mode
299 * [47:40] Maximum Efficiency Ratio
301 * The other bits can be safely set to 0 on all
302 * micro-architectures up to Haswell.
304 platform_info = (ratio << 8) | (ratio << 40);
307 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
308 * dependent on the maximum cores per package supported by the micro-
309 * architecture. For e.g., Westmere supports 6 cores per package and
310 * uses the low 48 bits. Sandybridge support 8 cores per package and
311 * uses up all 64 bits.
313 * However, the unused bits are reserved so we pretend that all bits
314 * in this MSR are valid.
316 for (i = 0; i < 8; i++)
317 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
321 vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
323 uint64_t *guest_msrs;
325 guest_msrs = vmx->guest_msrs[vcpuid];
328 * The permissions bitmap is shared between all vcpus so initialize it
329 * once when initializing the vBSP.
332 guest_msr_rw(vmx, MSR_LSTAR);
333 guest_msr_rw(vmx, MSR_CSTAR);
334 guest_msr_rw(vmx, MSR_STAR);
335 guest_msr_rw(vmx, MSR_SF_MASK);
336 guest_msr_rw(vmx, MSR_KGSBASE);
340 * Initialize guest IA32_PAT MSR with default value after reset.
342 guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
343 PAT_VALUE(1, PAT_WRITE_THROUGH) |
344 PAT_VALUE(2, PAT_UNCACHED) |
345 PAT_VALUE(3, PAT_UNCACHEABLE) |
346 PAT_VALUE(4, PAT_WRITE_BACK) |
347 PAT_VALUE(5, PAT_WRITE_THROUGH) |
348 PAT_VALUE(6, PAT_UNCACHED) |
349 PAT_VALUE(7, PAT_UNCACHEABLE);
355 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
357 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
359 /* Save host MSRs (if any) and restore guest MSRs */
360 wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
361 wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
362 wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
363 wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
364 wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
368 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
370 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
372 /* Save guest MSRs */
373 guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
374 guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
375 guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
376 guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
377 guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
379 /* Restore host MSRs */
380 wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
381 wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
382 wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
383 wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
385 /* MSR_KGSBASE will be restored on the way back to userspace */
389 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
391 const uint64_t *guest_msrs;
394 guest_msrs = vmx->guest_msrs[vcpuid];
403 case MSR_MTRRdefType:
404 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
405 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
406 case MSR_MTRR64kBase:
409 case MSR_IA32_MISC_ENABLE:
412 case MSR_PLATFORM_INFO:
413 *val = platform_info;
415 case MSR_TURBO_RATIO_LIMIT:
416 case MSR_TURBO_RATIO_LIMIT1:
417 *val = turbo_ratio_limit;
420 *val = guest_msrs[IDX_MSR_PAT];
430 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
432 uint64_t *guest_msrs;
436 guest_msrs = vmx->guest_msrs[vcpuid];
442 break; /* ignore writes */
444 vm_inject_gp(vmx->vm, vcpuid);
446 case MSR_MTRRdefType:
447 case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
448 case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
449 case MSR_MTRR64kBase:
450 break; /* Ignore writes */
451 case MSR_IA32_MISC_ENABLE:
452 changed = val ^ misc_enable;
454 * If the host has disabled the NX feature then the guest
455 * also cannot use it. However, a Linux guest will try to
456 * enable the NX feature by writing to the MISC_ENABLE MSR.
458 * This can be safely ignored because the memory management
459 * code looks at CPUID.80000001H:EDX.NX to check if the
460 * functionality is actually enabled.
462 changed &= ~(1UL << 34);
465 * Punt to userspace if any other bits are being modified.
473 guest_msrs[IDX_MSR_PAT] = val;
475 vm_inject_gp(vmx->vm, vcpuid);
478 error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());