2 * Copyright (c) 2011 NetApp, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
41 #include <machine/param.h>
42 #include <machine/cpufunc.h>
43 #include <machine/pmap.h>
44 #include <machine/vmparam.h>
46 #include <machine/vmm.h>
47 #include "vmx_cpufunc.h"
52 #define EPT_PWL4(cap) ((cap) & (1UL << 6))
53 #define EPT_MEMORY_TYPE_WB(cap) ((cap) & (1UL << 14))
54 #define EPT_PDE_SUPERPAGE(cap) ((cap) & (1UL << 16)) /* 2MB pages */
55 #define EPT_PDPTE_SUPERPAGE(cap) ((cap) & (1UL << 17)) /* 1GB pages */
56 #define INVVPID_SUPPORTED(cap) ((cap) & (1UL << 32))
57 #define INVEPT_SUPPORTED(cap) ((cap) & (1UL << 20))
59 #define INVVPID_ALL_TYPES_MASK 0xF0000000000UL
60 #define INVVPID_ALL_TYPES_SUPPORTED(cap) \
61 (((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK)
63 #define INVEPT_ALL_TYPES_MASK 0x6000000UL
64 #define INVEPT_ALL_TYPES_SUPPORTED(cap) \
65 (((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK)
67 #define EPT_PG_RD (1 << 0)
68 #define EPT_PG_WR (1 << 1)
69 #define EPT_PG_EX (1 << 2)
70 #define EPT_PG_MEMORY_TYPE(x) ((x) << 3)
71 #define EPT_PG_IGNORE_PAT (1 << 6)
72 #define EPT_PG_SUPERPAGE (1 << 7)
74 #define EPT_ADDR_MASK ((uint64_t)-1 << 12)
76 MALLOC_DECLARE(M_VMX);
78 static uint64_t page_sizes_mask;
86 cap = rdmsr(MSR_VMX_EPT_VPID_CAP);
90 * - page walk length is 4 steps
91 * - extended page tables can be laid out in write-back memory
92 * - invvpid instruction with all possible types is supported
93 * - invept instruction with all possible types is supported
96 !EPT_MEMORY_TYPE_WB(cap) ||
97 !INVVPID_SUPPORTED(cap) ||
98 !INVVPID_ALL_TYPES_SUPPORTED(cap) ||
99 !INVEPT_SUPPORTED(cap) ||
100 !INVEPT_ALL_TYPES_SUPPORTED(cap))
103 /* Set bits in 'page_sizes_mask' for each valid page size */
104 page_shift = PAGE_SHIFT;
105 page_sizes_mask = 1UL << page_shift; /* 4KB page */
108 if (EPT_PDE_SUPERPAGE(cap))
109 page_sizes_mask |= 1UL << page_shift; /* 2MB superpage */
112 if (EPT_PDPTE_SUPERPAGE(cap))
113 page_sizes_mask |= 1UL << page_shift; /* 1GB superpage */
120 ept_dump(uint64_t *ptp, int nlevels)
123 uint64_t *ptpnext, ptpval;
129 for (t = 0; t < tabs; t++)
131 printf("PTP = %p\n", ptp);
133 for (i = 0; i < 512; i++) {
139 for (t = 0; t < tabs; t++)
141 printf("%3d 0x%016lx\n", i, ptpval);
143 if (nlevels != 0 && (ptpval & EPT_PG_SUPERPAGE) == 0) {
144 ptpnext = (uint64_t *)
145 PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
146 ept_dump(ptpnext, nlevels);
153 ept_create_mapping(uint64_t *ptp, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
154 vm_memattr_t attr, vm_prot_t prot, boolean_t spok)
156 int spshift, ptpshift, ptpindex, nlevels;
159 * Compute the size of the mapping that we can accomodate.
161 * This is based on three factors:
162 * - super page sizes supported by the processor
163 * - alignment of the region starting at 'gpa' and 'hpa'
164 * - length of the region 'len'
166 spshift = PAGE_SHIFT;
168 spshift += (EPT_PWLEVELS - 1) * 9;
169 while (spshift >= PAGE_SHIFT) {
170 uint64_t spsize = 1UL << spshift;
171 if ((page_sizes_mask & spsize) != 0 &&
172 (gpa & (spsize - 1)) == 0 &&
173 (hpa & (spsize - 1)) == 0 &&
180 if (spshift < PAGE_SHIFT) {
181 panic("Invalid spshift for gpa 0x%016lx, hpa 0x%016lx, "
182 "length 0x%016lx, page_sizes_mask 0x%016lx",
183 gpa, hpa, length, page_sizes_mask);
186 nlevels = EPT_PWLEVELS;
187 while (--nlevels >= 0) {
188 ptpshift = PAGE_SHIFT + nlevels * 9;
189 ptpindex = (gpa >> ptpshift) & 0x1FF;
191 /* We have reached the leaf mapping */
192 if (spshift >= ptpshift)
196 * We are working on a non-leaf page table page.
198 * Create the next level page table page if necessary and point
199 * to it from the current page table.
201 if (ptp[ptpindex] == 0) {
202 void *nlp = malloc(PAGE_SIZE, M_VMX, M_WAITOK | M_ZERO);
203 ptp[ptpindex] = vtophys(nlp);
204 ptp[ptpindex] |= EPT_PG_RD | EPT_PG_WR | EPT_PG_EX;
207 /* Work our way down to the next level page table page */
208 ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & EPT_ADDR_MASK);
211 if ((gpa & ((1UL << ptpshift) - 1)) != 0) {
212 panic("ept_create_mapping: gpa 0x%016lx and ptpshift %d "
213 "mismatch\n", gpa, ptpshift);
216 if (prot != VM_PROT_NONE) {
220 /* Apply the access controls */
221 if (prot & VM_PROT_READ)
222 ptp[ptpindex] |= EPT_PG_RD;
223 if (prot & VM_PROT_WRITE)
224 ptp[ptpindex] |= EPT_PG_WR;
225 if (prot & VM_PROT_EXECUTE)
226 ptp[ptpindex] |= EPT_PG_EX;
229 * XXX should we enforce this memory type by setting the
230 * ignore PAT bit to 1.
232 ptp[ptpindex] |= EPT_PG_MEMORY_TYPE(attr);
235 ptp[ptpindex] |= EPT_PG_SUPERPAGE;
237 /* Remove the mapping */
241 return (1UL << ptpshift);
245 ept_lookup_mapping(uint64_t *ptp, vm_paddr_t gpa)
247 int nlevels, ptpshift, ptpindex;
248 uint64_t ptpval, hpabase, pgmask;
250 nlevels = EPT_PWLEVELS;
251 while (--nlevels >= 0) {
252 ptpshift = PAGE_SHIFT + nlevels * 9;
253 ptpindex = (gpa >> ptpshift) & 0x1FF;
255 ptpval = ptp[ptpindex];
257 /* Cannot make progress beyond this point */
258 if ((ptpval & (EPT_PG_RD | EPT_PG_WR | EPT_PG_EX)) == 0)
261 if (nlevels == 0 || (ptpval & EPT_PG_SUPERPAGE)) {
262 pgmask = (1UL << ptpshift) - 1;
263 hpabase = ptpval & ~pgmask;
264 return (hpabase | (gpa & pgmask));
267 /* Work our way down to the next level page table page */
268 ptp = (uint64_t *)PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
271 return ((vm_paddr_t)-1);
275 ept_free_pt_entry(pt_entry_t pte)
281 if ((pte & EPT_PG_SUPERPAGE) != 0)
282 panic("ept_free_pt_entry: pte cannot have superpage bit");
288 ept_free_pd_entry(pd_entry_t pde)
296 if ((pde & EPT_PG_SUPERPAGE) == 0) {
297 pt = (pt_entry_t *)PHYS_TO_DMAP(pde & EPT_ADDR_MASK);
298 for (i = 0; i < NPTEPG; i++)
299 ept_free_pt_entry(pt[i]);
300 free(pt, M_VMX); /* free the page table page */
305 ept_free_pdp_entry(pdp_entry_t pdpe)
313 if ((pdpe & EPT_PG_SUPERPAGE) == 0) {
314 pd = (pd_entry_t *)PHYS_TO_DMAP(pdpe & EPT_ADDR_MASK);
315 for (i = 0; i < NPDEPG; i++)
316 ept_free_pd_entry(pd[i]);
317 free(pd, M_VMX); /* free the page directory page */
322 ept_free_pml4_entry(pml4_entry_t pml4e)
330 if ((pml4e & EPT_PG_SUPERPAGE) == 0) {
331 pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4e & EPT_ADDR_MASK);
332 for (i = 0; i < NPDPEPG; i++)
333 ept_free_pdp_entry(pdp[i]);
334 free(pdp, M_VMX); /* free the page directory ptr page */
339 ept_vmcleanup(struct vmx *vmx)
343 for (i = 0; i < NPML4EPG; i++)
344 ept_free_pml4_entry(vmx->pml4ept[i]);
348 ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t len,
349 vm_memattr_t attr, int prot, boolean_t spok)
352 struct vmx *vmx = arg;
355 n = ept_create_mapping(vmx->pml4ept, gpa, hpa, len, attr,
366 ept_vmmmap_get(void *arg, vm_paddr_t gpa)
372 hpa = ept_lookup_mapping(vmx->pml4ept, gpa);
377 invept_single_context(void *arg)
379 struct invept_desc desc = *(struct invept_desc *)arg;
381 invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
385 ept_invalidate_mappings(u_long pml4ept)
387 struct invept_desc invept_desc = { 0 };
389 invept_desc.eptp = EPTP(pml4ept);
391 smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);