]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/amd64/vmm/vmm_mem.c
First cut to port bhyve, vmmctl, and libvmmapi to HEAD.
[FreeBSD/FreeBSD.git] / sys / amd64 / vmm / vmm_mem.c
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/linker.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/kernel.h>
39
40 #include <vm/vm.h>
41 #include <vm/pmap.h>
42
43 #include <machine/md_var.h>
44 #include <machine/metadata.h>
45 #include <machine/pc/bios.h>
46 #include <machine/vmparam.h>
47 #include <machine/pmap.h>
48
49 #include "vmm_util.h"
50 #include "vmm_mem.h"
51
52 static MALLOC_DEFINE(M_VMM_MEM, "vmm memory", "vmm memory");
53
54 #define MB              (1024 * 1024)
55 #define GB              (1024 * MB)
56
57 #define VMM_MEM_MAXSEGS 64
58
59 /* protected by vmm_mem_mtx */
60 static struct {
61         vm_paddr_t      base;
62         vm_size_t       length;
63 } vmm_mem_avail[VMM_MEM_MAXSEGS];
64
65 static int vmm_mem_nsegs;
66
67 static vm_paddr_t maxaddr;
68
69 static struct mtx vmm_mem_mtx;
70
71 /*
72  * Steal any memory that was deliberately hidden from FreeBSD either by
73  * the use of MAXMEM kernel config option or the hw.physmem loader tunable.
74  */
75 static int
76 vmm_mem_steal_memory(void)
77 {
78         int nsegs;
79         caddr_t kmdp;
80         uint32_t smapsize;
81         uint64_t base, length;
82         struct bios_smap *smapbase, *smap, *smapend;
83
84         /*
85          * Borrowed from hammer_time() and getmemsize() in machdep.c
86          */
87         kmdp = preload_search_by_type("elf kernel");
88         if (kmdp == NULL)
89                 kmdp = preload_search_by_type("elf64 kernel");
90
91         smapbase = (struct bios_smap *)preload_search_info(kmdp,
92                 MODINFO_METADATA | MODINFOMD_SMAP);
93         if (smapbase == NULL)
94                 panic("No BIOS smap info from loader!");
95
96         smapsize = *((uint32_t *)smapbase - 1);
97         smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
98
99         nsegs = 0;
100         for (smap = smapbase; smap < smapend; smap++) {
101                 /*
102                  * XXX
103                  * Assuming non-overlapping, monotonically increasing
104                  * memory segments.
105                  */
106                 if (smap->type != SMAP_TYPE_MEMORY)
107                         continue;
108                 if (smap->length == 0)
109                         break;
110
111                 base = roundup(smap->base, NBPDR);
112                 length = rounddown(smap->length, NBPDR);
113
114                 /* Skip this segment if FreeBSD is using all of it. */
115                 if (base + length <= ptoa(Maxmem))
116                         continue;
117
118                 /*
119                  * If FreeBSD is using part of this segment then adjust
120                  * 'base' and 'length' accordingly.
121                  */
122                 if (base < ptoa(Maxmem)) {
123                         uint64_t used;
124                         used = roundup(ptoa(Maxmem), NBPDR) - base;
125                         base += used;
126                         length -= used;
127                 }
128
129                 if (length == 0)
130                         continue;
131
132                 vmm_mem_avail[nsegs].base = base;
133                 vmm_mem_avail[nsegs].length = length;
134
135                 if (base + length > maxaddr)
136                         maxaddr = base + length;
137
138                 if (0 && bootverbose) {
139                         printf("vmm_mem_populate: index %d, base 0x%0lx, "
140                                "length %ld\n",
141                                nsegs, vmm_mem_avail[nsegs].base,
142                                vmm_mem_avail[nsegs].length);
143                 }
144
145                 nsegs++;
146                 if (nsegs >= VMM_MEM_MAXSEGS) {
147                         printf("vmm_mem_populate: maximum number of vmm memory "
148                                "segments reached!\n");
149                         return (ENOSPC);
150                 }
151         }
152
153         vmm_mem_nsegs = nsegs;
154
155         return (0);
156 }
157
158 static void
159 vmm_mem_direct_map(vm_paddr_t start, vm_paddr_t end)
160 {
161         vm_paddr_t addr, remaining;
162         int pdpi, pdi, superpage_size;
163         pml4_entry_t *pml4p;
164         pdp_entry_t *pdp;
165         pd_entry_t *pd;
166         uint64_t page_attr_bits;
167
168         if (end >= NBPML4)
169                 panic("Cannot map memory beyond %ldGB", NBPML4 / GB);
170
171         if (vmm_supports_1G_pages())
172                 superpage_size = NBPDP;
173         else
174                 superpage_size = NBPDR;
175
176         /*
177          * Get the page directory pointer page that contains the direct
178          * map address mappings.
179          */
180         pml4p = kernel_pmap->pm_pml4;
181         pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4p[DMPML4I] & ~PAGE_MASK);
182
183         page_attr_bits = PG_RW | PG_V | PG_PS | PG_G;
184         addr = start;
185         while (addr < end) {
186                 remaining = end - addr;
187                 pdpi = addr / NBPDP;
188                 if (superpage_size == NBPDP &&
189                     remaining >= NBPDP &&
190                     addr % NBPDP == 0) {
191                         /*
192                          * If there isn't a mapping for this address then
193                          * create one but if there is one already make sure
194                          * it matches what we expect it to be.
195                          */
196                         if (pdp[pdpi] == 0) {
197                                 pdp[pdpi] = addr | page_attr_bits;
198                                 if (0 && bootverbose) {
199                                         printf("vmm_mem_populate: mapping "
200                                                "0x%lx with 1GB page at "
201                                                "pdpi %d\n", addr, pdpi);
202                                 }
203                         } else {
204                                 pdp_entry_t pdpe = pdp[pdpi];
205                                 if ((pdpe & ~PAGE_MASK) != addr ||
206                                     (pdpe & page_attr_bits) != page_attr_bits) {
207                                         panic("An invalid mapping 0x%016lx "
208                                               "already exists for 0x%016lx\n",
209                                               pdpe, addr);
210                                 }
211                         }
212                         addr += NBPDP;
213                 } else {
214                         if (remaining < NBPDR) {
215                                 panic("vmm_mem_populate: remaining (%ld) must "
216                                       "be greater than NBPDR (%d)\n",
217                                       remaining, NBPDR);
218                         }
219                         if (pdp[pdpi] == 0) {
220                                 /*
221                                  * XXX we lose this memory forever because
222                                  * we do not keep track of the virtual address
223                                  * that would be required to free this page.
224                                  */
225                                 pd = malloc(PAGE_SIZE, M_VMM_MEM,
226                                             M_WAITOK | M_ZERO);
227                                 if ((uintptr_t)pd & PAGE_MASK) {
228                                         panic("vmm_mem_populate: page directory"
229                                               "page not aligned on %d "
230                                               "boundary\n", PAGE_SIZE);
231                                 }
232                                 pdp[pdpi] = vtophys(pd);
233                                 pdp[pdpi] |= PG_RW | PG_V | PG_U;
234                                 if (0 && bootverbose) {
235                                         printf("Creating page directory "
236                                                "at pdp index %d for 0x%016lx\n",
237                                                pdpi, addr);
238                                 }
239                         }
240                         pdi = (addr % NBPDP) / NBPDR;
241                         pd = (pd_entry_t *)PHYS_TO_DMAP(pdp[pdpi] & ~PAGE_MASK);
242
243                         /*
244                          * Create a new mapping if one doesn't already exist
245                          * or validate it if it does.
246                          */
247                         if (pd[pdi] == 0) {
248                                 pd[pdi] = addr | page_attr_bits;
249                                 if (0 && bootverbose) {
250                                         printf("vmm_mem_populate: mapping "
251                                                "0x%lx with 2MB page at "
252                                                "pdpi %d, pdi %d\n",
253                                                addr, pdpi, pdi);
254                                 }
255                         } else {
256                                 pd_entry_t pde = pd[pdi];
257                                 if ((pde & ~PAGE_MASK) != addr ||
258                                     (pde & page_attr_bits) != page_attr_bits) {
259                                         panic("An invalid mapping 0x%016lx "
260                                               "already exists for 0x%016lx\n",
261                                               pde, addr);
262                                 }
263                         }
264                         addr += NBPDR;
265                 }
266         }
267 }
268
269 static int
270 vmm_mem_populate(void)
271 {
272         int seg, error;
273         vm_paddr_t start, end;
274
275         /* populate the vmm_mem_avail[] array */
276         error = vmm_mem_steal_memory();
277         if (error)
278                 return (error);
279         
280         /*
281          * Now map the memory that was hidden from FreeBSD in
282          * the direct map VA space.
283          */
284         for (seg = 0; seg < vmm_mem_nsegs; seg++) {
285                 start = vmm_mem_avail[seg].base;
286                 end = start + vmm_mem_avail[seg].length;
287                 if ((start & PDRMASK) != 0 || (end & PDRMASK) != 0) {
288                         panic("start (0x%016lx) and end (0x%016lx) must be "
289                               "aligned on a %dMB boundary\n",
290                               start, end, NBPDR / MB);
291                 }
292                 vmm_mem_direct_map(start, end);
293         }
294
295         return (0);
296 }
297
298 int
299 vmm_mem_init(void)
300 {
301         int error;
302
303         mtx_init(&vmm_mem_mtx, "vmm_mem_mtx", NULL, MTX_DEF);
304
305         error = vmm_mem_populate();
306         if (error)
307                 return (error);
308
309         return (0);
310 }
311
312 vm_paddr_t
313 vmm_mem_alloc(size_t size)
314 {
315         int i;
316         vm_paddr_t addr;
317
318         if ((size & PDRMASK) != 0) {
319                 panic("vmm_mem_alloc: size 0x%0lx must be "
320                       "aligned on a 0x%0x boundary\n", size, NBPDR);
321         }
322
323         addr = 0;
324
325         mtx_lock(&vmm_mem_mtx);
326         for (i = 0; i < vmm_mem_nsegs; i++) {
327                 if (vmm_mem_avail[i].length >= size) {
328                         addr = vmm_mem_avail[i].base;
329                         vmm_mem_avail[i].base += size;
330                         vmm_mem_avail[i].length -= size;
331                         /* remove a zero length segment */
332                         if (vmm_mem_avail[i].length == 0) {
333                                 memmove(&vmm_mem_avail[i],
334                                         &vmm_mem_avail[i + 1],
335                                         (vmm_mem_nsegs - (i + 1)) *
336                                          sizeof(vmm_mem_avail[0]));
337                                 vmm_mem_nsegs--;
338                         }
339                         break;
340                 }
341         }
342         mtx_unlock(&vmm_mem_mtx);
343
344         return (addr);
345 }
346
347 void
348 vmm_mem_free(vm_paddr_t base, size_t length)
349 {
350         int i;
351
352         if ((base & PDRMASK) != 0 || (length & PDRMASK) != 0) {
353                 panic("vmm_mem_free: base 0x%0lx and length 0x%0lx must be "
354                       "aligned on a 0x%0x boundary\n", base, length, NBPDR);
355         }
356
357         mtx_lock(&vmm_mem_mtx);
358
359         for (i = 0; i < vmm_mem_nsegs; i++) {
360                 if (vmm_mem_avail[i].base > base)
361                         break;
362         }
363
364         if (vmm_mem_nsegs >= VMM_MEM_MAXSEGS)
365                 panic("vmm_mem_free: cannot free any more segments");
366
367         /* Create a new segment at index 'i' */
368         memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i],
369                 (vmm_mem_nsegs - i) * sizeof(vmm_mem_avail[0]));
370
371         vmm_mem_avail[i].base = base;
372         vmm_mem_avail[i].length = length;
373
374         vmm_mem_nsegs++;
375
376 coalesce_some_more:
377         for (i = 0; i < vmm_mem_nsegs - 1; i++) {
378                 if (vmm_mem_avail[i].base + vmm_mem_avail[i].length ==
379                     vmm_mem_avail[i + 1].base) {
380                         vmm_mem_avail[i].length += vmm_mem_avail[i + 1].length;
381                         memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i + 2],
382                           (vmm_mem_nsegs - (i + 2)) * sizeof(vmm_mem_avail[0]));
383                         vmm_mem_nsegs--;
384                         goto coalesce_some_more;
385                 }
386         }
387
388         mtx_unlock(&vmm_mem_mtx);
389 }
390
391 vm_paddr_t
392 vmm_mem_maxaddr(void)
393 {
394
395         return (maxaddr);
396 }
397
398 void
399 vmm_mem_dump(void)
400 {
401         int i;
402         vm_paddr_t base;
403         vm_size_t length;
404
405         mtx_lock(&vmm_mem_mtx);
406         for (i = 0; i < vmm_mem_nsegs; i++) {
407                 base = vmm_mem_avail[i].base;
408                 length = vmm_mem_avail[i].length;
409                 printf("%-4d0x%016lx    0x%016lx\n", i, base, base + length);
410         }
411         mtx_unlock(&vmm_mem_mtx);
412 }