2 * Copyright (c) 2006 Kip Macy
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include <sys/param.h>
32 #include <sys/queue.h>
35 #include <sys/mutex.h>
38 #include <sys/sysctl.h>
39 #include <sys/systm.h>
44 #include <vm/vm_object.h>
45 #include <vm/vm_page.h>
46 #include <vm/vm_kern.h>
47 #include <vm/vm_pageout.h>
48 #include <vm/vm_extern.h>
51 #include <machine/cpufunc.h>
52 #include <machine/hypervisorvar.h>
53 #include <machine/smp.h>
54 #include <machine/mmu.h>
55 #include <machine/tte.h>
56 #include <machine/vmparam.h>
57 #include <machine/tlb.h>
58 #include <machine/tte_hash.h>
60 #define HASH_SIZE (1 << HASH_ENTRY_SHIFT)
61 #define HASH_MASK(th) ((1<<(th->th_shift+PAGE_SHIFT-THE_SHIFT))-1)
63 #define MAGIC_VALUE 0xcafebabe
65 struct tte_hash_entry;
68 #define MAX_FRAGMENT_ENTRIES ((PAGE_SIZE / sizeof(struct tte_hash_entry)) - 1)
70 typedef struct tte_hash_field_ {
73 } tte_hash_field, *tte_hash_field_t;
80 struct tte_hash_entry *next;
83 typedef struct tte_hash_entry {
84 tte_hash_field the_fields[HASH_ENTRIES];
88 struct fragment_header {
89 struct tte_hash_fragment *fh_next;
92 uint8_t pad[sizeof(struct tte_hash_entry) - 10];
95 CTASSERT(sizeof(struct fragment_header) == sizeof(struct tte_hash_entry));
97 SLIST_HEAD(tte_hash_list, tte_hash);
99 struct tte_hash_list hash_free_list[PAGE_SHIFT];
102 uint16_t th_shift; /* effective size in pages */
103 uint16_t th_context; /* TLB context */
104 uint32_t th_entries; /* # pages held */
105 tte_hash_entry_t th_hashtable; /* hash of TTEs */
106 struct tte_hash_fragment *th_fhhead;
107 struct tte_hash_fragment *th_fhtail;
108 SLIST_ENTRY(tte_hash) th_next;
111 struct tte_hash_fragment {
112 struct fragment_header thf_head;
113 struct tte_hash_entry thf_entries[MAX_FRAGMENT_ENTRIES];
116 CTASSERT(sizeof(struct tte_hash_fragment) == PAGE_SIZE);
119 static struct tte_hash kernel_tte_hash;
121 * Data for the tte_hash allocation mechanism
123 static uma_zone_t thzone;
124 static struct vm_object thzone_obj;
125 static int tte_hash_count = 0, tte_hash_max = 0;
127 extern uint64_t hash_bucket_lock(tte_hash_field_t fields);
128 extern void hash_bucket_unlock(tte_hash_field_t fields, uint64_t s);
135 th = uma_zalloc(thzone, M_NOWAIT);
137 KASSERT(th != NULL, ("tte_hash allocation failed"));
144 free_tte_hash(tte_hash_t th)
147 uma_zfree(thzone, th);
151 tte_hash_cached_get(int shift)
154 struct tte_hash_list *head;
157 head = &hash_free_list[shift];
158 if (!SLIST_EMPTY(head)) {
159 th = SLIST_FIRST(head);
160 SLIST_REMOVE_HEAD(head, th_next);
166 tte_hash_cached_free(tte_hash_t th)
168 th->th_context = 0xffff;
169 SLIST_INSERT_HEAD(&hash_free_list[th->th_shift - HASH_ENTRY_SHIFT], th, th_next);
177 thzone = uma_zcreate("TTE_HASH", sizeof(struct tte_hash), NULL, NULL,
178 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
179 tte_hash_max = maxproc;
180 uma_zone_set_obj(thzone, &thzone_obj, tte_hash_max);
181 for (i = 0; i < PAGE_SHIFT; i++)
182 SLIST_INIT(&hash_free_list[i]);
186 tte_hash_kernel_create(vm_offset_t va, uint16_t shift, vm_paddr_t fragment_page)
190 th = &kernel_tte_hash;
191 th->th_shift = shift;
194 th->th_hashtable = (tte_hash_entry_t)va;
195 th->th_fhtail = th->th_fhhead = (void *)TLB_PHYS_TO_DIRECT(fragment_page);
201 alloc_zeroed_page(void)
210 m = vm_page_alloc(NULL, color++,
211 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
218 if ((m->flags & PG_ZERO) == 0)
221 ptr = (void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
226 free_fragment_pages(void *ptr)
228 struct tte_hash_fragment *fh;
231 for (fh = ptr; fh != NULL; fh = fh->thf_head.fh_next) {
232 m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS((vm_offset_t)fh));
234 atomic_subtract_int(&cnt.v_wire_count, 1);
239 static inline tte_hash_t
240 _tte_hash_create(uint64_t context, uint64_t *scratchval, uint16_t shift)
245 th->th_shift = shift;
247 th->th_context = (uint16_t)context;
249 th->th_hashtable = pmap_alloc_zeroed_contig_pages((1 << shift), PAGE_SIZE);
251 th->th_fhtail = th->th_fhhead = alloc_zeroed_page();
252 KASSERT(th->th_fhtail != NULL, ("th->th_fhtail == NULL"));
255 *scratchval = (uint64_t)((vm_offset_t)th->th_hashtable) | ((vm_offset_t)(1 << shift));
262 tte_hash_create(uint64_t context, uint64_t *scratchval)
264 return (_tte_hash_create(context, scratchval, HASH_ENTRY_SHIFT));
268 tte_hash_destroy(tte_hash_t th)
270 tte_hash_cached_free(th);
274 _tte_hash_reset(tte_hash_t th)
277 free_fragment_pages(th->th_fhhead->thf_head.fh_next);
279 th->th_fhtail = th->th_fhhead;
280 hwblkclr(th->th_fhhead, PAGE_SIZE);
282 if (th->th_entries != 0)
284 hwblkclr(th->th_hashtable, (1 << (th->th_shift + PAGE_SHIFT)));
289 tte_hash_reset(tte_hash_t th, uint64_t *scratchval)
293 if (th->th_shift != HASH_ENTRY_SHIFT && (newth = tte_hash_cached_get(0)) != NULL) {
294 newth->th_context = th->th_context;
295 tte_hash_cached_free(th);
296 *scratchval = (uint64_t)((vm_offset_t)newth->th_hashtable) | ((vm_offset_t)HASH_SIZE);
300 _tte_hash_reset(newth);
306 tte_hash_set_field(tte_hash_field_t field, uint64_t tag, tte_t tte)
309 field->data = tte | (field->data & VTD_LOCK);
312 static __inline tte_hash_entry_t
313 find_entry(tte_hash_t th, vm_offset_t va, int page_shift)
317 hash_index = (va >> page_shift) & HASH_MASK(th);
318 return (&th->th_hashtable[hash_index]);
321 static __inline tte_hash_entry_t
322 tte_hash_lookup_last_entry(tte_hash_entry_t entry)
325 while (entry->of.next)
326 entry = entry->of.next;
331 static tte_hash_entry_t
332 tte_hash_allocate_fragment_entry(tte_hash_t th)
334 struct tte_hash_fragment *fh;
335 tte_hash_entry_t newentry;
338 if (fh->thf_head.fh_free_head == MAX_FRAGMENT_ENTRIES) {
339 fh = th->th_fhtail = fh->thf_head.fh_next = alloc_zeroed_page();
340 fh->thf_head.fh_free_head = 1;
342 printf("new fh=%p \n", fh);
345 newentry = &fh->thf_entries[fh->thf_head.fh_free_head];
347 fh->thf_head.fh_free_head++;
348 fh->thf_head.fh_count++;
354 * if a match for va is found the tte value is returned
355 * and if field is non-null field will point to that entry
359 static __inline tte_t
360 _tte_hash_lookup(tte_hash_entry_t entry, tte_t tte_tag, tte_hash_field_t *field)
364 tte_hash_field_t fields;
368 fields = entry->the_fields;
369 for (i = 0; i < entry->of.count; i++) {
370 if (fields[i].tag == tte_tag) {
371 tte_data = (fields[i].data & ~VTD_LOCK);
377 if (entry->of.next && entry->of.flags != MAGIC_VALUE)
378 panic("overflow pointer not null without flags set entry= %p next=%p flags=0x%x count=%d",
379 entry, entry->of.next, entry->of.flags, entry->of.count);
381 entry = entry->of.next;
390 _tte_hash_lookup_last(tte_hash_entry_t entry, tte_hash_field_t *field)
393 tte_hash_field_t fields;
395 fields = entry->the_fields;
397 while (entry->of.next && (entry->of.next->of.count > 1))
398 entry = entry->of.next;
400 if (entry->of.next && entry->of.next->of.count == 1) {
401 *field = &entry->of.next->the_fields[0];
402 entry->of.next = NULL;
406 if (entry->of.count == 0)
409 *field = &entry->the_fields[--entry->of.count];
414 tte_hash_clear_bits(tte_hash_t th, vm_offset_t va, uint64_t flags)
417 tte_hash_entry_t entry;
418 tte_t otte_data, tte_tag;
419 tte_hash_field_t field = NULL;
421 /* XXX - only handle 8K pages for now */
422 entry = find_entry(th, va, PAGE_SHIFT);
424 tte_tag = (((uint64_t)th->th_context << TTARGET_CTX_SHIFT)|(va >> TTARGET_VA_SHIFT));
426 s = hash_bucket_lock(entry->the_fields);
427 if((otte_data = _tte_hash_lookup(entry, tte_tag, &field)) != 0)
428 tte_hash_set_field(field, field->tag, field->data & ~flags);
429 hash_bucket_unlock(entry->the_fields, s);
434 tte_hash_delete(tte_hash_t th, vm_offset_t va)
437 tte_hash_entry_t entry;
438 tte_t tte_data, tte_tag;
439 tte_hash_field_t lookup_field = NULL;
440 tte_hash_field_t last_field = NULL;
442 /* XXX - only handle 8K pages for now */
443 entry = find_entry(th, va, PAGE_SHIFT);
445 tte_tag = (((uint64_t)th->th_context << TTARGET_CTX_SHIFT)|(va >> TTARGET_VA_SHIFT));
447 s = hash_bucket_lock(entry->the_fields);
449 if ((tte_data = _tte_hash_lookup(entry, tte_tag, &lookup_field)) == 0)
452 _tte_hash_lookup_last(entry, &last_field);
455 if (last_field->tag == 0) {
456 hash_bucket_unlock(entry->the_fields, s);
457 panic("lookup_last failed for va=0x%lx\n", va);
460 /* move last field's values in to the field we are deleting */
461 if (lookup_field != last_field)
462 tte_hash_set_field(lookup_field, last_field->tag, last_field->data);
464 tte_hash_set_field(last_field, 0, 0);
466 hash_bucket_unlock(entry->the_fields, s);
474 tte_hash_insert_locked(tte_hash_t th, tte_hash_entry_t entry, uint64_t tte_tag, tte_t tte_data)
476 tte_hash_entry_t lentry;
478 lentry = tte_hash_lookup_last_entry(entry);
480 if (lentry->of.count == HASH_ENTRIES)
482 tte_hash_set_field(&lentry->the_fields[lentry->of.count++],
489 tte_hash_extend_locked(tte_hash_t th, tte_hash_entry_t entry, tte_hash_entry_t newentry, uint64_t tte_tag, tte_t tte_data)
491 tte_hash_entry_t lentry;
493 lentry = tte_hash_lookup_last_entry(entry);
494 lentry->of.flags = MAGIC_VALUE;
495 lentry->of.next = newentry;
496 tte_hash_set_field(&newentry->the_fields[newentry->of.count++], tte_tag, tte_data);
501 tte_hash_insert(tte_hash_t th, vm_offset_t va, tte_t tte_data)
504 tte_hash_entry_t entry, newentry;
510 if (tte_hash_lookup(th, va) != 0)
511 panic("mapping for va=0x%lx already exists", va);
513 entry = find_entry(th, va, PAGE_SHIFT); /* should actually be a function of tte_data */
514 tte_tag = (((uint64_t)th->th_context << TTARGET_CTX_SHIFT)|(va >> TTARGET_VA_SHIFT));
516 s = hash_bucket_lock(entry->the_fields);
517 retval = tte_hash_insert_locked(th, entry, tte_tag, tte_data);
518 hash_bucket_unlock(entry->the_fields, s);
521 newentry = tte_hash_allocate_fragment_entry(th);
522 s = hash_bucket_lock(entry->the_fields);
523 tte_hash_extend_locked(th, entry, newentry, tte_tag, tte_data);
524 hash_bucket_unlock(entry->the_fields, s);
528 if (tte_hash_lookup(th, va) == 0)
529 panic("insert for va=0x%lx failed", va);
534 * If leave_locked is true the tte's data field will be returned to
535 * the caller with the hash bucket left locked
538 tte_hash_lookup(tte_hash_t th, vm_offset_t va)
541 tte_hash_entry_t entry;
542 tte_t tte_data, tte_tag;
543 tte_hash_field_t field = NULL;
544 /* XXX - only handle 8K pages for now */
545 entry = find_entry(th, va, PAGE_SHIFT);
547 tte_tag = (((uint64_t)th->th_context << TTARGET_CTX_SHIFT)|(va >> TTARGET_VA_SHIFT));
549 s = hash_bucket_lock(entry->the_fields);
550 tte_data = _tte_hash_lookup(entry, tte_tag, &field);
551 hash_bucket_unlock(entry->the_fields, s);
557 tte_hash_set_scratchpad_kernel(tte_hash_t th)
560 uint64_t hash_scratch;
561 /* This breaks if a hash table grows above 32MB
563 hash_scratch = ((vm_offset_t)th->th_hashtable) | ((vm_offset_t)(1<<th->th_shift));
564 set_hash_kernel_scratchpad(hash_scratch);
566 return (hash_scratch);
570 tte_hash_set_scratchpad_user(tte_hash_t th, uint64_t context)
573 uint64_t hash_scratch;
574 /* This breaks if a hash table grows above 32MB
576 th->th_context = (uint16_t)context;
577 hash_scratch = ((vm_offset_t)th->th_hashtable) | ((vm_offset_t)(1<<th->th_shift));
578 set_hash_user_scratchpad(hash_scratch);
580 return (hash_scratch);
584 tte_hash_update(tte_hash_t th, vm_offset_t va, tte_t tte_data)
588 tte_hash_entry_t entry;
589 tte_t otte_data, tte_tag;
590 tte_hash_field_t field = NULL;
592 entry = find_entry(th, va, PAGE_SHIFT); /* should actualy be a function of tte_data */
594 tte_tag = (((uint64_t)th->th_context << TTARGET_CTX_SHIFT)|(va >> TTARGET_VA_SHIFT));
595 s = hash_bucket_lock(entry->the_fields);
596 otte_data = _tte_hash_lookup(entry, tte_tag, &field);
598 if (otte_data == 0) {
599 hash_bucket_unlock(entry->the_fields, s);
600 tte_hash_insert(th, va, tte_data);
602 tte_hash_set_field(field, tte_tag, tte_data);
603 hash_bucket_unlock(entry->the_fields, s);
609 * resize when the average entry has a full fragment entry
612 tte_hash_needs_resize(tte_hash_t th)
614 return ((th->th_entries > (1 << (th->th_shift + PAGE_SHIFT - TTE_SHIFT + 1)))
615 && (th != &kernel_tte_hash));
619 tte_hash_resize(tte_hash_t th)
623 tte_hash_entry_t src_entry, dst_entry, newentry;
625 KASSERT(th != &kernel_tte_hash,("tte_hash_resize not supported for this pmap"));
626 if ((newth = tte_hash_cached_get((th->th_shift - HASH_ENTRY_SHIFT) + 1)) != NULL) {
627 newth->th_context = th->th_context;
628 _tte_hash_reset(newth);
630 newth = _tte_hash_create(th->th_context, NULL, (th->th_shift + 1));
633 nentries = (1 << (th->th_shift + PAGE_SHIFT - THE_SHIFT));
634 for (i = 0; i < nentries; i++) {
635 tte_hash_field_t fields;
636 src_entry = (&th->th_hashtable[i]);
638 fields = src_entry->the_fields;
639 for (j = 0; j < src_entry->of.count; j++) {
640 int shift = TTARGET_VA_SHIFT - PAGE_SHIFT;
641 uint64_t index = ((fields[j].tag<<shift) | (uint64_t)(i&((1<<shift)-1))) & HASH_MASK(newth);
642 dst_entry = &(newth->th_hashtable[index]);
643 if (tte_hash_insert_locked(newth, dst_entry, fields[j].tag, fields[j].data) == -1) {
644 newentry = tte_hash_allocate_fragment_entry(newth);
645 tte_hash_extend_locked(newth, dst_entry, newentry, fields[j].tag, fields[j].data);
648 src_entry = src_entry->of.next;
652 KASSERT(th->th_entries == newth->th_entries,
653 ("not all entries copied old=%d new=%d", th->th_entries, newth->th_entries));