2 * Copyright (c) 2016 Akshay Jaggi <jaggi@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * Interface to /dev/xen/gntdev.
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
36 #include <sys/systm.h>
39 #include <sys/malloc.h>
40 #include <sys/kernel.h>
42 #include <sys/mutex.h>
43 #include <sys/rwlock.h>
44 #include <sys/selinfo.h>
47 #include <sys/fcntl.h>
48 #include <sys/ioccom.h>
51 #include <sys/module.h>
53 #include <sys/bitset.h>
54 #include <sys/queue.h>
56 #include <sys/syslog.h>
57 #include <sys/taskqueue.h>
60 #include <vm/vm_param.h>
61 #include <vm/vm_extern.h>
62 #include <vm/vm_kern.h>
63 #include <vm/vm_page.h>
64 #include <vm/vm_map.h>
65 #include <vm/vm_object.h>
66 #include <vm/vm_pager.h>
67 #include <vm/vm_phys.h>
69 #include <machine/md_var.h>
71 #include <xen/xen-os.h>
72 #include <xen/hypervisor.h>
73 #include <xen/error.h>
74 #include <xen/xen_intr.h>
75 #include <xen/gnttab.h>
76 #include <xen/gntdev.h>
78 MALLOC_DEFINE(M_GNTDEV, "gntdev", "Xen grant-table user-space device");
80 #define MAX_OFFSET_COUNT ((0xffffffffffffffffull >> PAGE_SHIFT) + 1)
82 static d_open_t gntdev_open;
83 static d_ioctl_t gntdev_ioctl;
84 static d_mmap_single_t gntdev_mmap_single;
86 static struct cdevsw gntdev_devsw = {
87 .d_version = D_VERSION,
88 .d_open = gntdev_open,
89 .d_ioctl = gntdev_ioctl,
90 .d_mmap_single = gntdev_mmap_single,
94 static device_t gntdev_dev = NULL;
98 STAILQ_HEAD(gref_list_head, gntdev_gref);
99 STAILQ_HEAD(gmap_list_head, gntdev_gmap);
100 RB_HEAD(gref_tree_head, gntdev_gref);
101 RB_HEAD(gmap_tree_head, gntdev_gmap);
103 struct file_offset_struct {
104 RB_ENTRY(file_offset_struct) next;
105 uint64_t file_offset;
110 offset_cmp(struct file_offset_struct *f1, struct file_offset_struct *f2)
112 return (f1->file_offset - f2->file_offset);
115 RB_HEAD(file_offset_head, file_offset_struct);
116 RB_GENERATE_STATIC(file_offset_head, file_offset_struct, next, offset_cmp);
118 struct per_user_data {
119 struct mtx user_data_lock;
120 struct gref_tree_head gref_tree;
121 struct gmap_tree_head gmap_tree;
122 struct file_offset_head file_offset;
126 * Get offset into the file which will be used while mmapping the
127 * appropriate pages by the userspace program.
130 get_file_offset(struct per_user_data *priv_user, uint32_t count,
131 uint64_t *file_offset)
133 struct file_offset_struct *offset, *offset_tmp;
137 mtx_lock(&priv_user->user_data_lock);
138 RB_FOREACH_SAFE(offset, file_offset_head, &priv_user->file_offset,
140 if (offset->count >= count) {
141 offset->count -= count;
142 *file_offset = offset->file_offset + offset->count *
144 if (offset->count == 0) {
145 RB_REMOVE(file_offset_head,
146 &priv_user->file_offset, offset);
147 free(offset, M_GNTDEV);
149 mtx_unlock(&priv_user->user_data_lock);
153 mtx_unlock(&priv_user->user_data_lock);
159 put_file_offset(struct per_user_data *priv_user, uint32_t count,
160 uint64_t file_offset)
162 struct file_offset_struct *offset, *offset_nxt, *offset_prv;
164 offset = malloc(sizeof(*offset), M_GNTDEV, M_WAITOK | M_ZERO);
165 offset->file_offset = file_offset;
166 offset->count = count;
168 mtx_lock(&priv_user->user_data_lock);
169 RB_INSERT(file_offset_head, &priv_user->file_offset, offset);
170 offset_nxt = RB_NEXT(file_offset_head, &priv_user->file_offset, offset);
171 offset_prv = RB_PREV(file_offset_head, &priv_user->file_offset, offset);
172 if (offset_nxt != NULL &&
173 offset_nxt->file_offset == offset->file_offset + offset->count *
175 offset->count += offset_nxt->count;
176 RB_REMOVE(file_offset_head, &priv_user->file_offset,
178 free(offset_nxt, M_GNTDEV);
180 if (offset_prv != NULL &&
181 offset->file_offset == offset_prv->file_offset + offset_prv->count *
183 offset_prv->count += offset->count;
184 RB_REMOVE(file_offset_head, &priv_user->file_offset, offset);
185 free(offset, M_GNTDEV);
187 mtx_unlock(&priv_user->user_data_lock);
190 static int gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size,
191 vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color);
192 static void gntdev_gmap_pg_dtor(void *handle);
193 static int gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset,
194 int prot, vm_page_t *mres);
196 static struct cdev_pager_ops gntdev_gmap_pg_ops = {
197 .cdev_pg_fault = gntdev_gmap_pg_fault,
198 .cdev_pg_ctor = gntdev_gmap_pg_ctor,
199 .cdev_pg_dtor = gntdev_gmap_pg_dtor,
202 struct cleanup_data_struct {
203 struct mtx to_kill_grefs_mtx;
204 struct mtx to_kill_gmaps_mtx;
205 struct gref_list_head to_kill_grefs;
206 struct gmap_list_head to_kill_gmaps;
209 static struct cleanup_data_struct cleanup_data = {
210 .to_kill_grefs = STAILQ_HEAD_INITIALIZER(cleanup_data.to_kill_grefs),
211 .to_kill_gmaps = STAILQ_HEAD_INITIALIZER(cleanup_data.to_kill_gmaps),
213 MTX_SYSINIT(to_kill_grefs_mtx, &cleanup_data.to_kill_grefs_mtx,
214 "gntdev to_kill_grefs mutex", MTX_DEF);
215 MTX_SYSINIT(to_kill_gmaps_mtx, &cleanup_data.to_kill_gmaps_mtx,
216 "gntdev to_kill_gmaps mutex", MTX_DEF);
218 static void cleanup_function(void *arg, __unused int pending);
219 static struct task cleanup_task = TASK_INITIALIZER(0, cleanup_function,
225 uint32_t event_channel_port;
226 xen_intr_handle_t notify_evtchn_handle;
229 static void notify(struct notify_data *notify, vm_page_t page);
231 /*-------------------- Grant Allocation Methods -----------------------------*/
234 union gref_next_union {
235 STAILQ_ENTRY(gntdev_gref) list;
236 RB_ENTRY(gntdev_gref) tree;
241 struct notify_data *notify;
245 gref_cmp(struct gntdev_gref *g1, struct gntdev_gref *g2)
247 return (g1->file_index - g2->file_index);
250 RB_GENERATE_STATIC(gref_tree_head, gntdev_gref, gref_next.tree, gref_cmp);
253 * Traverse over the device-list of to-be-deleted grants allocated, and
254 * if all accesses, both local mmaps and foreign maps, to them have ended,
258 gref_list_dtor(struct cleanup_data_struct *cleanup_data)
260 struct gref_list_head tmp_grefs;
261 struct gntdev_gref *gref, *gref_tmp, *gref_previous;
263 STAILQ_INIT(&tmp_grefs);
264 mtx_lock(&cleanup_data->to_kill_grefs_mtx);
265 STAILQ_SWAP(&cleanup_data->to_kill_grefs, &tmp_grefs, gntdev_gref);
266 mtx_unlock(&cleanup_data->to_kill_grefs_mtx);
268 gref_previous = NULL;
269 STAILQ_FOREACH_SAFE(gref, &tmp_grefs, gref_next.list, gref_tmp) {
270 if (gref->page && gref->page->object == NULL) {
272 notify(gref->notify, gref->page);
274 if (gref->gref_id != GRANT_REF_INVALID) {
275 if (gnttab_query_foreign_access(gref->gref_id))
277 if (gnttab_end_foreign_access_ref(gref->gref_id)
280 gnttab_free_grant_reference(gref->gref_id);
282 vm_page_unwire(gref->page, PQ_NONE);
283 vm_page_free(gref->page);
286 if (gref->page == NULL) {
287 if (gref_previous == NULL)
288 STAILQ_REMOVE_HEAD(&tmp_grefs, gref_next.list);
290 STAILQ_REMOVE_AFTER(&tmp_grefs, gref_previous,
293 free(gref->notify, M_GNTDEV);
294 free(gref, M_GNTDEV);
297 gref_previous = gref;
300 if (!STAILQ_EMPTY(&tmp_grefs)) {
301 mtx_lock(&cleanup_data->to_kill_grefs_mtx);
302 STAILQ_CONCAT(&cleanup_data->to_kill_grefs, &tmp_grefs);
303 mtx_unlock(&cleanup_data->to_kill_grefs_mtx);
308 * Find count number of contiguous allocated grants for a given userspace
309 * program by file-offset (index).
311 static struct gntdev_gref*
312 gntdev_find_grefs(struct per_user_data *priv_user,
313 uint64_t index, uint32_t count)
315 struct gntdev_gref find_gref, *gref, *gref_start = NULL;
317 find_gref.file_index = index;
319 mtx_lock(&priv_user->user_data_lock);
320 gref_start = RB_FIND(gref_tree_head, &priv_user->gref_tree, &find_gref);
321 for (gref = gref_start; gref != NULL && count > 0; gref =
322 RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref)) {
323 if (index != gref->file_index)
328 mtx_unlock(&priv_user->user_data_lock);
336 * IOCTL_GNTDEV_ALLOC_GREF
337 * Allocate required number of wired pages for the request, grant foreign
338 * access to the physical frames for these pages, and add details about
339 * this allocation to the per user private data, so that these pages can
340 * be mmapped by the userspace program.
343 gntdev_alloc_gref(struct ioctl_gntdev_alloc_gref *arg)
347 uint64_t file_offset;
348 struct gntdev_gref *grefs;
349 struct per_user_data *priv_user;
351 readonly = !(arg->flags & GNTDEV_ALLOC_FLAG_WRITABLE);
353 error = devfs_get_cdevpriv((void**) &priv_user);
357 /* Cleanup grefs and free pages. */
358 taskqueue_enqueue(taskqueue_thread, &cleanup_task);
360 /* Get file offset for this request. */
361 error = get_file_offset(priv_user, arg->count, &file_offset);
365 /* Allocate grefs. */
366 grefs = malloc(sizeof(*grefs) * arg->count, M_GNTDEV, M_WAITOK);
368 for (i = 0; i < arg->count; i++) {
369 grefs[i].file_index = file_offset + i * PAGE_SIZE;
370 grefs[i].gref_id = GRANT_REF_INVALID;
371 grefs[i].notify = NULL;
372 grefs[i].page = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL
373 | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
374 if (grefs[i].page == NULL) {
375 log(LOG_ERR, "Page allocation failed.");
379 if ((grefs[i].page->flags & PG_ZERO) == 0) {
381 * Zero the allocated page, as we don't want to
382 * leak our memory to other domains.
384 pmap_zero_page(grefs[i].page);
386 grefs[i].page->valid = VM_PAGE_BITS_ALL;
388 error = gnttab_grant_foreign_access(arg->domid,
389 (VM_PAGE_TO_PHYS(grefs[i].page) >> PAGE_SHIFT),
390 readonly, &grefs[i].gref_id);
392 log(LOG_ERR, "Grant Table Hypercall failed.");
399 * If target domain maps the gref (by guessing the gref-id),
400 * then we can't clean it up yet and we have to leave the
401 * page in place so as to not leak our memory to that domain.
402 * Add it to a global list to be cleaned up later.
404 mtx_lock(&cleanup_data.to_kill_grefs_mtx);
405 for (i = 0; i < arg->count; i++)
406 STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs,
407 &grefs[i], gref_next.list);
408 mtx_unlock(&cleanup_data.to_kill_grefs_mtx);
410 taskqueue_enqueue(taskqueue_thread, &cleanup_task);
415 /* Copy the output values. */
416 arg->index = file_offset;
417 for (i = 0; i < arg->count; i++)
418 arg->gref_ids[i] = grefs[i].gref_id;
420 /* Modify the per user private data. */
421 mtx_lock(&priv_user->user_data_lock);
422 for (i = 0; i < arg->count; i++)
423 RB_INSERT(gref_tree_head, &priv_user->gref_tree, &grefs[i]);
424 mtx_unlock(&priv_user->user_data_lock);
430 * IOCTL_GNTDEV_DEALLOC_GREF
431 * Remove grant allocation information from the per user private data, so
432 * that it can't be mmapped anymore by the userspace program, and add it
433 * to the to-be-deleted grants global device-list.
436 gntdev_dealloc_gref(struct ioctl_gntdev_dealloc_gref *arg)
440 struct gntdev_gref *gref, *gref_tmp;
441 struct per_user_data *priv_user;
443 error = devfs_get_cdevpriv((void**) &priv_user);
447 gref = gntdev_find_grefs(priv_user, arg->index, arg->count);
449 log(LOG_ERR, "Can't find requested grant-refs.");
453 /* Remove the grefs from user private data. */
455 mtx_lock(&priv_user->user_data_lock);
456 mtx_lock(&cleanup_data.to_kill_grefs_mtx);
457 for (; gref != NULL && count > 0; gref = gref_tmp) {
458 gref_tmp = RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref);
459 RB_REMOVE(gref_tree_head, &priv_user->gref_tree, gref);
460 STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, gref,
464 mtx_unlock(&cleanup_data.to_kill_grefs_mtx);
465 mtx_unlock(&priv_user->user_data_lock);
467 taskqueue_enqueue(taskqueue_thread, &cleanup_task);
468 put_file_offset(priv_user, arg->count, arg->index);
473 /*-------------------- Grant Mapping Methods --------------------------------*/
475 struct gntdev_gmap_map {
477 struct resource *pseudo_phys_res;
478 int pseudo_phys_res_id;
479 vm_paddr_t phys_base_addr;
483 union gmap_next_union {
484 STAILQ_ENTRY(gntdev_gmap) list;
485 RB_ENTRY(gntdev_gmap) tree;
489 struct gnttab_map_grant_ref *grant_map_ops;
490 struct gntdev_gmap_map *map;
491 struct notify_data *notify;
495 gmap_cmp(struct gntdev_gmap *g1, struct gntdev_gmap *g2)
497 return (g1->file_index - g2->file_index);
500 RB_GENERATE_STATIC(gmap_tree_head, gntdev_gmap, gmap_next.tree, gmap_cmp);
503 * Traverse over the device-list of to-be-deleted grant mappings, and if
504 * the region is no longer mmapped by anyone, free the memory used to
505 * store information about the mapping.
508 gmap_list_dtor(struct cleanup_data_struct *cleanup_data)
510 struct gmap_list_head tmp_gmaps;
511 struct gntdev_gmap *gmap, *gmap_tmp, *gmap_previous;
513 STAILQ_INIT(&tmp_gmaps);
514 mtx_lock(&cleanup_data->to_kill_gmaps_mtx);
515 STAILQ_SWAP(&cleanup_data->to_kill_gmaps, &tmp_gmaps, gntdev_gmap);
516 mtx_unlock(&cleanup_data->to_kill_gmaps_mtx);
518 gmap_previous = NULL;
519 STAILQ_FOREACH_SAFE(gmap, &tmp_gmaps, gmap_next.list, gmap_tmp) {
520 if (gmap->map == NULL) {
521 if (gmap_previous == NULL)
522 STAILQ_REMOVE_HEAD(&tmp_gmaps, gmap_next.list);
524 STAILQ_REMOVE_AFTER(&tmp_gmaps, gmap_previous,
528 free(gmap->notify, M_GNTDEV);
529 free(gmap->grant_map_ops, M_GNTDEV);
530 free(gmap, M_GNTDEV);
533 gmap_previous = gmap;
536 if (!STAILQ_EMPTY(&tmp_gmaps)) {
537 mtx_lock(&cleanup_data->to_kill_gmaps_mtx);
538 STAILQ_CONCAT(&cleanup_data->to_kill_gmaps, &tmp_gmaps);
539 mtx_unlock(&cleanup_data->to_kill_gmaps_mtx);
544 * Find mapped grants for a given userspace program, by file-offset (index)
545 * and count, as supplied during the map-ioctl.
547 static struct gntdev_gmap*
548 gntdev_find_gmap(struct per_user_data *priv_user,
549 uint64_t index, uint32_t count)
551 struct gntdev_gmap find_gmap, *gmap;
553 find_gmap.file_index = index;
555 mtx_lock(&priv_user->user_data_lock);
556 gmap = RB_FIND(gmap_tree_head, &priv_user->gmap_tree, &find_gmap);
557 mtx_unlock(&priv_user->user_data_lock);
559 if (gmap != NULL && gmap->count == count)
565 * Remove the pages from the mgtdevice pager, call the unmap hypercall,
566 * free the xenmem resource. This function is called during the
567 * destruction of the mgtdevice pager, which happens when all mmaps to
568 * it have been removed, and the unmap-ioctl has been performed.
571 notify_unmap_cleanup(struct gntdev_gmap *gmap)
576 struct gnttab_unmap_grant_ref *unmap_ops;
578 unmap_ops = malloc(sizeof(struct gnttab_unmap_grant_ref) * gmap->count,
581 /* Enumerate freeable maps. */
583 for (i = 0; i < gmap->count; i++) {
584 if (gmap->grant_map_ops[i].handle != -1) {
585 unmap_ops[count].handle = gmap->grant_map_ops[i].handle;
586 unmap_ops[count].host_addr =
587 gmap->grant_map_ops[i].host_addr;
588 unmap_ops[count].dev_bus_addr = 0;
593 /* Perform notification. */
594 if (count > 0 && gmap->notify) {
596 uint64_t page_offset;
598 page_offset = gmap->notify->index - gmap->file_index;
599 page = PHYS_TO_VM_PAGE(gmap->map->phys_base_addr + page_offset);
600 notify(gmap->notify, page);
603 /* Free the pages. */
604 VM_OBJECT_WLOCK(gmap->map->mem);
606 for (i = 0; i < gmap->count; i++) {
607 m = vm_page_lookup(gmap->map->mem, i);
610 if (vm_page_sleep_if_busy(m, "pcmdum"))
612 cdev_pager_free_page(gmap->map->mem, m);
614 VM_OBJECT_WUNLOCK(gmap->map->mem);
616 /* Perform unmap hypercall. */
617 error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
620 for (i = 0; i < gmap->count; i++) {
621 gmap->grant_map_ops[i].handle = -1;
622 gmap->grant_map_ops[i].host_addr = 0;
626 error = xenmem_free(gntdev_dev, gmap->map->pseudo_phys_res_id,
627 gmap->map->pseudo_phys_res);
629 ("Unable to release memory resource: %d", error));
631 free(gmap->map, M_GNTDEV);
635 free(unmap_ops, M_GNTDEV);
641 * IOCTL_GNTDEV_MAP_GRANT_REF
642 * Populate structures for mapping the grant reference in the per user
643 * private data. Actual resource allocation and map hypercall is performed
647 gntdev_map_grant_ref(struct ioctl_gntdev_map_grant_ref *arg)
651 struct gntdev_gmap *gmap;
652 struct per_user_data *priv_user;
654 error = devfs_get_cdevpriv((void**) &priv_user);
658 gmap = malloc(sizeof(*gmap), M_GNTDEV, M_WAITOK | M_ZERO);
659 gmap->count = arg->count;
660 gmap->grant_map_ops =
661 malloc(sizeof(struct gnttab_map_grant_ref) * arg->count,
662 M_GNTDEV, M_WAITOK | M_ZERO);
664 error = get_file_offset(priv_user, arg->count, &gmap->file_index);
668 for (i = 0; i < arg->count; i++) {
669 gmap->grant_map_ops[i].dom = arg->refs[i].domid;
670 gmap->grant_map_ops[i].ref = arg->refs[i].ref;
671 gmap->grant_map_ops[i].handle = -1;
672 gmap->grant_map_ops[i].flags = GNTMAP_host_map;
675 mtx_lock(&priv_user->user_data_lock);
676 RB_INSERT(gmap_tree_head, &priv_user->gmap_tree, gmap);
677 mtx_unlock(&priv_user->user_data_lock);
679 arg->index = gmap->file_index;
685 * IOCTL_GNTDEV_UNMAP_GRANT_REF
686 * Remove the map information from the per user private data and add it
687 * to the global device-list of mappings to be deleted. A reference to
688 * the mgtdevice pager is also decreased, the reason for which is
689 * explained in mmap_gmap().
692 gntdev_unmap_grant_ref(struct ioctl_gntdev_unmap_grant_ref *arg)
695 struct gntdev_gmap *gmap;
696 struct per_user_data *priv_user;
698 error = devfs_get_cdevpriv((void**) &priv_user);
702 gmap = gntdev_find_gmap(priv_user, arg->index, arg->count);
704 log(LOG_ERR, "Can't find requested grant-map.");
708 mtx_lock(&priv_user->user_data_lock);
709 mtx_lock(&cleanup_data.to_kill_gmaps_mtx);
710 RB_REMOVE(gmap_tree_head, &priv_user->gmap_tree, gmap);
711 STAILQ_INSERT_TAIL(&cleanup_data.to_kill_gmaps, gmap, gmap_next.list);
712 mtx_unlock(&cleanup_data.to_kill_gmaps_mtx);
713 mtx_unlock(&priv_user->user_data_lock);
716 vm_object_deallocate(gmap->map->mem);
718 taskqueue_enqueue(taskqueue_thread, &cleanup_task);
719 put_file_offset(priv_user, arg->count, arg->index);
725 * IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR
726 * Get file-offset and count for a given mapping, from the virtual address
727 * where the mapping is mmapped.
728 * Please note, this only works for grants mapped by this domain, and not
729 * grants allocated. Count doesn't make much sense in reference to grants
730 * allocated. Also, because this function is present in the linux gntdev
731 * device, but not in the linux gntalloc one, most userspace code only use
732 * it for mapped grants.
735 gntdev_get_offset_for_vaddr(struct ioctl_gntdev_get_offset_for_vaddr *arg,
740 vm_map_entry_t entry;
745 struct gntdev_gmap *gmap;
748 map = &td->td_proc->p_vmspace->vm_map;
749 error = vm_map_lookup(&map, arg->vaddr, VM_PROT_NONE, &entry,
750 &mem, &pindex, &prot, &wired);
751 if (error != KERN_SUCCESS)
754 if ((mem->type != OBJT_MGTDEVICE) ||
755 (mem->un_pager.devp.ops != &gntdev_gmap_pg_ops)) {
762 (entry->end - entry->start) != (gmap->count * PAGE_SIZE)) {
767 arg->count = gmap->count;
768 arg->offset = gmap->file_index;
772 vm_map_lookup_done(map, entry);
776 /*-------------------- Grant Mapping Pager ----------------------------------*/
779 gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
780 vm_ooffset_t foff, struct ucred *cred, u_short *color)
787 gntdev_gmap_pg_dtor(void *handle)
790 notify_unmap_cleanup((struct gntdev_gmap *)handle);
794 gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot,
797 struct gntdev_gmap *gmap = object->handle;
798 vm_pindex_t pidx, ridx;
799 vm_page_t page, oldm;
800 vm_ooffset_t relative_offset;
802 if (gmap->map == NULL)
803 return (VM_PAGER_FAIL);
805 relative_offset = offset - gmap->file_index;
807 pidx = UOFF_TO_IDX(offset);
808 ridx = UOFF_TO_IDX(relative_offset);
809 if (ridx >= gmap->count ||
810 gmap->grant_map_ops[ridx].status != GNTST_okay)
811 return (VM_PAGER_FAIL);
813 page = PHYS_TO_VM_PAGE(gmap->map->phys_base_addr + relative_offset);
815 return (VM_PAGER_FAIL);
817 KASSERT((page->flags & PG_FICTITIOUS) != 0,
818 ("not fictitious %p", page));
819 KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page));
820 KASSERT(vm_page_busied(page) == 0, ("page %p is busy", page));
826 vm_page_unlock(oldm);
830 vm_page_insert(page, object, pidx);
831 page->valid = VM_PAGE_BITS_ALL;
834 return (VM_PAGER_OK);
837 /*------------------ Grant Table Methods ------------------------------------*/
840 notify(struct notify_data *notify, vm_page_t page)
842 if (notify->action & UNMAP_NOTIFY_CLEAR_BYTE) {
846 offset = notify->index & PAGE_MASK;
847 mem = (uint8_t *)pmap_quick_enter_page(page);
849 pmap_quick_remove_page((vm_offset_t)mem);
851 if (notify->action & UNMAP_NOTIFY_SEND_EVENT) {
852 xen_intr_signal(notify->notify_evtchn_handle);
853 xen_intr_unbind(¬ify->notify_evtchn_handle);
859 * Helper to copy new arguments from the notify ioctl into
860 * the existing notify data.
863 copy_notify_helper(struct notify_data *destination,
864 struct ioctl_gntdev_unmap_notify *source)
866 xen_intr_handle_t handlep = NULL;
869 * "Get" before "Put"ting previous reference, as we might be
870 * holding the last reference to the event channel port.
872 if (source->action & UNMAP_NOTIFY_SEND_EVENT)
873 if (xen_intr_get_evtchn_from_port(source->event_channel_port,
877 if (destination->action & UNMAP_NOTIFY_SEND_EVENT)
878 xen_intr_unbind(&destination->notify_evtchn_handle);
880 destination->action = source->action;
881 destination->event_channel_port = source->event_channel_port;
882 destination->index = source->index;
883 destination->notify_evtchn_handle = handlep;
889 * IOCTL_GNTDEV_SET_UNMAP_NOTIFY
890 * Set unmap notification inside the appropriate grant. It sends a
891 * notification when the grant is completely munmapped by this domain
892 * and ready for destruction.
895 gntdev_set_unmap_notify(struct ioctl_gntdev_unmap_notify *arg)
899 struct per_user_data *priv_user;
900 struct gntdev_gref *gref = NULL;
901 struct gntdev_gmap *gmap;
903 error = devfs_get_cdevpriv((void**) &priv_user);
907 if (arg->action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
910 index = arg->index & ~PAGE_MASK;
911 gref = gntdev_find_grefs(priv_user, index, 1);
913 if (gref->notify == NULL)
914 gref->notify = malloc(sizeof(*arg), M_GNTDEV,
916 return (copy_notify_helper(gref->notify, arg));
920 mtx_lock(&priv_user->user_data_lock);
921 RB_FOREACH(gmap, gmap_tree_head, &priv_user->gmap_tree) {
922 if (arg->index >= gmap->file_index &&
923 arg->index < gmap->file_index + gmap->count * PAGE_SIZE) {
924 if (gmap->notify == NULL)
925 gmap->notify = malloc(sizeof(*arg), M_GNTDEV,
927 error = copy_notify_helper(gmap->notify, arg);
931 mtx_unlock(&priv_user->user_data_lock);
936 /*------------------ Gntdev Char Device Methods -----------------------------*/
939 cleanup_function(void *arg, __unused int pending)
942 gref_list_dtor((struct cleanup_data_struct *) arg);
943 gmap_list_dtor((struct cleanup_data_struct *) arg);
947 per_user_data_dtor(void *arg)
949 struct gntdev_gref *gref, *gref_tmp;
950 struct gntdev_gmap *gmap, *gmap_tmp;
951 struct file_offset_struct *offset, *offset_tmp;
952 struct per_user_data *priv_user;
954 priv_user = (struct per_user_data *) arg;
956 mtx_lock(&priv_user->user_data_lock);
958 mtx_lock(&cleanup_data.to_kill_grefs_mtx);
959 RB_FOREACH_SAFE(gref, gref_tree_head, &priv_user->gref_tree, gref_tmp) {
960 RB_REMOVE(gref_tree_head, &priv_user->gref_tree, gref);
961 STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, gref,
964 mtx_unlock(&cleanup_data.to_kill_grefs_mtx);
966 mtx_lock(&cleanup_data.to_kill_gmaps_mtx);
967 RB_FOREACH_SAFE(gmap, gmap_tree_head, &priv_user->gmap_tree, gmap_tmp) {
968 RB_REMOVE(gmap_tree_head, &priv_user->gmap_tree, gmap);
969 STAILQ_INSERT_TAIL(&cleanup_data.to_kill_gmaps, gmap,
972 vm_object_deallocate(gmap->map->mem);
974 mtx_unlock(&cleanup_data.to_kill_gmaps_mtx);
976 RB_FOREACH_SAFE(offset, file_offset_head, &priv_user->file_offset,
978 RB_REMOVE(file_offset_head, &priv_user->file_offset, offset);
979 free(offset, M_GNTDEV);
982 mtx_unlock(&priv_user->user_data_lock);
984 taskqueue_enqueue(taskqueue_thread, &cleanup_task);
986 mtx_destroy(&priv_user->user_data_lock);
987 free(priv_user, M_GNTDEV);
991 gntdev_open(struct cdev *dev, int flag, int otyp, struct thread *td)
994 struct per_user_data *priv_user;
995 struct file_offset_struct *offset;
997 priv_user = malloc(sizeof(*priv_user), M_GNTDEV, M_WAITOK | M_ZERO);
998 RB_INIT(&priv_user->gref_tree);
999 RB_INIT(&priv_user->gmap_tree);
1000 RB_INIT(&priv_user->file_offset);
1001 offset = malloc(sizeof(*offset), M_GNTDEV, M_WAITOK | M_ZERO);
1002 offset->file_offset = 0;
1003 offset->count = MAX_OFFSET_COUNT;
1004 RB_INSERT(file_offset_head, &priv_user->file_offset, offset);
1005 mtx_init(&priv_user->user_data_lock,
1006 "per user data mutex", NULL, MTX_DEF);
1008 error = devfs_set_cdevpriv(priv_user, per_user_data_dtor);
1010 per_user_data_dtor(priv_user);
1016 gntdev_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
1017 int fflag, struct thread *td)
1022 case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
1023 error = gntdev_set_unmap_notify(
1024 (struct ioctl_gntdev_unmap_notify*) data);
1026 case IOCTL_GNTDEV_ALLOC_GREF:
1027 error = gntdev_alloc_gref(
1028 (struct ioctl_gntdev_alloc_gref*) data);
1030 case IOCTL_GNTDEV_DEALLOC_GREF:
1031 error = gntdev_dealloc_gref(
1032 (struct ioctl_gntdev_dealloc_gref*) data);
1034 case IOCTL_GNTDEV_MAP_GRANT_REF:
1035 error = gntdev_map_grant_ref(
1036 (struct ioctl_gntdev_map_grant_ref*) data);
1038 case IOCTL_GNTDEV_UNMAP_GRANT_REF:
1039 error = gntdev_unmap_grant_ref(
1040 (struct ioctl_gntdev_unmap_grant_ref*) data);
1042 case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
1043 error = gntdev_get_offset_for_vaddr(
1044 (struct ioctl_gntdev_get_offset_for_vaddr*) data, td);
1055 * MMAP an allocated grant into user memory.
1056 * Please note, that the grants must not already be mmapped, otherwise
1057 * this function will fail.
1060 mmap_gref(struct per_user_data *priv_user, struct gntdev_gref *gref_start,
1061 uint32_t count, vm_size_t size, struct vm_object **object)
1063 vm_object_t mem_obj;
1064 struct gntdev_gref *gref;
1066 mem_obj = vm_object_allocate(OBJT_PHYS, size);
1067 if (mem_obj == NULL)
1070 mtx_lock(&priv_user->user_data_lock);
1071 VM_OBJECT_WLOCK(mem_obj);
1072 for (gref = gref_start; gref != NULL && count > 0; gref =
1073 RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref)) {
1074 if (gref->page->object)
1077 vm_page_insert(gref->page, mem_obj,
1078 UOFF_TO_IDX(gref->file_index));
1082 VM_OBJECT_WUNLOCK(mem_obj);
1083 mtx_unlock(&priv_user->user_data_lock);
1086 vm_object_deallocate(mem_obj);
1097 * MMAP a mapped grant into user memory.
1100 mmap_gmap(struct per_user_data *priv_user, struct gntdev_gmap *gmap_start,
1101 vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot)
1107 * The grant map hypercall might already be done.
1108 * If that is the case, increase a reference to the
1109 * vm object and return the already allocated object.
1111 if (gmap_start->map) {
1112 vm_object_reference(gmap_start->map->mem);
1113 *object = gmap_start->map->mem;
1117 gmap_start->map = malloc(sizeof(*(gmap_start->map)), M_GNTDEV,
1120 /* Allocate the xen pseudo physical memory resource. */
1121 gmap_start->map->pseudo_phys_res_id = 0;
1122 gmap_start->map->pseudo_phys_res = xenmem_alloc(gntdev_dev,
1123 &gmap_start->map->pseudo_phys_res_id, size);
1124 if (gmap_start->map->pseudo_phys_res == NULL) {
1125 free(gmap_start->map, M_GNTDEV);
1126 gmap_start->map = NULL;
1129 gmap_start->map->phys_base_addr =
1130 rman_get_start(gmap_start->map->pseudo_phys_res);
1132 /* Allocate the mgtdevice pager. */
1133 gmap_start->map->mem = cdev_pager_allocate(gmap_start, OBJT_MGTDEVICE,
1134 &gntdev_gmap_pg_ops, size, nprot, *offset, NULL);
1135 if (gmap_start->map->mem == NULL) {
1136 xenmem_free(gntdev_dev, gmap_start->map->pseudo_phys_res_id,
1137 gmap_start->map->pseudo_phys_res);
1138 free(gmap_start->map, M_GNTDEV);
1139 gmap_start->map = NULL;
1143 for (i = 0; i < gmap_start->count; i++) {
1144 gmap_start->grant_map_ops[i].host_addr =
1145 gmap_start->map->phys_base_addr + i * PAGE_SIZE;
1147 if ((nprot & PROT_WRITE) == 0)
1148 gmap_start->grant_map_ops[i].flags |= GNTMAP_readonly;
1150 /* Make the MAP hypercall. */
1151 error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
1152 gmap_start->grant_map_ops, gmap_start->count);
1156 * Pager deallocation will automatically take care of
1157 * xenmem deallocation, etc.
1159 vm_object_deallocate(gmap_start->map->mem);
1164 /* Retry EAGAIN maps. */
1165 for (i = 0; i < gmap_start->count; i++) {
1167 while (delay < 256 &&
1168 gmap_start->grant_map_ops[i].status == GNTST_eagain) {
1169 HYPERVISOR_grant_table_op( GNTTABOP_map_grant_ref,
1170 &gmap_start->grant_map_ops[i], 1);
1171 pause(("gntmap"), delay * SBT_1MS);
1174 if (gmap_start->grant_map_ops[i].status == GNTST_eagain)
1175 gmap_start->grant_map_ops[i].status = GNTST_bad_page;
1177 if (gmap_start->grant_map_ops[i].status != GNTST_okay) {
1180 * Pager deallocation will automatically take care of
1181 * xenmem deallocation, notification, unmap hypercall,
1184 vm_object_deallocate(gmap_start->map->mem);
1191 * Add a reference to the vm object. We do not want
1192 * the vm object to be deleted when all the mmaps are
1193 * unmapped, because it may be re-mmapped. Instead,
1194 * we want the object to be deleted, when along with
1195 * munmaps, we have also processed the unmap-ioctl.
1197 vm_object_reference(gmap_start->map->mem);
1199 *object = gmap_start->map->mem;
1205 gntdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
1206 struct vm_object **object, int nprot)
1210 struct gntdev_gref *gref_start;
1211 struct gntdev_gmap *gmap_start;
1212 struct per_user_data *priv_user;
1214 error = devfs_get_cdevpriv((void**) &priv_user);
1218 count = UOFF_TO_IDX(size);
1220 gref_start = gntdev_find_grefs(priv_user, *offset, count);
1222 error = mmap_gref(priv_user, gref_start, count, size, object);
1226 gmap_start = gntdev_find_gmap(priv_user, *offset, count);
1228 error = mmap_gmap(priv_user, gmap_start, offset, size, object,
1236 /*------------------ Private Device Attachment Functions --------------------*/
1238 gntdev_identify(driver_t *driver, device_t parent)
1241 KASSERT((xen_domain()),
1242 ("Trying to attach gntdev device on non Xen domain"));
1244 if (BUS_ADD_CHILD(parent, 0, "gntdev", 0) == NULL)
1245 panic("unable to attach gntdev user-space device");
1249 gntdev_probe(device_t dev)
1253 device_set_desc(dev, "Xen grant-table user-space device");
1254 return (BUS_PROBE_NOWILDCARD);
1258 gntdev_attach(device_t dev)
1261 make_dev_credf(MAKEDEV_ETERNAL, &gntdev_devsw, 0, NULL, UID_ROOT,
1262 GID_WHEEL, 0600, "xen/gntdev");
1266 /*-------------------- Private Device Attachment Data -----------------------*/
1267 static device_method_t gntdev_methods[] = {
1268 DEVMETHOD(device_identify, gntdev_identify),
1269 DEVMETHOD(device_probe, gntdev_probe),
1270 DEVMETHOD(device_attach, gntdev_attach),
1274 static driver_t gntdev_driver = {
1280 devclass_t gntdev_devclass;
1282 DRIVER_MODULE(gntdev, xenpv, gntdev_driver, gntdev_devclass, 0, 0);
1283 MODULE_DEPEND(gntdev, xenpv, 1, 1, 1);