sys/dev/cxgb/ulp/tom/cxgb_vm.c

   1 /**************************************************************************
   2
   3 Copyright (c) 2007-2008, Chelsio Inc.
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without
   7 modification, are permitted provided that the following conditions are met:
   8
   9  1. Redistributions of source code must retain the above copyright notice,
  10     this list of conditions and the following disclaimer.
  11
  12  2. Neither the name of the Chelsio Corporation nor the names of its
  13     contributors may be used to endorse or promote products derived from
  14     this software without specific prior written permission.
  15
  16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  26 POSSIBILITY OF SUCH DAMAGE.
  27
  28 ***************************************************************************/
  29
  30 #include <sys/cdefs.h>
  31 __FBSDID("$FreeBSD$");
  32
  33 #include <sys/param.h>
  34 #include <sys/systm.h>
  35 #include <sys/types.h>
  36 #include <sys/fcntl.h>
  37 #include <sys/kernel.h>
  38 #include <sys/limits.h>
  39 #include <sys/lock.h>
  40 #include <sys/mbuf.h>
  41 #include <sys/condvar.h>
  42 #include <sys/mutex.h>
  43 #include <sys/proc.h>
  44 #include <sys/syslog.h>
  45
  46 #include <vm/vm.h>
  47 #include <vm/vm_page.h>
  48 #include <vm/vm_map.h>
  49 #include <vm/vm_extern.h>
  50 #include <vm/pmap.h>
  51 #include <ulp/tom/cxgb_vm.h>
  52
  53 /*
  54  * This routine takes a user's map, array of pages, number of pages, and flags
  55  * and then does the following:
  56  *  - validate that the user has access to those pages (flags indicates read
  57  *      or write) - if not fail
  58  *  - validate that count is enough to hold range number of pages - if not fail
  59  *  - fault in any non-resident pages
  60  *  - if the user is doing a read force a write fault for any COWed pages
  61  *  - if the user is doing a read mark all pages as dirty
  62  *  - hold all pages
  63  */
  64 int
  65 vm_fault_hold_user_pages(vm_map_t map, vm_offset_t addr, vm_page_t *mp,
  66     int count, vm_prot_t prot)
  67 {
  68         vm_offset_t end, va;
  69         int faults, rv;
  70         pmap_t pmap;
  71         vm_page_t m, *pages;
  72
  73         pmap = vm_map_pmap(map);
  74         pages = mp;
  75         addr &= ~PAGE_MASK;
  76         /*
  77          * Check that virtual address range is legal
  78          * This check is somewhat bogus as on some architectures kernel
  79          * and user do not share VA - however, it appears that all FreeBSD
  80          * architectures define it
  81          */
  82         end = addr + (count * PAGE_SIZE);
  83         if (end > VM_MAXUSER_ADDRESS) {
  84                 log(LOG_WARNING, "bad address passed to vm_fault_hold_user_pages");
  85                 return (EFAULT);
  86         }
  87
  88         /*
  89          * First optimistically assume that all pages are resident
  90          * (and R/W if for write) if so just mark pages as held (and
  91          * dirty if for write) and return
  92          */
  93         vm_page_lock_queues();
  94         for (pages = mp, faults = 0, va = addr; va < end;
  95              va += PAGE_SIZE, pages++) {
  96                 /*
  97                  * page queue mutex is recursable so this is OK
  98                  * it would be really nice if we had an unlocked
  99                  * version of this so we were only acquiring the
 100                  * pmap lock 1 time as opposed to potentially
 101                  * many dozens of times
 102                  */
 103                 *pages = m = pmap_extract_and_hold(pmap, va, prot);
 104                 if (m == NULL) {
 105                         faults++;
 106                         continue;
 107                 }
 108                 /*
 109                  * Preemptively mark dirty - the pages
 110                  * will never have the modified bit set if
 111                  * they are only changed via DMA
 112                  */
 113                 if (prot & VM_PROT_WRITE)
 114                         vm_page_dirty(m);
 115
 116         }
 117         vm_page_unlock_queues();
 118
 119         if (faults == 0)
 120                 return (0);
 121
 122         /*
 123          * Pages either have insufficient permissions or are not present
 124          * trigger a fault where neccessary
 125          *
 126          */
 127         rv = 0;
 128         for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++) {
 129                 /*
 130                  * Account for a very narrow race where the page may be
 131                  * taken away from us before it is held
 132                  */
 133                 while (*pages == NULL) {
 134                         rv = vm_fault(map, va, prot,
 135                             (prot & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL);
 136                         if (rv)
 137                                 goto error;
 138                         *pages = pmap_extract_and_hold(pmap, va, prot);
 139                 }
 140         }
 141         return (0);
 142 error:
 143         log(LOG_WARNING,
 144             "vm_fault bad return rv=%d va=0x%zx\n", rv, va);
 145         vm_page_lock_queues();
 146         for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++)
 147                 if (*pages) {
 148                         vm_page_unhold(*pages);
 149                         *pages = NULL;
 150                 }
 151         vm_page_unlock_queues();
 152         return (EFAULT);
 153 }
 154
 155 void
 156 vm_fault_unhold_pages(vm_page_t *mp, int count)
 157 {
 158
 159         KASSERT(count >= 0, ("negative count %d", count));
 160         vm_page_lock_queues();
 161         while (count--) {
 162                 vm_page_unhold(*mp);
 163                 mp++;
 164         }
 165         vm_page_unlock_queues();
 166 }