]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c
zfs: fix assert in FreeBSD-specific dmu_read_pages
[FreeBSD/FreeBSD.git] / sys / contrib / openzfs / module / os / freebsd / zfs / dmu_os.c
1 /*
2  * Copyright (c) 2020 iXsystems, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/dmu.h>
34 #include <sys/dmu_impl.h>
35 #include <sys/dmu_tx.h>
36 #include <sys/dbuf.h>
37 #include <sys/dnode.h>
38 #include <sys/zfs_context.h>
39 #include <sys/dmu_objset.h>
40 #include <sys/dmu_traverse.h>
41 #include <sys/dsl_dataset.h>
42 #include <sys/dsl_dir.h>
43 #include <sys/dsl_pool.h>
44 #include <sys/dsl_synctask.h>
45 #include <sys/dsl_prop.h>
46 #include <sys/dmu_zfetch.h>
47 #include <sys/zfs_ioctl.h>
48 #include <sys/zap.h>
49 #include <sys/zio_checksum.h>
50 #include <sys/zio_compress.h>
51 #include <sys/sa.h>
52 #include <sys/zfeature.h>
53 #include <sys/abd.h>
54 #include <sys/zfs_rlock.h>
55 #include <sys/racct.h>
56 #include <sys/vm.h>
57 #include <sys/zfs_znode.h>
58 #include <sys/zfs_vnops.h>
59
60 #include <sys/ccompat.h>
61
62 #ifndef IDX_TO_OFF
63 #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
64 #endif
65
66 #if  __FreeBSD_version < 1300051
67 #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY
68 #else
69 #define VM_ALLOC_BUSY_FLAGS  VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
70 #endif
71
72
73 #if __FreeBSD_version < 1300072
74 #define dmu_page_lock(m)        vm_page_lock(m)
75 #define dmu_page_unlock(m)      vm_page_unlock(m)
76 #else
77 #define dmu_page_lock(m)
78 #define dmu_page_unlock(m)
79 #endif
80
81 static int
82 dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
83     uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
84 {
85         dnode_t *dn;
86         int err;
87
88         err = dnode_hold(os, object, FTAG, &dn);
89         if (err)
90                 return (err);
91
92         err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
93             numbufsp, dbpp, DMU_READ_PREFETCH);
94
95         dnode_rele(dn, FTAG);
96
97         return (err);
98 }
99
100 int
101 dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
102     vm_page_t *ma, dmu_tx_t *tx)
103 {
104         dmu_buf_t **dbp;
105         struct sf_buf *sf;
106         int numbufs, i;
107         int err;
108
109         if (size == 0)
110                 return (0);
111
112         err = dmu_buf_hold_array(os, object, offset, size,
113             FALSE, FTAG, &numbufs, &dbp);
114         if (err)
115                 return (err);
116
117         for (i = 0; i < numbufs; i++) {
118                 int tocpy, copied, thiscpy;
119                 int bufoff;
120                 dmu_buf_t *db = dbp[i];
121                 caddr_t va;
122
123                 ASSERT(size > 0);
124                 ASSERT3U(db->db_size, >=, PAGESIZE);
125
126                 bufoff = offset - db->db_offset;
127                 tocpy = (int)MIN(db->db_size - bufoff, size);
128
129                 ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
130
131                 if (tocpy == db->db_size)
132                         dmu_buf_will_fill(db, tx);
133                 else
134                         dmu_buf_will_dirty(db, tx);
135
136                 for (copied = 0; copied < tocpy; copied += PAGESIZE) {
137                         ASSERT3U(ptoa((*ma)->pindex), ==,
138                             db->db_offset + bufoff);
139                         thiscpy = MIN(PAGESIZE, tocpy - copied);
140                         va = zfs_map_page(*ma, &sf);
141                         bcopy(va, (char *)db->db_data + bufoff, thiscpy);
142                         zfs_unmap_page(sf);
143                         ma += 1;
144                         bufoff += PAGESIZE;
145                 }
146
147                 if (tocpy == db->db_size)
148                         dmu_buf_fill_done(db, tx);
149
150                 offset += tocpy;
151                 size -= tocpy;
152         }
153         dmu_buf_rele_array(dbp, numbufs, FTAG);
154         return (err);
155 }
156
157 int
158 dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
159     int *rbehind, int *rahead, int last_size)
160 {
161         struct sf_buf *sf;
162         vm_object_t vmobj;
163         vm_page_t m;
164         dmu_buf_t **dbp;
165         dmu_buf_t *db;
166         caddr_t va;
167         int numbufs, i;
168         int bufoff, pgoff, tocpy;
169         int mi, di;
170         int err;
171
172         ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
173         ASSERT(last_size <= PAGE_SIZE);
174
175         err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
176             IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
177         if (err != 0)
178                 return (err);
179
180 #ifdef ZFS_DEBUG
181         IMPLY(last_size < PAGE_SIZE, *rahead == 0);
182         if (dbp[0]->db_offset != 0 || numbufs > 1) {
183                 for (i = 0; i < numbufs; i++) {
184                         ASSERT(ISP2(dbp[i]->db_size));
185                         ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0);
186                         ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
187                 }
188         }
189 #endif
190
191         vmobj = ma[0]->object;
192         zfs_vmobject_wlock_12(vmobj);
193
194         db = dbp[0];
195         for (i = 0; i < *rbehind; i++) {
196                 m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i,
197                     VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
198                 if (m == NULL)
199                         break;
200                 if (!vm_page_none_valid(m)) {
201                         ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
202                         vm_page_do_sunbusy(m);
203                         break;
204                 }
205                 ASSERT(m->dirty == 0);
206                 ASSERT(!pmap_page_is_write_mapped(m));
207
208                 ASSERT(db->db_size > PAGE_SIZE);
209                 bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
210                 va = zfs_map_page(m, &sf);
211                 bcopy((char *)db->db_data + bufoff, va, PAGESIZE);
212                 zfs_unmap_page(sf);
213                 vm_page_valid(m);
214                 dmu_page_lock(m);
215                 if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
216                         vm_page_activate(m);
217                 else
218                         vm_page_deactivate(m);
219                 dmu_page_unlock(m);
220                 vm_page_do_sunbusy(m);
221         }
222         *rbehind = i;
223
224         bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
225         pgoff = 0;
226         for (mi = 0, di = 0; mi < count && di < numbufs; ) {
227                 if (pgoff == 0) {
228                         m = ma[mi];
229                         if (m != bogus_page) {
230                                 vm_page_assert_xbusied(m);
231                                 ASSERT(vm_page_none_valid(m));
232                                 ASSERT(m->dirty == 0);
233                                 ASSERT(!pmap_page_is_write_mapped(m));
234                                 va = zfs_map_page(m, &sf);
235                         }
236                 }
237                 if (bufoff == 0)
238                         db = dbp[di];
239
240                 if (m != bogus_page) {
241                         ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
242                             db->db_offset + bufoff);
243                 }
244
245                 /*
246                  * We do not need to clamp the copy size by the file
247                  * size as the last block is zero-filled beyond the
248                  * end of file anyway.
249                  */
250                 tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
251                 if (m != bogus_page)
252                         bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy);
253
254                 pgoff += tocpy;
255                 ASSERT(pgoff <= PAGESIZE);
256                 if (pgoff == PAGESIZE) {
257                         if (m != bogus_page) {
258                                 zfs_unmap_page(sf);
259                                 vm_page_valid(m);
260                         }
261                         ASSERT(mi < count);
262                         mi++;
263                         pgoff = 0;
264                 }
265
266                 bufoff += tocpy;
267                 ASSERT(bufoff <= db->db_size);
268                 if (bufoff == db->db_size) {
269                         ASSERT(di < numbufs);
270                         di++;
271                         bufoff = 0;
272                 }
273         }
274
275 #ifdef ZFS_DEBUG
276         /*
277          * Three possibilities:
278          * - last requested page ends at a buffer boundary and , thus,
279          *   all pages and buffers have been iterated;
280          * - all requested pages are filled, but the last buffer
281          *   has not been exhausted;
282          *   the read-ahead is possible only in this case;
283          * - all buffers have been read, but the last page has not been
284          *   fully filled;
285          *   this is only possible if the file has only a single buffer
286          *   with a size that is not a multiple of the page size.
287          */
288         if (mi == count) {
289                 ASSERT(di >= numbufs - 1);
290                 IMPLY(*rahead != 0, di == numbufs - 1);
291                 IMPLY(*rahead != 0, bufoff != 0);
292                 ASSERT(pgoff == 0);
293         }
294         if (di == numbufs) {
295                 ASSERT(mi >= count - 1);
296                 ASSERT(*rahead == 0);
297                 IMPLY(pgoff == 0, mi == count);
298                 if (pgoff != 0) {
299                         ASSERT(mi == count - 1);
300                         ASSERT((dbp[0]->db_size & PAGE_MASK) != 0);
301                 }
302         }
303 #endif
304         if (pgoff != 0) {
305                 ASSERT(m != bogus_page);
306                 bzero(va + pgoff, PAGESIZE - pgoff);
307                 zfs_unmap_page(sf);
308                 vm_page_valid(m);
309         }
310
311         for (i = 0; i < *rahead; i++) {
312                 m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i,
313                     VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
314                 if (m == NULL)
315                         break;
316                 if (!vm_page_none_valid(m)) {
317                         ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
318                         vm_page_do_sunbusy(m);
319                         break;
320                 }
321                 ASSERT(m->dirty == 0);
322                 ASSERT(!pmap_page_is_write_mapped(m));
323
324                 ASSERT(db->db_size > PAGE_SIZE);
325                 bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
326                 tocpy = MIN(db->db_size - bufoff, PAGESIZE);
327                 va = zfs_map_page(m, &sf);
328                 bcopy((char *)db->db_data + bufoff, va, tocpy);
329                 if (tocpy < PAGESIZE) {
330                         ASSERT(i == *rahead - 1);
331                         ASSERT((db->db_size & PAGE_MASK) != 0);
332                         bzero(va + tocpy, PAGESIZE - tocpy);
333                 }
334                 zfs_unmap_page(sf);
335                 vm_page_valid(m);
336                 dmu_page_lock(m);
337                 if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
338                         vm_page_activate(m);
339                 else
340                         vm_page_deactivate(m);
341                 dmu_page_unlock(m);
342                 vm_page_do_sunbusy(m);
343         }
344         *rahead = i;
345         zfs_vmobject_wunlock_12(vmobj);
346
347         dmu_buf_rele_array(dbp, numbufs, FTAG);
348         return (0);
349 }