]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - module/zfs/dmu_zfetch.c
Illumos 5960, 5925
[FreeBSD/FreeBSD.git] / module / zfs / dmu_zfetch.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25
26 /*
27  * Copyright (c) 2013 by Delphix. All rights reserved.
28  */
29
30 #include <sys/zfs_context.h>
31 #include <sys/dnode.h>
32 #include <sys/dmu_objset.h>
33 #include <sys/dmu_zfetch.h>
34 #include <sys/dmu.h>
35 #include <sys/dbuf.h>
36 #include <sys/kstat.h>
37
38 /*
39  * I'm against tune-ables, but these should probably exist as tweakable globals
40  * until we can get this working the way we want it to.
41  */
42
43 int zfs_prefetch_disable = 0;
44
45 /* max # of streams per zfetch */
46 unsigned int    zfetch_max_streams = 8;
47 /* min time before stream reclaim */
48 unsigned int    zfetch_min_sec_reap = 2;
49 /* max number of blocks to fetch at a time */
50 unsigned int    zfetch_block_cap = 256;
51 /* number of bytes in a array_read at which we stop prefetching (1Mb) */
52 unsigned long   zfetch_array_rd_sz = 1024 * 1024;
53
54 /* forward decls for static routines */
55 static boolean_t        dmu_zfetch_colinear(zfetch_t *, zstream_t *);
56 static void             dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
57 static uint64_t         dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
58 static uint64_t         dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
59 static boolean_t        dmu_zfetch_find(zfetch_t *, zstream_t *, int);
60 static int              dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
61 static zstream_t        *dmu_zfetch_stream_reclaim(zfetch_t *);
62 static void             dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
63 static int              dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
64
65 typedef struct zfetch_stats {
66         kstat_named_t zfetchstat_hits;
67         kstat_named_t zfetchstat_misses;
68         kstat_named_t zfetchstat_colinear_hits;
69         kstat_named_t zfetchstat_colinear_misses;
70         kstat_named_t zfetchstat_stride_hits;
71         kstat_named_t zfetchstat_stride_misses;
72         kstat_named_t zfetchstat_reclaim_successes;
73         kstat_named_t zfetchstat_reclaim_failures;
74         kstat_named_t zfetchstat_stream_resets;
75         kstat_named_t zfetchstat_stream_noresets;
76         kstat_named_t zfetchstat_bogus_streams;
77 } zfetch_stats_t;
78
79 static zfetch_stats_t zfetch_stats = {
80         { "hits",                       KSTAT_DATA_UINT64 },
81         { "misses",                     KSTAT_DATA_UINT64 },
82         { "colinear_hits",              KSTAT_DATA_UINT64 },
83         { "colinear_misses",            KSTAT_DATA_UINT64 },
84         { "stride_hits",                KSTAT_DATA_UINT64 },
85         { "stride_misses",              KSTAT_DATA_UINT64 },
86         { "reclaim_successes",          KSTAT_DATA_UINT64 },
87         { "reclaim_failures",           KSTAT_DATA_UINT64 },
88         { "streams_resets",             KSTAT_DATA_UINT64 },
89         { "streams_noresets",           KSTAT_DATA_UINT64 },
90         { "bogus_streams",              KSTAT_DATA_UINT64 },
91 };
92
93 #define ZFETCHSTAT_INCR(stat, val) \
94         atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
95
96 #define ZFETCHSTAT_BUMP(stat)           ZFETCHSTAT_INCR(stat, 1);
97
98 kstat_t         *zfetch_ksp;
99
100 /*
101  * Given a zfetch structure and a zstream structure, determine whether the
102  * blocks to be read are part of a co-linear pair of existing prefetch
103  * streams.  If a set is found, coalesce the streams, removing one, and
104  * configure the prefetch so it looks for a strided access pattern.
105  *
106  * In other words: if we find two sequential access streams that are
107  * the same length and distance N appart, and this read is N from the
108  * last stream, then we are probably in a strided access pattern.  So
109  * combine the two sequential streams into a single strided stream.
110  *
111  * Returns whether co-linear streams were found.
112  */
113 static boolean_t
114 dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
115 {
116         zstream_t       *z_walk;
117         zstream_t       *z_comp;
118
119         if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
120                 return (0);
121
122         if (zh == NULL) {
123                 rw_exit(&zf->zf_rwlock);
124                 return (0);
125         }
126
127         for (z_walk = list_head(&zf->zf_stream); z_walk;
128             z_walk = list_next(&zf->zf_stream, z_walk)) {
129                 for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
130                     z_comp = list_next(&zf->zf_stream, z_comp)) {
131                         int64_t         diff;
132
133                         if (z_walk->zst_len != z_walk->zst_stride ||
134                             z_comp->zst_len != z_comp->zst_stride) {
135                                 continue;
136                         }
137
138                         diff = z_comp->zst_offset - z_walk->zst_offset;
139                         if (z_comp->zst_offset + diff == zh->zst_offset) {
140                                 z_walk->zst_offset = zh->zst_offset;
141                                 z_walk->zst_direction = diff < 0 ?
142                                     ZFETCH_BACKWARD : ZFETCH_FORWARD;
143                                 z_walk->zst_stride =
144                                     diff * z_walk->zst_direction;
145                                 z_walk->zst_ph_offset =
146                                     zh->zst_offset + z_walk->zst_stride;
147                                 dmu_zfetch_stream_remove(zf, z_comp);
148                                 mutex_destroy(&z_comp->zst_lock);
149                                 kmem_free(z_comp, sizeof (zstream_t));
150
151                                 dmu_zfetch_dofetch(zf, z_walk);
152
153                                 rw_exit(&zf->zf_rwlock);
154                                 return (1);
155                         }
156
157                         diff = z_walk->zst_offset - z_comp->zst_offset;
158                         if (z_walk->zst_offset + diff == zh->zst_offset) {
159                                 z_walk->zst_offset = zh->zst_offset;
160                                 z_walk->zst_direction = diff < 0 ?
161                                     ZFETCH_BACKWARD : ZFETCH_FORWARD;
162                                 z_walk->zst_stride =
163                                     diff * z_walk->zst_direction;
164                                 z_walk->zst_ph_offset =
165                                     zh->zst_offset + z_walk->zst_stride;
166                                 dmu_zfetch_stream_remove(zf, z_comp);
167                                 mutex_destroy(&z_comp->zst_lock);
168                                 kmem_free(z_comp, sizeof (zstream_t));
169
170                                 dmu_zfetch_dofetch(zf, z_walk);
171
172                                 rw_exit(&zf->zf_rwlock);
173                                 return (1);
174                         }
175                 }
176         }
177
178         rw_exit(&zf->zf_rwlock);
179         return (0);
180 }
181
182 /*
183  * Given a zstream_t, determine the bounds of the prefetch.  Then call the
184  * routine that actually prefetches the individual blocks.
185  */
186 static void
187 dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
188 {
189         uint64_t        prefetch_tail;
190         uint64_t        prefetch_limit;
191         uint64_t        prefetch_ofst;
192         uint64_t        prefetch_len;
193         uint64_t        blocks_fetched;
194
195         zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len);
196         zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap);
197
198         prefetch_tail = MAX((int64_t)zs->zst_ph_offset,
199             (int64_t)(zs->zst_offset + zs->zst_stride));
200         /*
201          * XXX: use a faster division method?
202          */
203         prefetch_limit = zs->zst_offset + zs->zst_len +
204             (zs->zst_cap * zs->zst_stride) / zs->zst_len;
205
206         while (prefetch_tail < prefetch_limit) {
207                 prefetch_ofst = zs->zst_offset + zs->zst_direction *
208                     (prefetch_tail - zs->zst_offset);
209
210                 prefetch_len = zs->zst_len;
211
212                 /*
213                  * Don't prefetch beyond the end of the file, if working
214                  * backwards.
215                  */
216                 if ((zs->zst_direction == ZFETCH_BACKWARD) &&
217                     (prefetch_ofst > prefetch_tail)) {
218                         prefetch_len += prefetch_ofst;
219                         prefetch_ofst = 0;
220                 }
221
222                 /* don't prefetch more than we're supposed to */
223                 if (prefetch_len > zs->zst_len)
224                         break;
225
226                 blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode,
227                     prefetch_ofst, zs->zst_len);
228
229                 prefetch_tail += zs->zst_stride;
230                 /* stop if we've run out of stuff to prefetch */
231                 if (blocks_fetched < zs->zst_len)
232                         break;
233         }
234         zs->zst_ph_offset = prefetch_tail;
235         zs->zst_last = ddi_get_lbolt();
236 }
237
238 void
239 zfetch_init(void)
240 {
241
242         zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
243             KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
244             KSTAT_FLAG_VIRTUAL);
245
246         if (zfetch_ksp != NULL) {
247                 zfetch_ksp->ks_data = &zfetch_stats;
248                 kstat_install(zfetch_ksp);
249         }
250 }
251
252 void
253 zfetch_fini(void)
254 {
255         if (zfetch_ksp != NULL) {
256                 kstat_delete(zfetch_ksp);
257                 zfetch_ksp = NULL;
258         }
259 }
260
261 /*
262  * This takes a pointer to a zfetch structure and a dnode.  It performs the
263  * necessary setup for the zfetch structure, grokking data from the
264  * associated dnode.
265  */
266 void
267 dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
268 {
269         if (zf == NULL) {
270                 return;
271         }
272
273         zf->zf_dnode = dno;
274         zf->zf_stream_cnt = 0;
275         zf->zf_alloc_fail = 0;
276
277         list_create(&zf->zf_stream, sizeof (zstream_t),
278             offsetof(zstream_t, zst_node));
279
280         rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
281 }
282
283 /*
284  * This function computes the actual size, in blocks, that can be prefetched,
285  * and fetches it.
286  */
287 static uint64_t
288 dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks)
289 {
290         uint64_t        fetchsz;
291         uint64_t        i;
292
293         fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks);
294
295         for (i = 0; i < fetchsz; i++) {
296                 dbuf_prefetch(dn, 0, blkid + i, ZIO_PRIORITY_ASYNC_READ,
297                     ARC_FLAG_PREFETCH);
298         }
299
300         return (fetchsz);
301 }
302
303 /*
304  * this function returns the number of blocks that would be prefetched, based
305  * upon the supplied dnode, blockid, and nblks.  This is used so that we can
306  * update streams in place, and then prefetch with their old value after the
307  * fact.  This way, we can delay the prefetch, but subsequent accesses to the
308  * stream won't result in the same data being prefetched multiple times.
309  */
310 static uint64_t
311 dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
312 {
313         uint64_t        fetchsz;
314
315         if (blkid > dn->dn_maxblkid) {
316                 return (0);
317         }
318
319         /* compute fetch size */
320         if (blkid + nblks + 1 > dn->dn_maxblkid) {
321                 fetchsz = (dn->dn_maxblkid - blkid) + 1;
322                 ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
323         } else {
324                 fetchsz = nblks;
325         }
326
327
328         return (fetchsz);
329 }
330
331 /*
332  * given a zfetch and a zstream structure, see if there is an associated zstream
333  * for this block read.  If so, it starts a prefetch for the stream it
334  * located and returns true, otherwise it returns false
335  */
336 static boolean_t
337 dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
338 {
339         zstream_t       *zs;
340         int64_t         diff;
341         int             reset = !prefetched;
342         int             rc = 0;
343
344         if (zh == NULL)
345                 return (0);
346
347         /*
348          * XXX: This locking strategy is a bit coarse; however, it's impact has
349          * yet to be tested.  If this turns out to be an issue, it can be
350          * modified in a number of different ways.
351          */
352
353         rw_enter(&zf->zf_rwlock, RW_READER);
354 top:
355
356         for (zs = list_head(&zf->zf_stream); zs;
357             zs = list_next(&zf->zf_stream, zs)) {
358
359                 /*
360                  * XXX - should this be an assert?
361                  */
362                 if (zs->zst_len == 0) {
363                         /* bogus stream */
364                         ZFETCHSTAT_BUMP(zfetchstat_bogus_streams);
365                         continue;
366                 }
367
368                 /*
369                  * We hit this case when we are in a strided prefetch stream:
370                  * we will read "len" blocks before "striding".
371                  */
372                 if (zh->zst_offset >= zs->zst_offset &&
373                     zh->zst_offset < zs->zst_offset + zs->zst_len) {
374                         if (prefetched) {
375                                 /* already fetched */
376                                 ZFETCHSTAT_BUMP(zfetchstat_stride_hits);
377                                 rc = 1;
378                                 goto out;
379                         } else {
380                                 ZFETCHSTAT_BUMP(zfetchstat_stride_misses);
381                         }
382                 }
383
384                 /*
385                  * This is the forward sequential read case: we increment
386                  * len by one each time we hit here, so we will enter this
387                  * case on every read.
388                  */
389                 if (zh->zst_offset == zs->zst_offset + zs->zst_len) {
390
391                         reset = !prefetched && zs->zst_len > 1;
392
393                         mutex_enter(&zs->zst_lock);
394
395                         if (zh->zst_offset != zs->zst_offset + zs->zst_len) {
396                                 mutex_exit(&zs->zst_lock);
397                                 goto top;
398                         }
399                         zs->zst_len += zh->zst_len;
400                         diff = zs->zst_len - zfetch_block_cap;
401                         if (diff > 0) {
402                                 zs->zst_offset += diff;
403                                 zs->zst_len = zs->zst_len > diff ?
404                                     zs->zst_len - diff : 0;
405                         }
406                         zs->zst_direction = ZFETCH_FORWARD;
407
408                         break;
409
410                 /*
411                  * Same as above, but reading backwards through the file.
412                  */
413                 } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) {
414                         /* backwards sequential access */
415
416                         reset = !prefetched && zs->zst_len > 1;
417
418                         mutex_enter(&zs->zst_lock);
419
420                         if (zh->zst_offset != zs->zst_offset - zh->zst_len) {
421                                 mutex_exit(&zs->zst_lock);
422                                 goto top;
423                         }
424
425                         zs->zst_offset = zs->zst_offset > zh->zst_len ?
426                             zs->zst_offset - zh->zst_len : 0;
427                         zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ?
428                             zs->zst_ph_offset - zh->zst_len : 0;
429                         zs->zst_len += zh->zst_len;
430
431                         diff = zs->zst_len - zfetch_block_cap;
432                         if (diff > 0) {
433                                 zs->zst_ph_offset = zs->zst_ph_offset > diff ?
434                                     zs->zst_ph_offset - diff : 0;
435                                 zs->zst_len = zs->zst_len > diff ?
436                                     zs->zst_len - diff : zs->zst_len;
437                         }
438                         zs->zst_direction = ZFETCH_BACKWARD;
439
440                         break;
441
442                 } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride <
443                     zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
444                         /* strided forward access */
445
446                         mutex_enter(&zs->zst_lock);
447
448                         if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >=
449                             zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
450                                 mutex_exit(&zs->zst_lock);
451                                 goto top;
452                         }
453
454                         zs->zst_offset += zs->zst_stride;
455                         zs->zst_direction = ZFETCH_FORWARD;
456
457                         break;
458
459                 } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride <
460                     zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
461                         /* strided reverse access */
462
463                         mutex_enter(&zs->zst_lock);
464
465                         if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >=
466                             zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
467                                 mutex_exit(&zs->zst_lock);
468                                 goto top;
469                         }
470
471                         zs->zst_offset = zs->zst_offset > zs->zst_stride ?
472                             zs->zst_offset - zs->zst_stride : 0;
473                         zs->zst_ph_offset = (zs->zst_ph_offset >
474                             (2 * zs->zst_stride)) ?
475                             (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0;
476                         zs->zst_direction = ZFETCH_BACKWARD;
477
478                         break;
479                 }
480         }
481
482         if (zs) {
483                 if (reset) {
484                         zstream_t *remove = zs;
485
486                         ZFETCHSTAT_BUMP(zfetchstat_stream_resets);
487                         rc = 0;
488                         mutex_exit(&zs->zst_lock);
489                         rw_exit(&zf->zf_rwlock);
490                         rw_enter(&zf->zf_rwlock, RW_WRITER);
491                         /*
492                          * Relocate the stream, in case someone removes
493                          * it while we were acquiring the WRITER lock.
494                          */
495                         for (zs = list_head(&zf->zf_stream); zs;
496                             zs = list_next(&zf->zf_stream, zs)) {
497                                 if (zs == remove) {
498                                         dmu_zfetch_stream_remove(zf, zs);
499                                         mutex_destroy(&zs->zst_lock);
500                                         kmem_free(zs, sizeof (zstream_t));
501                                         break;
502                                 }
503                         }
504                 } else {
505                         ZFETCHSTAT_BUMP(zfetchstat_stream_noresets);
506                         rc = 1;
507                         dmu_zfetch_dofetch(zf, zs);
508                         mutex_exit(&zs->zst_lock);
509                 }
510         }
511 out:
512         rw_exit(&zf->zf_rwlock);
513         return (rc);
514 }
515
516 /*
517  * Clean-up state associated with a zfetch structure.  This frees allocated
518  * structure members, empties the zf_stream tree, and generally makes things
519  * nice.  This doesn't free the zfetch_t itself, that's left to the caller.
520  */
521 void
522 dmu_zfetch_rele(zfetch_t *zf)
523 {
524         zstream_t       *zs;
525         zstream_t       *zs_next;
526
527         ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
528
529         for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
530                 zs_next = list_next(&zf->zf_stream, zs);
531
532                 list_remove(&zf->zf_stream, zs);
533                 mutex_destroy(&zs->zst_lock);
534                 kmem_free(zs, sizeof (zstream_t));
535         }
536         list_destroy(&zf->zf_stream);
537         rw_destroy(&zf->zf_rwlock);
538
539         zf->zf_dnode = NULL;
540 }
541
542 /*
543  * Given a zfetch and zstream structure, insert the zstream structure into the
544  * AVL tree contained within the zfetch structure.  Peform the appropriate
545  * book-keeping.  It is possible that another thread has inserted a stream which
546  * matches one that we are about to insert, so we must be sure to check for this
547  * case.  If one is found, return failure, and let the caller cleanup the
548  * duplicates.
549  */
550 static int
551 dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
552 {
553         zstream_t       *zs_walk;
554         zstream_t       *zs_next;
555
556         ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
557
558         for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) {
559                 zs_next = list_next(&zf->zf_stream, zs_walk);
560
561                 if (dmu_zfetch_streams_equal(zs_walk, zs)) {
562                         return (0);
563                 }
564         }
565
566         list_insert_head(&zf->zf_stream, zs);
567         zf->zf_stream_cnt++;
568         return (1);
569 }
570
571
572 /*
573  * Walk the list of zstreams in the given zfetch, find an old one (by time), and
574  * reclaim it for use by the caller.
575  */
576 static zstream_t *
577 dmu_zfetch_stream_reclaim(zfetch_t *zf)
578 {
579         zstream_t       *zs;
580
581         if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
582                 return (0);
583
584         for (zs = list_head(&zf->zf_stream); zs;
585             zs = list_next(&zf->zf_stream, zs)) {
586
587                 if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap)
588                         break;
589         }
590
591         if (zs) {
592                 dmu_zfetch_stream_remove(zf, zs);
593                 mutex_destroy(&zs->zst_lock);
594                 bzero(zs, sizeof (zstream_t));
595         } else {
596                 zf->zf_alloc_fail++;
597         }
598         rw_exit(&zf->zf_rwlock);
599
600         return (zs);
601 }
602
603 /*
604  * Given a zfetch and zstream structure, remove the zstream structure from its
605  * container in the zfetch structure.  Perform the appropriate book-keeping.
606  */
607 static void
608 dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
609 {
610         ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
611
612         list_remove(&zf->zf_stream, zs);
613         zf->zf_stream_cnt--;
614 }
615
616 static int
617 dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2)
618 {
619         if (zs1->zst_offset != zs2->zst_offset)
620                 return (0);
621
622         if (zs1->zst_len != zs2->zst_len)
623                 return (0);
624
625         if (zs1->zst_stride != zs2->zst_stride)
626                 return (0);
627
628         if (zs1->zst_ph_offset != zs2->zst_ph_offset)
629                 return (0);
630
631         if (zs1->zst_cap != zs2->zst_cap)
632                 return (0);
633
634         if (zs1->zst_direction != zs2->zst_direction)
635                 return (0);
636
637         return (1);
638 }
639
640 /*
641  * This is the prefetch entry point.  It calls all of the other dmu_zfetch
642  * routines to create, delete, find, or operate upon prefetch streams.
643  */
644 void
645 dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
646 {
647         zstream_t       zst;
648         zstream_t       *newstream;
649         boolean_t       fetched;
650         int             inserted;
651         unsigned int    blkshft;
652         uint64_t        blksz;
653
654         if (zfs_prefetch_disable)
655                 return;
656
657         /* files that aren't ln2 blocksz are only one block -- nothing to do */
658         if (!zf->zf_dnode->dn_datablkshift)
659                 return;
660
661         /* convert offset and size, into blockid and nblocks */
662         blkshft = zf->zf_dnode->dn_datablkshift;
663         blksz = (1 << blkshft);
664
665         bzero(&zst, sizeof (zstream_t));
666         zst.zst_offset = offset >> blkshft;
667         zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
668             P2ALIGN(offset, blksz)) >> blkshft;
669
670         fetched = dmu_zfetch_find(zf, &zst, prefetched);
671         if (fetched) {
672                 ZFETCHSTAT_BUMP(zfetchstat_hits);
673         } else {
674                 ZFETCHSTAT_BUMP(zfetchstat_misses);
675                 if ((fetched = dmu_zfetch_colinear(zf, &zst))) {
676                         ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
677                 } else {
678                         ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
679                 }
680         }
681
682         if (!fetched) {
683                 newstream = dmu_zfetch_stream_reclaim(zf);
684
685                 /*
686                  * we still couldn't find a stream, drop the lock, and allocate
687                  * one if possible.  Otherwise, give up and go home.
688                  */
689                 if (newstream) {
690                         ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
691                 } else {
692                         uint64_t        maxblocks;
693                         uint32_t        max_streams;
694                         uint32_t        cur_streams;
695
696                         ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures);
697                         cur_streams = zf->zf_stream_cnt;
698                         maxblocks = zf->zf_dnode->dn_maxblkid;
699
700                         max_streams = MIN(zfetch_max_streams,
701                             (maxblocks / zfetch_block_cap));
702                         if (max_streams == 0) {
703                                 max_streams++;
704                         }
705
706                         if (cur_streams >= max_streams) {
707                                 return;
708                         }
709                         newstream =
710                             kmem_zalloc(sizeof (zstream_t), KM_SLEEP);
711                 }
712
713                 newstream->zst_offset = zst.zst_offset;
714                 newstream->zst_len = zst.zst_len;
715                 newstream->zst_stride = zst.zst_len;
716                 newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
717                 newstream->zst_cap = zst.zst_len;
718                 newstream->zst_direction = ZFETCH_FORWARD;
719                 newstream->zst_last = ddi_get_lbolt();
720
721                 mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);
722
723                 rw_enter(&zf->zf_rwlock, RW_WRITER);
724                 inserted = dmu_zfetch_stream_insert(zf, newstream);
725                 rw_exit(&zf->zf_rwlock);
726
727                 if (!inserted) {
728                         mutex_destroy(&newstream->zst_lock);
729                         kmem_free(newstream, sizeof (zstream_t));
730                 }
731         }
732 }
733
734 #if defined(_KERNEL) && defined(HAVE_SPL)
735 module_param(zfs_prefetch_disable, int, 0644);
736 MODULE_PARM_DESC(zfs_prefetch_disable, "Disable all ZFS prefetching");
737
738 module_param(zfetch_max_streams, uint, 0644);
739 MODULE_PARM_DESC(zfetch_max_streams, "Max number of streams per zfetch");
740
741 module_param(zfetch_min_sec_reap, uint, 0644);
742 MODULE_PARM_DESC(zfetch_min_sec_reap, "Min time before stream reclaim");
743
744 module_param(zfetch_block_cap, uint, 0644);
745 MODULE_PARM_DESC(zfetch_block_cap, "Max number of blocks to fetch at a time");
746
747 module_param(zfetch_array_rd_sz, ulong, 0644);
748 MODULE_PARM_DESC(zfetch_array_rd_sz, "Number of bytes in a array_read");
749 #endif