2 * Copyright (c) 2009-2010 The FreeBSD Foundation
5 * This software was developed by Pawel Jakub Dawidek under sponsorship from
6 * the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h> /* powerof2() */
34 #include <sys/queue.h>
37 #include <bitstring.h>
44 #include <activemap.h>
46 #define ACTIVEMAP_MAGIC 0xac71e4
48 int am_magic; /* Magic value. */
49 off_t am_mediasize; /* Media size in bytes. */
50 uint32_t am_extentsize; /* Extent size in bytes,
51 must be power of 2. */
52 uint8_t am_extentshift;/* 2 ^ extentbits == extentsize */
53 int am_nextents; /* Number of extents. */
54 size_t am_mapsize; /* Bitmap size in bytes. */
55 uint16_t *am_memtab; /* An array that holds number of pending
57 bitstr_t *am_diskmap; /* On-disk bitmap of dirty extents. */
58 bitstr_t *am_memmap; /* In-memory bitmap of dirty extents. */
59 size_t am_diskmapsize; /* Map size rounded up to sector size. */
60 uint64_t am_ndirty; /* Number of dirty regions. */
61 bitstr_t *am_syncmap; /* Bitmap of extents to sync. */
62 off_t am_syncoff; /* Next synchronization offset. */
63 TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that
64 we keep dirty to reduce bitmap
66 int am_nkeepdirty; /* Number of am_keepdirty elements. */
67 int am_nkeepdirty_limit; /* Maximum number of am_keepdirty
73 TAILQ_ENTRY(keepdirty) kd_next;
77 * Helper function taken from sys/systm.h to calculate extentshift.
80 bitcount32(uint32_t x)
83 x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1);
84 x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2);
85 x = (x + (x >> 4)) & 0x0f0f0f0f;
87 x = (x + (x >> 16)) & 0x000000ff;
92 off2ext(const struct activemap *amp, off_t offset)
96 assert(offset >= 0 && offset < amp->am_mediasize);
97 extent = (offset >> amp->am_extentshift);
98 assert(extent >= 0 && extent < amp->am_nextents);
102 static __inline off_t
103 ext2off(const struct activemap *amp, int extent)
107 assert(extent >= 0 && extent < amp->am_nextents);
108 offset = ((off_t)extent << amp->am_extentshift);
109 assert(offset >= 0 && offset < amp->am_mediasize);
114 * Function calculates number of requests needed to synchronize the given
118 ext2reqs(const struct activemap *amp, int ext)
122 if (ext < amp->am_nextents - 1)
123 return (((amp->am_extentsize - 1) / MAXPHYS) + 1);
125 assert(ext == amp->am_nextents - 1);
126 left = amp->am_mediasize % amp->am_extentsize;
128 left = amp->am_extentsize;
129 return (((left - 1) / MAXPHYS) + 1);
133 * Initialize activemap structure and allocate memory for internal needs.
134 * Function returns 0 on success and -1 if any of the allocations failed.
137 activemap_init(struct activemap **ampp, uint64_t mediasize, uint32_t extentsize,
138 uint32_t sectorsize, uint32_t keepdirty)
140 struct activemap *amp;
142 assert(ampp != NULL);
143 assert(mediasize > 0);
144 assert(extentsize > 0);
145 assert(powerof2(extentsize));
146 assert(sectorsize > 0);
147 assert(powerof2(sectorsize));
148 assert(keepdirty > 0);
150 amp = malloc(sizeof(*amp));
154 amp->am_mediasize = mediasize;
155 amp->am_nkeepdirty_limit = keepdirty;
156 amp->am_extentsize = extentsize;
157 amp->am_extentshift = bitcount32(extentsize - 1);
158 amp->am_nextents = ((mediasize - 1) / extentsize) + 1;
159 amp->am_mapsize = sizeof(bitstr_t) * bitstr_size(amp->am_nextents);
160 amp->am_diskmapsize = roundup2(amp->am_mapsize, sectorsize);
162 amp->am_syncoff = -2;
163 TAILQ_INIT(&->am_keepdirty);
164 amp->am_nkeepdirty = 0;
166 amp->am_memtab = calloc(amp->am_nextents, sizeof(amp->am_memtab[0]));
167 amp->am_diskmap = calloc(1, amp->am_diskmapsize);
168 amp->am_memmap = bit_alloc(amp->am_nextents);
169 amp->am_syncmap = bit_alloc(amp->am_nextents);
172 * Check to see if any of the allocations above failed.
174 if (amp->am_memtab == NULL || amp->am_diskmap == NULL ||
175 amp->am_memmap == NULL || amp->am_syncmap == NULL) {
176 if (amp->am_memtab != NULL)
177 free(amp->am_memtab);
178 if (amp->am_diskmap != NULL)
179 free(amp->am_diskmap);
180 if (amp->am_memmap != NULL)
181 free(amp->am_memmap);
182 if (amp->am_syncmap != NULL)
183 free(amp->am_syncmap);
190 amp->am_magic = ACTIVEMAP_MAGIC;
196 static struct keepdirty *
197 keepdirty_find(struct activemap *amp, int extent)
199 struct keepdirty *kd;
201 TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) {
202 if (kd->kd_extent == extent)
209 keepdirty_add(struct activemap *amp, int extent)
211 struct keepdirty *kd;
213 kd = keepdirty_find(amp, extent);
216 * Only move element at the begining.
218 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next);
219 TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next);
223 * Add new element, but first remove the most unused one if
226 if (amp->am_nkeepdirty >= amp->am_nkeepdirty_limit) {
227 kd = TAILQ_LAST(&->am_keepdirty, skeepdirty);
229 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next);
230 amp->am_nkeepdirty--;
231 assert(amp->am_nkeepdirty > 0);
234 kd = malloc(sizeof(*kd));
235 /* We can ignore allocation failure. */
237 kd->kd_extent = extent;
238 amp->am_nkeepdirty++;
239 TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next);
244 keepdirty_fill(struct activemap *amp)
246 struct keepdirty *kd;
248 TAILQ_FOREACH(kd, &->am_keepdirty, kd_next)
249 bit_set(amp->am_diskmap, kd->kd_extent);
253 keepdirty_free(struct activemap *amp)
255 struct keepdirty *kd;
257 while ((kd = TAILQ_FIRST(&->am_keepdirty)) != NULL) {
258 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next);
259 amp->am_nkeepdirty--;
262 assert(amp->am_nkeepdirty == 0);
266 * Function frees resources allocated by activemap_init() function.
269 activemap_free(struct activemap *amp)
272 assert(amp->am_magic == ACTIVEMAP_MAGIC);
277 free(amp->am_memtab);
278 free(amp->am_diskmap);
279 free(amp->am_memmap);
280 free(amp->am_syncmap);
284 * Function should be called before we handle write requests. It updates
285 * internal structures and returns true if on-disk metadata should be updated.
288 activemap_write_start(struct activemap *amp, off_t offset, off_t length)
294 assert(amp->am_magic == ACTIVEMAP_MAGIC);
298 end = offset + length - 1;
300 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
302 * If the number of pending writes is increased from 0,
303 * we have to mark the extent as dirty also in on-disk bitmap.
304 * By returning true we inform the caller that on-disk bitmap
305 * was modified and has to be flushed to disk.
307 if (amp->am_memtab[ext]++ == 0) {
308 assert(!bit_test(amp->am_memmap, ext));
309 bit_set(amp->am_memmap, ext);
313 keepdirty_add(amp, ext);
320 * Function should be called after receiving write confirmation. It updates
321 * internal structures and returns true if on-disk metadata should be updated.
324 activemap_write_complete(struct activemap *amp, off_t offset, off_t length)
330 assert(amp->am_magic == ACTIVEMAP_MAGIC);
334 end = offset + length - 1;
336 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
338 * If the number of pending writes goes down to 0, we have to
339 * mark the extent as clean also in on-disk bitmap.
340 * By returning true we inform the caller that on-disk bitmap
341 * was modified and has to be flushed to disk.
343 assert(amp->am_memtab[ext] > 0);
344 assert(bit_test(amp->am_memmap, ext));
345 if (--amp->am_memtab[ext] == 0) {
346 bit_clear(amp->am_memmap, ext);
356 * Function should be called after finishing synchronization of one extent.
357 * It returns true if on-disk metadata should be updated.
360 activemap_extent_complete(struct activemap *amp, int extent)
365 assert(amp->am_magic == ACTIVEMAP_MAGIC);
366 assert(extent >= 0 && extent < amp->am_nextents);
370 reqs = ext2reqs(amp, extent);
371 assert(amp->am_memtab[extent] >= reqs);
372 amp->am_memtab[extent] -= reqs;
373 assert(bit_test(amp->am_memmap, extent));
374 if (amp->am_memtab[extent] == 0) {
375 bit_clear(amp->am_memmap, extent);
384 * Function returns number of dirty regions.
387 activemap_ndirty(const struct activemap *amp)
390 assert(amp->am_magic == ACTIVEMAP_MAGIC);
392 return (amp->am_ndirty);
396 * Function compare on-disk bitmap and in-memory bitmap and returns true if
397 * they differ and should be flushed to the disk.
400 activemap_differ(const struct activemap *amp)
403 assert(amp->am_magic == ACTIVEMAP_MAGIC);
405 return (memcmp(amp->am_diskmap, amp->am_memmap,
406 amp->am_mapsize) != 0);
410 * Function returns number of bytes used by bitmap.
413 activemap_size(const struct activemap *amp)
416 assert(amp->am_magic == ACTIVEMAP_MAGIC);
418 return (amp->am_mapsize);
422 * Function returns number of bytes needed for storing on-disk bitmap.
423 * This is the same as activemap_size(), but rounded up to sector size.
426 activemap_ondisk_size(const struct activemap *amp)
429 assert(amp->am_magic == ACTIVEMAP_MAGIC);
431 return (amp->am_diskmapsize);
435 * Function copies the given buffer read from disk to the internal bitmap.
438 activemap_copyin(struct activemap *amp, const unsigned char *buf, size_t size)
442 assert(amp->am_magic == ACTIVEMAP_MAGIC);
443 assert(size >= amp->am_mapsize);
445 memcpy(amp->am_diskmap, buf, amp->am_mapsize);
446 memcpy(amp->am_memmap, buf, amp->am_mapsize);
447 memcpy(amp->am_syncmap, buf, amp->am_mapsize);
449 bit_ffs(amp->am_memmap, amp->am_nextents, &ext);
451 /* There are no dirty extents, so we can leave now. */
455 * Set synchronization offset to the first dirty extent.
457 activemap_sync_rewind(amp);
459 * We have dirty extents and we want them to stay that way until
460 * we synchronize, so we set number of pending writes to number
461 * of requests needed to synchronize one extent.
464 for (; ext < amp->am_nextents; ext++) {
465 if (bit_test(amp->am_memmap, ext)) {
466 amp->am_memtab[ext] = ext2reqs(amp, ext);
473 * Function merges the given bitmap with existng one.
476 activemap_merge(struct activemap *amp, const unsigned char *buf, size_t size)
478 bitstr_t *remmap = __DECONST(bitstr_t *, buf);
481 assert(amp->am_magic == ACTIVEMAP_MAGIC);
482 assert(size >= amp->am_mapsize);
484 bit_ffs(remmap, amp->am_nextents, &ext);
486 /* There are no dirty extents, so we can leave now. */
490 * We have dirty extents and we want them to stay that way until
491 * we synchronize, so we set number of pending writes to number
492 * of requests needed to synchronize one extent.
494 for (; ext < amp->am_nextents; ext++) {
495 /* Local extent already dirty. */
496 if (bit_test(amp->am_syncmap, ext))
498 /* Remote extent isn't dirty. */
499 if (!bit_test(remmap, ext))
501 bit_set(amp->am_syncmap, ext);
502 bit_set(amp->am_memmap, ext);
503 bit_set(amp->am_diskmap, ext);
504 if (amp->am_memtab[ext] == 0)
506 amp->am_memtab[ext] = ext2reqs(amp, ext);
509 * Set synchronization offset to the first dirty extent.
511 activemap_sync_rewind(amp);
515 * Function returns pointer to internal bitmap that should be written to disk.
517 const unsigned char *
518 activemap_bitmap(struct activemap *amp, size_t *sizep)
521 assert(amp->am_magic == ACTIVEMAP_MAGIC);
524 *sizep = amp->am_diskmapsize;
525 memcpy(amp->am_diskmap, amp->am_memmap, amp->am_mapsize);
527 return ((const unsigned char *)amp->am_diskmap);
531 * Function calculates size needed to store bitmap on disk.
534 activemap_calc_ondisk_size(uint64_t mediasize, uint32_t extentsize,
537 uint64_t nextents, mapsize;
539 assert(mediasize > 0);
540 assert(extentsize > 0);
541 assert(powerof2(extentsize));
542 assert(sectorsize > 0);
543 assert(powerof2(sectorsize));
545 nextents = ((mediasize - 1) / extentsize) + 1;
546 mapsize = sizeof(bitstr_t) * bitstr_size(nextents);
547 return (roundup2(mapsize, sectorsize));
551 * Set synchronization offset to the first dirty extent.
554 activemap_sync_rewind(struct activemap *amp)
558 assert(amp->am_magic == ACTIVEMAP_MAGIC);
560 bit_ffs(amp->am_syncmap, amp->am_nextents, &ext);
562 /* There are no extents to synchronize. */
563 amp->am_syncoff = -2;
567 * Mark that we want to start synchronization from the begining.
569 amp->am_syncoff = -1;
573 * Return next offset of where we should synchronize.
576 activemap_sync_offset(struct activemap *amp, off_t *lengthp, int *syncextp)
581 assert(amp->am_magic == ACTIVEMAP_MAGIC);
582 assert(lengthp != NULL);
583 assert(syncextp != NULL);
587 if (amp->am_syncoff == -2)
590 if (amp->am_syncoff >= 0 &&
591 (amp->am_syncoff + MAXPHYS >= amp->am_mediasize ||
592 off2ext(amp, amp->am_syncoff) !=
593 off2ext(amp, amp->am_syncoff + MAXPHYS))) {
595 * We are about to change extent, so mark previous one as clean.
597 ext = off2ext(amp, amp->am_syncoff);
598 bit_clear(amp->am_syncmap, ext);
600 amp->am_syncoff = -1;
603 if (amp->am_syncoff == -1) {
605 * Let's find first extent to synchronize.
607 bit_ffs(amp->am_syncmap, amp->am_nextents, &ext);
609 amp->am_syncoff = -2;
612 amp->am_syncoff = ext2off(amp, ext);
615 * We don't change extent, so just increase offset.
617 amp->am_syncoff += MAXPHYS;
618 if (amp->am_syncoff >= amp->am_mediasize) {
619 amp->am_syncoff = -2;
624 syncoff = amp->am_syncoff;
625 left = ext2off(amp, off2ext(amp, syncoff)) +
626 amp->am_extentsize - syncoff;
627 if (syncoff + left > amp->am_mediasize)
628 left = amp->am_mediasize - syncoff;
632 assert(left >= 0 && left <= MAXPHYS);
633 assert(syncoff >= 0 && syncoff < amp->am_mediasize);
634 assert(syncoff + left >= 0 && syncoff + left <= amp->am_mediasize);
641 * Mark extent(s) containing the given region for synchronization.
642 * Most likely one of the components is unavailable.
645 activemap_need_sync(struct activemap *amp, off_t offset, off_t length)
651 assert(amp->am_magic == ACTIVEMAP_MAGIC);
654 end = offset + length - 1;
656 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
657 if (bit_test(amp->am_syncmap, ext)) {
658 /* Already marked for synchronization. */
659 assert(bit_test(amp->am_memmap, ext));
662 bit_set(amp->am_syncmap, ext);
663 if (!bit_test(amp->am_memmap, ext)) {
664 bit_set(amp->am_memmap, ext);
667 amp->am_memtab[ext] += ext2reqs(amp, ext);
675 activemap_dump(const struct activemap *amp)
680 for (bit = 0; bit < amp->am_nextents; bit++)
681 printf("%d", bit_test(amp->am_memmap, bit) ? 1 : 0);
684 for (bit = 0; bit < amp->am_nextents; bit++)
685 printf("%d", bit_test(amp->am_diskmap, bit) ? 1 : 0);
688 for (bit = 0; bit < amp->am_nextents; bit++)
689 printf("%d", bit_test(amp->am_syncmap, bit) ? 1 : 0);