2 * Copyright (c) 2014 Juniper Networks, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
61 TAILQ_ENTRY(chunk) ch_list;
62 size_t ch_size; /* Size of chunk in bytes. */
63 lba_t ch_block; /* Block address in image. */
66 off_t ofs; /* Offset in backing file. */
67 int fd; /* FD of backing file. */
70 void *ptr; /* Pointer to data in memory */
74 #define CH_TYPE_ZEROES 0 /* Chunk is a gap (no data). */
75 #define CH_TYPE_FILE 1 /* File-backed chunk. */
76 #define CH_TYPE_MEMORY 2 /* Memory-backed chunk */
79 static TAILQ_HEAD(chunk_head, chunk) image_chunks;
80 static u_int image_nchunks;
82 static char image_swap_file[PATH_MAX];
83 static int image_swap_fd = -1;
84 static u_int image_swap_pgsz;
85 static off_t image_swap_size;
87 static lba_t image_size;
90 is_empty_sector(void *buf)
95 assert(((uintptr_t)p & 3) == 0);
97 max = secsz / sizeof(uint64_t);
98 for (n = 0; n < max; n++) {
110 image_swap_alloc(size_t size)
115 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
116 assert((unit & (unit - 1)) == 0);
118 size = (size + unit - 1) & ~(unit - 1);
120 ofs = image_swap_size;
121 image_swap_size += size;
122 if (ftruncate(image_swap_fd, image_swap_size) == -1) {
123 image_swap_size = ofs;
130 * Image chunk handling.
133 static struct chunk *
134 image_chunk_find(lba_t blk)
136 static struct chunk *last = NULL;
139 ch = (last != NULL && last->ch_block <= blk)
140 ? last : TAILQ_FIRST(&image_chunks);
142 if (ch->ch_block <= blk &&
143 (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) {
147 ch = TAILQ_NEXT(ch, ch_list);
153 image_chunk_grow(struct chunk *ch, size_t sz)
157 newsz = ch->ch_size + sz;
158 if (newsz > ch->ch_size) {
162 /* We would overflow -- create new chunk for remainder. */
163 dsz = SIZE_MAX - ch->ch_size;
165 ch->ch_size = SIZE_MAX;
169 static struct chunk *
170 image_chunk_memory(struct chunk *ch, lba_t blk)
175 ptr = calloc(1, secsz);
179 if (ch->ch_block < blk) {
180 new = malloc(sizeof(*new));
185 memcpy(new, ch, sizeof(*new));
186 ch->ch_size = (blk - ch->ch_block) * secsz;
188 new->ch_size -= ch->ch_size;
189 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
194 if (ch->ch_size > secsz) {
195 new = malloc(sizeof(*new));
200 memcpy(new, ch, sizeof(*new));
203 new->ch_size -= secsz;
204 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
208 ch->ch_type = CH_TYPE_MEMORY;
209 ch->ch_u.mem.ptr = ptr;
214 image_chunk_skipto(lba_t to)
220 ch = TAILQ_LAST(&image_chunks, chunk_head);
221 from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL;
228 /* Avoid bugs due to overflows. */
229 if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz))
231 sz = (to - from) * secsz;
232 if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) {
233 sz = image_chunk_grow(ch, sz);
236 from = ch->ch_block + (ch->ch_size / secsz);
238 ch = malloc(sizeof(*ch));
241 memset(ch, 0, sizeof(*ch));
244 ch->ch_type = CH_TYPE_ZEROES;
245 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
251 image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd)
255 ch = TAILQ_LAST(&image_chunks, chunk_head);
256 if (ch != NULL && ch->ch_type == CH_TYPE_FILE) {
257 if (fd == ch->ch_u.file.fd &&
258 blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) &&
259 ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) {
260 sz = image_chunk_grow(ch, sz);
263 blk = ch->ch_block + (ch->ch_size / secsz);
264 ofs = ch->ch_u.file.ofs + ch->ch_size;
267 ch = malloc(sizeof(*ch));
270 memset(ch, 0, sizeof(*ch));
273 ch->ch_type = CH_TYPE_FILE;
274 ch->ch_u.file.ofs = ofs;
275 ch->ch_u.file.fd = fd;
276 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
282 image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd)
288 sz = (sz + secsz - 1) & ~(secsz - 1);
289 while (!error && sz > 0) {
290 if (is_empty_sector(p))
291 error = image_chunk_skipto(blk + 1);
293 error = image_chunk_append(blk, secsz, ofs, fd);
303 * File mapping support.
307 image_file_map(int fd, off_t ofs, size_t sz, off_t *iofp)
314 /* On Linux anyway ofs must also be page aligned */
315 if ((x = (ofs % image_swap_pgsz)) != 0) {
321 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
322 assert((unit & (unit - 1)) == 0);
324 flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED;
325 /* Allow writing to our swap file only. */
326 prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0);
327 sz = (sz + unit - 1) & ~(unit - 1);
328 ptr = mmap(NULL, sz, prot, flags, fd, ofs);
329 return ((ptr == MAP_FAILED) ? NULL : ptr);
333 image_file_unmap(void *buffer, size_t sz)
337 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
338 sz = (sz + unit - 1) & ~(unit - 1);
339 if (madvise(buffer, sz, MADV_DONTNEED) != 0)
346 * Input/source file handling.
350 image_copyin_stream(lba_t blk, int fd, uint64_t *sizep)
361 * This makes sure we're doing I/O in multiples of the page
362 * size as well as of the sector size. 2MB is the minimum
363 * by virtue of secsz at least 512 bytes and the page size
366 iosz = secsz * image_swap_pgsz;
370 swofs = image_swap_alloc(iosz);
373 buffer = image_file_map(image_swap_fd, swofs, iosz, &iof);
376 rdsz = read(fd, &buffer[iof], iosz);
378 error = image_chunk_copyin(blk, &buffer[iof], rdsz, swofs,
384 image_file_unmap(buffer, iosz);
385 /* XXX should we relinguish unused swap space? */
390 blk += (rdsz + secsz - 1) / secsz;
399 image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep)
401 off_t cur, data, end, hole, pos, iof;
409 * We'd like to know the size of the file and we must
410 * be able to seek in order to mmap(2). If this isn't
411 * possible, then treat the file as a stream/pipe.
413 end = lseek(fd, 0L, SEEK_END);
415 return (image_copyin_stream(blk, fd, sizep));
418 * We need the file opened for the duration and our
419 * caller is going to close the file. Make a dup(2)
420 * so that control the faith of the descriptor.
426 iosz = secsz * image_swap_pgsz;
431 while (!error && cur < end) {
432 hole = lseek(fd, cur, SEEK_HOLE);
435 data = lseek(fd, cur, SEEK_DATA);
440 * Treat the entire file as data if sparse files
441 * are not supported by the underlying file system.
443 if (hole == end && data == end)
446 if (cur == hole && data > hole) {
448 pos = data & ~((uint64_t)secsz - 1);
450 blk += (pos - hole) / secsz;
451 error = image_chunk_skipto(blk);
453 bytesize += pos - hole;
455 } else if (cur == data && hole > data) {
457 pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1);
460 sz = (pos - data > (off_t)iosz)
461 ? iosz : (size_t)(pos - data);
463 buf = mp = image_file_map(fd, data, sz, &iof);
466 error = image_chunk_copyin(blk, buf,
468 image_file_unmap(mp, sz);
479 * I don't know what this means or whether it
480 * can happen at all...
487 if (!error && sizep != NULL)
493 image_copyin(lba_t blk, int fd, uint64_t *sizep)
498 error = image_chunk_skipto(blk);
500 if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode))
501 error = image_copyin_stream(blk, fd, sizep);
503 error = image_copyin_mapped(blk, fd, sizep);
509 * Output/sink file handling.
513 image_copyout(int fd)
517 error = image_copyout_region(fd, 0, image_size);
519 error = image_copyout_done(fd);
524 image_copyout_done(int fd)
529 ofs = lseek(fd, 0L, SEEK_CUR);
532 error = (ftruncate(fd, ofs) == -1) ? errno : 0;
537 image_copyout_memory(int fd, size_t size, void *ptr)
540 if (write(fd, ptr, size) == -1)
546 image_copyout_zeroes(int fd, size_t count)
548 static uint8_t *zeroes = NULL;
552 if (lseek(fd, (off_t)count, SEEK_CUR) != -1)
556 * If we can't seek, we must write.
559 if (zeroes == NULL) {
560 zeroes = calloc(1, secsz);
566 sz = (count > secsz) ? secsz : count;
567 error = image_copyout_memory(fd, sz, zeroes);
576 image_copyout_file(int fd, size_t size, int ifd, off_t iofs)
584 iosz = secsz * image_swap_pgsz;
587 sz = (size > iosz) ? iosz : size;
588 buf = mp = image_file_map(ifd, iofs, sz, &iof);
592 error = image_copyout_memory(fd, sz, buf);
593 image_file_unmap(mp, sz);
603 image_copyout_region(int fd, lba_t blk, lba_t size)
612 while (!error && size > 0) {
613 ch = image_chunk_find(blk);
618 ofs = (blk - ch->ch_block) * secsz;
619 sz = ch->ch_size - ofs;
620 sz = ((lba_t)sz < size) ? sz : (size_t)size;
621 switch (ch->ch_type) {
623 error = image_copyout_zeroes(fd, sz);
626 error = image_copyout_file(fd, sz, ch->ch_u.file.fd,
627 ch->ch_u.file.ofs + ofs);
630 error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr);
642 image_data(lba_t blk, lba_t size)
648 ch = image_chunk_find(blk);
651 if (ch->ch_type != CH_TYPE_ZEROES)
653 lim = ch->ch_block + (ch->ch_size / secsz);
654 if (lim >= blk + size)
670 image_set_size(lba_t blk)
674 error = image_chunk_skipto(blk);
681 image_write(lba_t blk, void *buf, ssize_t len)
686 if (!is_empty_sector(buf)) {
687 ch = image_chunk_find(blk);
690 /* We may not be able to write to files. */
691 if (ch->ch_type == CH_TYPE_FILE)
693 if (ch->ch_type == CH_TYPE_ZEROES) {
694 ch = image_chunk_memory(ch, blk);
698 assert(ch->ch_type == CH_TYPE_MEMORY);
699 memcpy(ch->ch_u.mem.ptr, buf, secsz);
702 buf = (char *)buf + secsz;
713 while ((ch = TAILQ_FIRST(&image_chunks)) != NULL) {
714 switch (ch->ch_type) {
716 /* We may be closing the same file multiple times. */
717 if (ch->ch_u.file.fd != -1)
718 close(ch->ch_u.file.fd);
721 free(ch->ch_u.mem.ptr);
726 TAILQ_REMOVE(&image_chunks, ch, ch_list);
729 if (image_swap_fd != -1)
730 close(image_swap_fd);
731 unlink(image_swap_file);
739 TAILQ_INIT(&image_chunks);
743 image_swap_pgsz = getpagesize();
745 if (atexit(image_cleanup) == -1)
747 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
749 snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX",
751 image_swap_fd = mkstemp(image_swap_file);
752 if (image_swap_fd == -1)