2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/types.h>
33 #include <sys/endian.h>
34 #include <sys/param.h>
35 #include <sys/sysctl.h>
38 #include <netinet/in.h>
52 #include "mkuz_cloop.h"
53 #include "mkuz_blockcache.h"
54 #include "mkuz_lzma.h"
55 #include "mkuz_zlib.h"
56 #include "mkuz_zstd.h"
59 #include "mkuz_conveyor.h"
60 #include "mkuz_format.h"
61 #include "mkuz_fqueue.h"
62 #include "mkuz_time.h"
63 #include "mkuz_insize.h"
65 #define DEFAULT_CLSTSIZE 16384
74 static const struct mkuz_format uzip_fmts[] = {
77 .magic = CLOOP_MAGIC_ZLIB,
78 .default_sufx = DEFAULT_SUFX_ZLIB,
79 .f_compress_bound = mkuz_zlib_cbound,
80 .f_init = mkuz_zlib_init,
81 .f_compress = mkuz_zlib_compress,
85 .magic = CLOOP_MAGIC_LZMA,
86 .default_sufx = DEFAULT_SUFX_LZMA,
87 .f_compress_bound = mkuz_lzma_cbound,
88 .f_init = mkuz_lzma_init,
89 .f_compress = mkuz_lzma_compress,
93 .magic = CLOOP_MAGIC_ZSTD,
94 .default_sufx = DEFAULT_SUFX_ZSTD,
95 .f_compress_bound = mkuz_zstd_cbound,
96 .f_init = mkuz_zstd_init,
97 .f_compress = mkuz_zstd_compress,
101 static struct mkuz_blk *readblock(int, u_int32_t);
102 static void usage(void);
103 static void cleanup(void);
105 static char *cleanfile = NULL;
108 cmp_blkno(const struct mkuz_blk *bp, void *p)
114 return (bp->info.blkno == *ap);
117 int main(int argc, char **argv)
128 uint64_t offset, last_offset;
129 struct cloop_header hdr;
130 struct mkuz_conveyor *cvp;
132 struct mkuz_blk_info *chit;
133 size_t ncpusz, ncpu, magiclen;
135 enum UZ_ALGORITHM comp_alg;
140 ncpusz = sizeof(size_t);
141 if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
143 } else if (ncpu > MAX_WORKERS_AUTO) {
144 ncpu = MAX_WORKERS_AUTO;
147 memset(&hdr, 0, sizeof(hdr));
148 cfs.blksz = DEFAULT_CLSTSIZE;
156 comp_level = USE_DEFAULT_LEVEL;
158 struct mkuz_blk *iblk, *oblk;
160 while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) {
163 for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) {
164 if (strcmp(uzip_fmts[tmp].option, optarg) == 0)
167 if (tmp == UZ_INVALID)
168 errx(1, "invalid algorithm specified: %s",
174 comp_level = atoi(optarg);
183 errx(1, "invalid cluster size specified: %s",
214 errx(1, "invalid number of compression threads"
215 " specified: %s", optarg);
234 cfs.handler = &uzip_fmts[comp_alg];
236 magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
237 assert(magiclen < sizeof(hdr.magic));
239 if (cfs.en_dedup != 0) {
241 * Dedupe requires a version 3 format. Don't downgrade newer
244 if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2)
245 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
246 hdr.magic[CLOOP_OFS_COMPR] =
247 tolower(hdr.magic[CLOOP_OFS_COMPR]);
250 if (cfs.blksz % DEV_BSIZE != 0)
251 errx(1, "cluster size should be multiple of %d", DEV_BSIZE);
253 cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz);
254 if (cfs.cbound_blksz > MAXPHYS)
255 errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu",
256 cfs.cbound_blksz, (size_t)MAXPHYS);
258 c_ctx = cfs.handler->f_init(&comp_level);
259 cfs.comp_level = comp_level;
263 asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx);
265 err(1, "can't allocate memory");
270 signal(SIGHUP, exit);
271 signal(SIGINT, exit);
272 signal(SIGTERM, exit);
273 signal(SIGXCPU, exit);
274 signal(SIGXFSZ, exit);
277 cfs.fdr = open(cfs.iname, O_RDONLY);
279 err(1, "open(%s)", cfs.iname);
282 cfs.isize = mkuz_get_insize(&cfs);
284 errx(1, "can't determine input image size");
287 hdr.nblocks = cfs.isize / cfs.blksz;
288 if ((cfs.isize % cfs.blksz) != 0) {
289 if (cfs.verbose != 0)
290 fprintf(stderr, "file size is not multiple "
291 "of %d, padding data\n", cfs.blksz);
294 toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
297 * Initialize last+1 entry with non-heap trash. If final padding is
298 * added later, it may or may not be overwritten with an offset
299 * representing the length of the final compressed block. If not,
300 * initialize to a defined value.
302 toc[hdr.nblocks] = 0;
304 cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
305 S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
307 err(1, "open(%s)", oname);
312 /* Prepare header that we will write later when we have index ready. */
313 iov[0].iov_base = (char *)&hdr;
314 iov[0].iov_len = sizeof(hdr);
315 iov[1].iov_base = (char *)toc;
316 iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
317 offset = iov[0].iov_len + iov[1].iov_len;
319 /* Reserve space for header */
320 lseek(cfs.fdw, offset, SEEK_SET);
322 if (cfs.verbose != 0) {
323 fprintf(stderr, "data size %ju bytes, number of clusters "
324 "%u, index length %zu bytes\n", cfs.isize,
325 hdr.nblocks, iov[1].iov_len);
328 cvp = mkuz_conveyor_ctor(&cfs);
332 for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
333 iblk = readblock(cfs.fdr, cfs.blksz);
334 mkuz_fqueue_enq(cvp->wrk_queue, iblk);
335 if (iblk != MKUZ_BLK_EOF &&
336 (i < (cfs.nworkers * ITEMS_PER_WORKER))) {
340 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
341 assert(oblk->info.blkno == (unsigned)io);
342 oblk->info.offset = offset;
344 if (cfs.en_dedup != 0 && oblk->info.len > 0) {
345 chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
347 * There should be at least one non-empty block
348 * between us and the backref'ed offset, otherwise
349 * we won't be able to parse that sequence correctly
350 * as it would be indistinguishible from another
353 if (chit != NULL && chit->offset == last_offset) {
358 toc[io] = htobe64(chit->offset);
361 if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
362 oblk->info.len) < 0) {
363 err(1, "write(%s)", oname);
366 toc[io] = htobe64(offset);
367 last_offset = offset;
368 offset += oblk->info.len;
370 if (cfs.verbose != 0) {
371 fprintf(stderr, "cluster #%d, in %u bytes, "
372 "out len=%lu offset=%lu", io, cfs.blksz,
373 (u_long)oblk->info.len, (u_long)be64toh(toc[io]));
375 fprintf(stderr, " (backref'ed to #%d)",
378 fprintf(stderr, "\n");
382 if (iblk == MKUZ_BLK_EOF) {
385 /* Last block, see if we need to add some padding */
386 if ((offset % DEV_BSIZE) == 0)
388 oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
389 oblk->info.blkno = io;
390 oblk->info.len = oblk->alen;
391 if (cfs.verbose != 0) {
392 fprintf(stderr, "padding data with %lu bytes "
393 "so that file size is multiple of %d\n",
394 (u_long)oblk->alen, DEV_BSIZE);
396 mkuz_fqueue_enq(cvp->results, oblk);
403 if (cfs.verbose != 0 || summary.en != 0) {
405 fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
406 "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
407 (long long)(cfs.isize - offset),
408 100.0 * (long long)(cfs.isize - offset) /
409 (float)cfs.isize, (float)cfs.isize / (et - st));
412 /* Convert to big endian */
413 hdr.blksz = htonl(cfs.blksz);
414 hdr.nblocks = htonl(hdr.nblocks);
415 /* Write headers into pre-allocated space */
416 lseek(cfs.fdw, 0, SEEK_SET);
417 if (writev(cfs.fdw, iov, 2) < 0) {
418 err(1, "writev(%s)", oname);
427 static struct mkuz_blk *
428 readblock(int fd, u_int32_t clstsize)
431 struct mkuz_blk *rval;
435 rval = mkuz_blk_ctor(clstsize);
437 rval->info.blkno = blockcnt;
439 cpos = lseek(fd, 0, SEEK_CUR);
441 err(1, "readblock: lseek() failed");
444 rval->info.offset = cpos;
446 numread = read(fd, rval->data, clstsize);
448 err(1, "readblock: read() failed");
455 rval->info.len = numread;
463 fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
464 "[-j ncompr] infile\n");
469 mkuz_safe_malloc(size_t size)
473 retval = malloc(size);
474 if (retval == NULL) {
475 err(1, "can't allocate memory");
482 mkuz_safe_zmalloc(size_t size)
486 retval = mkuz_safe_malloc(size);
495 if (cleanfile != NULL)
500 mkuz_memvcmp(const void *memory, unsigned char val, size_t size)
504 mm = (const u_char *)memory;
505 return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;