]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/mkuzip/mkuzip.c
geom_uzip(4), mkuzip(8): Add Zstd image mode
[FreeBSD/FreeBSD.git] / usr.bin / mkuzip / mkuzip.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/types.h>
33 #include <sys/endian.h>
34 #include <sys/param.h>
35 #include <sys/sysctl.h>
36 #include <sys/stat.h>
37 #include <sys/uio.h>
38 #include <netinet/in.h>
39 #include <assert.h>
40 #include <ctype.h>
41 #include <err.h>
42 #include <fcntl.h>
43 #include <pthread.h>
44 #include <signal.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50
51 #include "mkuzip.h"
52 #include "mkuz_cloop.h"
53 #include "mkuz_blockcache.h"
54 #include "mkuz_lzma.h"
55 #include "mkuz_zlib.h"
56 #include "mkuz_zstd.h"
57 #include "mkuz_blk.h"
58 #include "mkuz_cfg.h"
59 #include "mkuz_conveyor.h"
60 #include "mkuz_format.h"
61 #include "mkuz_fqueue.h"
62 #include "mkuz_time.h"
63 #include "mkuz_insize.h"
64
65 #define DEFAULT_CLSTSIZE        16384
66
67 enum UZ_ALGORITHM {
68         UZ_ZLIB = 0,
69         UZ_LZMA,
70         UZ_ZSTD,
71         UZ_INVALID
72 };
73
74 static const struct mkuz_format uzip_fmts[] = {
75         [UZ_ZLIB] = {
76                 .option = "zlib",
77                 .magic = CLOOP_MAGIC_ZLIB,
78                 .default_sufx = DEFAULT_SUFX_ZLIB,
79                 .f_compress_bound = mkuz_zlib_cbound,
80                 .f_init = mkuz_zlib_init,
81                 .f_compress = mkuz_zlib_compress,
82         },
83         [UZ_LZMA] = {
84                 .option = "lzma",
85                 .magic = CLOOP_MAGIC_LZMA,
86                 .default_sufx = DEFAULT_SUFX_LZMA,
87                 .f_compress_bound = mkuz_lzma_cbound,
88                 .f_init = mkuz_lzma_init,
89                 .f_compress = mkuz_lzma_compress,
90         },
91         [UZ_ZSTD] = {
92                 .option = "zstd",
93                 .magic = CLOOP_MAGIC_ZSTD,
94                 .default_sufx = DEFAULT_SUFX_ZSTD,
95                 .f_compress_bound = mkuz_zstd_cbound,
96                 .f_init = mkuz_zstd_init,
97                 .f_compress = mkuz_zstd_compress,
98         },
99 };
100
101 static struct mkuz_blk *readblock(int, u_int32_t);
102 static void usage(void);
103 static void cleanup(void);
104
105 static char *cleanfile = NULL;
106
107 static int
108 cmp_blkno(const struct mkuz_blk *bp, void *p)
109 {
110         uint32_t *ap;
111
112         ap = (uint32_t *)p;
113
114         return (bp->info.blkno == *ap);
115 }
116
117 int main(int argc, char **argv)
118 {
119         struct mkuz_cfg cfs;
120         char *oname;
121         uint64_t *toc;
122         int i, io, opt, tmp;
123         struct {
124                 int en;
125                 FILE *f;
126         } summary;
127         struct iovec iov[2];
128         uint64_t offset, last_offset;
129         struct cloop_header hdr;
130         struct mkuz_conveyor *cvp;
131         void *c_ctx;
132         struct mkuz_blk_info *chit;
133         size_t ncpusz, ncpu, magiclen;
134         double st, et;
135         enum UZ_ALGORITHM comp_alg;
136         int comp_level;
137
138         st = getdtime();
139
140         ncpusz = sizeof(size_t);
141         if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
142                 ncpu = 1;
143         } else if (ncpu > MAX_WORKERS_AUTO) {
144                 ncpu = MAX_WORKERS_AUTO;
145         }
146
147         memset(&hdr, 0, sizeof(hdr));
148         cfs.blksz = DEFAULT_CLSTSIZE;
149         oname = NULL;
150         cfs.verbose = 0;
151         cfs.no_zcomp = 0;
152         cfs.en_dedup = 0;
153         summary.en = 0;
154         summary.f = stderr;
155         comp_alg = UZ_ZLIB;
156         comp_level = USE_DEFAULT_LEVEL;
157         cfs.nworkers = ncpu;
158         struct mkuz_blk *iblk, *oblk;
159
160         while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) {
161                 switch(opt) {
162                 case 'A':
163                         for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) {
164                                 if (strcmp(uzip_fmts[tmp].option, optarg) == 0)
165                                         break;
166                         }
167                         if (tmp == UZ_INVALID)
168                                 errx(1, "invalid algorithm specified: %s",
169                                     optarg);
170                                 /* Not reached */
171                         comp_alg = tmp;
172                         break;
173                 case 'C':
174                         comp_level = atoi(optarg);
175                         break;
176                 case 'o':
177                         oname = optarg;
178                         break;
179
180                 case 's':
181                         tmp = atoi(optarg);
182                         if (tmp <= 0) {
183                                 errx(1, "invalid cluster size specified: %s",
184                                     optarg);
185                                 /* Not reached */
186                         }
187                         cfs.blksz = tmp;
188                         break;
189
190                 case 'v':
191                         cfs.verbose = 1;
192                         break;
193
194                 case 'Z':
195                         cfs.no_zcomp = 1;
196                         break;
197
198                 case 'd':
199                         cfs.en_dedup = 1;
200                         break;
201
202                 case 'L':
203                         comp_alg = UZ_LZMA;
204                         break;
205
206                 case 'S':
207                         summary.en = 1;
208                         summary.f = stdout;
209                         break;
210
211                 case 'j':
212                         tmp = atoi(optarg);
213                         if (tmp <= 0) {
214                                 errx(1, "invalid number of compression threads"
215                                     " specified: %s", optarg);
216                                 /* Not reached */
217                         }
218                         cfs.nworkers = tmp;
219                         break;
220
221                 default:
222                         usage();
223                         /* Not reached */
224                 }
225         }
226         argc -= optind;
227         argv += optind;
228
229         if (argc != 1) {
230                 usage();
231                 /* Not reached */
232         }
233
234         cfs.handler = &uzip_fmts[comp_alg];
235
236         magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
237         assert(magiclen < sizeof(hdr.magic));
238
239         if (cfs.en_dedup != 0) {
240                 /*
241                  * Dedupe requires a version 3 format.  Don't downgrade newer
242                  * formats.
243                  */
244                 if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2)
245                         hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
246                 hdr.magic[CLOOP_OFS_COMPR] =
247                     tolower(hdr.magic[CLOOP_OFS_COMPR]);
248         }
249
250         if (cfs.blksz % DEV_BSIZE != 0)
251                 errx(1, "cluster size should be multiple of %d", DEV_BSIZE);
252
253         cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz);
254         if (cfs.cbound_blksz > MAXPHYS)
255                 errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu",
256                     cfs.cbound_blksz, (size_t)MAXPHYS);
257
258         c_ctx = cfs.handler->f_init(&comp_level);
259         cfs.comp_level = comp_level;
260
261         cfs.iname = argv[0];
262         if (oname == NULL) {
263                 asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx);
264                 if (oname == NULL) {
265                         err(1, "can't allocate memory");
266                         /* Not reached */
267                 }
268         }
269
270         signal(SIGHUP, exit);
271         signal(SIGINT, exit);
272         signal(SIGTERM, exit);
273         signal(SIGXCPU, exit);
274         signal(SIGXFSZ, exit);
275         atexit(cleanup);
276
277         cfs.fdr = open(cfs.iname, O_RDONLY);
278         if (cfs.fdr < 0) {
279                 err(1, "open(%s)", cfs.iname);
280                 /* Not reached */
281         }
282         cfs.isize = mkuz_get_insize(&cfs);
283         if (cfs.isize < 0) {
284                 errx(1, "can't determine input image size");
285                 /* Not reached */
286         }
287         hdr.nblocks = cfs.isize / cfs.blksz;
288         if ((cfs.isize % cfs.blksz) != 0) {
289                 if (cfs.verbose != 0)
290                         fprintf(stderr, "file size is not multiple "
291                         "of %d, padding data\n", cfs.blksz);
292                 hdr.nblocks++;
293         }
294         toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
295
296         /*
297          * Initialize last+1 entry with non-heap trash.  If final padding is
298          * added later, it may or may not be overwritten with an offset
299          * representing the length of the final compressed block.  If not,
300          * initialize to a defined value.
301          */
302         toc[hdr.nblocks] = 0;
303
304         cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
305                    S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
306         if (cfs.fdw < 0) {
307                 err(1, "open(%s)", oname);
308                 /* Not reached */
309         }
310         cleanfile = oname;
311
312         /* Prepare header that we will write later when we have index ready. */
313         iov[0].iov_base = (char *)&hdr;
314         iov[0].iov_len = sizeof(hdr);
315         iov[1].iov_base = (char *)toc;
316         iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
317         offset = iov[0].iov_len + iov[1].iov_len;
318
319         /* Reserve space for header */
320         lseek(cfs.fdw, offset, SEEK_SET);
321
322         if (cfs.verbose != 0) {
323                 fprintf(stderr, "data size %ju bytes, number of clusters "
324                     "%u, index length %zu bytes\n", cfs.isize,
325                     hdr.nblocks, iov[1].iov_len);
326         }
327
328         cvp = mkuz_conveyor_ctor(&cfs);
329
330         last_offset = 0;
331         iblk = oblk = NULL;
332         for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
333                 iblk = readblock(cfs.fdr, cfs.blksz);
334                 mkuz_fqueue_enq(cvp->wrk_queue, iblk);
335                 if (iblk != MKUZ_BLK_EOF &&
336                     (i < (cfs.nworkers * ITEMS_PER_WORKER))) {
337                         continue;
338                 }
339 drain:
340                 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
341                 assert(oblk->info.blkno == (unsigned)io);
342                 oblk->info.offset = offset;
343                 chit = NULL;
344                 if (cfs.en_dedup != 0 && oblk->info.len > 0) {
345                         chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
346                         /*
347                          * There should be at least one non-empty block
348                          * between us and the backref'ed offset, otherwise
349                          * we won't be able to parse that sequence correctly
350                          * as it would be indistinguishible from another
351                          * empty block.
352                          */
353                         if (chit != NULL && chit->offset == last_offset) {
354                                 chit = NULL;
355                         }
356                 }
357                 if (chit != NULL) {
358                         toc[io] = htobe64(chit->offset);
359                         oblk->info.len = 0;
360                 } else {
361                         if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
362                             oblk->info.len) < 0) {
363                                 err(1, "write(%s)", oname);
364                                 /* Not reached */
365                         }
366                         toc[io] = htobe64(offset);
367                         last_offset = offset;
368                         offset += oblk->info.len;
369                 }
370                 if (cfs.verbose != 0) {
371                         fprintf(stderr, "cluster #%d, in %u bytes, "
372                             "out len=%lu offset=%lu", io, cfs.blksz,
373                             (u_long)oblk->info.len, (u_long)be64toh(toc[io]));
374                         if (chit != NULL) {
375                                 fprintf(stderr, " (backref'ed to #%d)",
376                                     chit->blkno);
377                         }
378                         fprintf(stderr, "\n");
379                 }
380                 free(oblk);
381                 io += 1;
382                 if (iblk == MKUZ_BLK_EOF) {
383                         if (io < i)
384                                 goto drain;
385                         /* Last block, see if we need to add some padding */
386                         if ((offset % DEV_BSIZE) == 0)
387                                 continue;
388                         oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
389                         oblk->info.blkno = io;
390                         oblk->info.len = oblk->alen;
391                         if (cfs.verbose != 0) {
392                                 fprintf(stderr, "padding data with %lu bytes "
393                                     "so that file size is multiple of %d\n",
394                                     (u_long)oblk->alen, DEV_BSIZE);
395                         }
396                         mkuz_fqueue_enq(cvp->results, oblk);
397                         goto drain;
398                 }
399         }
400
401         close(cfs.fdr);
402
403         if (cfs.verbose != 0 || summary.en != 0) {
404                 et = getdtime();
405                 fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
406                     "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
407                     (long long)(cfs.isize - offset),
408                     100.0 * (long long)(cfs.isize - offset) /
409                     (float)cfs.isize, (float)cfs.isize / (et - st));
410         }
411
412         /* Convert to big endian */
413         hdr.blksz = htonl(cfs.blksz);
414         hdr.nblocks = htonl(hdr.nblocks);
415         /* Write headers into pre-allocated space */
416         lseek(cfs.fdw, 0, SEEK_SET);
417         if (writev(cfs.fdw, iov, 2) < 0) {
418                 err(1, "writev(%s)", oname);
419                 /* Not reached */
420         }
421         cleanfile = NULL;
422         close(cfs.fdw);
423
424         exit(0);
425 }
426
427 static struct mkuz_blk *
428 readblock(int fd, u_int32_t clstsize)
429 {
430         int numread;
431         struct mkuz_blk *rval;
432         static int blockcnt;
433         off_t cpos;
434
435         rval = mkuz_blk_ctor(clstsize);
436
437         rval->info.blkno = blockcnt;
438         blockcnt += 1;
439         cpos = lseek(fd, 0, SEEK_CUR);
440         if (cpos < 0) {
441                 err(1, "readblock: lseek() failed");
442                 /* Not reached */
443         }
444         rval->info.offset = cpos;
445
446         numread = read(fd, rval->data, clstsize);
447         if (numread < 0) {
448                 err(1, "readblock: read() failed");
449                 /* Not reached */
450         }
451         if (numread == 0) {
452                 free(rval);
453                 return MKUZ_BLK_EOF;
454         }
455         rval->info.len = numread;
456         return rval;
457 }
458
459 static void
460 usage(void)
461 {
462
463         fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
464             "[-j ncompr] infile\n");
465         exit(1);
466 }
467
468 void *
469 mkuz_safe_malloc(size_t size)
470 {
471         void *retval;
472
473         retval = malloc(size);
474         if (retval == NULL) {
475                 err(1, "can't allocate memory");
476                 /* Not reached */
477         }
478         return retval;
479 }
480
481 void *
482 mkuz_safe_zmalloc(size_t size)
483 {
484         void *retval;
485
486         retval = mkuz_safe_malloc(size);
487         bzero(retval, size);
488         return retval;
489 }
490
491 static void
492 cleanup(void)
493 {
494
495         if (cleanfile != NULL)
496                 unlink(cleanfile);
497 }
498
499 int
500 mkuz_memvcmp(const void *memory, unsigned char val, size_t size)
501 {
502     const u_char *mm;
503
504     mm = (const u_char *)memory;
505     return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
506 }