]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/mkuzip/mkuzip.c
Import tzdata 2018d
[FreeBSD/FreeBSD.git] / usr.bin / mkuzip / mkuzip.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/types.h>
33 #include <sys/endian.h>
34 #include <sys/param.h>
35 #include <sys/sysctl.h>
36 #include <sys/stat.h>
37 #include <sys/uio.h>
38 #include <netinet/in.h>
39 #include <assert.h>
40 #include <ctype.h>
41 #include <err.h>
42 #include <fcntl.h>
43 #include <pthread.h>
44 #include <signal.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50
51 #include "mkuzip.h"
52 #include "mkuz_cloop.h"
53 #include "mkuz_blockcache.h"
54 #include "mkuz_zlib.h"
55 #include "mkuz_lzma.h"
56 #include "mkuz_blk.h"
57 #include "mkuz_cfg.h"
58 #include "mkuz_conveyor.h"
59 #include "mkuz_format.h"
60 #include "mkuz_fqueue.h"
61 #include "mkuz_time.h"
62 #include "mkuz_insize.h"
63
64 #define DEFAULT_CLSTSIZE        16384
65
66 static struct mkuz_format uzip_fmt = {
67         .magic = CLOOP_MAGIC_ZLIB,
68         .default_sufx = DEFAULT_SUFX_ZLIB,
69         .f_init = &mkuz_zlib_init,
70         .f_compress = &mkuz_zlib_compress
71 };
72
73 static struct mkuz_format ulzma_fmt = {
74         .magic = CLOOP_MAGIC_LZMA,
75         .default_sufx = DEFAULT_SUFX_LZMA,
76         .f_init = &mkuz_lzma_init,
77         .f_compress = &mkuz_lzma_compress
78 };
79
80 static struct mkuz_blk *readblock(int, u_int32_t);
81 static void usage(void);
82 static void cleanup(void);
83
84 static char *cleanfile = NULL;
85
86 static int
87 cmp_blkno(const struct mkuz_blk *bp, void *p)
88 {
89         uint32_t *ap;
90
91         ap = (uint32_t *)p;
92
93         return (bp->info.blkno == *ap);
94 }
95
96 int main(int argc, char **argv)
97 {
98         struct mkuz_cfg cfs;
99         char *oname;
100         uint64_t *toc;
101         int i, io, opt, tmp;
102         struct {
103                 int en;
104                 FILE *f;
105         } summary;
106         struct iovec iov[2];
107         uint64_t offset, last_offset;
108         struct cloop_header hdr;
109         struct mkuz_conveyor *cvp;
110         void *c_ctx;
111         struct mkuz_blk_info *chit;
112         size_t ncpusz, ncpu, magiclen;
113         double st, et;
114
115         st = getdtime();
116
117         ncpusz = sizeof(size_t);
118         if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
119                 ncpu = 1;
120         } else if (ncpu > MAX_WORKERS_AUTO) {
121                 ncpu = MAX_WORKERS_AUTO;
122         }
123
124         memset(&hdr, 0, sizeof(hdr));
125         cfs.blksz = DEFAULT_CLSTSIZE;
126         oname = NULL;
127         cfs.verbose = 0;
128         cfs.no_zcomp = 0;
129         cfs.en_dedup = 0;
130         summary.en = 0;
131         summary.f = stderr;
132         cfs.handler = &uzip_fmt;
133         cfs.nworkers = ncpu;
134         struct mkuz_blk *iblk, *oblk;
135
136         while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) {
137                 switch(opt) {
138                 case 'o':
139                         oname = optarg;
140                         break;
141
142                 case 's':
143                         tmp = atoi(optarg);
144                         if (tmp <= 0) {
145                                 errx(1, "invalid cluster size specified: %s",
146                                     optarg);
147                                 /* Not reached */
148                         }
149                         cfs.blksz = tmp;
150                         break;
151
152                 case 'v':
153                         cfs.verbose = 1;
154                         break;
155
156                 case 'Z':
157                         cfs.no_zcomp = 1;
158                         break;
159
160                 case 'd':
161                         cfs.en_dedup = 1;
162                         break;
163
164                 case 'L':
165                         cfs.handler = &ulzma_fmt;
166                         break;
167
168                 case 'S':
169                         summary.en = 1;
170                         summary.f = stdout;
171                         break;
172
173                 case 'j':
174                         tmp = atoi(optarg);
175                         if (tmp <= 0) {
176                                 errx(1, "invalid number of compression threads"
177                                     " specified: %s", optarg);
178                                 /* Not reached */
179                         }
180                         cfs.nworkers = tmp;
181                         break;
182
183                 default:
184                         usage();
185                         /* Not reached */
186                 }
187         }
188         argc -= optind;
189         argv += optind;
190
191         if (argc != 1) {
192                 usage();
193                 /* Not reached */
194         }
195
196         magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
197         assert(magiclen < sizeof(hdr.magic));
198
199         if (cfs.en_dedup != 0) {
200                 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
201                 hdr.magic[CLOOP_OFS_COMPR] =
202                     tolower(hdr.magic[CLOOP_OFS_COMPR]);
203         }
204
205         c_ctx = cfs.handler->f_init(cfs.blksz);
206
207         cfs.iname = argv[0];
208         if (oname == NULL) {
209                 asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx);
210                 if (oname == NULL) {
211                         err(1, "can't allocate memory");
212                         /* Not reached */
213                 }
214         }
215
216         signal(SIGHUP, exit);
217         signal(SIGINT, exit);
218         signal(SIGTERM, exit);
219         signal(SIGXCPU, exit);
220         signal(SIGXFSZ, exit);
221         atexit(cleanup);
222
223         cfs.fdr = open(cfs.iname, O_RDONLY);
224         if (cfs.fdr < 0) {
225                 err(1, "open(%s)", cfs.iname);
226                 /* Not reached */
227         }
228         cfs.isize = mkuz_get_insize(&cfs);
229         if (cfs.isize < 0) {
230                 errx(1, "can't determine input image size");
231                 /* Not reached */
232         }
233         hdr.nblocks = cfs.isize / cfs.blksz;
234         if ((cfs.isize % cfs.blksz) != 0) {
235                 if (cfs.verbose != 0)
236                         fprintf(stderr, "file size is not multiple "
237                         "of %d, padding data\n", cfs.blksz);
238                 hdr.nblocks++;
239         }
240         toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
241
242         cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
243                    S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
244         if (cfs.fdw < 0) {
245                 err(1, "open(%s)", oname);
246                 /* Not reached */
247         }
248         cleanfile = oname;
249
250         /* Prepare header that we will write later when we have index ready. */
251         iov[0].iov_base = (char *)&hdr;
252         iov[0].iov_len = sizeof(hdr);
253         iov[1].iov_base = (char *)toc;
254         iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
255         offset = iov[0].iov_len + iov[1].iov_len;
256
257         /* Reserve space for header */
258         lseek(cfs.fdw, offset, SEEK_SET);
259
260         if (cfs.verbose != 0) {
261                 fprintf(stderr, "data size %ju bytes, number of clusters "
262                     "%u, index length %zu bytes\n", cfs.isize,
263                     hdr.nblocks, iov[1].iov_len);
264         }
265
266         cvp = mkuz_conveyor_ctor(&cfs);
267
268         last_offset = 0;
269         iblk = oblk = NULL;
270         for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
271                 iblk = readblock(cfs.fdr, cfs.blksz);
272                 mkuz_fqueue_enq(cvp->wrk_queue, iblk);
273                 if (iblk != MKUZ_BLK_EOF &&
274                     (i < (cfs.nworkers * ITEMS_PER_WORKER))) {
275                         continue;
276                 }
277 drain:
278                 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
279                 assert(oblk->info.blkno == (unsigned)io);
280                 oblk->info.offset = offset;
281                 chit = NULL;
282                 if (cfs.en_dedup != 0 && oblk->info.len > 0) {
283                         chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
284                         /*
285                          * There should be at least one non-empty block
286                          * between us and the backref'ed offset, otherwise
287                          * we won't be able to parse that sequence correctly
288                          * as it would be indistinguishible from another
289                          * empty block.
290                          */
291                         if (chit != NULL && chit->offset == last_offset) {
292                                 chit = NULL;
293                         }
294                 }
295                 if (chit != NULL) {
296                         toc[io] = htobe64(chit->offset);
297                         oblk->info.len = 0;
298                 } else {
299                         if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
300                             oblk->info.len) < 0) {
301                                 err(1, "write(%s)", oname);
302                                 /* Not reached */
303                         }
304                         toc[io] = htobe64(offset);
305                         last_offset = offset;
306                         offset += oblk->info.len;
307                 }
308                 if (cfs.verbose != 0) {
309                         fprintf(stderr, "cluster #%d, in %u bytes, "
310                             "out len=%lu offset=%lu", io, cfs.blksz,
311                             (u_long)oblk->info.len, (u_long)be64toh(toc[io]));
312                         if (chit != NULL) {
313                                 fprintf(stderr, " (backref'ed to #%d)",
314                                     chit->blkno);
315                         }
316                         fprintf(stderr, "\n");
317                 }
318                 free(oblk);
319                 io += 1;
320                 if (iblk == MKUZ_BLK_EOF) {
321                         if (io < i)
322                                 goto drain;
323                         /* Last block, see if we need to add some padding */
324                         if ((offset % DEV_BSIZE) == 0)
325                                 continue;
326                         oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
327                         oblk->info.blkno = io;
328                         oblk->info.len = oblk->alen;
329                         if (cfs.verbose != 0) {
330                                 fprintf(stderr, "padding data with %lu bytes "
331                                     "so that file size is multiple of %d\n",
332                                     (u_long)oblk->alen, DEV_BSIZE);
333                         }
334                         mkuz_fqueue_enq(cvp->results, oblk);
335                         goto drain;
336                 }
337         }
338
339         close(cfs.fdr);
340
341         if (cfs.verbose != 0 || summary.en != 0) {
342                 et = getdtime();
343                 fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
344                     "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
345                     (long long)(cfs.isize - offset),
346                     100.0 * (long long)(cfs.isize - offset) /
347                     (float)cfs.isize, (float)cfs.isize / (et - st));
348         }
349
350         /* Convert to big endian */
351         hdr.blksz = htonl(cfs.blksz);
352         hdr.nblocks = htonl(hdr.nblocks);
353         /* Write headers into pre-allocated space */
354         lseek(cfs.fdw, 0, SEEK_SET);
355         if (writev(cfs.fdw, iov, 2) < 0) {
356                 err(1, "writev(%s)", oname);
357                 /* Not reached */
358         }
359         cleanfile = NULL;
360         close(cfs.fdw);
361
362         exit(0);
363 }
364
365 static struct mkuz_blk *
366 readblock(int fd, u_int32_t clstsize)
367 {
368         int numread;
369         struct mkuz_blk *rval;
370         static int blockcnt;
371         off_t cpos;
372
373         rval = mkuz_blk_ctor(clstsize);
374
375         rval->info.blkno = blockcnt;
376         blockcnt += 1;
377         cpos = lseek(fd, 0, SEEK_CUR);
378         if (cpos < 0) {
379                 err(1, "readblock: lseek() failed");
380                 /* Not reached */
381         }
382         rval->info.offset = cpos;
383
384         numread = read(fd, rval->data, clstsize);
385         if (numread < 0) {
386                 err(1, "readblock: read() failed");
387                 /* Not reached */
388         }
389         if (numread == 0) {
390                 free(rval);
391                 return MKUZ_BLK_EOF;
392         }
393         rval->info.len = numread;
394         return rval;
395 }
396
397 static void
398 usage(void)
399 {
400
401         fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
402             "[-j ncompr] infile\n");
403         exit(1);
404 }
405
406 void *
407 mkuz_safe_malloc(size_t size)
408 {
409         void *retval;
410
411         retval = malloc(size);
412         if (retval == NULL) {
413                 err(1, "can't allocate memory");
414                 /* Not reached */
415         }
416         return retval;
417 }
418
419 void *
420 mkuz_safe_zmalloc(size_t size)
421 {
422         void *retval;
423
424         retval = mkuz_safe_malloc(size);
425         bzero(retval, size);
426         return retval;
427 }
428
429 static void
430 cleanup(void)
431 {
432
433         if (cleanfile != NULL)
434                 unlink(cleanfile);
435 }
436
437 int
438 mkuz_memvcmp(const void *memory, unsigned char val, size_t size)
439 {
440     const u_char *mm;
441
442     mm = (const u_char *)memory;
443     return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
444 }