]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - usr.bin/mkuzip/mkuzip.c
MFC r315798:
[FreeBSD/stable/10.git] / usr.bin / mkuzip / mkuzip.c
1 /*
2  * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/types.h>
31 #include <sys/disk.h>
32 #include <sys/endian.h>
33 #include <sys/param.h>
34 #include <sys/sysctl.h>
35 #include <sys/stat.h>
36 #include <sys/uio.h>
37 #include <netinet/in.h>
38 #include <assert.h>
39 #include <ctype.h>
40 #include <err.h>
41 #include <fcntl.h>
42 #include <pthread.h>
43 #include <signal.h>
44 #include <stdint.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49
50 #include "mkuzip.h"
51 #include "mkuz_cloop.h"
52 #include "mkuz_blockcache.h"
53 #include "mkuz_zlib.h"
54 #include "mkuz_lzma.h"
55 #include "mkuz_blk.h"
56 #include "mkuz_cfg.h"
57 #include "mkuz_conveyor.h"
58 #include "mkuz_format.h"
59 #include "mkuz_fqueue.h"
60 #include "mkuz_time.h"
61
62 #define DEFAULT_CLSTSIZE        16384
63
64 static struct mkuz_format uzip_fmt = {
65         .magic = CLOOP_MAGIC_ZLIB,
66         .default_sufx = DEFAULT_SUFX_ZLIB,
67         .f_init = &mkuz_zlib_init,
68         .f_compress = &mkuz_zlib_compress
69 };
70
71 static struct mkuz_format ulzma_fmt = {
72         .magic = CLOOP_MAGIC_LZMA,
73         .default_sufx = DEFAULT_SUFX_LZMA,
74         .f_init = &mkuz_lzma_init,
75         .f_compress = &mkuz_lzma_compress
76 };
77
78 static struct mkuz_blk *readblock(int, u_int32_t);
79 static void usage(void);
80 static void cleanup(void);
81
82 static char *cleanfile = NULL;
83
84 static int
85 cmp_blkno(const struct mkuz_blk *bp, void *p)
86 {
87         uint32_t *ap;
88
89         ap = (uint32_t *)p;
90
91         return (bp->info.blkno == *ap);
92 }
93
94 int main(int argc, char **argv)
95 {
96         struct mkuz_cfg cfs;
97         char *iname, *oname;
98         uint64_t *toc;
99         int i, io, opt, tmp;
100         struct {
101                 int en;
102                 FILE *f;
103         } summary;
104         struct iovec iov[2];
105         struct stat sb;
106         uint64_t offset, last_offset;
107         struct cloop_header hdr;
108         struct mkuz_conveyor *cvp;
109         void *c_ctx;
110         struct mkuz_blk_info *chit;
111         size_t ncpusz, ncpu;
112         double st, et;
113
114         st = getdtime();
115
116         ncpusz = sizeof(size_t);
117         if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
118                 ncpu = 1;
119         } else if (ncpu > MAX_WORKERS_AUTO) {
120                 ncpu = MAX_WORKERS_AUTO;
121         }
122
123         memset(&hdr, 0, sizeof(hdr));
124         cfs.blksz = DEFAULT_CLSTSIZE;
125         oname = NULL;
126         cfs.verbose = 0;
127         cfs.no_zcomp = 0;
128         cfs.en_dedup = 0;
129         summary.en = 0;
130         summary.f = stderr;
131         cfs.handler = &uzip_fmt;
132         cfs.nworkers = ncpu;
133         struct mkuz_blk *iblk, *oblk;
134
135         while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) {
136                 switch(opt) {
137                 case 'o':
138                         oname = optarg;
139                         break;
140
141                 case 's':
142                         tmp = atoi(optarg);
143                         if (tmp <= 0) {
144                                 errx(1, "invalid cluster size specified: %s",
145                                     optarg);
146                                 /* Not reached */
147                         }
148                         cfs.blksz = tmp;
149                         break;
150
151                 case 'v':
152                         cfs.verbose = 1;
153                         break;
154
155                 case 'Z':
156                         cfs.no_zcomp = 1;
157                         break;
158
159                 case 'd':
160                         cfs.en_dedup = 1;
161                         break;
162
163                 case 'L':
164                         cfs.handler = &ulzma_fmt;
165                         break;
166
167                 case 'S':
168                         summary.en = 1;
169                         summary.f = stdout;
170                         break;
171
172                 case 'j':
173                         tmp = atoi(optarg);
174                         if (tmp <= 0) {
175                                 errx(1, "invalid number of compression threads"
176                                     " specified: %s", optarg);
177                                 /* Not reached */
178                         }
179                         cfs.nworkers = tmp;
180                         break;
181
182                 default:
183                         usage();
184                         /* Not reached */
185                 }
186         }
187         argc -= optind;
188         argv += optind;
189
190         if (argc != 1) {
191                 usage();
192                 /* Not reached */
193         }
194
195         strcpy(hdr.magic, cfs.handler->magic);
196
197         if (cfs.en_dedup != 0) {
198                 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
199                 hdr.magic[CLOOP_OFS_COMPR] =
200                     tolower(hdr.magic[CLOOP_OFS_COMPR]);
201         }
202
203         c_ctx = cfs.handler->f_init(cfs.blksz);
204
205         iname = argv[0];
206         if (oname == NULL) {
207                 asprintf(&oname, "%s%s", iname, cfs.handler->default_sufx);
208                 if (oname == NULL) {
209                         err(1, "can't allocate memory");
210                         /* Not reached */
211                 }
212         }
213
214         signal(SIGHUP, exit);
215         signal(SIGINT, exit);
216         signal(SIGTERM, exit);
217         signal(SIGXCPU, exit);
218         signal(SIGXFSZ, exit);
219         atexit(cleanup);
220
221         cfs.fdr = open(iname, O_RDONLY);
222         if (cfs.fdr < 0) {
223                 err(1, "open(%s)", iname);
224                 /* Not reached */
225         }
226         if (fstat(cfs.fdr, &sb) != 0) {
227                 err(1, "fstat(%s)", iname);
228                 /* Not reached */
229         }
230         if (S_ISCHR(sb.st_mode)) {
231                 off_t ms;
232
233                 if (ioctl(cfs.fdr, DIOCGMEDIASIZE, &ms) < 0) {
234                         err(1, "ioctl(DIOCGMEDIASIZE)");
235                         /* Not reached */
236                 }
237                 sb.st_size = ms;
238         } else if (!S_ISREG(sb.st_mode)) {
239                 fprintf(stderr, "%s: not a character device or regular file\n",
240                         iname);
241                 exit(1);
242         }
243         hdr.nblocks = sb.st_size / cfs.blksz;
244         if ((sb.st_size % cfs.blksz) != 0) {
245                 if (cfs.verbose != 0)
246                         fprintf(stderr, "file size is not multiple "
247                         "of %d, padding data\n", cfs.blksz);
248                 hdr.nblocks++;
249         }
250         toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
251
252         cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
253                    S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
254         if (cfs.fdw < 0) {
255                 err(1, "open(%s)", oname);
256                 /* Not reached */
257         }
258         cleanfile = oname;
259
260         /* Prepare header that we will write later when we have index ready. */
261         iov[0].iov_base = (char *)&hdr;
262         iov[0].iov_len = sizeof(hdr);
263         iov[1].iov_base = (char *)toc;
264         iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
265         offset = iov[0].iov_len + iov[1].iov_len;
266
267         /* Reserve space for header */
268         lseek(cfs.fdw, offset, SEEK_SET);
269
270         if (cfs.verbose != 0) {
271                 fprintf(stderr, "data size %ju bytes, number of clusters "
272                     "%u, index length %zu bytes\n", sb.st_size,
273                     hdr.nblocks, iov[1].iov_len);
274         }
275
276         cvp = mkuz_conveyor_ctor(&cfs);
277
278         last_offset = 0;
279         iblk = oblk = NULL;
280         for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
281                 iblk = readblock(cfs.fdr, cfs.blksz);
282                 mkuz_fqueue_enq(cvp->wrk_queue, iblk);
283                 if (iblk != MKUZ_BLK_EOF &&
284                     (i < (cfs.nworkers * ITEMS_PER_WORKER))) {
285                         continue;
286                 }
287 drain:
288                 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
289                 assert(oblk->info.blkno == (unsigned)io);
290                 oblk->info.offset = offset;
291                 chit = NULL;
292                 if (cfs.en_dedup != 0 && oblk->info.len > 0) {
293                         chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
294                         /*
295                          * There should be at least one non-empty block
296                          * between us and the backref'ed offset, otherwise
297                          * we won't be able to parse that sequence correctly
298                          * as it would be indistinguishible from another
299                          * empty block.
300                          */
301                         if (chit != NULL && chit->offset == last_offset) {
302                                 chit = NULL;
303                         }
304                 }
305                 if (chit != NULL) {
306                         toc[io] = htobe64(chit->offset);
307                         oblk->info.len = 0;
308                 } else {
309                         if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
310                             oblk->info.len) < 0) {
311                                 err(1, "write(%s)", oname);
312                                 /* Not reached */
313                         }
314                         toc[io] = htobe64(offset);
315                         last_offset = offset;
316                         offset += oblk->info.len;
317                 }
318                 if (cfs.verbose != 0) {
319                         fprintf(stderr, "cluster #%d, in %u bytes, "
320                             "out len=%lu offset=%lu", io, cfs.blksz,
321                             (u_long)oblk->info.len, (u_long)be64toh(toc[io]));
322                         if (chit != NULL) {
323                                 fprintf(stderr, " (backref'ed to #%d)",
324                                     chit->blkno);
325                         }
326                         fprintf(stderr, "\n");
327                 }
328                 free(oblk);
329                 io += 1;
330                 if (iblk == MKUZ_BLK_EOF) {
331                         if (io < i)
332                                 goto drain;
333                         /* Last block, see if we need to add some padding */
334                         if ((offset % DEV_BSIZE) == 0)
335                                 continue;
336                         oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
337                         oblk->info.blkno = io;
338                         oblk->info.len = oblk->alen;
339                         if (cfs.verbose != 0) {
340                                 fprintf(stderr, "padding data with %lu bytes "
341                                     "so that file size is multiple of %d\n",
342                                     (u_long)oblk->alen, DEV_BSIZE);
343                         }
344                         mkuz_fqueue_enq(cvp->results, oblk);
345                         goto drain;
346                 }
347         }
348
349         close(cfs.fdr);
350
351         if (cfs.verbose != 0 || summary.en != 0) {
352                 et = getdtime();
353                 fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
354                     "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
355                     (long long)(sb.st_size - offset),
356                     100.0 * (long long)(sb.st_size - offset) /
357                     (float)sb.st_size, (float)sb.st_size / (et - st));
358         }
359
360         /* Convert to big endian */
361         hdr.blksz = htonl(cfs.blksz);
362         hdr.nblocks = htonl(hdr.nblocks);
363         /* Write headers into pre-allocated space */
364         lseek(cfs.fdw, 0, SEEK_SET);
365         if (writev(cfs.fdw, iov, 2) < 0) {
366                 err(1, "writev(%s)", oname);
367                 /* Not reached */
368         }
369         cleanfile = NULL;
370         close(cfs.fdw);
371
372         exit(0);
373 }
374
375 static struct mkuz_blk *
376 readblock(int fd, u_int32_t clstsize)
377 {
378         int numread;
379         struct mkuz_blk *rval;
380         static int blockcnt;
381         off_t cpos;
382
383         rval = mkuz_blk_ctor(clstsize);
384
385         rval->info.blkno = blockcnt;
386         blockcnt += 1;
387         cpos = lseek(fd, 0, SEEK_CUR);
388         if (cpos < 0) {
389                 err(1, "readblock: lseek() failed");
390                 /* Not reached */
391         }
392         rval->info.offset = cpos;
393
394         numread = read(fd, rval->data, clstsize);
395         if (numread < 0) {
396                 err(1, "readblock: read() failed");
397                 /* Not reached */
398         }
399         if (numread == 0) {
400                 free(rval);
401                 return MKUZ_BLK_EOF;
402         }
403         rval->info.len = numread;
404         return rval;
405 }
406
407 static void
408 usage(void)
409 {
410
411         fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
412             "[-j ncompr] infile\n");
413         exit(1);
414 }
415
416 void *
417 mkuz_safe_malloc(size_t size)
418 {
419         void *retval;
420
421         retval = malloc(size);
422         if (retval == NULL) {
423                 err(1, "can't allocate memory");
424                 /* Not reached */
425         }
426         return retval;
427 }
428
429 void *
430 mkuz_safe_zmalloc(size_t size)
431 {
432         void *retval;
433
434         retval = mkuz_safe_malloc(size);
435         bzero(retval, size);
436         return retval;
437 }
438
439 static void
440 cleanup(void)
441 {
442
443         if (cleanfile != NULL)
444                 unlink(cleanfile);
445 }
446
447 int
448 mkuz_memvcmp(const void *memory, unsigned char val, size_t size)
449 {
450     const u_char *mm;
451
452     mm = (const u_char *)memory;
453     return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
454 }