]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - contrib/libarchive/libarchive/archive_read_support_format_mtree.c
MFH r328332:
[FreeBSD/stable/10.git] / contrib / libarchive / libarchive / archive_read_support_format_mtree.c
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * Copyright (c) 2008 Joerg Sonnenberger
4  * Copyright (c) 2011-2012 Michihiro NAKAJIMA
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #include "archive_platform.h"
29 __FBSDID("$FreeBSD$");
30
31 #ifdef HAVE_SYS_STAT_H
32 #include <sys/stat.h>
33 #endif
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_FCNTL_H
38 #include <fcntl.h>
39 #endif
40 #include <stddef.h>
41 /* #include <stdint.h> */ /* See archive_platform.h */
42 #ifdef HAVE_STDLIB_H
43 #include <stdlib.h>
44 #endif
45 #ifdef HAVE_STRING_H
46 #include <string.h>
47 #endif
48
49 #include "archive.h"
50 #include "archive_entry.h"
51 #include "archive_private.h"
52 #include "archive_read_private.h"
53 #include "archive_string.h"
54 #include "archive_pack_dev.h"
55
56 #ifndef O_BINARY
57 #define O_BINARY 0
58 #endif
59 #ifndef O_CLOEXEC
60 #define O_CLOEXEC       0
61 #endif
62
63 #define MTREE_HAS_DEVICE        0x0001
64 #define MTREE_HAS_FFLAGS        0x0002
65 #define MTREE_HAS_GID           0x0004
66 #define MTREE_HAS_GNAME         0x0008
67 #define MTREE_HAS_MTIME         0x0010
68 #define MTREE_HAS_NLINK         0x0020
69 #define MTREE_HAS_PERM          0x0040
70 #define MTREE_HAS_SIZE          0x0080
71 #define MTREE_HAS_TYPE          0x0100
72 #define MTREE_HAS_UID           0x0200
73 #define MTREE_HAS_UNAME         0x0400
74
75 #define MTREE_HAS_OPTIONAL      0x0800
76 #define MTREE_HAS_NOCHANGE      0x1000 /* FreeBSD specific */
77
78 #define MTREE_HASHTABLE_SIZE 1024
79
80 #define MAX_LINE_LEN            (1024 * 1024)
81
82 struct mtree_option {
83         struct mtree_option *next;
84         char *value;
85 };
86
87 struct mtree_entry {
88         struct mtree_entry *next;
89         struct mtree_option *options;
90         char *name;
91         char full;
92         char used;
93         unsigned int name_hash;
94         struct mtree_entry *hashtable_next;
95 };
96
97 struct mtree {
98         struct archive_string    line;
99         size_t                   buffsize;
100         char                    *buff;
101         int64_t                  offset;
102         int                      fd;
103         int                      archive_format;
104         const char              *archive_format_name;
105         struct mtree_entry      *entries;
106         struct mtree_entry      *this_entry;
107         struct mtree_entry      *entry_hashtable[MTREE_HASHTABLE_SIZE];
108         struct archive_string    current_dir;
109         struct archive_string    contents_name;
110
111         struct archive_entry_linkresolver *resolver;
112
113         int64_t                  cur_size;
114         char checkfs;
115 };
116
117 static int      bid_keycmp(const char *, const char *, ssize_t);
118 static int      cleanup(struct archive_read *);
119 static int      detect_form(struct archive_read *, int *);
120 static unsigned int     hash(const char *);
121 static int      mtree_bid(struct archive_read *, int);
122 static int      parse_file(struct archive_read *, struct archive_entry *,
123                     struct mtree *, struct mtree_entry *, int *);
124 static void     parse_escapes(char *, struct mtree_entry *);
125 static int      parse_line(struct archive_read *, struct archive_entry *,
126                     struct mtree *, struct mtree_entry *, int *);
127 static int      parse_keyword(struct archive_read *, struct mtree *,
128                     struct archive_entry *, struct mtree_option *, int *);
129 static int      read_data(struct archive_read *a,
130                     const void **buff, size_t *size, int64_t *offset);
131 static ssize_t  readline(struct archive_read *, struct mtree *, char **, ssize_t);
132 static int      skip(struct archive_read *a);
133 static int      read_header(struct archive_read *,
134                     struct archive_entry *);
135 static int64_t  mtree_atol(char **, int base);
136
137 /*
138  * There's no standard for TIME_T_MAX/TIME_T_MIN.  So we compute them
139  * here.  TODO: Move this to configure time, but be careful
140  * about cross-compile environments.
141  */
142 static int64_t
143 get_time_t_max(void)
144 {
145 #if defined(TIME_T_MAX)
146         return TIME_T_MAX;
147 #else
148         /* ISO C allows time_t to be a floating-point type,
149            but POSIX requires an integer type.  The following
150            should work on any system that follows the POSIX
151            conventions. */
152         if (((time_t)0) < ((time_t)-1)) {
153                 /* Time_t is unsigned */
154                 return (~(time_t)0);
155         } else {
156                 /* Time_t is signed. */
157                 /* Assume it's the same as int64_t or int32_t */
158                 if (sizeof(time_t) == sizeof(int64_t)) {
159                         return (time_t)INT64_MAX;
160                 } else {
161                         return (time_t)INT32_MAX;
162                 }
163         }
164 #endif
165 }
166
167 static int64_t
168 get_time_t_min(void)
169 {
170 #if defined(TIME_T_MIN)
171         return TIME_T_MIN;
172 #else
173         if (((time_t)0) < ((time_t)-1)) {
174                 /* Time_t is unsigned */
175                 return (time_t)0;
176         } else {
177                 /* Time_t is signed. */
178                 if (sizeof(time_t) == sizeof(int64_t)) {
179                         return (time_t)INT64_MIN;
180                 } else {
181                         return (time_t)INT32_MIN;
182                 }
183         }
184 #endif
185 }
186
187 static int
188 archive_read_format_mtree_options(struct archive_read *a,
189     const char *key, const char *val)
190 {
191         struct mtree *mtree;
192
193         mtree = (struct mtree *)(a->format->data);
194         if (strcmp(key, "checkfs")  == 0) {
195                 /* Allows to read information missing from the mtree from the file system */
196                 if (val == NULL || val[0] == 0) {
197                         mtree->checkfs = 0;
198                 } else {
199                         mtree->checkfs = 1;
200                 }
201                 return (ARCHIVE_OK);
202         }
203
204         /* Note: The "warn" return is just to inform the options
205          * supervisor that we didn't handle it.  It will generate
206          * a suitable error if no one used this option. */
207         return (ARCHIVE_WARN);
208 }
209
210 static void
211 free_options(struct mtree_option *head)
212 {
213         struct mtree_option *next;
214
215         for (; head != NULL; head = next) {
216                 next = head->next;
217                 free(head->value);
218                 free(head);
219         }
220 }
221
222 int
223 archive_read_support_format_mtree(struct archive *_a)
224 {
225         struct archive_read *a = (struct archive_read *)_a;
226         struct mtree *mtree;
227         int r;
228
229         archive_check_magic(_a, ARCHIVE_READ_MAGIC,
230             ARCHIVE_STATE_NEW, "archive_read_support_format_mtree");
231
232         mtree = (struct mtree *)calloc(1, sizeof(*mtree));
233         if (mtree == NULL) {
234                 archive_set_error(&a->archive, ENOMEM,
235                     "Can't allocate mtree data");
236                 return (ARCHIVE_FATAL);
237         }
238         mtree->fd = -1;
239
240         r = __archive_read_register_format(a, mtree, "mtree",
241            mtree_bid, archive_read_format_mtree_options, read_header, read_data, skip, NULL, cleanup, NULL, NULL);
242
243         if (r != ARCHIVE_OK)
244                 free(mtree);
245         return (ARCHIVE_OK);
246 }
247
248 static int
249 cleanup(struct archive_read *a)
250 {
251         struct mtree *mtree;
252         struct mtree_entry *p, *q;
253
254         mtree = (struct mtree *)(a->format->data);
255
256         p = mtree->entries;
257         while (p != NULL) {
258                 q = p->next;
259                 free(p->name);
260                 free_options(p->options);
261                 free(p);
262                 p = q;
263         }
264         archive_string_free(&mtree->line);
265         archive_string_free(&mtree->current_dir);
266         archive_string_free(&mtree->contents_name);
267         archive_entry_linkresolver_free(mtree->resolver);
268
269         free(mtree->buff);
270         free(mtree);
271         (a->format->data) = NULL;
272         return (ARCHIVE_OK);
273 }
274
275 static ssize_t
276 get_line_size(const char *b, ssize_t avail, ssize_t *nlsize)
277 {
278         ssize_t len;
279
280         len = 0;
281         while (len < avail) {
282                 switch (*b) {
283                 case '\0':/* Non-ascii character or control character. */
284                         if (nlsize != NULL)
285                                 *nlsize = 0;
286                         return (-1);
287                 case '\r':
288                         if (avail-len > 1 && b[1] == '\n') {
289                                 if (nlsize != NULL)
290                                         *nlsize = 2;
291                                 return (len+2);
292                         }
293                         /* FALL THROUGH */
294                 case '\n':
295                         if (nlsize != NULL)
296                                 *nlsize = 1;
297                         return (len+1);
298                 default:
299                         b++;
300                         len++;
301                         break;
302                 }
303         }
304         if (nlsize != NULL)
305                 *nlsize = 0;
306         return (avail);
307 }
308
309 /*
310  *  <---------------- ravail --------------------->
311  *  <-- diff ------> <---  avail ----------------->
312  *                   <---- len ----------->
313  * | Previous lines | line being parsed  nl extra |
314  *                  ^
315  *                  b
316  *
317  */
318 static ssize_t
319 next_line(struct archive_read *a,
320     const char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl)
321 {
322         ssize_t len;
323         int quit;
324         
325         quit = 0;
326         if (*avail == 0) {
327                 *nl = 0;
328                 len = 0;
329         } else
330                 len = get_line_size(*b, *avail, nl);
331         /*
332          * Read bytes more while it does not reach the end of line.
333          */
334         while (*nl == 0 && len == *avail && !quit) {
335                 ssize_t diff = *ravail - *avail;
336                 size_t nbytes_req = (*ravail+1023) & ~1023U;
337                 ssize_t tested;
338
339                 /*
340                  * Place an arbitrary limit on the line length.
341                  * mtree is almost free-form input and without line length limits,
342                  * it can consume a lot of memory.
343                  */
344                 if (len >= MAX_LINE_LEN)
345                         return (-1);
346
347                 /* Increase reading bytes if it is not enough to at least
348                  * new two lines. */
349                 if (nbytes_req < (size_t)*ravail + 160)
350                         nbytes_req <<= 1;
351
352                 *b = __archive_read_ahead(a, nbytes_req, avail);
353                 if (*b == NULL) {
354                         if (*ravail >= *avail)
355                                 return (0);
356                         /* Reading bytes reaches the end of file. */
357                         *b = __archive_read_ahead(a, *avail, avail);
358                         quit = 1;
359                 }
360                 *ravail = *avail;
361                 *b += diff;
362                 *avail -= diff;
363                 tested = len;/* Skip some bytes we already determinated. */
364                 len = get_line_size(*b + len, *avail - len, nl);
365                 if (len >= 0)
366                         len += tested;
367         }
368         return (len);
369 }
370
371 /*
372  * Compare characters with a mtree keyword.
373  * Returns the length of a mtree keyword if matched.
374  * Returns 0 if not matched.
375  */
376 static int
377 bid_keycmp(const char *p, const char *key, ssize_t len)
378 {
379         int match_len = 0;
380
381         while (len > 0 && *p && *key) {
382                 if (*p == *key) {
383                         --len;
384                         ++p;
385                         ++key;
386                         ++match_len;
387                         continue;
388                 }
389                 return (0);/* Not match */
390         }
391         if (*key != '\0')
392                 return (0);/* Not match */
393
394         /* A following character should be specified characters */
395         if (p[0] == '=' || p[0] == ' ' || p[0] == '\t' ||
396             p[0] == '\n' || p[0] == '\r' ||
397            (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r')))
398                 return (match_len);
399         return (0);/* Not match */
400 }
401
402 /*
403  * Test whether the characters 'p' has is mtree keyword.
404  * Returns the length of a detected keyword.
405  * Returns 0 if any keywords were not found.
406  */
407 static int
408 bid_keyword(const char *p,  ssize_t len)
409 {
410         static const char * const keys_c[] = {
411                 "content", "contents", "cksum", NULL
412         };
413         static const char * const keys_df[] = {
414                 "device", "flags", NULL
415         };
416         static const char * const keys_g[] = {
417                 "gid", "gname", NULL
418         };
419         static const char * const keys_il[] = {
420                 "ignore", "inode", "link", NULL
421         };
422         static const char * const keys_m[] = {
423                 "md5", "md5digest", "mode", NULL
424         };
425         static const char * const keys_no[] = {
426                 "nlink", "nochange", "optional", NULL
427         };
428         static const char * const keys_r[] = {
429                 "resdevice", "rmd160", "rmd160digest", NULL
430         };
431         static const char * const keys_s[] = {
432                 "sha1", "sha1digest",
433                 "sha256", "sha256digest",
434                 "sha384", "sha384digest",
435                 "sha512", "sha512digest",
436                 "size", NULL
437         };
438         static const char * const keys_t[] = {
439                 "tags", "time", "type", NULL
440         };
441         static const char * const keys_u[] = {
442                 "uid", "uname", NULL
443         };
444         const char * const *keys;
445         int i;
446
447         switch (*p) {
448         case 'c': keys = keys_c; break;
449         case 'd': case 'f': keys = keys_df; break;
450         case 'g': keys = keys_g; break;
451         case 'i': case 'l': keys = keys_il; break;
452         case 'm': keys = keys_m; break;
453         case 'n': case 'o': keys = keys_no; break;
454         case 'r': keys = keys_r; break;
455         case 's': keys = keys_s; break;
456         case 't': keys = keys_t; break;
457         case 'u': keys = keys_u; break;
458         default: return (0);/* Unknown key */
459         }
460
461         for (i = 0; keys[i] != NULL; i++) {
462                 int l = bid_keycmp(p, keys[i], len);
463                 if (l > 0)
464                         return (l);
465         }
466         return (0);/* Unknown key */
467 }
468
469 /*
470  * Test whether there is a set of mtree keywords.
471  * Returns the number of keyword.
472  * Returns -1 if we got incorrect sequence.
473  * This function expects a set of "<space characters>keyword=value".
474  * When "unset" is specified, expects a set of "<space characters>keyword".
475  */
476 static int
477 bid_keyword_list(const char *p,  ssize_t len, int unset, int last_is_path)
478 {
479         int l;
480         int keycnt = 0;
481
482         while (len > 0 && *p) {
483                 int blank = 0;
484
485                 /* Test whether there are blank characters in the line. */
486                 while (len >0 && (*p == ' ' || *p == '\t')) {
487                         ++p;
488                         --len;
489                         blank = 1;
490                 }
491                 if (*p == '\n' || *p == '\r')
492                         break;
493                 if (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r'))
494                         break;
495                 if (!blank && !last_is_path) /* No blank character. */
496                         return (-1);
497                 if (last_is_path && len == 0)
498                                 return (keycnt);
499
500                 if (unset) {
501                         l = bid_keycmp(p, "all", len);
502                         if (l > 0)
503                                 return (1);
504                 }
505                 /* Test whether there is a correct key in the line. */
506                 l = bid_keyword(p, len);
507                 if (l == 0)
508                         return (-1);/* Unknown keyword was found. */
509                 p += l;
510                 len -= l;
511                 keycnt++;
512
513                 /* Skip value */
514                 if (*p == '=') {
515                         int value = 0;
516                         ++p;
517                         --len;
518                         while (len > 0 && *p != ' ' && *p != '\t') {
519                                 ++p;
520                                 --len;
521                                 value = 1;
522                         }
523                         /* A keyword should have a its value unless
524                          * "/unset" operation. */ 
525                         if (!unset && value == 0)
526                                 return (-1);
527                 }
528         }
529         return (keycnt);
530 }
531
532 static int
533 bid_entry(const char *p, ssize_t len, ssize_t nl, int *last_is_path)
534 {
535         int f = 0;
536         static const unsigned char safe_char[256] = {
537                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */
538                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
539                 /* !"$%&'()*+,-./  EXCLUSION:( )(#) */
540                 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */
541                 /* 0123456789:;<>?  EXCLUSION:(=) */
542                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, /* 30 - 3F */
543                 /* @ABCDEFGHIJKLMNO */
544                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
545                 /* PQRSTUVWXYZ[\]^_  */
546                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */
547                 /* `abcdefghijklmno */
548                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */
549                 /* pqrstuvwxyz{|}~ */
550                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */
551                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
552                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
553                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
554                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
555                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
556                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
557                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
558                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
559         };
560         ssize_t ll;
561         const char *pp = p;
562         const char * const pp_end = pp + len;
563
564         *last_is_path = 0;
565         /*
566          * Skip the path-name which is quoted.
567          */
568         for (;pp < pp_end; ++pp) {
569                 if (!safe_char[*(const unsigned char *)pp]) {
570                         if (*pp != ' ' && *pp != '\t' && *pp != '\r'
571                             && *pp != '\n')
572                                 f = 0;
573                         break;
574                 }
575                 f = 1;
576         }
577         ll = pp_end - pp;
578
579         /* If a path-name was not found at the first, try to check
580          * a mtree format(a.k.a form D) ``NetBSD's mtree -D'' creates,
581          * which places the path-name at the last. */
582         if (f == 0) {
583                 const char *pb = p + len - nl;
584                 int name_len = 0;
585                 int slash;
586
587                 /* The form D accepts only a single line for an entry. */
588                 if (pb-2 >= p &&
589                     pb[-1] == '\\' && (pb[-2] == ' ' || pb[-2] == '\t'))
590                         return (-1);
591                 if (pb-1 >= p && pb[-1] == '\\')
592                         return (-1);
593
594                 slash = 0;
595                 while (p <= --pb && *pb != ' ' && *pb != '\t') {
596                         if (!safe_char[*(const unsigned char *)pb])
597                                 return (-1);
598                         name_len++;
599                         /* The pathname should have a slash in this
600                          * format. */
601                         if (*pb == '/')
602                                 slash = 1;
603                 }
604                 if (name_len == 0 || slash == 0)
605                         return (-1);
606                 /* If '/' is placed at the first in this field, this is not
607                  * a valid filename. */
608                 if (pb[1] == '/')
609                         return (-1);
610                 ll = len - nl - name_len;
611                 pp = p;
612                 *last_is_path = 1;
613         }
614
615         return (bid_keyword_list(pp, ll, 0, *last_is_path));
616 }
617
618 #define MAX_BID_ENTRY   3
619
620 static int
621 mtree_bid(struct archive_read *a, int best_bid)
622 {
623         const char *signature = "#mtree";
624         const char *p;
625
626         (void)best_bid; /* UNUSED */
627
628         /* Now let's look at the actual header and see if it matches. */
629         p = __archive_read_ahead(a, strlen(signature), NULL);
630         if (p == NULL)
631                 return (-1);
632
633         if (memcmp(p, signature, strlen(signature)) == 0)
634                 return (8 * (int)strlen(signature));
635
636         /*
637          * There is not a mtree signature. Let's try to detect mtree format.
638          */
639         return (detect_form(a, NULL));
640 }
641
642 static int
643 detect_form(struct archive_read *a, int *is_form_d)
644 {
645         const char *p;
646         ssize_t avail, ravail;
647         ssize_t detected_bytes = 0, len, nl;
648         int entry_cnt = 0, multiline = 0;
649         int form_D = 0;/* The archive is generated by `NetBSD mtree -D'
650                         * (In this source we call it `form D') . */
651
652         if (is_form_d != NULL)
653                 *is_form_d = 0;
654         p = __archive_read_ahead(a, 1, &avail);
655         if (p == NULL)
656                 return (-1);
657         ravail = avail;
658         for (;;) {
659                 len = next_line(a, &p, &avail, &ravail, &nl);
660                 /* The terminal character of the line should be
661                  * a new line character, '\r\n' or '\n'. */
662                 if (len <= 0 || nl == 0)
663                         break;
664                 if (!multiline) {
665                         /* Leading whitespace is never significant,
666                          * ignore it. */
667                         while (len > 0 && (*p == ' ' || *p == '\t')) {
668                                 ++p;
669                                 --avail;
670                                 --len;
671                         }
672                         /* Skip comment or empty line. */ 
673                         if (p[0] == '#' || p[0] == '\n' || p[0] == '\r') {
674                                 p += len;
675                                 avail -= len;
676                                 continue;
677                         }
678                 } else {
679                         /* A continuance line; the terminal
680                          * character of previous line was '\' character. */
681                         if (bid_keyword_list(p, len, 0, 0) <= 0)
682                                 break;
683                         if (multiline == 1)
684                                 detected_bytes += len;
685                         if (p[len-nl-1] != '\\') {
686                                 if (multiline == 1 &&
687                                     ++entry_cnt >= MAX_BID_ENTRY)
688                                         break;
689                                 multiline = 0;
690                         }
691                         p += len;
692                         avail -= len;
693                         continue;
694                 }
695                 if (p[0] != '/') {
696                         int last_is_path, keywords;
697
698                         keywords = bid_entry(p, len, nl, &last_is_path);
699                         if (keywords >= 0) {
700                                 detected_bytes += len;
701                                 if (form_D == 0) {
702                                         if (last_is_path)
703                                                 form_D = 1;
704                                         else if (keywords > 0)
705                                                 /* This line is not `form D'. */
706                                                 form_D = -1;
707                                 } else if (form_D == 1) {
708                                         if (!last_is_path && keywords > 0)
709                                                 /* This this is not `form D'
710                                                  * and We cannot accept mixed
711                                                  * format. */
712                                                 break;
713                                 }
714                                 if (!last_is_path && p[len-nl-1] == '\\')
715                                         /* This line continues. */
716                                         multiline = 1;
717                                 else {
718                                         /* We've got plenty of correct lines
719                                          * to assume that this file is a mtree
720                                          * format. */
721                                         if (++entry_cnt >= MAX_BID_ENTRY)
722                                                 break;
723                                 }
724                         } else
725                                 break;
726                 } else if (len > 4 && strncmp(p, "/set", 4) == 0) {
727                         if (bid_keyword_list(p+4, len-4, 0, 0) <= 0)
728                                 break;
729                         /* This line continues. */
730                         if (p[len-nl-1] == '\\')
731                                 multiline = 2;
732                 } else if (len > 6 && strncmp(p, "/unset", 6) == 0) {
733                         if (bid_keyword_list(p+6, len-6, 1, 0) <= 0)
734                                 break;
735                         /* This line continues. */
736                         if (p[len-nl-1] == '\\')
737                                 multiline = 2;
738                 } else
739                         break;
740
741                 /* Test next line. */
742                 p += len;
743                 avail -= len;
744         }
745         if (entry_cnt >= MAX_BID_ENTRY || (entry_cnt > 0 && len == 0)) {
746                 if (is_form_d != NULL) {
747                         if (form_D == 1)
748                                 *is_form_d = 1;
749                 }
750                 return (32);
751         }
752
753         return (0);
754 }
755
756 /*
757  * The extended mtree format permits multiple lines specifying
758  * attributes for each file.  For those entries, only the last line
759  * is actually used.  Practically speaking, that means we have
760  * to read the entire mtree file into memory up front.
761  *
762  * The parsing is done in two steps.  First, it is decided if a line
763  * changes the global defaults and if it is, processed accordingly.
764  * Otherwise, the options of the line are merged with the current
765  * global options.
766  */
767 static int
768 add_option(struct archive_read *a, struct mtree_option **global,
769     const char *value, size_t len)
770 {
771         struct mtree_option *opt;
772
773         if ((opt = malloc(sizeof(*opt))) == NULL) {
774                 archive_set_error(&a->archive, errno, "Can't allocate memory");
775                 return (ARCHIVE_FATAL);
776         }
777         if ((opt->value = malloc(len + 1)) == NULL) {
778                 free(opt);
779                 archive_set_error(&a->archive, errno, "Can't allocate memory");
780                 return (ARCHIVE_FATAL);
781         }
782         memcpy(opt->value, value, len);
783         opt->value[len] = '\0';
784         opt->next = *global;
785         *global = opt;
786         return (ARCHIVE_OK);
787 }
788
789 static void
790 remove_option(struct mtree_option **global, const char *value, size_t len)
791 {
792         struct mtree_option *iter, *last;
793
794         last = NULL;
795         for (iter = *global; iter != NULL; last = iter, iter = iter->next) {
796                 if (strncmp(iter->value, value, len) == 0 &&
797                     (iter->value[len] == '\0' ||
798                      iter->value[len] == '='))
799                         break;
800         }
801         if (iter == NULL)
802                 return;
803         if (last == NULL)
804                 *global = iter->next;
805         else
806                 last->next = iter->next;
807
808         free(iter->value);
809         free(iter);
810 }
811
812 static int
813 process_global_set(struct archive_read *a,
814     struct mtree_option **global, const char *line)
815 {
816         const char *next, *eq;
817         size_t len;
818         int r;
819
820         line += 4;
821         for (;;) {
822                 next = line + strspn(line, " \t\r\n");
823                 if (*next == '\0')
824                         return (ARCHIVE_OK);
825                 line = next;
826                 next = line + strcspn(line, " \t\r\n");
827                 eq = strchr(line, '=');
828                 if (eq > next)
829                         len = next - line;
830                 else
831                         len = eq - line;
832
833                 remove_option(global, line, len);
834                 r = add_option(a, global, line, next - line);
835                 if (r != ARCHIVE_OK)
836                         return (r);
837                 line = next;
838         }
839 }
840
841 static int
842 process_global_unset(struct archive_read *a,
843     struct mtree_option **global, const char *line)
844 {
845         const char *next;
846         size_t len;
847
848         line += 6;
849         if (strchr(line, '=') != NULL) {
850                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
851                     "/unset shall not contain `='");
852                 return ARCHIVE_FATAL;
853         }
854
855         for (;;) {
856                 next = line + strspn(line, " \t\r\n");
857                 if (*next == '\0')
858                         return (ARCHIVE_OK);
859                 line = next;
860                 len = strcspn(line, " \t\r\n");
861
862                 if (len == 3 && strncmp(line, "all", 3) == 0) {
863                         free_options(*global);
864                         *global = NULL;
865                 } else {
866                         remove_option(global, line, len);
867                 }
868
869                 line += len;
870         }
871 }
872
873 static int
874 process_add_entry(struct archive_read *a, struct mtree *mtree,
875     struct mtree_option **global, const char *line, ssize_t line_len,
876     struct mtree_entry **last_entry, int is_form_d)
877 {
878         struct mtree_entry *entry, *ht_iter;
879         struct mtree_option *iter;
880         const char *next, *eq, *name, *end;
881         size_t name_len, len;
882         int r, i;
883         unsigned int ht_idx;
884
885         if ((entry = malloc(sizeof(*entry))) == NULL) {
886                 archive_set_error(&a->archive, errno, "Can't allocate memory");
887                 return (ARCHIVE_FATAL);
888         }
889         entry->next = NULL;
890         entry->options = NULL;
891         entry->name = NULL;
892         entry->used = 0;
893         entry->full = 0;
894         entry->name_hash = 0;
895         entry->hashtable_next = NULL;
896
897         /* Add this entry to list. */
898         if (*last_entry == NULL)
899                 mtree->entries = entry;
900         else
901                 (*last_entry)->next = entry;
902         *last_entry = entry;
903
904         if (is_form_d) {
905                 /* Filename is last item on line. */
906                 /* Adjust line_len to trim trailing whitespace */
907                 while (line_len > 0) {
908                         char last_character = line[line_len - 1];
909                         if (last_character == '\r'
910                             || last_character == '\n'
911                             || last_character == '\t'
912                             || last_character == ' ') {
913                                 line_len--;
914                         } else {
915                                 break;
916                         }
917                 }
918                 /* Name starts after the last whitespace separator */
919                 name = line;
920                 for (i = 0; i < line_len; i++) {
921                         if (line[i] == '\r'
922                             || line[i] == '\n'
923                             || line[i] == '\t'
924                             || line[i] == ' ') {
925                                 name = line + i + 1;
926                         }
927                 }
928                 name_len = line + line_len - name;
929                 end = name;
930         } else {
931                 /* Filename is first item on line */
932                 name_len = strcspn(line, " \t\r\n");
933                 name = line;
934                 line += name_len;
935                 end = line + line_len;
936         }
937         /* name/name_len is the name within the line. */
938         /* line..end brackets the entire line except the name */
939
940         if ((entry->name = malloc(name_len + 1)) == NULL) {
941                 archive_set_error(&a->archive, errno, "Can't allocate memory");
942                 return (ARCHIVE_FATAL);
943         }
944
945         memcpy(entry->name, name, name_len);
946         entry->name[name_len] = '\0';
947         parse_escapes(entry->name, entry);
948         entry->name_hash = hash(entry->name);
949
950         ht_idx = entry->name_hash % MTREE_HASHTABLE_SIZE;
951         if ((ht_iter = mtree->entry_hashtable[ht_idx]) != NULL) {
952                 while (ht_iter->hashtable_next)
953                         ht_iter = ht_iter->hashtable_next;
954                 ht_iter->hashtable_next = entry;
955         } else {
956                 mtree->entry_hashtable[ht_idx] = entry;
957         }
958
959         for (iter = *global; iter != NULL; iter = iter->next) {
960                 r = add_option(a, &entry->options, iter->value,
961                     strlen(iter->value));
962                 if (r != ARCHIVE_OK)
963                         return (r);
964         }
965
966         for (;;) {
967                 next = line + strspn(line, " \t\r\n");
968                 if (*next == '\0')
969                         return (ARCHIVE_OK);
970                 if (next >= end)
971                         return (ARCHIVE_OK);
972                 line = next;
973                 next = line + strcspn(line, " \t\r\n");
974                 eq = strchr(line, '=');
975                 if (eq == NULL || eq > next)
976                         len = next - line;
977                 else
978                         len = eq - line;
979
980                 remove_option(&entry->options, line, len);
981                 r = add_option(a, &entry->options, line, next - line);
982                 if (r != ARCHIVE_OK)
983                         return (r);
984                 line = next;
985         }
986 }
987
988 static int
989 read_mtree(struct archive_read *a, struct mtree *mtree)
990 {
991         ssize_t len;
992         uintmax_t counter;
993         char *p;
994         struct mtree_option *global;
995         struct mtree_entry *last_entry;
996         int r, is_form_d;
997
998         mtree->archive_format = ARCHIVE_FORMAT_MTREE;
999         mtree->archive_format_name = "mtree";
1000
1001         global = NULL;
1002         last_entry = NULL;
1003
1004         (void)detect_form(a, &is_form_d);
1005
1006         for (counter = 1; ; ++counter) {
1007                 len = readline(a, mtree, &p, 65536);
1008                 if (len == 0) {
1009                         mtree->this_entry = mtree->entries;
1010                         free_options(global);
1011                         return (ARCHIVE_OK);
1012                 }
1013                 if (len < 0) {
1014                         free_options(global);
1015                         return ((int)len);
1016                 }
1017                 /* Leading whitespace is never significant, ignore it. */
1018                 while (*p == ' ' || *p == '\t') {
1019                         ++p;
1020                         --len;
1021                 }
1022                 /* Skip content lines and blank lines. */
1023                 if (*p == '#')
1024                         continue;
1025                 if (*p == '\r' || *p == '\n' || *p == '\0')
1026                         continue;
1027                 if (*p != '/') {
1028                         r = process_add_entry(a, mtree, &global, p, len,
1029                             &last_entry, is_form_d);
1030                 } else if (len > 4 && strncmp(p, "/set", 4) == 0) {
1031                         if (p[4] != ' ' && p[4] != '\t')
1032                                 break;
1033                         r = process_global_set(a, &global, p);
1034                 } else if (len > 6 && strncmp(p, "/unset", 6) == 0) {
1035                         if (p[6] != ' ' && p[6] != '\t')
1036                                 break;
1037                         r = process_global_unset(a, &global, p);
1038                 } else
1039                         break;
1040
1041                 if (r != ARCHIVE_OK) {
1042                         free_options(global);
1043                         return r;
1044                 }
1045         }
1046
1047         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1048             "Can't parse line %ju", counter);
1049         free_options(global);
1050         return (ARCHIVE_FATAL);
1051 }
1052
1053 /*
1054  * Read in the entire mtree file into memory on the first request.
1055  * Then use the next unused file to satisfy each header request.
1056  */
1057 static int
1058 read_header(struct archive_read *a, struct archive_entry *entry)
1059 {
1060         struct mtree *mtree;
1061         char *p;
1062         int r, use_next;
1063
1064         mtree = (struct mtree *)(a->format->data);
1065
1066         if (mtree->fd >= 0) {
1067                 close(mtree->fd);
1068                 mtree->fd = -1;
1069         }
1070
1071         if (mtree->entries == NULL) {
1072                 mtree->resolver = archive_entry_linkresolver_new();
1073                 if (mtree->resolver == NULL)
1074                         return ARCHIVE_FATAL;
1075                 archive_entry_linkresolver_set_strategy(mtree->resolver,
1076                     ARCHIVE_FORMAT_MTREE);
1077                 r = read_mtree(a, mtree);
1078                 if (r != ARCHIVE_OK)
1079                         return (r);
1080         }
1081
1082         a->archive.archive_format = mtree->archive_format;
1083         a->archive.archive_format_name = mtree->archive_format_name;
1084
1085         for (;;) {
1086                 if (mtree->this_entry == NULL)
1087                         return (ARCHIVE_EOF);
1088                 if (strcmp(mtree->this_entry->name, "..") == 0) {
1089                         mtree->this_entry->used = 1;
1090                         if (archive_strlen(&mtree->current_dir) > 0) {
1091                                 /* Roll back current path. */
1092                                 p = mtree->current_dir.s
1093                                     + mtree->current_dir.length - 1;
1094                                 while (p >= mtree->current_dir.s && *p != '/')
1095                                         --p;
1096                                 if (p >= mtree->current_dir.s)
1097                                         --p;
1098                                 mtree->current_dir.length
1099                                     = p - mtree->current_dir.s + 1;
1100                         }
1101                 }
1102                 if (!mtree->this_entry->used) {
1103                         use_next = 0;
1104                         r = parse_file(a, entry, mtree, mtree->this_entry,
1105                                 &use_next);
1106                         if (use_next == 0)
1107                                 return (r);
1108                 }
1109                 mtree->this_entry = mtree->this_entry->next;
1110         }
1111 }
1112
1113 /*
1114  * A single file can have multiple lines contribute specifications.
1115  * Parse as many lines as necessary, then pull additional information
1116  * from a backing file on disk as necessary.
1117  */
1118 static int
1119 parse_file(struct archive_read *a, struct archive_entry *entry,
1120     struct mtree *mtree, struct mtree_entry *mentry, int *use_next)
1121 {
1122         const char *path;
1123         struct stat st_storage, *st;
1124         struct mtree_entry *mp;
1125         struct archive_entry *sparse_entry;
1126         int r = ARCHIVE_OK, r1, parsed_kws;
1127
1128         mentry->used = 1;
1129
1130         /* Initialize reasonable defaults. */
1131         archive_entry_set_filetype(entry, AE_IFREG);
1132         archive_entry_set_size(entry, 0);
1133         archive_string_empty(&mtree->contents_name);
1134
1135         /* Parse options from this line. */
1136         parsed_kws = 0;
1137         r = parse_line(a, entry, mtree, mentry, &parsed_kws);
1138
1139         if (mentry->full) {
1140                 archive_entry_copy_pathname(entry, mentry->name);
1141                 /*
1142                  * "Full" entries are allowed to have multiple lines
1143                  * and those lines aren't required to be adjacent.  We
1144                  * don't support multiple lines for "relative" entries
1145                  * nor do we make any attempt to merge data from
1146                  * separate "relative" and "full" entries.  (Merging
1147                  * "relative" and "full" entries would require dealing
1148                  * with pathname canonicalization, which is a very
1149                  * tricky subject.)
1150                  */
1151                 for (mp = mentry->hashtable_next; mp != NULL; mp = mp->hashtable_next) {
1152                         if (mp->full && !mp->used
1153                                         && mentry->name_hash == mp->name_hash
1154                                         && strcmp(mentry->name, mp->name) == 0) {
1155                                 /* Later lines override earlier ones. */
1156                                 mp->used = 1;
1157                                 r1 = parse_line(a, entry, mtree, mp,
1158                                     &parsed_kws);
1159                                 if (r1 < r)
1160                                         r = r1;
1161                         }
1162                 }
1163         } else {
1164                 /*
1165                  * Relative entries require us to construct
1166                  * the full path and possibly update the
1167                  * current directory.
1168                  */
1169                 size_t n = archive_strlen(&mtree->current_dir);
1170                 if (n > 0)
1171                         archive_strcat(&mtree->current_dir, "/");
1172                 archive_strcat(&mtree->current_dir, mentry->name);
1173                 archive_entry_copy_pathname(entry, mtree->current_dir.s);
1174                 if (archive_entry_filetype(entry) != AE_IFDIR)
1175                         mtree->current_dir.length = n;
1176         }
1177
1178         if (mtree->checkfs) {
1179                 /*
1180                  * Try to open and stat the file to get the real size
1181                  * and other file info.  It would be nice to avoid
1182                  * this here so that getting a listing of an mtree
1183                  * wouldn't require opening every referenced contents
1184                  * file.  But then we wouldn't know the actual
1185                  * contents size, so I don't see a really viable way
1186                  * around this.  (Also, we may want to someday pull
1187                  * other unspecified info from the contents file on
1188                  * disk.)
1189                  */
1190                 mtree->fd = -1;
1191                 if (archive_strlen(&mtree->contents_name) > 0)
1192                         path = mtree->contents_name.s;
1193                 else
1194                         path = archive_entry_pathname(entry);
1195
1196                 if (archive_entry_filetype(entry) == AE_IFREG ||
1197                                 archive_entry_filetype(entry) == AE_IFDIR) {
1198                         mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC);
1199                         __archive_ensure_cloexec_flag(mtree->fd);
1200                         if (mtree->fd == -1 &&
1201                                 (errno != ENOENT ||
1202                                  archive_strlen(&mtree->contents_name) > 0)) {
1203                                 archive_set_error(&a->archive, errno,
1204                                                 "Can't open %s", path);
1205                                 r = ARCHIVE_WARN;
1206                         }
1207                 }
1208
1209                 st = &st_storage;
1210                 if (mtree->fd >= 0) {
1211                         if (fstat(mtree->fd, st) == -1) {
1212                                 archive_set_error(&a->archive, errno,
1213                                                 "Could not fstat %s", path);
1214                                 r = ARCHIVE_WARN;
1215                                 /* If we can't stat it, don't keep it open. */
1216                                 close(mtree->fd);
1217                                 mtree->fd = -1;
1218                                 st = NULL;
1219                         }
1220                 } else if (lstat(path, st) == -1) {
1221                         st = NULL;
1222                 }
1223
1224                 /*
1225                  * Check for a mismatch between the type in the specification
1226                  * and the type of the contents object on disk.
1227                  */
1228                 if (st != NULL) {
1229                         if (((st->st_mode & S_IFMT) == S_IFREG &&
1230                               archive_entry_filetype(entry) == AE_IFREG)
1231 #ifdef S_IFLNK
1232                           ||((st->st_mode & S_IFMT) == S_IFLNK &&
1233                               archive_entry_filetype(entry) == AE_IFLNK)
1234 #endif
1235 #ifdef S_IFSOCK
1236                           ||((st->st_mode & S_IFSOCK) == S_IFSOCK &&
1237                               archive_entry_filetype(entry) == AE_IFSOCK)
1238 #endif
1239 #ifdef S_IFCHR
1240                           ||((st->st_mode & S_IFMT) == S_IFCHR &&
1241                               archive_entry_filetype(entry) == AE_IFCHR)
1242 #endif
1243 #ifdef S_IFBLK
1244                           ||((st->st_mode & S_IFMT) == S_IFBLK &&
1245                               archive_entry_filetype(entry) == AE_IFBLK)
1246 #endif
1247                           ||((st->st_mode & S_IFMT) == S_IFDIR &&
1248                               archive_entry_filetype(entry) == AE_IFDIR)
1249 #ifdef S_IFIFO
1250                           ||((st->st_mode & S_IFMT) == S_IFIFO &&
1251                               archive_entry_filetype(entry) == AE_IFIFO)
1252 #endif
1253                         ) {
1254                                 /* Types match. */
1255                         } else {
1256                                 /* Types don't match; bail out gracefully. */
1257                                 if (mtree->fd >= 0)
1258                                         close(mtree->fd);
1259                                 mtree->fd = -1;
1260                                 if (parsed_kws & MTREE_HAS_OPTIONAL) {
1261                                         /* It's not an error for an optional
1262                                          * entry to not match disk. */
1263                                         *use_next = 1;
1264                                 } else if (r == ARCHIVE_OK) {
1265                                         archive_set_error(&a->archive,
1266                                             ARCHIVE_ERRNO_MISC,
1267                                             "mtree specification has different"
1268                                             " type for %s",
1269                                             archive_entry_pathname(entry));
1270                                         r = ARCHIVE_WARN;
1271                                 }
1272                                 return (r);
1273                         }
1274                 }
1275
1276                 /*
1277                  * If there is a contents file on disk, pick some of the
1278                  * metadata from that file.  For most of these, we only
1279                  * set it from the contents if it wasn't already parsed
1280                  * from the specification.
1281                  */
1282                 if (st != NULL) {
1283                         if (((parsed_kws & MTREE_HAS_DEVICE) == 0 ||
1284                                 (parsed_kws & MTREE_HAS_NOCHANGE) != 0) &&
1285                                 (archive_entry_filetype(entry) == AE_IFCHR ||
1286                                  archive_entry_filetype(entry) == AE_IFBLK))
1287                                 archive_entry_set_rdev(entry, st->st_rdev);
1288                         if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME))
1289                                 == 0 ||
1290                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1291                                 archive_entry_set_gid(entry, st->st_gid);
1292                         if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME))
1293                                 == 0 ||
1294                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1295                                 archive_entry_set_uid(entry, st->st_uid);
1296                         if ((parsed_kws & MTREE_HAS_MTIME) == 0 ||
1297                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0) {
1298 #if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC
1299                                 archive_entry_set_mtime(entry, st->st_mtime,
1300                                                 st->st_mtimespec.tv_nsec);
1301 #elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
1302                                 archive_entry_set_mtime(entry, st->st_mtime,
1303                                                 st->st_mtim.tv_nsec);
1304 #elif HAVE_STRUCT_STAT_ST_MTIME_N
1305                                 archive_entry_set_mtime(entry, st->st_mtime,
1306                                                 st->st_mtime_n);
1307 #elif HAVE_STRUCT_STAT_ST_UMTIME
1308                                 archive_entry_set_mtime(entry, st->st_mtime,
1309                                                 st->st_umtime*1000);
1310 #elif HAVE_STRUCT_STAT_ST_MTIME_USEC
1311                                 archive_entry_set_mtime(entry, st->st_mtime,
1312                                                 st->st_mtime_usec*1000);
1313 #else
1314                                 archive_entry_set_mtime(entry, st->st_mtime, 0);
1315 #endif
1316                         }
1317                         if ((parsed_kws & MTREE_HAS_NLINK) == 0 ||
1318                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1319                                 archive_entry_set_nlink(entry, st->st_nlink);
1320                         if ((parsed_kws & MTREE_HAS_PERM) == 0 ||
1321                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1322                                 archive_entry_set_perm(entry, st->st_mode);
1323                         if ((parsed_kws & MTREE_HAS_SIZE) == 0 ||
1324                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1325                                 archive_entry_set_size(entry, st->st_size);
1326                         archive_entry_set_ino(entry, st->st_ino);
1327                         archive_entry_set_dev(entry, st->st_dev);
1328
1329                         archive_entry_linkify(mtree->resolver, &entry,
1330                                 &sparse_entry);
1331                 } else if (parsed_kws & MTREE_HAS_OPTIONAL) {
1332                         /*
1333                          * Couldn't open the entry, stat it or the on-disk type
1334                          * didn't match.  If this entry is optional, just
1335                          * ignore it and read the next header entry.
1336                          */
1337                         *use_next = 1;
1338                         return ARCHIVE_OK;
1339                 }
1340         }
1341
1342         mtree->cur_size = archive_entry_size(entry);
1343         mtree->offset = 0;
1344
1345         return r;
1346 }
1347
1348 /*
1349  * Each line contains a sequence of keywords.
1350  */
1351 static int
1352 parse_line(struct archive_read *a, struct archive_entry *entry,
1353     struct mtree *mtree, struct mtree_entry *mp, int *parsed_kws)
1354 {
1355         struct mtree_option *iter;
1356         int r = ARCHIVE_OK, r1;
1357
1358         for (iter = mp->options; iter != NULL; iter = iter->next) {
1359                 r1 = parse_keyword(a, mtree, entry, iter, parsed_kws);
1360                 if (r1 < r)
1361                         r = r1;
1362         }
1363         if (r == ARCHIVE_OK && (*parsed_kws & MTREE_HAS_TYPE) == 0) {
1364                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1365                     "Missing type keyword in mtree specification");
1366                 return (ARCHIVE_WARN);
1367         }
1368         return (r);
1369 }
1370
1371 /*
1372  * Device entries have one of the following forms:
1373  *  - raw dev_t
1374  *  - format,major,minor[,subdevice]
1375  * When parsing succeeded, `pdev' will contain the appropriate dev_t value.
1376  */
1377
1378 /* strsep() is not in C90, but strcspn() is. */
1379 /* Taken from http://unixpapa.com/incnote/string.html */
1380 static char *
1381 la_strsep(char **sp, const char *sep)
1382 {
1383         char *p, *s;
1384         if (sp == NULL || *sp == NULL || **sp == '\0')
1385                 return(NULL);
1386         s = *sp;
1387         p = s + strcspn(s, sep);
1388         if (*p != '\0')
1389                 *p++ = '\0';
1390         *sp = p;
1391         return(s);
1392 }
1393
1394 static int
1395 parse_device(dev_t *pdev, struct archive *a, char *val)
1396 {
1397 #define MAX_PACK_ARGS 3
1398         unsigned long numbers[MAX_PACK_ARGS];
1399         char *p, *dev;
1400         int argc;
1401         pack_t *pack;
1402         dev_t result;
1403         const char *error = NULL;
1404
1405         memset(pdev, 0, sizeof(*pdev));
1406         if ((dev = strchr(val, ',')) != NULL) {
1407                 /*
1408                  * Device's major/minor are given in a specified format.
1409                  * Decode and pack it accordingly.
1410                  */
1411                 *dev++ = '\0';
1412                 if ((pack = pack_find(val)) == NULL) {
1413                         archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1414                             "Unknown format `%s'", val);
1415                         return ARCHIVE_WARN;
1416                 }
1417                 argc = 0;
1418                 while ((p = la_strsep(&dev, ",")) != NULL) {
1419                         if (*p == '\0') {
1420                                 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1421                                     "Missing number");
1422                                 return ARCHIVE_WARN;
1423                         }
1424                         if (argc >= MAX_PACK_ARGS) {
1425                                 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1426                                     "Too many arguments");
1427                                 return ARCHIVE_WARN;
1428                         }
1429                         numbers[argc++] = (unsigned long)mtree_atol(&p, 0);
1430                 }
1431                 if (argc < 2) {
1432                         archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1433                             "Not enough arguments");
1434                         return ARCHIVE_WARN;
1435                 }
1436                 result = (*pack)(argc, numbers, &error);
1437                 if (error != NULL) {
1438                         archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1439                             "%s", error);
1440                         return ARCHIVE_WARN;
1441                 }
1442         } else {
1443                 /* file system raw value. */
1444                 result = (dev_t)mtree_atol(&val, 0);
1445         }
1446         *pdev = result;
1447         return ARCHIVE_OK;
1448 #undef MAX_PACK_ARGS
1449 }
1450
1451 /*
1452  * Parse a single keyword and its value.
1453  */
1454 static int
1455 parse_keyword(struct archive_read *a, struct mtree *mtree,
1456     struct archive_entry *entry, struct mtree_option *opt, int *parsed_kws)
1457 {
1458         char *val, *key;
1459
1460         key = opt->value;
1461
1462         if (*key == '\0')
1463                 return (ARCHIVE_OK);
1464
1465         if (strcmp(key, "nochange") == 0) {
1466                 *parsed_kws |= MTREE_HAS_NOCHANGE;
1467                 return (ARCHIVE_OK);
1468         }
1469         if (strcmp(key, "optional") == 0) {
1470                 *parsed_kws |= MTREE_HAS_OPTIONAL;
1471                 return (ARCHIVE_OK);
1472         }
1473         if (strcmp(key, "ignore") == 0) {
1474                 /*
1475                  * The mtree processing is not recursive, so
1476                  * recursion will only happen for explicitly listed
1477                  * entries.
1478                  */
1479                 return (ARCHIVE_OK);
1480         }
1481
1482         val = strchr(key, '=');
1483         if (val == NULL) {
1484                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1485                     "Malformed attribute \"%s\" (%d)", key, key[0]);
1486                 return (ARCHIVE_WARN);
1487         }
1488
1489         *val = '\0';
1490         ++val;
1491
1492         switch (key[0]) {
1493         case 'c':
1494                 if (strcmp(key, "content") == 0
1495                     || strcmp(key, "contents") == 0) {
1496                         parse_escapes(val, NULL);
1497                         archive_strcpy(&mtree->contents_name, val);
1498                         break;
1499                 }
1500                 if (strcmp(key, "cksum") == 0)
1501                         break;
1502                 __LA_FALLTHROUGH;
1503         case 'd':
1504                 if (strcmp(key, "device") == 0) {
1505                         /* stat(2) st_rdev field, e.g. the major/minor IDs
1506                          * of a char/block special file */
1507                         int r;
1508                         dev_t dev;
1509
1510                         *parsed_kws |= MTREE_HAS_DEVICE;
1511                         r = parse_device(&dev, &a->archive, val);
1512                         if (r == ARCHIVE_OK)
1513                                 archive_entry_set_rdev(entry, dev);
1514                         return r;
1515                 }
1516                 __LA_FALLTHROUGH;
1517         case 'f':
1518                 if (strcmp(key, "flags") == 0) {
1519                         *parsed_kws |= MTREE_HAS_FFLAGS;
1520                         archive_entry_copy_fflags_text(entry, val);
1521                         break;
1522                 }
1523                 __LA_FALLTHROUGH;
1524         case 'g':
1525                 if (strcmp(key, "gid") == 0) {
1526                         *parsed_kws |= MTREE_HAS_GID;
1527                         archive_entry_set_gid(entry, mtree_atol(&val, 10));
1528                         break;
1529                 }
1530                 if (strcmp(key, "gname") == 0) {
1531                         *parsed_kws |= MTREE_HAS_GNAME;
1532                         archive_entry_copy_gname(entry, val);
1533                         break;
1534                 }
1535                 __LA_FALLTHROUGH;
1536         case 'i':
1537                 if (strcmp(key, "inode") == 0) {
1538                         archive_entry_set_ino(entry, mtree_atol(&val, 10));
1539                         break;
1540                 }
1541                 __LA_FALLTHROUGH;
1542         case 'l':
1543                 if (strcmp(key, "link") == 0) {
1544                         archive_entry_copy_symlink(entry, val);
1545                         break;
1546                 }
1547                 __LA_FALLTHROUGH;
1548         case 'm':
1549                 if (strcmp(key, "md5") == 0 || strcmp(key, "md5digest") == 0)
1550                         break;
1551                 if (strcmp(key, "mode") == 0) {
1552                         if (val[0] >= '0' && val[0] <= '7') {
1553                                 *parsed_kws |= MTREE_HAS_PERM;
1554                                 archive_entry_set_perm(entry,
1555                                     (mode_t)mtree_atol(&val, 8));
1556                         } else {
1557                                 archive_set_error(&a->archive,
1558                                     ARCHIVE_ERRNO_FILE_FORMAT,
1559                                     "Symbolic or non-octal mode \"%s\" unsupported", val);
1560                                 return ARCHIVE_WARN;
1561                         }
1562                         break;
1563                 }
1564                 __LA_FALLTHROUGH;
1565         case 'n':
1566                 if (strcmp(key, "nlink") == 0) {
1567                         *parsed_kws |= MTREE_HAS_NLINK;
1568                         archive_entry_set_nlink(entry,
1569                                 (unsigned int)mtree_atol(&val, 10));
1570                         break;
1571                 }
1572                 __LA_FALLTHROUGH;
1573         case 'r':
1574                 if (strcmp(key, "resdevice") == 0) {
1575                         /* stat(2) st_dev field, e.g. the device ID where the
1576                          * inode resides */
1577                         int r;
1578                         dev_t dev;
1579
1580                         r = parse_device(&dev, &a->archive, val);
1581                         if (r == ARCHIVE_OK)
1582                                 archive_entry_set_dev(entry, dev);
1583                         return r;
1584                 }
1585                 if (strcmp(key, "rmd160") == 0 ||
1586                     strcmp(key, "rmd160digest") == 0)
1587                         break;
1588                 __LA_FALLTHROUGH;
1589         case 's':
1590                 if (strcmp(key, "sha1") == 0 || strcmp(key, "sha1digest") == 0)
1591                         break;
1592                 if (strcmp(key, "sha256") == 0 ||
1593                     strcmp(key, "sha256digest") == 0)
1594                         break;
1595                 if (strcmp(key, "sha384") == 0 ||
1596                     strcmp(key, "sha384digest") == 0)
1597                         break;
1598                 if (strcmp(key, "sha512") == 0 ||
1599                     strcmp(key, "sha512digest") == 0)
1600                         break;
1601                 if (strcmp(key, "size") == 0) {
1602                         archive_entry_set_size(entry, mtree_atol(&val, 10));
1603                         break;
1604                 }
1605                 __LA_FALLTHROUGH;
1606         case 't':
1607                 if (strcmp(key, "tags") == 0) {
1608                         /*
1609                          * Comma delimited list of tags.
1610                          * Ignore the tags for now, but the interface
1611                          * should be extended to allow inclusion/exclusion.
1612                          */
1613                         break;
1614                 }
1615                 if (strcmp(key, "time") == 0) {
1616                         int64_t m;
1617                         int64_t my_time_t_max = get_time_t_max();
1618                         int64_t my_time_t_min = get_time_t_min();
1619                         long ns = 0;
1620
1621                         *parsed_kws |= MTREE_HAS_MTIME;
1622                         m = mtree_atol(&val, 10);
1623                         /* Replicate an old mtree bug:
1624                          * 123456789.1 represents 123456789
1625                          * seconds and 1 nanosecond. */
1626                         if (*val == '.') {
1627                                 ++val;
1628                                 ns = (long)mtree_atol(&val, 10);
1629                                 if (ns < 0)
1630                                         ns = 0;
1631                                 else if (ns > 999999999)
1632                                         ns = 999999999;
1633                         }
1634                         if (m > my_time_t_max)
1635                                 m = my_time_t_max;
1636                         else if (m < my_time_t_min)
1637                                 m = my_time_t_min;
1638                         archive_entry_set_mtime(entry, (time_t)m, ns);
1639                         break;
1640                 }
1641                 if (strcmp(key, "type") == 0) {
1642                         switch (val[0]) {
1643                         case 'b':
1644                                 if (strcmp(val, "block") == 0) {
1645                                         archive_entry_set_filetype(entry, AE_IFBLK);
1646                                         break;
1647                                 }
1648                                 __LA_FALLTHROUGH;
1649                         case 'c':
1650                                 if (strcmp(val, "char") == 0) {
1651                                         archive_entry_set_filetype(entry,
1652                                                 AE_IFCHR);
1653                                         break;
1654                                 }
1655                                 __LA_FALLTHROUGH;
1656                         case 'd':
1657                                 if (strcmp(val, "dir") == 0) {
1658                                         archive_entry_set_filetype(entry,
1659                                                 AE_IFDIR);
1660                                         break;
1661                                 }
1662                                 __LA_FALLTHROUGH;
1663                         case 'f':
1664                                 if (strcmp(val, "fifo") == 0) {
1665                                         archive_entry_set_filetype(entry,
1666                                                 AE_IFIFO);
1667                                         break;
1668                                 }
1669                                 if (strcmp(val, "file") == 0) {
1670                                         archive_entry_set_filetype(entry,
1671                                                 AE_IFREG);
1672                                         break;
1673                                 }
1674                                 __LA_FALLTHROUGH;
1675                         case 'l':
1676                                 if (strcmp(val, "link") == 0) {
1677                                         archive_entry_set_filetype(entry,
1678                                                 AE_IFLNK);
1679                                         break;
1680                                 }
1681                                 __LA_FALLTHROUGH;
1682                         default:
1683                                 archive_set_error(&a->archive,
1684                                     ARCHIVE_ERRNO_FILE_FORMAT,
1685                                     "Unrecognized file type \"%s\"; "
1686                                     "assuming \"file\"", val);
1687                                 archive_entry_set_filetype(entry, AE_IFREG);
1688                                 return (ARCHIVE_WARN);
1689                         }
1690                         *parsed_kws |= MTREE_HAS_TYPE;
1691                         break;
1692                 }
1693                 __LA_FALLTHROUGH;
1694         case 'u':
1695                 if (strcmp(key, "uid") == 0) {
1696                         *parsed_kws |= MTREE_HAS_UID;
1697                         archive_entry_set_uid(entry, mtree_atol(&val, 10));
1698                         break;
1699                 }
1700                 if (strcmp(key, "uname") == 0) {
1701                         *parsed_kws |= MTREE_HAS_UNAME;
1702                         archive_entry_copy_uname(entry, val);
1703                         break;
1704                 }
1705                 __LA_FALLTHROUGH;
1706         default:
1707                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1708                     "Unrecognized key %s=%s", key, val);
1709                 return (ARCHIVE_WARN);
1710         }
1711         return (ARCHIVE_OK);
1712 }
1713
1714 static int
1715 read_data(struct archive_read *a, const void **buff, size_t *size,
1716     int64_t *offset)
1717 {
1718         size_t bytes_to_read;
1719         ssize_t bytes_read;
1720         struct mtree *mtree;
1721
1722         mtree = (struct mtree *)(a->format->data);
1723         if (mtree->fd < 0) {
1724                 *buff = NULL;
1725                 *offset = 0;
1726                 *size = 0;
1727                 return (ARCHIVE_EOF);
1728         }
1729         if (mtree->buff == NULL) {
1730                 mtree->buffsize = 64 * 1024;
1731                 mtree->buff = malloc(mtree->buffsize);
1732                 if (mtree->buff == NULL) {
1733                         archive_set_error(&a->archive, ENOMEM,
1734                             "Can't allocate memory");
1735                         return (ARCHIVE_FATAL);
1736                 }
1737         }
1738
1739         *buff = mtree->buff;
1740         *offset = mtree->offset;
1741         if ((int64_t)mtree->buffsize > mtree->cur_size - mtree->offset)
1742                 bytes_to_read = (size_t)(mtree->cur_size - mtree->offset);
1743         else
1744                 bytes_to_read = mtree->buffsize;
1745         bytes_read = read(mtree->fd, mtree->buff, bytes_to_read);
1746         if (bytes_read < 0) {
1747                 archive_set_error(&a->archive, errno, "Can't read");
1748                 return (ARCHIVE_WARN);
1749         }
1750         if (bytes_read == 0) {
1751                 *size = 0;
1752                 return (ARCHIVE_EOF);
1753         }
1754         mtree->offset += bytes_read;
1755         *size = bytes_read;
1756         return (ARCHIVE_OK);
1757 }
1758
1759 /* Skip does nothing except possibly close the contents file. */
1760 static int
1761 skip(struct archive_read *a)
1762 {
1763         struct mtree *mtree;
1764
1765         mtree = (struct mtree *)(a->format->data);
1766         if (mtree->fd >= 0) {
1767                 close(mtree->fd);
1768                 mtree->fd = -1;
1769         }
1770         return (ARCHIVE_OK);
1771 }
1772
1773 /*
1774  * Since parsing backslash sequences always makes strings shorter,
1775  * we can always do this conversion in-place.
1776  */
1777 static void
1778 parse_escapes(char *src, struct mtree_entry *mentry)
1779 {
1780         char *dest = src;
1781         char c;
1782
1783         if (mentry != NULL && strcmp(src, ".") == 0)
1784                 mentry->full = 1;
1785
1786         while (*src != '\0') {
1787                 c = *src++;
1788                 if (c == '/' && mentry != NULL)
1789                         mentry->full = 1;
1790                 if (c == '\\') {
1791                         switch (src[0]) {
1792                         case '0':
1793                                 if (src[1] < '0' || src[1] > '7') {
1794                                         c = 0;
1795                                         ++src;
1796                                         break;
1797                                 }
1798                                 /* FALLTHROUGH */
1799                         case '1':
1800                         case '2':
1801                         case '3':
1802                                 if (src[1] >= '0' && src[1] <= '7' &&
1803                                     src[2] >= '0' && src[2] <= '7') {
1804                                         c = (src[0] - '0') << 6;
1805                                         c |= (src[1] - '0') << 3;
1806                                         c |= (src[2] - '0');
1807                                         src += 3;
1808                                 }
1809                                 break;
1810                         case 'a':
1811                                 c = '\a';
1812                                 ++src;
1813                                 break;
1814                         case 'b':
1815                                 c = '\b';
1816                                 ++src;
1817                                 break;
1818                         case 'f':
1819                                 c = '\f';
1820                                 ++src;
1821                                 break;
1822                         case 'n':
1823                                 c = '\n';
1824                                 ++src;
1825                                 break;
1826                         case 'r':
1827                                 c = '\r';
1828                                 ++src;
1829                                 break;
1830                         case 's':
1831                                 c = ' ';
1832                                 ++src;
1833                                 break;
1834                         case 't':
1835                                 c = '\t';
1836                                 ++src;
1837                                 break;
1838                         case 'v':
1839                                 c = '\v';
1840                                 ++src;
1841                                 break;
1842                         case '\\':
1843                                 c = '\\';
1844                                 ++src;
1845                                 break;
1846                         }
1847                 }
1848                 *dest++ = c;
1849         }
1850         *dest = '\0';
1851 }
1852
1853 /* Parse a hex digit. */
1854 static int
1855 parsedigit(char c)
1856 {
1857         if (c >= '0' && c <= '9')
1858                 return c - '0';
1859         else if (c >= 'a' && c <= 'f')
1860                 return c - 'a';
1861         else if (c >= 'A' && c <= 'F')
1862                 return c - 'A';
1863         else
1864                 return -1;
1865 }
1866
1867 /*
1868  * Note that this implementation does not (and should not!) obey
1869  * locale settings; you cannot simply substitute strtol here, since
1870  * it does obey locale.
1871  */
1872 static int64_t
1873 mtree_atol(char **p, int base)
1874 {
1875         int64_t l, limit;
1876         int digit, last_digit_limit;
1877
1878         if (base == 0) {
1879                 if (**p != '0')
1880                         base = 10;
1881                 else if ((*p)[1] == 'x' || (*p)[1] == 'X') {
1882                         *p += 2;
1883                         base = 16;
1884                 } else {
1885                         base = 8;
1886                 }
1887         }
1888
1889         if (**p == '-') {
1890                 limit = INT64_MIN / base;
1891                 last_digit_limit = INT64_MIN % base;
1892                 ++(*p);
1893
1894                 l = 0;
1895                 digit = parsedigit(**p);
1896                 while (digit >= 0 && digit < base) {
1897                         if (l < limit || (l == limit && digit > last_digit_limit))
1898                                 return INT64_MIN;
1899                         l = (l * base) - digit;
1900                         digit = parsedigit(*++(*p));
1901                 }
1902                 return l;
1903         } else {
1904                 limit = INT64_MAX / base;
1905                 last_digit_limit = INT64_MAX % base;
1906
1907                 l = 0;
1908                 digit = parsedigit(**p);
1909                 while (digit >= 0 && digit < base) {
1910                         if (l > limit || (l == limit && digit > last_digit_limit))
1911                                 return INT64_MAX;
1912                         l = (l * base) + digit;
1913                         digit = parsedigit(*++(*p));
1914                 }
1915                 return l;
1916         }
1917 }
1918
1919 /*
1920  * Returns length of line (including trailing newline)
1921  * or negative on error.  'start' argument is updated to
1922  * point to first character of line.
1923  */
1924 static ssize_t
1925 readline(struct archive_read *a, struct mtree *mtree, char **start,
1926     ssize_t limit)
1927 {
1928         ssize_t bytes_read;
1929         ssize_t total_size = 0;
1930         ssize_t find_off = 0;
1931         const void *t;
1932         void *nl;
1933         char *u;
1934
1935         /* Accumulate line in a line buffer. */
1936         for (;;) {
1937                 /* Read some more. */
1938                 t = __archive_read_ahead(a, 1, &bytes_read);
1939                 if (t == NULL)
1940                         return (0);
1941                 if (bytes_read < 0)
1942                         return (ARCHIVE_FATAL);
1943                 nl = memchr(t, '\n', bytes_read);
1944                 /* If we found '\n', trim the read to end exactly there. */
1945                 if (nl != NULL) {
1946                         bytes_read = ((const char *)nl) - ((const char *)t) + 1;
1947                 }
1948                 if (total_size + bytes_read + 1 > limit) {
1949                         archive_set_error(&a->archive,
1950                             ARCHIVE_ERRNO_FILE_FORMAT,
1951                             "Line too long");
1952                         return (ARCHIVE_FATAL);
1953                 }
1954                 if (archive_string_ensure(&mtree->line,
1955                         total_size + bytes_read + 1) == NULL) {
1956                         archive_set_error(&a->archive, ENOMEM,
1957                             "Can't allocate working buffer");
1958                         return (ARCHIVE_FATAL);
1959                 }
1960                 /* Append new bytes to string. */
1961                 memcpy(mtree->line.s + total_size, t, bytes_read);
1962                 __archive_read_consume(a, bytes_read);
1963                 total_size += bytes_read;
1964                 mtree->line.s[total_size] = '\0';
1965
1966                 for (u = mtree->line.s + find_off; *u; ++u) {
1967                         if (u[0] == '\n') {
1968                                 /* Ends with unescaped newline. */
1969                                 *start = mtree->line.s;
1970                                 return total_size;
1971                         } else if (u[0] == '#') {
1972                                 /* Ends with comment sequence #...\n */
1973                                 if (nl == NULL) {
1974                                         /* But we've not found the \n yet */
1975                                         break;
1976                                 }
1977                         } else if (u[0] == '\\') {
1978                                 if (u[1] == '\n') {
1979                                         /* Trim escaped newline. */
1980                                         total_size -= 2;
1981                                         mtree->line.s[total_size] = '\0';
1982                                         break;
1983                                 } else if (u[1] != '\0') {
1984                                         /* Skip the two-char escape sequence */
1985                                         ++u;
1986                                 }
1987                         }
1988                 }
1989                 find_off = u - mtree->line.s;
1990         }
1991 }
1992
1993 static unsigned int
1994 hash(const char *p)
1995 {
1996         /* A 32-bit version of Peter Weinberger's (PJW) hash algorithm,
1997            as used by ELF for hashing function names. */
1998         unsigned g, h = 0;
1999         while (*p != '\0') {
2000                 h = (h << 4) + *p++;
2001                 if ((g = h & 0xF0000000) != 0) {
2002                         h ^= g >> 24;
2003                         h &= 0x0FFFFFFF;
2004                 }
2005         }
2006         return h;
2007 }