]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - contrib/libarchive/libarchive/archive_read_support_format_mtree.c
MFC r306670:
[FreeBSD/stable/10.git] / contrib / libarchive / libarchive / archive_read_support_format_mtree.c
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * Copyright (c) 2008 Joerg Sonnenberger
4  * Copyright (c) 2011-2012 Michihiro NAKAJIMA
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #include "archive_platform.h"
29 __FBSDID("$FreeBSD$");
30
31 #ifdef HAVE_SYS_STAT_H
32 #include <sys/stat.h>
33 #endif
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_FCNTL_H
38 #include <fcntl.h>
39 #endif
40 #include <stddef.h>
41 /* #include <stdint.h> */ /* See archive_platform.h */
42 #ifdef HAVE_STDLIB_H
43 #include <stdlib.h>
44 #endif
45 #ifdef HAVE_STRING_H
46 #include <string.h>
47 #endif
48
49 #include "archive.h"
50 #include "archive_entry.h"
51 #include "archive_private.h"
52 #include "archive_read_private.h"
53 #include "archive_string.h"
54 #include "archive_pack_dev.h"
55
56 #ifndef O_BINARY
57 #define O_BINARY 0
58 #endif
59 #ifndef O_CLOEXEC
60 #define O_CLOEXEC       0
61 #endif
62
63 #define MTREE_HAS_DEVICE        0x0001
64 #define MTREE_HAS_FFLAGS        0x0002
65 #define MTREE_HAS_GID           0x0004
66 #define MTREE_HAS_GNAME         0x0008
67 #define MTREE_HAS_MTIME         0x0010
68 #define MTREE_HAS_NLINK         0x0020
69 #define MTREE_HAS_PERM          0x0040
70 #define MTREE_HAS_SIZE          0x0080
71 #define MTREE_HAS_TYPE          0x0100
72 #define MTREE_HAS_UID           0x0200
73 #define MTREE_HAS_UNAME         0x0400
74
75 #define MTREE_HAS_OPTIONAL      0x0800
76 #define MTREE_HAS_NOCHANGE      0x1000 /* FreeBSD specific */
77
78 struct mtree_option {
79         struct mtree_option *next;
80         char *value;
81 };
82
83 struct mtree_entry {
84         struct mtree_entry *next;
85         struct mtree_option *options;
86         char *name;
87         char full;
88         char used;
89 };
90
91 struct mtree {
92         struct archive_string    line;
93         size_t                   buffsize;
94         char                    *buff;
95         int64_t                  offset;
96         int                      fd;
97         int                      archive_format;
98         const char              *archive_format_name;
99         struct mtree_entry      *entries;
100         struct mtree_entry      *this_entry;
101         struct archive_string    current_dir;
102         struct archive_string    contents_name;
103
104         struct archive_entry_linkresolver *resolver;
105
106         int64_t                  cur_size;
107         char checkfs;
108 };
109
110 static int      bid_keycmp(const char *, const char *, ssize_t);
111 static int      cleanup(struct archive_read *);
112 static int      detect_form(struct archive_read *, int *);
113 static int      mtree_bid(struct archive_read *, int);
114 static int      parse_file(struct archive_read *, struct archive_entry *,
115                     struct mtree *, struct mtree_entry *, int *);
116 static void     parse_escapes(char *, struct mtree_entry *);
117 static int      parse_line(struct archive_read *, struct archive_entry *,
118                     struct mtree *, struct mtree_entry *, int *);
119 static int      parse_keyword(struct archive_read *, struct mtree *,
120                     struct archive_entry *, struct mtree_option *, int *);
121 static int      read_data(struct archive_read *a,
122                     const void **buff, size_t *size, int64_t *offset);
123 static ssize_t  readline(struct archive_read *, struct mtree *, char **, ssize_t);
124 static int      skip(struct archive_read *a);
125 static int      read_header(struct archive_read *,
126                     struct archive_entry *);
127 static int64_t  mtree_atol10(char **);
128 static int64_t  mtree_atol8(char **);
129 static int64_t  mtree_atol(char **);
130
131 /*
132  * There's no standard for TIME_T_MAX/TIME_T_MIN.  So we compute them
133  * here.  TODO: Move this to configure time, but be careful
134  * about cross-compile environments.
135  */
136 static int64_t
137 get_time_t_max(void)
138 {
139 #if defined(TIME_T_MAX)
140         return TIME_T_MAX;
141 #else
142         /* ISO C allows time_t to be a floating-point type,
143            but POSIX requires an integer type.  The following
144            should work on any system that follows the POSIX
145            conventions. */
146         if (((time_t)0) < ((time_t)-1)) {
147                 /* Time_t is unsigned */
148                 return (~(time_t)0);
149         } else {
150                 /* Time_t is signed. */
151                 /* Assume it's the same as int64_t or int32_t */
152                 if (sizeof(time_t) == sizeof(int64_t)) {
153                         return (time_t)INT64_MAX;
154                 } else {
155                         return (time_t)INT32_MAX;
156                 }
157         }
158 #endif
159 }
160
161 static int64_t
162 get_time_t_min(void)
163 {
164 #if defined(TIME_T_MIN)
165         return TIME_T_MIN;
166 #else
167         if (((time_t)0) < ((time_t)-1)) {
168                 /* Time_t is unsigned */
169                 return (time_t)0;
170         } else {
171                 /* Time_t is signed. */
172                 if (sizeof(time_t) == sizeof(int64_t)) {
173                         return (time_t)INT64_MIN;
174                 } else {
175                         return (time_t)INT32_MIN;
176                 }
177         }
178 #endif
179 }
180
181 static int
182 archive_read_format_mtree_options(struct archive_read *a,
183     const char *key, const char *val)
184 {
185         struct mtree *mtree;
186
187         mtree = (struct mtree *)(a->format->data);
188         if (strcmp(key, "checkfs")  == 0) {
189                 /* Allows to read information missing from the mtree from the file system */
190                 if (val == NULL || val[0] == 0) {
191                         mtree->checkfs = 0;
192                 } else {
193                         mtree->checkfs = 1;
194                 }
195                 return (ARCHIVE_OK);
196         }
197
198         /* Note: The "warn" return is just to inform the options
199          * supervisor that we didn't handle it.  It will generate
200          * a suitable error if no one used this option. */
201         return (ARCHIVE_WARN);
202 }
203
204 static void
205 free_options(struct mtree_option *head)
206 {
207         struct mtree_option *next;
208
209         for (; head != NULL; head = next) {
210                 next = head->next;
211                 free(head->value);
212                 free(head);
213         }
214 }
215
216 int
217 archive_read_support_format_mtree(struct archive *_a)
218 {
219         struct archive_read *a = (struct archive_read *)_a;
220         struct mtree *mtree;
221         int r;
222
223         archive_check_magic(_a, ARCHIVE_READ_MAGIC,
224             ARCHIVE_STATE_NEW, "archive_read_support_format_mtree");
225
226         mtree = (struct mtree *)malloc(sizeof(*mtree));
227         if (mtree == NULL) {
228                 archive_set_error(&a->archive, ENOMEM,
229                     "Can't allocate mtree data");
230                 return (ARCHIVE_FATAL);
231         }
232         memset(mtree, 0, sizeof(*mtree));
233         mtree->fd = -1;
234
235         r = __archive_read_register_format(a, mtree, "mtree",
236            mtree_bid, archive_read_format_mtree_options, read_header, read_data, skip, NULL, cleanup, NULL, NULL);
237
238         if (r != ARCHIVE_OK)
239                 free(mtree);
240         return (ARCHIVE_OK);
241 }
242
243 static int
244 cleanup(struct archive_read *a)
245 {
246         struct mtree *mtree;
247         struct mtree_entry *p, *q;
248
249         mtree = (struct mtree *)(a->format->data);
250
251         p = mtree->entries;
252         while (p != NULL) {
253                 q = p->next;
254                 free(p->name);
255                 free_options(p->options);
256                 free(p);
257                 p = q;
258         }
259         archive_string_free(&mtree->line);
260         archive_string_free(&mtree->current_dir);
261         archive_string_free(&mtree->contents_name);
262         archive_entry_linkresolver_free(mtree->resolver);
263
264         free(mtree->buff);
265         free(mtree);
266         (a->format->data) = NULL;
267         return (ARCHIVE_OK);
268 }
269
270 static ssize_t
271 get_line_size(const char *b, ssize_t avail, ssize_t *nlsize)
272 {
273         ssize_t len;
274
275         len = 0;
276         while (len < avail) {
277                 switch (*b) {
278                 case '\0':/* Non-ascii character or control character. */
279                         if (nlsize != NULL)
280                                 *nlsize = 0;
281                         return (-1);
282                 case '\r':
283                         if (avail-len > 1 && b[1] == '\n') {
284                                 if (nlsize != NULL)
285                                         *nlsize = 2;
286                                 return (len+2);
287                         }
288                         /* FALL THROUGH */
289                 case '\n':
290                         if (nlsize != NULL)
291                                 *nlsize = 1;
292                         return (len+1);
293                 default:
294                         b++;
295                         len++;
296                         break;
297                 }
298         }
299         if (nlsize != NULL)
300                 *nlsize = 0;
301         return (avail);
302 }
303
304 /*
305  *  <---------------- ravail --------------------->
306  *  <-- diff ------> <---  avail ----------------->
307  *                   <---- len ----------->
308  * | Previous lines | line being parsed  nl extra |
309  *                  ^
310  *                  b
311  *
312  */
313 static ssize_t
314 next_line(struct archive_read *a,
315     const char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl)
316 {
317         ssize_t len;
318         int quit;
319         
320         quit = 0;
321         if (*avail == 0) {
322                 *nl = 0;
323                 len = 0;
324         } else
325                 len = get_line_size(*b, *avail, nl);
326         /*
327          * Read bytes more while it does not reach the end of line.
328          */
329         while (*nl == 0 && len == *avail && !quit) {
330                 ssize_t diff = *ravail - *avail;
331                 size_t nbytes_req = (*ravail+1023) & ~1023U;
332                 ssize_t tested;
333
334                 /* Increase reading bytes if it is not enough to at least
335                  * new two lines. */
336                 if (nbytes_req < (size_t)*ravail + 160)
337                         nbytes_req <<= 1;
338
339                 *b = __archive_read_ahead(a, nbytes_req, avail);
340                 if (*b == NULL) {
341                         if (*ravail >= *avail)
342                                 return (0);
343                         /* Reading bytes reaches the end of file. */
344                         *b = __archive_read_ahead(a, *avail, avail);
345                         quit = 1;
346                 }
347                 *ravail = *avail;
348                 *b += diff;
349                 *avail -= diff;
350                 tested = len;/* Skip some bytes we already determinated. */
351                 len = get_line_size(*b + len, *avail - len, nl);
352                 if (len >= 0)
353                         len += tested;
354         }
355         return (len);
356 }
357
358 /*
359  * Compare characters with a mtree keyword.
360  * Returns the length of a mtree keyword if matched.
361  * Returns 0 if not matched.
362  */
363 static int
364 bid_keycmp(const char *p, const char *key, ssize_t len)
365 {
366         int match_len = 0;
367
368         while (len > 0 && *p && *key) {
369                 if (*p == *key) {
370                         --len;
371                         ++p;
372                         ++key;
373                         ++match_len;
374                         continue;
375                 }
376                 return (0);/* Not match */
377         }
378         if (*key != '\0')
379                 return (0);/* Not match */
380
381         /* A following character should be specified characters */
382         if (p[0] == '=' || p[0] == ' ' || p[0] == '\t' ||
383             p[0] == '\n' || p[0] == '\r' ||
384            (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r')))
385                 return (match_len);
386         return (0);/* Not match */
387 }
388
389 /*
390  * Test whether the characters 'p' has is mtree keyword.
391  * Returns the length of a detected keyword.
392  * Returns 0 if any keywords were not found.
393  */
394 static int
395 bid_keyword(const char *p,  ssize_t len)
396 {
397         static const char *keys_c[] = {
398                 "content", "contents", "cksum", NULL
399         };
400         static const char *keys_df[] = {
401                 "device", "flags", NULL
402         };
403         static const char *keys_g[] = {
404                 "gid", "gname", NULL
405         };
406         static const char *keys_il[] = {
407                 "ignore", "inode", "link", NULL
408         };
409         static const char *keys_m[] = {
410                 "md5", "md5digest", "mode", NULL
411         };
412         static const char *keys_no[] = {
413                 "nlink", "nochange", "optional", NULL
414         };
415         static const char *keys_r[] = {
416                 "resdevice", "rmd160", "rmd160digest", NULL
417         };
418         static const char *keys_s[] = {
419                 "sha1", "sha1digest",
420                 "sha256", "sha256digest",
421                 "sha384", "sha384digest",
422                 "sha512", "sha512digest",
423                 "size", NULL
424         };
425         static const char *keys_t[] = {
426                 "tags", "time", "type", NULL
427         };
428         static const char *keys_u[] = {
429                 "uid", "uname", NULL
430         };
431         const char **keys;
432         int i;
433
434         switch (*p) {
435         case 'c': keys = keys_c; break;
436         case 'd': case 'f': keys = keys_df; break;
437         case 'g': keys = keys_g; break;
438         case 'i': case 'l': keys = keys_il; break;
439         case 'm': keys = keys_m; break;
440         case 'n': case 'o': keys = keys_no; break;
441         case 'r': keys = keys_r; break;
442         case 's': keys = keys_s; break;
443         case 't': keys = keys_t; break;
444         case 'u': keys = keys_u; break;
445         default: return (0);/* Unknown key */
446         }
447
448         for (i = 0; keys[i] != NULL; i++) {
449                 int l = bid_keycmp(p, keys[i], len);
450                 if (l > 0)
451                         return (l);
452         }
453         return (0);/* Unknown key */
454 }
455
456 /*
457  * Test whether there is a set of mtree keywords.
458  * Returns the number of keyword.
459  * Returns -1 if we got incorrect sequence.
460  * This function expects a set of "<space characters>keyword=value".
461  * When "unset" is specified, expects a set of "<space characters>keyword".
462  */
463 static int
464 bid_keyword_list(const char *p,  ssize_t len, int unset, int last_is_path)
465 {
466         int l;
467         int keycnt = 0;
468
469         while (len > 0 && *p) {
470                 int blank = 0;
471
472                 /* Test whether there are blank characters in the line. */
473                 while (len >0 && (*p == ' ' || *p == '\t')) {
474                         ++p;
475                         --len;
476                         blank = 1;
477                 }
478                 if (*p == '\n' || *p == '\r')
479                         break;
480                 if (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r'))
481                         break;
482                 if (!blank && !last_is_path) /* No blank character. */
483                         return (-1);
484                 if (last_is_path && len == 0)
485                                 return (keycnt);
486
487                 if (unset) {
488                         l = bid_keycmp(p, "all", len);
489                         if (l > 0)
490                                 return (1);
491                 }
492                 /* Test whether there is a correct key in the line. */
493                 l = bid_keyword(p, len);
494                 if (l == 0)
495                         return (-1);/* Unknown keyword was found. */
496                 p += l;
497                 len -= l;
498                 keycnt++;
499
500                 /* Skip value */
501                 if (*p == '=') {
502                         int value = 0;
503                         ++p;
504                         --len;
505                         while (len > 0 && *p != ' ' && *p != '\t') {
506                                 ++p;
507                                 --len;
508                                 value = 1;
509                         }
510                         /* A keyword should have a its value unless
511                          * "/unset" operation. */ 
512                         if (!unset && value == 0)
513                                 return (-1);
514                 }
515         }
516         return (keycnt);
517 }
518
519 static int
520 bid_entry(const char *p, ssize_t len, ssize_t nl, int *last_is_path)
521 {
522         int f = 0;
523         static const unsigned char safe_char[256] = {
524                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */
525                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
526                 /* !"$%&'()*+,-./  EXCLUSION:( )(#) */
527                 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */
528                 /* 0123456789:;<>?  EXCLUSION:(=) */
529                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, /* 30 - 3F */
530                 /* @ABCDEFGHIJKLMNO */
531                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
532                 /* PQRSTUVWXYZ[\]^_  */
533                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */
534                 /* `abcdefghijklmno */
535                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */
536                 /* pqrstuvwxyz{|}~ */
537                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */
538                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
539                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
540                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
541                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
542                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
543                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
544                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
545                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
546         };
547         ssize_t ll;
548         const char *pp = p;
549         const char * const pp_end = pp + len;
550
551         *last_is_path = 0;
552         /*
553          * Skip the path-name which is quoted.
554          */
555         for (;pp < pp_end; ++pp) {
556                 if (!safe_char[*(const unsigned char *)pp]) {
557                         if (*pp != ' ' && *pp != '\t' && *pp != '\r'
558                             && *pp != '\n')
559                                 f = 0;
560                         break;
561                 }
562                 f = 1;
563         }
564         ll = pp_end - pp;
565
566         /* If a path-name was not found at the first, try to check
567          * a mtree format(a.k.a form D) ``NetBSD's mtree -D'' creates,
568          * which places the path-name at the last. */
569         if (f == 0) {
570                 const char *pb = p + len - nl;
571                 int name_len = 0;
572                 int slash;
573
574                 /* The form D accepts only a single line for an entry. */
575                 if (pb-2 >= p &&
576                     pb[-1] == '\\' && (pb[-2] == ' ' || pb[-2] == '\t'))
577                         return (-1);
578                 if (pb-1 >= p && pb[-1] == '\\')
579                         return (-1);
580
581                 slash = 0;
582                 while (p <= --pb && *pb != ' ' && *pb != '\t') {
583                         if (!safe_char[*(const unsigned char *)pb])
584                                 return (-1);
585                         name_len++;
586                         /* The pathname should have a slash in this
587                          * format. */
588                         if (*pb == '/')
589                                 slash = 1;
590                 }
591                 if (name_len == 0 || slash == 0)
592                         return (-1);
593                 /* If '/' is placed at the first in this field, this is not
594                  * a valid filename. */
595                 if (pb[1] == '/')
596                         return (-1);
597                 ll = len - nl - name_len;
598                 pp = p;
599                 *last_is_path = 1;
600         }
601
602         return (bid_keyword_list(pp, ll, 0, *last_is_path));
603 }
604
605 #define MAX_BID_ENTRY   3
606
607 static int
608 mtree_bid(struct archive_read *a, int best_bid)
609 {
610         const char *signature = "#mtree";
611         const char *p;
612
613         (void)best_bid; /* UNUSED */
614
615         /* Now let's look at the actual header and see if it matches. */
616         p = __archive_read_ahead(a, strlen(signature), NULL);
617         if (p == NULL)
618                 return (-1);
619
620         if (memcmp(p, signature, strlen(signature)) == 0)
621                 return (8 * (int)strlen(signature));
622
623         /*
624          * There is not a mtree signature. Let's try to detect mtree format.
625          */
626         return (detect_form(a, NULL));
627 }
628
629 static int
630 detect_form(struct archive_read *a, int *is_form_d)
631 {
632         const char *p;
633         ssize_t avail, ravail;
634         ssize_t detected_bytes = 0, len, nl;
635         int entry_cnt = 0, multiline = 0;
636         int form_D = 0;/* The archive is generated by `NetBSD mtree -D'
637                         * (In this source we call it `form D') . */
638
639         if (is_form_d != NULL)
640                 *is_form_d = 0;
641         p = __archive_read_ahead(a, 1, &avail);
642         if (p == NULL)
643                 return (-1);
644         ravail = avail;
645         for (;;) {
646                 len = next_line(a, &p, &avail, &ravail, &nl);
647                 /* The terminal character of the line should be
648                  * a new line character, '\r\n' or '\n'. */
649                 if (len <= 0 || nl == 0)
650                         break;
651                 if (!multiline) {
652                         /* Leading whitespace is never significant,
653                          * ignore it. */
654                         while (len > 0 && (*p == ' ' || *p == '\t')) {
655                                 ++p;
656                                 --avail;
657                                 --len;
658                         }
659                         /* Skip comment or empty line. */ 
660                         if (p[0] == '#' || p[0] == '\n' || p[0] == '\r') {
661                                 p += len;
662                                 avail -= len;
663                                 continue;
664                         }
665                 } else {
666                         /* A continuance line; the terminal
667                          * character of previous line was '\' character. */
668                         if (bid_keyword_list(p, len, 0, 0) <= 0)
669                                 break;
670                         if (multiline == 1)
671                                 detected_bytes += len;
672                         if (p[len-nl-1] != '\\') {
673                                 if (multiline == 1 &&
674                                     ++entry_cnt >= MAX_BID_ENTRY)
675                                         break;
676                                 multiline = 0;
677                         }
678                         p += len;
679                         avail -= len;
680                         continue;
681                 }
682                 if (p[0] != '/') {
683                         int last_is_path, keywords;
684
685                         keywords = bid_entry(p, len, nl, &last_is_path);
686                         if (keywords >= 0) {
687                                 detected_bytes += len;
688                                 if (form_D == 0) {
689                                         if (last_is_path)
690                                                 form_D = 1;
691                                         else if (keywords > 0)
692                                                 /* This line is not `form D'. */
693                                                 form_D = -1;
694                                 } else if (form_D == 1) {
695                                         if (!last_is_path && keywords > 0)
696                                                 /* This this is not `form D'
697                                                  * and We cannot accept mixed
698                                                  * format. */
699                                                 break;
700                                 }
701                                 if (!last_is_path && p[len-nl-1] == '\\')
702                                         /* This line continues. */
703                                         multiline = 1;
704                                 else {
705                                         /* We've got plenty of correct lines
706                                          * to assume that this file is a mtree
707                                          * format. */
708                                         if (++entry_cnt >= MAX_BID_ENTRY)
709                                                 break;
710                                 }
711                         } else
712                                 break;
713                 } else if (strncmp(p, "/set", 4) == 0) {
714                         if (bid_keyword_list(p+4, len-4, 0, 0) <= 0)
715                                 break;
716                         /* This line continues. */
717                         if (p[len-nl-1] == '\\')
718                                 multiline = 2;
719                 } else if (strncmp(p, "/unset", 6) == 0) {
720                         if (bid_keyword_list(p+6, len-6, 1, 0) <= 0)
721                                 break;
722                         /* This line continues. */
723                         if (p[len-nl-1] == '\\')
724                                 multiline = 2;
725                 } else
726                         break;
727
728                 /* Test next line. */
729                 p += len;
730                 avail -= len;
731         }
732         if (entry_cnt >= MAX_BID_ENTRY || (entry_cnt > 0 && len == 0)) {
733                 if (is_form_d != NULL) {
734                         if (form_D == 1)
735                                 *is_form_d = 1;
736                 }
737                 return (32);
738         }
739
740         return (0);
741 }
742
743 /*
744  * The extended mtree format permits multiple lines specifying
745  * attributes for each file.  For those entries, only the last line
746  * is actually used.  Practically speaking, that means we have
747  * to read the entire mtree file into memory up front.
748  *
749  * The parsing is done in two steps.  First, it is decided if a line
750  * changes the global defaults and if it is, processed accordingly.
751  * Otherwise, the options of the line are merged with the current
752  * global options.
753  */
754 static int
755 add_option(struct archive_read *a, struct mtree_option **global,
756     const char *value, size_t len)
757 {
758         struct mtree_option *opt;
759
760         if ((opt = malloc(sizeof(*opt))) == NULL) {
761                 archive_set_error(&a->archive, errno, "Can't allocate memory");
762                 return (ARCHIVE_FATAL);
763         }
764         if ((opt->value = malloc(len + 1)) == NULL) {
765                 free(opt);
766                 archive_set_error(&a->archive, errno, "Can't allocate memory");
767                 return (ARCHIVE_FATAL);
768         }
769         memcpy(opt->value, value, len);
770         opt->value[len] = '\0';
771         opt->next = *global;
772         *global = opt;
773         return (ARCHIVE_OK);
774 }
775
776 static void
777 remove_option(struct mtree_option **global, const char *value, size_t len)
778 {
779         struct mtree_option *iter, *last;
780
781         last = NULL;
782         for (iter = *global; iter != NULL; last = iter, iter = iter->next) {
783                 if (strncmp(iter->value, value, len) == 0 &&
784                     (iter->value[len] == '\0' ||
785                      iter->value[len] == '='))
786                         break;
787         }
788         if (iter == NULL)
789                 return;
790         if (last == NULL)
791                 *global = iter->next;
792         else
793                 last->next = iter->next;
794
795         free(iter->value);
796         free(iter);
797 }
798
799 static int
800 process_global_set(struct archive_read *a,
801     struct mtree_option **global, const char *line)
802 {
803         const char *next, *eq;
804         size_t len;
805         int r;
806
807         line += 4;
808         for (;;) {
809                 next = line + strspn(line, " \t\r\n");
810                 if (*next == '\0')
811                         return (ARCHIVE_OK);
812                 line = next;
813                 next = line + strcspn(line, " \t\r\n");
814                 eq = strchr(line, '=');
815                 if (eq > next)
816                         len = next - line;
817                 else
818                         len = eq - line;
819
820                 remove_option(global, line, len);
821                 r = add_option(a, global, line, next - line);
822                 if (r != ARCHIVE_OK)
823                         return (r);
824                 line = next;
825         }
826 }
827
828 static int
829 process_global_unset(struct archive_read *a,
830     struct mtree_option **global, const char *line)
831 {
832         const char *next;
833         size_t len;
834
835         line += 6;
836         if (strchr(line, '=') != NULL) {
837                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
838                     "/unset shall not contain `='");
839                 return ARCHIVE_FATAL;
840         }
841
842         for (;;) {
843                 next = line + strspn(line, " \t\r\n");
844                 if (*next == '\0')
845                         return (ARCHIVE_OK);
846                 line = next;
847                 len = strcspn(line, " \t\r\n");
848
849                 if (len == 3 && strncmp(line, "all", 3) == 0) {
850                         free_options(*global);
851                         *global = NULL;
852                 } else {
853                         remove_option(global, line, len);
854                 }
855
856                 line += len;
857         }
858 }
859
860 static int
861 process_add_entry(struct archive_read *a, struct mtree *mtree,
862     struct mtree_option **global, const char *line, ssize_t line_len,
863     struct mtree_entry **last_entry, int is_form_d)
864 {
865         struct mtree_entry *entry;
866         struct mtree_option *iter;
867         const char *next, *eq, *name, *end;
868         size_t name_len, len;
869         int r, i;
870
871         if ((entry = malloc(sizeof(*entry))) == NULL) {
872                 archive_set_error(&a->archive, errno, "Can't allocate memory");
873                 return (ARCHIVE_FATAL);
874         }
875         entry->next = NULL;
876         entry->options = NULL;
877         entry->name = NULL;
878         entry->used = 0;
879         entry->full = 0;
880
881         /* Add this entry to list. */
882         if (*last_entry == NULL)
883                 mtree->entries = entry;
884         else
885                 (*last_entry)->next = entry;
886         *last_entry = entry;
887
888         if (is_form_d) {
889                 /* Filename is last item on line. */
890                 /* Adjust line_len to trim trailing whitespace */
891                 while (line_len > 0) {
892                         char last_character = line[line_len - 1];
893                         if (last_character == '\r'
894                             || last_character == '\n'
895                             || last_character == '\t'
896                             || last_character == ' ') {
897                                 line_len--;
898                         } else {
899                                 break;
900                         }
901                 }
902                 /* Name starts after the last whitespace separator */
903                 name = line;
904                 for (i = 0; i < line_len; i++) {
905                         if (line[i] == '\r'
906                             || line[i] == '\n'
907                             || line[i] == '\t'
908                             || line[i] == ' ') {
909                                 name = line + i + 1;
910                         }
911                 }
912                 name_len = line + line_len - name;
913                 end = name;
914         } else {
915                 /* Filename is first item on line */
916                 name_len = strcspn(line, " \t\r\n");
917                 name = line;
918                 line += name_len;
919                 end = line + line_len;
920         }
921         /* name/name_len is the name within the line. */
922         /* line..end brackets the entire line except the name */
923
924         if ((entry->name = malloc(name_len + 1)) == NULL) {
925                 archive_set_error(&a->archive, errno, "Can't allocate memory");
926                 return (ARCHIVE_FATAL);
927         }
928
929         memcpy(entry->name, name, name_len);
930         entry->name[name_len] = '\0';
931         parse_escapes(entry->name, entry);
932
933         for (iter = *global; iter != NULL; iter = iter->next) {
934                 r = add_option(a, &entry->options, iter->value,
935                     strlen(iter->value));
936                 if (r != ARCHIVE_OK)
937                         return (r);
938         }
939
940         for (;;) {
941                 next = line + strspn(line, " \t\r\n");
942                 if (*next == '\0')
943                         return (ARCHIVE_OK);
944                 if (next >= end)
945                         return (ARCHIVE_OK);
946                 line = next;
947                 next = line + strcspn(line, " \t\r\n");
948                 eq = strchr(line, '=');
949                 if (eq == NULL || eq > next)
950                         len = next - line;
951                 else
952                         len = eq - line;
953
954                 remove_option(&entry->options, line, len);
955                 r = add_option(a, &entry->options, line, next - line);
956                 if (r != ARCHIVE_OK)
957                         return (r);
958                 line = next;
959         }
960 }
961
962 static int
963 read_mtree(struct archive_read *a, struct mtree *mtree)
964 {
965         ssize_t len;
966         uintmax_t counter;
967         char *p;
968         struct mtree_option *global;
969         struct mtree_entry *last_entry;
970         int r, is_form_d;
971
972         mtree->archive_format = ARCHIVE_FORMAT_MTREE;
973         mtree->archive_format_name = "mtree";
974
975         global = NULL;
976         last_entry = NULL;
977
978         (void)detect_form(a, &is_form_d);
979
980         for (counter = 1; ; ++counter) {
981                 len = readline(a, mtree, &p, 65536);
982                 if (len == 0) {
983                         mtree->this_entry = mtree->entries;
984                         free_options(global);
985                         return (ARCHIVE_OK);
986                 }
987                 if (len < 0) {
988                         free_options(global);
989                         return ((int)len);
990                 }
991                 /* Leading whitespace is never significant, ignore it. */
992                 while (*p == ' ' || *p == '\t') {
993                         ++p;
994                         --len;
995                 }
996                 /* Skip content lines and blank lines. */
997                 if (*p == '#')
998                         continue;
999                 if (*p == '\r' || *p == '\n' || *p == '\0')
1000                         continue;
1001                 if (*p != '/') {
1002                         r = process_add_entry(a, mtree, &global, p, len,
1003                             &last_entry, is_form_d);
1004                 } else if (strncmp(p, "/set", 4) == 0) {
1005                         if (p[4] != ' ' && p[4] != '\t')
1006                                 break;
1007                         r = process_global_set(a, &global, p);
1008                 } else if (strncmp(p, "/unset", 6) == 0) {
1009                         if (p[6] != ' ' && p[6] != '\t')
1010                                 break;
1011                         r = process_global_unset(a, &global, p);
1012                 } else
1013                         break;
1014
1015                 if (r != ARCHIVE_OK) {
1016                         free_options(global);
1017                         return r;
1018                 }
1019         }
1020
1021         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1022             "Can't parse line %ju", counter);
1023         free_options(global);
1024         return (ARCHIVE_FATAL);
1025 }
1026
1027 /*
1028  * Read in the entire mtree file into memory on the first request.
1029  * Then use the next unused file to satisfy each header request.
1030  */
1031 static int
1032 read_header(struct archive_read *a, struct archive_entry *entry)
1033 {
1034         struct mtree *mtree;
1035         char *p;
1036         int r, use_next;
1037
1038         mtree = (struct mtree *)(a->format->data);
1039
1040         if (mtree->fd >= 0) {
1041                 close(mtree->fd);
1042                 mtree->fd = -1;
1043         }
1044
1045         if (mtree->entries == NULL) {
1046                 mtree->resolver = archive_entry_linkresolver_new();
1047                 if (mtree->resolver == NULL)
1048                         return ARCHIVE_FATAL;
1049                 archive_entry_linkresolver_set_strategy(mtree->resolver,
1050                     ARCHIVE_FORMAT_MTREE);
1051                 r = read_mtree(a, mtree);
1052                 if (r != ARCHIVE_OK)
1053                         return (r);
1054         }
1055
1056         a->archive.archive_format = mtree->archive_format;
1057         a->archive.archive_format_name = mtree->archive_format_name;
1058
1059         for (;;) {
1060                 if (mtree->this_entry == NULL)
1061                         return (ARCHIVE_EOF);
1062                 if (strcmp(mtree->this_entry->name, "..") == 0) {
1063                         mtree->this_entry->used = 1;
1064                         if (archive_strlen(&mtree->current_dir) > 0) {
1065                                 /* Roll back current path. */
1066                                 p = mtree->current_dir.s
1067                                     + mtree->current_dir.length - 1;
1068                                 while (p >= mtree->current_dir.s && *p != '/')
1069                                         --p;
1070                                 if (p >= mtree->current_dir.s)
1071                                         --p;
1072                                 mtree->current_dir.length
1073                                     = p - mtree->current_dir.s + 1;
1074                         }
1075                 }
1076                 if (!mtree->this_entry->used) {
1077                         use_next = 0;
1078                         r = parse_file(a, entry, mtree, mtree->this_entry,
1079                                 &use_next);
1080                         if (use_next == 0)
1081                                 return (r);
1082                 }
1083                 mtree->this_entry = mtree->this_entry->next;
1084         }
1085 }
1086
1087 /*
1088  * A single file can have multiple lines contribute specifications.
1089  * Parse as many lines as necessary, then pull additional information
1090  * from a backing file on disk as necessary.
1091  */
1092 static int
1093 parse_file(struct archive_read *a, struct archive_entry *entry,
1094     struct mtree *mtree, struct mtree_entry *mentry, int *use_next)
1095 {
1096         const char *path;
1097         struct stat st_storage, *st;
1098         struct mtree_entry *mp;
1099         struct archive_entry *sparse_entry;
1100         int r = ARCHIVE_OK, r1, parsed_kws;
1101
1102         mentry->used = 1;
1103
1104         /* Initialize reasonable defaults. */
1105         archive_entry_set_filetype(entry, AE_IFREG);
1106         archive_entry_set_size(entry, 0);
1107         archive_string_empty(&mtree->contents_name);
1108
1109         /* Parse options from this line. */
1110         parsed_kws = 0;
1111         r = parse_line(a, entry, mtree, mentry, &parsed_kws);
1112
1113         if (mentry->full) {
1114                 archive_entry_copy_pathname(entry, mentry->name);
1115                 /*
1116                  * "Full" entries are allowed to have multiple lines
1117                  * and those lines aren't required to be adjacent.  We
1118                  * don't support multiple lines for "relative" entries
1119                  * nor do we make any attempt to merge data from
1120                  * separate "relative" and "full" entries.  (Merging
1121                  * "relative" and "full" entries would require dealing
1122                  * with pathname canonicalization, which is a very
1123                  * tricky subject.)
1124                  */
1125                 for (mp = mentry->next; mp != NULL; mp = mp->next) {
1126                         if (mp->full && !mp->used
1127                             && strcmp(mentry->name, mp->name) == 0) {
1128                                 /* Later lines override earlier ones. */
1129                                 mp->used = 1;
1130                                 r1 = parse_line(a, entry, mtree, mp,
1131                                     &parsed_kws);
1132                                 if (r1 < r)
1133                                         r = r1;
1134                         }
1135                 }
1136         } else {
1137                 /*
1138                  * Relative entries require us to construct
1139                  * the full path and possibly update the
1140                  * current directory.
1141                  */
1142                 size_t n = archive_strlen(&mtree->current_dir);
1143                 if (n > 0)
1144                         archive_strcat(&mtree->current_dir, "/");
1145                 archive_strcat(&mtree->current_dir, mentry->name);
1146                 archive_entry_copy_pathname(entry, mtree->current_dir.s);
1147                 if (archive_entry_filetype(entry) != AE_IFDIR)
1148                         mtree->current_dir.length = n;
1149         }
1150
1151         if (mtree->checkfs) {
1152                 /*
1153                  * Try to open and stat the file to get the real size
1154                  * and other file info.  It would be nice to avoid
1155                  * this here so that getting a listing of an mtree
1156                  * wouldn't require opening every referenced contents
1157                  * file.  But then we wouldn't know the actual
1158                  * contents size, so I don't see a really viable way
1159                  * around this.  (Also, we may want to someday pull
1160                  * other unspecified info from the contents file on
1161                  * disk.)
1162                  */
1163                 mtree->fd = -1;
1164                 if (archive_strlen(&mtree->contents_name) > 0)
1165                         path = mtree->contents_name.s;
1166                 else
1167                         path = archive_entry_pathname(entry);
1168
1169                 if (archive_entry_filetype(entry) == AE_IFREG ||
1170                                 archive_entry_filetype(entry) == AE_IFDIR) {
1171                         mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC);
1172                         __archive_ensure_cloexec_flag(mtree->fd);
1173                         if (mtree->fd == -1 &&
1174                                 (errno != ENOENT ||
1175                                  archive_strlen(&mtree->contents_name) > 0)) {
1176                                 archive_set_error(&a->archive, errno,
1177                                                 "Can't open %s", path);
1178                                 r = ARCHIVE_WARN;
1179                         }
1180                 }
1181
1182                 st = &st_storage;
1183                 if (mtree->fd >= 0) {
1184                         if (fstat(mtree->fd, st) == -1) {
1185                                 archive_set_error(&a->archive, errno,
1186                                                 "Could not fstat %s", path);
1187                                 r = ARCHIVE_WARN;
1188                                 /* If we can't stat it, don't keep it open. */
1189                                 close(mtree->fd);
1190                                 mtree->fd = -1;
1191                                 st = NULL;
1192                         }
1193                 } else if (lstat(path, st) == -1) {
1194                         st = NULL;
1195                 }
1196
1197                 /*
1198                  * Check for a mismatch between the type in the specification
1199                  * and the type of the contents object on disk.
1200                  */
1201                 if (st != NULL) {
1202                         if (((st->st_mode & S_IFMT) == S_IFREG &&
1203                               archive_entry_filetype(entry) == AE_IFREG)
1204 #ifdef S_IFLNK
1205                           ||((st->st_mode & S_IFMT) == S_IFLNK &&
1206                               archive_entry_filetype(entry) == AE_IFLNK)
1207 #endif
1208 #ifdef S_IFSOCK
1209                           ||((st->st_mode & S_IFSOCK) == S_IFSOCK &&
1210                               archive_entry_filetype(entry) == AE_IFSOCK)
1211 #endif
1212 #ifdef S_IFCHR
1213                           ||((st->st_mode & S_IFMT) == S_IFCHR &&
1214                               archive_entry_filetype(entry) == AE_IFCHR)
1215 #endif
1216 #ifdef S_IFBLK
1217                           ||((st->st_mode & S_IFMT) == S_IFBLK &&
1218                               archive_entry_filetype(entry) == AE_IFBLK)
1219 #endif
1220                           ||((st->st_mode & S_IFMT) == S_IFDIR &&
1221                               archive_entry_filetype(entry) == AE_IFDIR)
1222 #ifdef S_IFIFO
1223                           ||((st->st_mode & S_IFMT) == S_IFIFO &&
1224                               archive_entry_filetype(entry) == AE_IFIFO)
1225 #endif
1226                         ) {
1227                                 /* Types match. */
1228                         } else {
1229                                 /* Types don't match; bail out gracefully. */
1230                                 if (mtree->fd >= 0)
1231                                         close(mtree->fd);
1232                                 mtree->fd = -1;
1233                                 if (parsed_kws & MTREE_HAS_OPTIONAL) {
1234                                         /* It's not an error for an optional
1235                                          * entry to not match disk. */
1236                                         *use_next = 1;
1237                                 } else if (r == ARCHIVE_OK) {
1238                                         archive_set_error(&a->archive,
1239                                             ARCHIVE_ERRNO_MISC,
1240                                             "mtree specification has different"
1241                                             " type for %s",
1242                                             archive_entry_pathname(entry));
1243                                         r = ARCHIVE_WARN;
1244                                 }
1245                                 return (r);
1246                         }
1247                 }
1248
1249                 /*
1250                  * If there is a contents file on disk, pick some of the
1251                  * metadata from that file.  For most of these, we only
1252                  * set it from the contents if it wasn't already parsed
1253                  * from the specification.
1254                  */
1255                 if (st != NULL) {
1256                         if (((parsed_kws & MTREE_HAS_DEVICE) == 0 ||
1257                                 (parsed_kws & MTREE_HAS_NOCHANGE) != 0) &&
1258                                 (archive_entry_filetype(entry) == AE_IFCHR ||
1259                                  archive_entry_filetype(entry) == AE_IFBLK))
1260                                 archive_entry_set_rdev(entry, st->st_rdev);
1261                         if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME))
1262                                 == 0 ||
1263                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1264                                 archive_entry_set_gid(entry, st->st_gid);
1265                         if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME))
1266                                 == 0 ||
1267                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1268                                 archive_entry_set_uid(entry, st->st_uid);
1269                         if ((parsed_kws & MTREE_HAS_MTIME) == 0 ||
1270                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0) {
1271 #if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC
1272                                 archive_entry_set_mtime(entry, st->st_mtime,
1273                                                 st->st_mtimespec.tv_nsec);
1274 #elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
1275                                 archive_entry_set_mtime(entry, st->st_mtime,
1276                                                 st->st_mtim.tv_nsec);
1277 #elif HAVE_STRUCT_STAT_ST_MTIME_N
1278                                 archive_entry_set_mtime(entry, st->st_mtime,
1279                                                 st->st_mtime_n);
1280 #elif HAVE_STRUCT_STAT_ST_UMTIME
1281                                 archive_entry_set_mtime(entry, st->st_mtime,
1282                                                 st->st_umtime*1000);
1283 #elif HAVE_STRUCT_STAT_ST_MTIME_USEC
1284                                 archive_entry_set_mtime(entry, st->st_mtime,
1285                                                 st->st_mtime_usec*1000);
1286 #else
1287                                 archive_entry_set_mtime(entry, st->st_mtime, 0);
1288 #endif
1289                         }
1290                         if ((parsed_kws & MTREE_HAS_NLINK) == 0 ||
1291                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1292                                 archive_entry_set_nlink(entry, st->st_nlink);
1293                         if ((parsed_kws & MTREE_HAS_PERM) == 0 ||
1294                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1295                                 archive_entry_set_perm(entry, st->st_mode);
1296                         if ((parsed_kws & MTREE_HAS_SIZE) == 0 ||
1297                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1298                                 archive_entry_set_size(entry, st->st_size);
1299                         archive_entry_set_ino(entry, st->st_ino);
1300                         archive_entry_set_dev(entry, st->st_dev);
1301
1302                         archive_entry_linkify(mtree->resolver, &entry,
1303                                 &sparse_entry);
1304                 } else if (parsed_kws & MTREE_HAS_OPTIONAL) {
1305                         /*
1306                          * Couldn't open the entry, stat it or the on-disk type
1307                          * didn't match.  If this entry is optional, just
1308                          * ignore it and read the next header entry.
1309                          */
1310                         *use_next = 1;
1311                         return ARCHIVE_OK;
1312                 }
1313         }
1314
1315         mtree->cur_size = archive_entry_size(entry);
1316         mtree->offset = 0;
1317
1318         return r;
1319 }
1320
1321 /*
1322  * Each line contains a sequence of keywords.
1323  */
1324 static int
1325 parse_line(struct archive_read *a, struct archive_entry *entry,
1326     struct mtree *mtree, struct mtree_entry *mp, int *parsed_kws)
1327 {
1328         struct mtree_option *iter;
1329         int r = ARCHIVE_OK, r1;
1330
1331         for (iter = mp->options; iter != NULL; iter = iter->next) {
1332                 r1 = parse_keyword(a, mtree, entry, iter, parsed_kws);
1333                 if (r1 < r)
1334                         r = r1;
1335         }
1336         if (r == ARCHIVE_OK && (*parsed_kws & MTREE_HAS_TYPE) == 0) {
1337                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1338                     "Missing type keyword in mtree specification");
1339                 return (ARCHIVE_WARN);
1340         }
1341         return (r);
1342 }
1343
1344 /*
1345  * Device entries have one of the following forms:
1346  *  - raw dev_t
1347  *  - format,major,minor[,subdevice]
1348  * When parsing succeeded, `pdev' will contain the appropriate dev_t value.
1349  */
1350
1351 /* strsep() is not in C90, but strcspn() is. */
1352 /* Taken from http://unixpapa.com/incnote/string.html */
1353 static char *
1354 la_strsep(char **sp, const char *sep)
1355 {
1356         char *p, *s;
1357         if (sp == NULL || *sp == NULL || **sp == '\0')
1358                 return(NULL);
1359         s = *sp;
1360         p = s + strcspn(s, sep);
1361         if (*p != '\0')
1362                 *p++ = '\0';
1363         *sp = p;
1364         return(s);
1365 }
1366
1367 static int
1368 parse_device(dev_t *pdev, struct archive *a, char *val)
1369 {
1370 #define MAX_PACK_ARGS 3
1371         unsigned long numbers[MAX_PACK_ARGS];
1372         char *p, *dev;
1373         int argc;
1374         pack_t *pack;
1375         dev_t result;
1376         const char *error = NULL;
1377
1378         memset(pdev, 0, sizeof(*pdev));
1379         if ((dev = strchr(val, ',')) != NULL) {
1380                 /*
1381                  * Device's major/minor are given in a specified format.
1382                  * Decode and pack it accordingly.
1383                  */
1384                 *dev++ = '\0';
1385                 if ((pack = pack_find(val)) == NULL) {
1386                         archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1387                             "Unknown format `%s'", val);
1388                         return ARCHIVE_WARN;
1389                 }
1390                 argc = 0;
1391                 while ((p = la_strsep(&dev, ",")) != NULL) {
1392                         if (*p == '\0') {
1393                                 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1394                                     "Missing number");
1395                                 return ARCHIVE_WARN;
1396                         }
1397                         if (argc >= MAX_PACK_ARGS) {
1398                                 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1399                                     "Too many arguments");
1400                                 return ARCHIVE_WARN;
1401                         }
1402                         numbers[argc++] = (unsigned long)mtree_atol(&p);
1403                 }
1404                 if (argc < 2) {
1405                         archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1406                             "Not enough arguments");
1407                         return ARCHIVE_WARN;
1408                 }
1409                 result = (*pack)(argc, numbers, &error);
1410                 if (error != NULL) {
1411                         archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1412                             "%s", error);
1413                         return ARCHIVE_WARN;
1414                 }
1415         } else {
1416                 /* file system raw value. */
1417                 result = (dev_t)mtree_atol(&val);
1418         }
1419         *pdev = result;
1420         return ARCHIVE_OK;
1421 #undef MAX_PACK_ARGS
1422 }
1423
1424 /*
1425  * Parse a single keyword and its value.
1426  */
1427 static int
1428 parse_keyword(struct archive_read *a, struct mtree *mtree,
1429     struct archive_entry *entry, struct mtree_option *opt, int *parsed_kws)
1430 {
1431         char *val, *key;
1432
1433         key = opt->value;
1434
1435         if (*key == '\0')
1436                 return (ARCHIVE_OK);
1437
1438         if (strcmp(key, "nochange") == 0) {
1439                 *parsed_kws |= MTREE_HAS_NOCHANGE;
1440                 return (ARCHIVE_OK);
1441         }
1442         if (strcmp(key, "optional") == 0) {
1443                 *parsed_kws |= MTREE_HAS_OPTIONAL;
1444                 return (ARCHIVE_OK);
1445         }
1446         if (strcmp(key, "ignore") == 0) {
1447                 /*
1448                  * The mtree processing is not recursive, so
1449                  * recursion will only happen for explicitly listed
1450                  * entries.
1451                  */
1452                 return (ARCHIVE_OK);
1453         }
1454
1455         val = strchr(key, '=');
1456         if (val == NULL) {
1457                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1458                     "Malformed attribute \"%s\" (%d)", key, key[0]);
1459                 return (ARCHIVE_WARN);
1460         }
1461
1462         *val = '\0';
1463         ++val;
1464
1465         switch (key[0]) {
1466         case 'c':
1467                 if (strcmp(key, "content") == 0
1468                     || strcmp(key, "contents") == 0) {
1469                         parse_escapes(val, NULL);
1470                         archive_strcpy(&mtree->contents_name, val);
1471                         break;
1472                 }
1473                 if (strcmp(key, "cksum") == 0)
1474                         break;
1475         case 'd':
1476                 if (strcmp(key, "device") == 0) {
1477                         /* stat(2) st_rdev field, e.g. the major/minor IDs
1478                          * of a char/block special file */
1479                         int r;
1480                         dev_t dev;
1481
1482                         *parsed_kws |= MTREE_HAS_DEVICE;
1483                         r = parse_device(&dev, &a->archive, val);
1484                         if (r == ARCHIVE_OK)
1485                                 archive_entry_set_rdev(entry, dev);
1486                         return r;
1487                 }
1488         case 'f':
1489                 if (strcmp(key, "flags") == 0) {
1490                         *parsed_kws |= MTREE_HAS_FFLAGS;
1491                         archive_entry_copy_fflags_text(entry, val);
1492                         break;
1493                 }
1494         case 'g':
1495                 if (strcmp(key, "gid") == 0) {
1496                         *parsed_kws |= MTREE_HAS_GID;
1497                         archive_entry_set_gid(entry, mtree_atol10(&val));
1498                         break;
1499                 }
1500                 if (strcmp(key, "gname") == 0) {
1501                         *parsed_kws |= MTREE_HAS_GNAME;
1502                         archive_entry_copy_gname(entry, val);
1503                         break;
1504                 }
1505         case 'i':
1506                 if (strcmp(key, "inode") == 0) {
1507                         archive_entry_set_ino(entry, mtree_atol10(&val));
1508                         break;
1509                 }
1510         case 'l':
1511                 if (strcmp(key, "link") == 0) {
1512                         archive_entry_copy_symlink(entry, val);
1513                         break;
1514                 }
1515         case 'm':
1516                 if (strcmp(key, "md5") == 0 || strcmp(key, "md5digest") == 0)
1517                         break;
1518                 if (strcmp(key, "mode") == 0) {
1519                         if (val[0] >= '0' && val[0] <= '9') {
1520                                 *parsed_kws |= MTREE_HAS_PERM;
1521                                 archive_entry_set_perm(entry,
1522                                     (mode_t)mtree_atol8(&val));
1523                         } else {
1524                                 archive_set_error(&a->archive,
1525                                     ARCHIVE_ERRNO_FILE_FORMAT,
1526                                     "Symbolic mode \"%s\" unsupported", val);
1527                                 return ARCHIVE_WARN;
1528                         }
1529                         break;
1530                 }
1531         case 'n':
1532                 if (strcmp(key, "nlink") == 0) {
1533                         *parsed_kws |= MTREE_HAS_NLINK;
1534                         archive_entry_set_nlink(entry,
1535                                 (unsigned int)mtree_atol10(&val));
1536                         break;
1537                 }
1538         case 'r':
1539                 if (strcmp(key, "resdevice") == 0) {
1540                         /* stat(2) st_dev field, e.g. the device ID where the
1541                          * inode resides */
1542                         int r;
1543                         dev_t dev;
1544
1545                         r = parse_device(&dev, &a->archive, val);
1546                         if (r == ARCHIVE_OK)
1547                                 archive_entry_set_dev(entry, dev);
1548                         return r;
1549                 }
1550                 if (strcmp(key, "rmd160") == 0 ||
1551                     strcmp(key, "rmd160digest") == 0)
1552                         break;
1553         case 's':
1554                 if (strcmp(key, "sha1") == 0 || strcmp(key, "sha1digest") == 0)
1555                         break;
1556                 if (strcmp(key, "sha256") == 0 ||
1557                     strcmp(key, "sha256digest") == 0)
1558                         break;
1559                 if (strcmp(key, "sha384") == 0 ||
1560                     strcmp(key, "sha384digest") == 0)
1561                         break;
1562                 if (strcmp(key, "sha512") == 0 ||
1563                     strcmp(key, "sha512digest") == 0)
1564                         break;
1565                 if (strcmp(key, "size") == 0) {
1566                         archive_entry_set_size(entry, mtree_atol10(&val));
1567                         break;
1568                 }
1569         case 't':
1570                 if (strcmp(key, "tags") == 0) {
1571                         /*
1572                          * Comma delimited list of tags.
1573                          * Ignore the tags for now, but the interface
1574                          * should be extended to allow inclusion/exclusion.
1575                          */
1576                         break;
1577                 }
1578                 if (strcmp(key, "time") == 0) {
1579                         int64_t m;
1580                         int64_t my_time_t_max = get_time_t_max();
1581                         int64_t my_time_t_min = get_time_t_min();
1582                         long ns = 0;
1583
1584                         *parsed_kws |= MTREE_HAS_MTIME;
1585                         m = mtree_atol10(&val);
1586                         /* Replicate an old mtree bug:
1587                          * 123456789.1 represents 123456789
1588                          * seconds and 1 nanosecond. */
1589                         if (*val == '.') {
1590                                 ++val;
1591                                 ns = (long)mtree_atol10(&val);
1592                         } else
1593                                 ns = 0;
1594                         if (m > my_time_t_max)
1595                                 m = my_time_t_max;
1596                         else if (m < my_time_t_min)
1597                                 m = my_time_t_min;
1598                         archive_entry_set_mtime(entry, (time_t)m, ns);
1599                         break;
1600                 }
1601                 if (strcmp(key, "type") == 0) {
1602                         switch (val[0]) {
1603                         case 'b':
1604                                 if (strcmp(val, "block") == 0) {
1605                                         archive_entry_set_filetype(entry, AE_IFBLK);
1606                                         break;
1607                                 }
1608                         case 'c':
1609                                 if (strcmp(val, "char") == 0) {
1610                                         archive_entry_set_filetype(entry,
1611                                                 AE_IFCHR);
1612                                         break;
1613                                 }
1614                         case 'd':
1615                                 if (strcmp(val, "dir") == 0) {
1616                                         archive_entry_set_filetype(entry,
1617                                                 AE_IFDIR);
1618                                         break;
1619                                 }
1620                         case 'f':
1621                                 if (strcmp(val, "fifo") == 0) {
1622                                         archive_entry_set_filetype(entry,
1623                                                 AE_IFIFO);
1624                                         break;
1625                                 }
1626                                 if (strcmp(val, "file") == 0) {
1627                                         archive_entry_set_filetype(entry,
1628                                                 AE_IFREG);
1629                                         break;
1630                                 }
1631                         case 'l':
1632                                 if (strcmp(val, "link") == 0) {
1633                                         archive_entry_set_filetype(entry,
1634                                                 AE_IFLNK);
1635                                         break;
1636                                 }
1637                         default:
1638                                 archive_set_error(&a->archive,
1639                                     ARCHIVE_ERRNO_FILE_FORMAT,
1640                                     "Unrecognized file type \"%s\"; "
1641                                     "assuming \"file\"", val);
1642                                 archive_entry_set_filetype(entry, AE_IFREG);
1643                                 return (ARCHIVE_WARN);
1644                         }
1645                         *parsed_kws |= MTREE_HAS_TYPE;
1646                         break;
1647                 }
1648         case 'u':
1649                 if (strcmp(key, "uid") == 0) {
1650                         *parsed_kws |= MTREE_HAS_UID;
1651                         archive_entry_set_uid(entry, mtree_atol10(&val));
1652                         break;
1653                 }
1654                 if (strcmp(key, "uname") == 0) {
1655                         *parsed_kws |= MTREE_HAS_UNAME;
1656                         archive_entry_copy_uname(entry, val);
1657                         break;
1658                 }
1659         default:
1660                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1661                     "Unrecognized key %s=%s", key, val);
1662                 return (ARCHIVE_WARN);
1663         }
1664         return (ARCHIVE_OK);
1665 }
1666
1667 static int
1668 read_data(struct archive_read *a, const void **buff, size_t *size,
1669     int64_t *offset)
1670 {
1671         size_t bytes_to_read;
1672         ssize_t bytes_read;
1673         struct mtree *mtree;
1674
1675         mtree = (struct mtree *)(a->format->data);
1676         if (mtree->fd < 0) {
1677                 *buff = NULL;
1678                 *offset = 0;
1679                 *size = 0;
1680                 return (ARCHIVE_EOF);
1681         }
1682         if (mtree->buff == NULL) {
1683                 mtree->buffsize = 64 * 1024;
1684                 mtree->buff = malloc(mtree->buffsize);
1685                 if (mtree->buff == NULL) {
1686                         archive_set_error(&a->archive, ENOMEM,
1687                             "Can't allocate memory");
1688                         return (ARCHIVE_FATAL);
1689                 }
1690         }
1691
1692         *buff = mtree->buff;
1693         *offset = mtree->offset;
1694         if ((int64_t)mtree->buffsize > mtree->cur_size - mtree->offset)
1695                 bytes_to_read = (size_t)(mtree->cur_size - mtree->offset);
1696         else
1697                 bytes_to_read = mtree->buffsize;
1698         bytes_read = read(mtree->fd, mtree->buff, bytes_to_read);
1699         if (bytes_read < 0) {
1700                 archive_set_error(&a->archive, errno, "Can't read");
1701                 return (ARCHIVE_WARN);
1702         }
1703         if (bytes_read == 0) {
1704                 *size = 0;
1705                 return (ARCHIVE_EOF);
1706         }
1707         mtree->offset += bytes_read;
1708         *size = bytes_read;
1709         return (ARCHIVE_OK);
1710 }
1711
1712 /* Skip does nothing except possibly close the contents file. */
1713 static int
1714 skip(struct archive_read *a)
1715 {
1716         struct mtree *mtree;
1717
1718         mtree = (struct mtree *)(a->format->data);
1719         if (mtree->fd >= 0) {
1720                 close(mtree->fd);
1721                 mtree->fd = -1;
1722         }
1723         return (ARCHIVE_OK);
1724 }
1725
1726 /*
1727  * Since parsing backslash sequences always makes strings shorter,
1728  * we can always do this conversion in-place.
1729  */
1730 static void
1731 parse_escapes(char *src, struct mtree_entry *mentry)
1732 {
1733         char *dest = src;
1734         char c;
1735
1736         if (mentry != NULL && strcmp(src, ".") == 0)
1737                 mentry->full = 1;
1738
1739         while (*src != '\0') {
1740                 c = *src++;
1741                 if (c == '/' && mentry != NULL)
1742                         mentry->full = 1;
1743                 if (c == '\\') {
1744                         switch (src[0]) {
1745                         case '0':
1746                                 if (src[1] < '0' || src[1] > '7') {
1747                                         c = 0;
1748                                         ++src;
1749                                         break;
1750                                 }
1751                                 /* FALLTHROUGH */
1752                         case '1':
1753                         case '2':
1754                         case '3':
1755                                 if (src[1] >= '0' && src[1] <= '7' &&
1756                                     src[2] >= '0' && src[2] <= '7') {
1757                                         c = (src[0] - '0') << 6;
1758                                         c |= (src[1] - '0') << 3;
1759                                         c |= (src[2] - '0');
1760                                         src += 3;
1761                                 }
1762                                 break;
1763                         case 'a':
1764                                 c = '\a';
1765                                 ++src;
1766                                 break;
1767                         case 'b':
1768                                 c = '\b';
1769                                 ++src;
1770                                 break;
1771                         case 'f':
1772                                 c = '\f';
1773                                 ++src;
1774                                 break;
1775                         case 'n':
1776                                 c = '\n';
1777                                 ++src;
1778                                 break;
1779                         case 'r':
1780                                 c = '\r';
1781                                 ++src;
1782                                 break;
1783                         case 's':
1784                                 c = ' ';
1785                                 ++src;
1786                                 break;
1787                         case 't':
1788                                 c = '\t';
1789                                 ++src;
1790                                 break;
1791                         case 'v':
1792                                 c = '\v';
1793                                 ++src;
1794                                 break;
1795                         case '\\':
1796                                 c = '\\';
1797                                 ++src;
1798                                 break;
1799                         }
1800                 }
1801                 *dest++ = c;
1802         }
1803         *dest = '\0';
1804 }
1805
1806 /*
1807  * Note that this implementation does not (and should not!) obey
1808  * locale settings; you cannot simply substitute strtol here, since
1809  * it does obey locale.
1810  */
1811 static int64_t
1812 mtree_atol8(char **p)
1813 {
1814         int64_t l, limit, last_digit_limit;
1815         int digit, base;
1816
1817         base = 8;
1818         limit = INT64_MAX / base;
1819         last_digit_limit = INT64_MAX % base;
1820
1821         l = 0;
1822         digit = **p - '0';
1823         while (digit >= 0 && digit < base) {
1824                 if (l>limit || (l == limit && digit > last_digit_limit)) {
1825                         l = INT64_MAX; /* Truncate on overflow. */
1826                         break;
1827                 }
1828                 l = (l * base) + digit;
1829                 digit = *++(*p) - '0';
1830         }
1831         return (l);
1832 }
1833
1834 /*
1835  * Note that this implementation does not (and should not!) obey
1836  * locale settings; you cannot simply substitute strtol here, since
1837  * it does obey locale.
1838  */
1839 static int64_t
1840 mtree_atol10(char **p)
1841 {
1842         int64_t l, limit, last_digit_limit;
1843         int base, digit, sign;
1844
1845         base = 10;
1846
1847         if (**p == '-') {
1848                 sign = -1;
1849                 limit = ((uint64_t)(INT64_MAX) + 1) / base;
1850                 last_digit_limit = ((uint64_t)(INT64_MAX) + 1) % base;
1851                 ++(*p);
1852         } else {
1853                 sign = 1;
1854                 limit = INT64_MAX / base;
1855                 last_digit_limit = INT64_MAX % base;
1856         }
1857
1858         l = 0;
1859         digit = **p - '0';
1860         while (digit >= 0 && digit < base) {
1861                 if (l > limit || (l == limit && digit > last_digit_limit))
1862                         return (sign < 0) ? INT64_MIN : INT64_MAX;
1863                 l = (l * base) + digit;
1864                 digit = *++(*p) - '0';
1865         }
1866         return (sign < 0) ? -l : l;
1867 }
1868
1869 /* Parse a hex digit. */
1870 static int
1871 parsehex(char c)
1872 {
1873         if (c >= '0' && c <= '9')
1874                 return c - '0';
1875         else if (c >= 'a' && c <= 'f')
1876                 return c - 'a';
1877         else if (c >= 'A' && c <= 'F')
1878                 return c - 'A';
1879         else
1880                 return -1;
1881 }
1882
1883 /*
1884  * Note that this implementation does not (and should not!) obey
1885  * locale settings; you cannot simply substitute strtol here, since
1886  * it does obey locale.
1887  */
1888 static int64_t
1889 mtree_atol16(char **p)
1890 {
1891         int64_t l, limit, last_digit_limit;
1892         int base, digit, sign;
1893
1894         base = 16;
1895
1896         if (**p == '-') {
1897                 sign = -1;
1898                 limit = ((uint64_t)(INT64_MAX) + 1) / base;
1899                 last_digit_limit = ((uint64_t)(INT64_MAX) + 1) % base;
1900                 ++(*p);
1901         } else {
1902                 sign = 1;
1903                 limit = INT64_MAX / base;
1904                 last_digit_limit = INT64_MAX % base;
1905         }
1906
1907         l = 0;
1908         digit = parsehex(**p);
1909         while (digit >= 0 && digit < base) {
1910                 if (l > limit || (l == limit && digit > last_digit_limit))
1911                         return (sign < 0) ? INT64_MIN : INT64_MAX;
1912                 l = (l * base) + digit;
1913                 digit = parsehex(*++(*p));
1914         }
1915         return (sign < 0) ? -l : l;
1916 }
1917
1918 static int64_t
1919 mtree_atol(char **p)
1920 {
1921         if (**p != '0')
1922                 return mtree_atol10(p);
1923         if ((*p)[1] == 'x' || (*p)[1] == 'X') {
1924                 *p += 2;
1925                 return mtree_atol16(p);
1926         }
1927         return mtree_atol8(p);
1928 }
1929
1930 /*
1931  * Returns length of line (including trailing newline)
1932  * or negative on error.  'start' argument is updated to
1933  * point to first character of line.
1934  */
1935 static ssize_t
1936 readline(struct archive_read *a, struct mtree *mtree, char **start,
1937     ssize_t limit)
1938 {
1939         ssize_t bytes_read;
1940         ssize_t total_size = 0;
1941         ssize_t find_off = 0;
1942         const void *t;
1943         void *nl;
1944         char *u;
1945
1946         /* Accumulate line in a line buffer. */
1947         for (;;) {
1948                 /* Read some more. */
1949                 t = __archive_read_ahead(a, 1, &bytes_read);
1950                 if (t == NULL)
1951                         return (0);
1952                 if (bytes_read < 0)
1953                         return (ARCHIVE_FATAL);
1954                 nl = memchr(t, '\n', bytes_read);
1955                 /* If we found '\n', trim the read to end exactly there. */
1956                 if (nl != NULL) {
1957                         bytes_read = ((const char *)nl) - ((const char *)t) + 1;
1958                 }
1959                 if (total_size + bytes_read + 1 > limit) {
1960                         archive_set_error(&a->archive,
1961                             ARCHIVE_ERRNO_FILE_FORMAT,
1962                             "Line too long");
1963                         return (ARCHIVE_FATAL);
1964                 }
1965                 if (archive_string_ensure(&mtree->line,
1966                         total_size + bytes_read + 1) == NULL) {
1967                         archive_set_error(&a->archive, ENOMEM,
1968                             "Can't allocate working buffer");
1969                         return (ARCHIVE_FATAL);
1970                 }
1971                 /* Append new bytes to string. */
1972                 memcpy(mtree->line.s + total_size, t, bytes_read);
1973                 __archive_read_consume(a, bytes_read);
1974                 total_size += bytes_read;
1975                 mtree->line.s[total_size] = '\0';
1976
1977                 for (u = mtree->line.s + find_off; *u; ++u) {
1978                         if (u[0] == '\n') {
1979                                 /* Ends with unescaped newline. */
1980                                 *start = mtree->line.s;
1981                                 return total_size;
1982                         } else if (u[0] == '#') {
1983                                 /* Ends with comment sequence #...\n */
1984                                 if (nl == NULL) {
1985                                         /* But we've not found the \n yet */
1986                                         break;
1987                                 }
1988                         } else if (u[0] == '\\') {
1989                                 if (u[1] == '\n') {
1990                                         /* Trim escaped newline. */
1991                                         total_size -= 2;
1992                                         mtree->line.s[total_size] = '\0';
1993                                         break;
1994                                 } else if (u[1] != '\0') {
1995                                         /* Skip the two-char escape sequence */
1996                                         ++u;
1997                                 }
1998                         }
1999                 }
2000                 find_off = u - mtree->line.s;
2001         }
2002 }