]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - contrib/libarchive/libarchive/archive_read_support_format_mtree.c
MFC r299529,r299540,r299576,r299896:
[FreeBSD/stable/10.git] / contrib / libarchive / libarchive / archive_read_support_format_mtree.c
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * Copyright (c) 2008 Joerg Sonnenberger
4  * Copyright (c) 2011-2012 Michihiro NAKAJIMA
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #include "archive_platform.h"
29 __FBSDID("$FreeBSD$");
30
31 #ifdef HAVE_SYS_STAT_H
32 #include <sys/stat.h>
33 #endif
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_FCNTL_H
38 #include <fcntl.h>
39 #endif
40 #include <stddef.h>
41 /* #include <stdint.h> */ /* See archive_platform.h */
42 #ifdef HAVE_STDLIB_H
43 #include <stdlib.h>
44 #endif
45 #ifdef HAVE_STRING_H
46 #include <string.h>
47 #endif
48
49 #include "archive.h"
50 #include "archive_entry.h"
51 #include "archive_private.h"
52 #include "archive_read_private.h"
53 #include "archive_string.h"
54 #include "archive_pack_dev.h"
55
56 #ifndef O_BINARY
57 #define O_BINARY 0
58 #endif
59 #ifndef O_CLOEXEC
60 #define O_CLOEXEC       0
61 #endif
62
63 #define MTREE_HAS_DEVICE        0x0001
64 #define MTREE_HAS_FFLAGS        0x0002
65 #define MTREE_HAS_GID           0x0004
66 #define MTREE_HAS_GNAME         0x0008
67 #define MTREE_HAS_MTIME         0x0010
68 #define MTREE_HAS_NLINK         0x0020
69 #define MTREE_HAS_PERM          0x0040
70 #define MTREE_HAS_SIZE          0x0080
71 #define MTREE_HAS_TYPE          0x0100
72 #define MTREE_HAS_UID           0x0200
73 #define MTREE_HAS_UNAME         0x0400
74
75 #define MTREE_HAS_OPTIONAL      0x0800
76 #define MTREE_HAS_NOCHANGE      0x1000 /* FreeBSD specific */
77
78 struct mtree_option {
79         struct mtree_option *next;
80         char *value;
81 };
82
83 struct mtree_entry {
84         struct mtree_entry *next;
85         struct mtree_option *options;
86         char *name;
87         char full;
88         char used;
89 };
90
91 struct mtree {
92         struct archive_string    line;
93         size_t                   buffsize;
94         char                    *buff;
95         int64_t                  offset;
96         int                      fd;
97         int                      archive_format;
98         const char              *archive_format_name;
99         struct mtree_entry      *entries;
100         struct mtree_entry      *this_entry;
101         struct archive_string    current_dir;
102         struct archive_string    contents_name;
103
104         struct archive_entry_linkresolver *resolver;
105
106         int64_t                  cur_size;
107         char checkfs;
108 };
109
110 static int      bid_keycmp(const char *, const char *, ssize_t);
111 static int      cleanup(struct archive_read *);
112 static int      detect_form(struct archive_read *, int *);
113 static int      mtree_bid(struct archive_read *, int);
114 static int      parse_file(struct archive_read *, struct archive_entry *,
115                     struct mtree *, struct mtree_entry *, int *);
116 static void     parse_escapes(char *, struct mtree_entry *);
117 static int      parse_line(struct archive_read *, struct archive_entry *,
118                     struct mtree *, struct mtree_entry *, int *);
119 static int      parse_keyword(struct archive_read *, struct mtree *,
120                     struct archive_entry *, struct mtree_option *, int *);
121 static int      read_data(struct archive_read *a,
122                     const void **buff, size_t *size, int64_t *offset);
123 static ssize_t  readline(struct archive_read *, struct mtree *, char **, ssize_t);
124 static int      skip(struct archive_read *a);
125 static int      read_header(struct archive_read *,
126                     struct archive_entry *);
127 static int64_t  mtree_atol10(char **);
128 static int64_t  mtree_atol8(char **);
129 static int64_t  mtree_atol(char **);
130
131 /*
132  * There's no standard for TIME_T_MAX/TIME_T_MIN.  So we compute them
133  * here.  TODO: Move this to configure time, but be careful
134  * about cross-compile environments.
135  */
136 static int64_t
137 get_time_t_max(void)
138 {
139 #if defined(TIME_T_MAX)
140         return TIME_T_MAX;
141 #else
142         /* ISO C allows time_t to be a floating-point type,
143            but POSIX requires an integer type.  The following
144            should work on any system that follows the POSIX
145            conventions. */
146         if (((time_t)0) < ((time_t)-1)) {
147                 /* Time_t is unsigned */
148                 return (~(time_t)0);
149         } else {
150                 /* Time_t is signed. */
151                 /* Assume it's the same as int64_t or int32_t */
152                 if (sizeof(time_t) == sizeof(int64_t)) {
153                         return (time_t)INT64_MAX;
154                 } else {
155                         return (time_t)INT32_MAX;
156                 }
157         }
158 #endif
159 }
160
161 static int64_t
162 get_time_t_min(void)
163 {
164 #if defined(TIME_T_MIN)
165         return TIME_T_MIN;
166 #else
167         if (((time_t)0) < ((time_t)-1)) {
168                 /* Time_t is unsigned */
169                 return (time_t)0;
170         } else {
171                 /* Time_t is signed. */
172                 if (sizeof(time_t) == sizeof(int64_t)) {
173                         return (time_t)INT64_MIN;
174                 } else {
175                         return (time_t)INT32_MIN;
176                 }
177         }
178 #endif
179 }
180
181 static int
182 archive_read_format_mtree_options(struct archive_read *a,
183     const char *key, const char *val)
184 {
185         struct mtree *mtree;
186
187         mtree = (struct mtree *)(a->format->data);
188         if (strcmp(key, "checkfs")  == 0) {
189                 /* Allows to read information missing from the mtree from the file system */
190                 if (val == NULL || val[0] == 0) {
191                         mtree->checkfs = 0;
192                 } else {
193                         mtree->checkfs = 1;
194                 }
195                 return (ARCHIVE_OK);
196         }
197
198         /* Note: The "warn" return is just to inform the options
199          * supervisor that we didn't handle it.  It will generate
200          * a suitable error if no one used this option. */
201         return (ARCHIVE_WARN);
202 }
203
204 static void
205 free_options(struct mtree_option *head)
206 {
207         struct mtree_option *next;
208
209         for (; head != NULL; head = next) {
210                 next = head->next;
211                 free(head->value);
212                 free(head);
213         }
214 }
215
216 int
217 archive_read_support_format_mtree(struct archive *_a)
218 {
219         struct archive_read *a = (struct archive_read *)_a;
220         struct mtree *mtree;
221         int r;
222
223         archive_check_magic(_a, ARCHIVE_READ_MAGIC,
224             ARCHIVE_STATE_NEW, "archive_read_support_format_mtree");
225
226         mtree = (struct mtree *)malloc(sizeof(*mtree));
227         if (mtree == NULL) {
228                 archive_set_error(&a->archive, ENOMEM,
229                     "Can't allocate mtree data");
230                 return (ARCHIVE_FATAL);
231         }
232         memset(mtree, 0, sizeof(*mtree));
233         mtree->fd = -1;
234
235         r = __archive_read_register_format(a, mtree, "mtree",
236            mtree_bid, archive_read_format_mtree_options, read_header, read_data, skip, NULL, cleanup, NULL, NULL);
237
238         if (r != ARCHIVE_OK)
239                 free(mtree);
240         return (ARCHIVE_OK);
241 }
242
243 static int
244 cleanup(struct archive_read *a)
245 {
246         struct mtree *mtree;
247         struct mtree_entry *p, *q;
248
249         mtree = (struct mtree *)(a->format->data);
250
251         p = mtree->entries;
252         while (p != NULL) {
253                 q = p->next;
254                 free(p->name);
255                 free_options(p->options);
256                 free(p);
257                 p = q;
258         }
259         archive_string_free(&mtree->line);
260         archive_string_free(&mtree->current_dir);
261         archive_string_free(&mtree->contents_name);
262         archive_entry_linkresolver_free(mtree->resolver);
263
264         free(mtree->buff);
265         free(mtree);
266         (a->format->data) = NULL;
267         return (ARCHIVE_OK);
268 }
269
270 static ssize_t
271 get_line_size(const char *b, ssize_t avail, ssize_t *nlsize)
272 {
273         ssize_t len;
274
275         len = 0;
276         while (len < avail) {
277                 switch (*b) {
278                 case '\0':/* Non-ascii character or control character. */
279                         if (nlsize != NULL)
280                                 *nlsize = 0;
281                         return (-1);
282                 case '\r':
283                         if (avail-len > 1 && b[1] == '\n') {
284                                 if (nlsize != NULL)
285                                         *nlsize = 2;
286                                 return (len+2);
287                         }
288                         /* FALL THROUGH */
289                 case '\n':
290                         if (nlsize != NULL)
291                                 *nlsize = 1;
292                         return (len+1);
293                 default:
294                         b++;
295                         len++;
296                         break;
297                 }
298         }
299         if (nlsize != NULL)
300                 *nlsize = 0;
301         return (avail);
302 }
303
304 static ssize_t
305 next_line(struct archive_read *a,
306     const char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl)
307 {
308         ssize_t len;
309         int quit;
310         
311         quit = 0;
312         if (*avail == 0) {
313                 *nl = 0;
314                 len = 0;
315         } else
316                 len = get_line_size(*b, *avail, nl);
317         /*
318          * Read bytes more while it does not reach the end of line.
319          */
320         while (*nl == 0 && len == *avail && !quit) {
321                 ssize_t diff = *ravail - *avail;
322                 size_t nbytes_req = (*ravail+1023) & ~1023U;
323                 ssize_t tested;
324
325                 /* Increase reading bytes if it is not enough to at least
326                  * new two lines. */
327                 if (nbytes_req < (size_t)*ravail + 160)
328                         nbytes_req <<= 1;
329
330                 *b = __archive_read_ahead(a, nbytes_req, avail);
331                 if (*b == NULL) {
332                         if (*ravail >= *avail)
333                                 return (0);
334                         /* Reading bytes reaches the end of file. */
335                         *b = __archive_read_ahead(a, *avail, avail);
336                         quit = 1;
337                 }
338                 *ravail = *avail;
339                 *b += diff;
340                 *avail -= diff;
341                 tested = len;/* Skip some bytes we already determinated. */
342                 len = get_line_size(*b, *avail, nl);
343                 if (len >= 0)
344                         len += tested;
345         }
346         return (len);
347 }
348
349 /*
350  * Compare characters with a mtree keyword.
351  * Returns the length of a mtree keyword if matched.
352  * Returns 0 if not matched.
353  */
354 static int
355 bid_keycmp(const char *p, const char *key, ssize_t len)
356 {
357         int match_len = 0;
358
359         while (len > 0 && *p && *key) {
360                 if (*p == *key) {
361                         --len;
362                         ++p;
363                         ++key;
364                         ++match_len;
365                         continue;
366                 }
367                 return (0);/* Not match */
368         }
369         if (*key != '\0')
370                 return (0);/* Not match */
371
372         /* A following character should be specified characters */
373         if (p[0] == '=' || p[0] == ' ' || p[0] == '\t' ||
374             p[0] == '\n' || p[0] == '\r' ||
375            (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r')))
376                 return (match_len);
377         return (0);/* Not match */
378 }
379
380 /*
381  * Test whether the characters 'p' has is mtree keyword.
382  * Returns the length of a detected keyword.
383  * Returns 0 if any keywords were not found.
384  */
385 static int
386 bid_keyword(const char *p,  ssize_t len)
387 {
388         static const char *keys_c[] = {
389                 "content", "contents", "cksum", NULL
390         };
391         static const char *keys_df[] = {
392                 "device", "flags", NULL
393         };
394         static const char *keys_g[] = {
395                 "gid", "gname", NULL
396         };
397         static const char *keys_il[] = {
398                 "ignore", "inode", "link", NULL
399         };
400         static const char *keys_m[] = {
401                 "md5", "md5digest", "mode", NULL
402         };
403         static const char *keys_no[] = {
404                 "nlink", "nochange", "optional", NULL
405         };
406         static const char *keys_r[] = {
407                 "resdevice", "rmd160", "rmd160digest", NULL
408         };
409         static const char *keys_s[] = {
410                 "sha1", "sha1digest",
411                 "sha256", "sha256digest",
412                 "sha384", "sha384digest",
413                 "sha512", "sha512digest",
414                 "size", NULL
415         };
416         static const char *keys_t[] = {
417                 "tags", "time", "type", NULL
418         };
419         static const char *keys_u[] = {
420                 "uid", "uname", NULL
421         };
422         const char **keys;
423         int i;
424
425         switch (*p) {
426         case 'c': keys = keys_c; break;
427         case 'd': case 'f': keys = keys_df; break;
428         case 'g': keys = keys_g; break;
429         case 'i': case 'l': keys = keys_il; break;
430         case 'm': keys = keys_m; break;
431         case 'n': case 'o': keys = keys_no; break;
432         case 'r': keys = keys_r; break;
433         case 's': keys = keys_s; break;
434         case 't': keys = keys_t; break;
435         case 'u': keys = keys_u; break;
436         default: return (0);/* Unknown key */
437         }
438
439         for (i = 0; keys[i] != NULL; i++) {
440                 int l = bid_keycmp(p, keys[i], len);
441                 if (l > 0)
442                         return (l);
443         }
444         return (0);/* Unknown key */
445 }
446
447 /*
448  * Test whether there is a set of mtree keywords.
449  * Returns the number of keyword.
450  * Returns -1 if we got incorrect sequence.
451  * This function expects a set of "<space characters>keyword=value".
452  * When "unset" is specified, expects a set of "<space characters>keyword".
453  */
454 static int
455 bid_keyword_list(const char *p,  ssize_t len, int unset, int last_is_path)
456 {
457         int l;
458         int keycnt = 0;
459
460         while (len > 0 && *p) {
461                 int blank = 0;
462
463                 /* Test whether there are blank characters in the line. */
464                 while (len >0 && (*p == ' ' || *p == '\t')) {
465                         ++p;
466                         --len;
467                         blank = 1;
468                 }
469                 if (*p == '\n' || *p == '\r')
470                         break;
471                 if (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r'))
472                         break;
473                 if (!blank && !last_is_path) /* No blank character. */
474                         return (-1);
475                 if (last_is_path && len == 0)
476                                 return (keycnt);
477
478                 if (unset) {
479                         l = bid_keycmp(p, "all", len);
480                         if (l > 0)
481                                 return (1);
482                 }
483                 /* Test whether there is a correct key in the line. */
484                 l = bid_keyword(p, len);
485                 if (l == 0)
486                         return (-1);/* Unknown keyword was found. */
487                 p += l;
488                 len -= l;
489                 keycnt++;
490
491                 /* Skip value */
492                 if (*p == '=') {
493                         int value = 0;
494                         ++p;
495                         --len;
496                         while (len > 0 && *p != ' ' && *p != '\t') {
497                                 ++p;
498                                 --len;
499                                 value = 1;
500                         }
501                         /* A keyword should have a its value unless
502                          * "/unset" operation. */ 
503                         if (!unset && value == 0)
504                                 return (-1);
505                 }
506         }
507         return (keycnt);
508 }
509
510 static int
511 bid_entry(const char *p, ssize_t len, ssize_t nl, int *last_is_path)
512 {
513         int f = 0;
514         static const unsigned char safe_char[256] = {
515                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */
516                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
517                 /* !"$%&'()*+,-./  EXCLUSION:( )(#) */
518                 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */
519                 /* 0123456789:;<>?  EXCLUSION:(=) */
520                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, /* 30 - 3F */
521                 /* @ABCDEFGHIJKLMNO */
522                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
523                 /* PQRSTUVWXYZ[\]^_  */
524                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */
525                 /* `abcdefghijklmno */
526                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */
527                 /* pqrstuvwxyz{|}~ */
528                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */
529                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
530                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
531                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
532                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
533                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
534                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
535                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
536                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
537         };
538         ssize_t ll;
539         const char *pp = p;
540         const char * const pp_end = pp + len;
541
542         *last_is_path = 0;
543         /*
544          * Skip the path-name which is quoted.
545          */
546         for (;pp < pp_end; ++pp) {
547                 if (!safe_char[*(const unsigned char *)pp]) {
548                         if (*pp != ' ' && *pp != '\t' && *pp != '\r'
549                             && *pp != '\n')
550                                 f = 0;
551                         break;
552                 }
553                 f = 1;
554         }
555         ll = pp_end - pp;
556
557         /* If a path-name was not found at the first, try to check
558          * a mtree format(a.k.a form D) ``NetBSD's mtree -D'' creates,
559          * which places the path-name at the last. */
560         if (f == 0) {
561                 const char *pb = p + len - nl;
562                 int name_len = 0;
563                 int slash;
564
565                 /* The form D accepts only a single line for an entry. */
566                 if (pb-2 >= p &&
567                     pb[-1] == '\\' && (pb[-2] == ' ' || pb[-2] == '\t'))
568                         return (-1);
569                 if (pb-1 >= p && pb[-1] == '\\')
570                         return (-1);
571
572                 slash = 0;
573                 while (p <= --pb && *pb != ' ' && *pb != '\t') {
574                         if (!safe_char[*(const unsigned char *)pb])
575                                 return (-1);
576                         name_len++;
577                         /* The pathname should have a slash in this
578                          * format. */
579                         if (*pb == '/')
580                                 slash = 1;
581                 }
582                 if (name_len == 0 || slash == 0)
583                         return (-1);
584                 /* If '/' is placed at the first in this field, this is not
585                  * a valid filename. */
586                 if (pb[1] == '/')
587                         return (-1);
588                 ll = len - nl - name_len;
589                 pp = p;
590                 *last_is_path = 1;
591         }
592
593         return (bid_keyword_list(pp, ll, 0, *last_is_path));
594 }
595
596 #define MAX_BID_ENTRY   3
597
598 static int
599 mtree_bid(struct archive_read *a, int best_bid)
600 {
601         const char *signature = "#mtree";
602         const char *p;
603
604         (void)best_bid; /* UNUSED */
605
606         /* Now let's look at the actual header and see if it matches. */
607         p = __archive_read_ahead(a, strlen(signature), NULL);
608         if (p == NULL)
609                 return (-1);
610
611         if (memcmp(p, signature, strlen(signature)) == 0)
612                 return (8 * (int)strlen(signature));
613
614         /*
615          * There is not a mtree signature. Let's try to detect mtree format.
616          */
617         return (detect_form(a, NULL));
618 }
619
620 static int
621 detect_form(struct archive_read *a, int *is_form_d)
622 {
623         const char *p;
624         ssize_t avail, ravail;
625         ssize_t detected_bytes = 0, len, nl;
626         int entry_cnt = 0, multiline = 0;
627         int form_D = 0;/* The archive is generated by `NetBSD mtree -D'
628                         * (In this source we call it `form D') . */
629
630         if (is_form_d != NULL)
631                 *is_form_d = 0;
632         p = __archive_read_ahead(a, 1, &avail);
633         if (p == NULL)
634                 return (-1);
635         ravail = avail;
636         for (;;) {
637                 len = next_line(a, &p, &avail, &ravail, &nl);
638                 /* The terminal character of the line should be
639                  * a new line character, '\r\n' or '\n'. */
640                 if (len <= 0 || nl == 0)
641                         break;
642                 if (!multiline) {
643                         /* Leading whitespace is never significant,
644                          * ignore it. */
645                         while (len > 0 && (*p == ' ' || *p == '\t')) {
646                                 ++p;
647                                 --avail;
648                                 --len;
649                         }
650                         /* Skip comment or empty line. */ 
651                         if (p[0] == '#' || p[0] == '\n' || p[0] == '\r') {
652                                 p += len;
653                                 avail -= len;
654                                 continue;
655                         }
656                 } else {
657                         /* A continuance line; the terminal
658                          * character of previous line was '\' character. */
659                         if (bid_keyword_list(p, len, 0, 0) <= 0)
660                                 break;
661                         if (multiline == 1)
662                                 detected_bytes += len;
663                         if (p[len-nl-1] != '\\') {
664                                 if (multiline == 1 &&
665                                     ++entry_cnt >= MAX_BID_ENTRY)
666                                         break;
667                                 multiline = 0;
668                         }
669                         p += len;
670                         avail -= len;
671                         continue;
672                 }
673                 if (p[0] != '/') {
674                         int last_is_path, keywords;
675
676                         keywords = bid_entry(p, len, nl, &last_is_path);
677                         if (keywords >= 0) {
678                                 detected_bytes += len;
679                                 if (form_D == 0) {
680                                         if (last_is_path)
681                                                 form_D = 1;
682                                         else if (keywords > 0)
683                                                 /* This line is not `form D'. */
684                                                 form_D = -1;
685                                 } else if (form_D == 1) {
686                                         if (!last_is_path && keywords > 0)
687                                                 /* This this is not `form D'
688                                                  * and We cannot accept mixed
689                                                  * format. */
690                                                 break;
691                                 }
692                                 if (!last_is_path && p[len-nl-1] == '\\')
693                                         /* This line continues. */
694                                         multiline = 1;
695                                 else {
696                                         /* We've got plenty of correct lines
697                                          * to assume that this file is a mtree
698                                          * format. */
699                                         if (++entry_cnt >= MAX_BID_ENTRY)
700                                                 break;
701                                 }
702                         } else
703                                 break;
704                 } else if (strncmp(p, "/set", 4) == 0) {
705                         if (bid_keyword_list(p+4, len-4, 0, 0) <= 0)
706                                 break;
707                         /* This line continues. */
708                         if (p[len-nl-1] == '\\')
709                                 multiline = 2;
710                 } else if (strncmp(p, "/unset", 6) == 0) {
711                         if (bid_keyword_list(p+6, len-6, 1, 0) <= 0)
712                                 break;
713                         /* This line continues. */
714                         if (p[len-nl-1] == '\\')
715                                 multiline = 2;
716                 } else
717                         break;
718
719                 /* Test next line. */
720                 p += len;
721                 avail -= len;
722         }
723         if (entry_cnt >= MAX_BID_ENTRY || (entry_cnt > 0 && len == 0)) {
724                 if (is_form_d != NULL) {
725                         if (form_D == 1)
726                                 *is_form_d = 1;
727                 }
728                 return (32);
729         }
730
731         return (0);
732 }
733
734 /*
735  * The extended mtree format permits multiple lines specifying
736  * attributes for each file.  For those entries, only the last line
737  * is actually used.  Practically speaking, that means we have
738  * to read the entire mtree file into memory up front.
739  *
740  * The parsing is done in two steps.  First, it is decided if a line
741  * changes the global defaults and if it is, processed accordingly.
742  * Otherwise, the options of the line are merged with the current
743  * global options.
744  */
745 static int
746 add_option(struct archive_read *a, struct mtree_option **global,
747     const char *value, size_t len)
748 {
749         struct mtree_option *opt;
750
751         if ((opt = malloc(sizeof(*opt))) == NULL) {
752                 archive_set_error(&a->archive, errno, "Can't allocate memory");
753                 return (ARCHIVE_FATAL);
754         }
755         if ((opt->value = malloc(len + 1)) == NULL) {
756                 free(opt);
757                 archive_set_error(&a->archive, errno, "Can't allocate memory");
758                 return (ARCHIVE_FATAL);
759         }
760         memcpy(opt->value, value, len);
761         opt->value[len] = '\0';
762         opt->next = *global;
763         *global = opt;
764         return (ARCHIVE_OK);
765 }
766
767 static void
768 remove_option(struct mtree_option **global, const char *value, size_t len)
769 {
770         struct mtree_option *iter, *last;
771
772         last = NULL;
773         for (iter = *global; iter != NULL; last = iter, iter = iter->next) {
774                 if (strncmp(iter->value, value, len) == 0 &&
775                     (iter->value[len] == '\0' ||
776                      iter->value[len] == '='))
777                         break;
778         }
779         if (iter == NULL)
780                 return;
781         if (last == NULL)
782                 *global = iter->next;
783         else
784                 last->next = iter->next;
785
786         free(iter->value);
787         free(iter);
788 }
789
790 static int
791 process_global_set(struct archive_read *a,
792     struct mtree_option **global, const char *line)
793 {
794         const char *next, *eq;
795         size_t len;
796         int r;
797
798         line += 4;
799         for (;;) {
800                 next = line + strspn(line, " \t\r\n");
801                 if (*next == '\0')
802                         return (ARCHIVE_OK);
803                 line = next;
804                 next = line + strcspn(line, " \t\r\n");
805                 eq = strchr(line, '=');
806                 if (eq > next)
807                         len = next - line;
808                 else
809                         len = eq - line;
810
811                 remove_option(global, line, len);
812                 r = add_option(a, global, line, next - line);
813                 if (r != ARCHIVE_OK)
814                         return (r);
815                 line = next;
816         }
817 }
818
819 static int
820 process_global_unset(struct archive_read *a,
821     struct mtree_option **global, const char *line)
822 {
823         const char *next;
824         size_t len;
825
826         line += 6;
827         if (strchr(line, '=') != NULL) {
828                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
829                     "/unset shall not contain `='");
830                 return ARCHIVE_FATAL;
831         }
832
833         for (;;) {
834                 next = line + strspn(line, " \t\r\n");
835                 if (*next == '\0')
836                         return (ARCHIVE_OK);
837                 line = next;
838                 len = strcspn(line, " \t\r\n");
839
840                 if (len == 3 && strncmp(line, "all", 3) == 0) {
841                         free_options(*global);
842                         *global = NULL;
843                 } else {
844                         remove_option(global, line, len);
845                 }
846
847                 line += len;
848         }
849 }
850
851 static int
852 process_add_entry(struct archive_read *a, struct mtree *mtree,
853     struct mtree_option **global, const char *line, ssize_t line_len,
854     struct mtree_entry **last_entry, int is_form_d)
855 {
856         struct mtree_entry *entry;
857         struct mtree_option *iter;
858         const char *next, *eq, *name, *end;
859         size_t name_len, len;
860         int r, i;
861
862         if ((entry = malloc(sizeof(*entry))) == NULL) {
863                 archive_set_error(&a->archive, errno, "Can't allocate memory");
864                 return (ARCHIVE_FATAL);
865         }
866         entry->next = NULL;
867         entry->options = NULL;
868         entry->name = NULL;
869         entry->used = 0;
870         entry->full = 0;
871
872         /* Add this entry to list. */
873         if (*last_entry == NULL)
874                 mtree->entries = entry;
875         else
876                 (*last_entry)->next = entry;
877         *last_entry = entry;
878
879         if (is_form_d) {
880                 /* Filename is last item on line. */
881                 /* Adjust line_len to trim trailing whitespace */
882                 while (line_len > 0) {
883                         char last_character = line[line_len - 1];
884                         if (last_character == '\r'
885                             || last_character == '\n'
886                             || last_character == '\t'
887                             || last_character == ' ') {
888                                 line_len--;
889                         } else {
890                                 break;
891                         }
892                 }
893                 /* Name starts after the last whitespace separator */
894                 name = line;
895                 for (i = 0; i < line_len; i++) {
896                         if (line[i] == '\r'
897                             || line[i] == '\n'
898                             || line[i] == '\t'
899                             || line[i] == ' ') {
900                                 name = line + i + 1;
901                         }
902                 }
903                 name_len = line + line_len - name;
904                 end = name;
905         } else {
906                 /* Filename is first item on line */
907                 name_len = strcspn(line, " \t\r\n");
908                 name = line;
909                 line += name_len;
910                 end = line + line_len;
911         }
912         /* name/name_len is the name within the line. */
913         /* line..end brackets the entire line except the name */
914
915         if ((entry->name = malloc(name_len + 1)) == NULL) {
916                 archive_set_error(&a->archive, errno, "Can't allocate memory");
917                 return (ARCHIVE_FATAL);
918         }
919
920         memcpy(entry->name, name, name_len);
921         entry->name[name_len] = '\0';
922         parse_escapes(entry->name, entry);
923
924         for (iter = *global; iter != NULL; iter = iter->next) {
925                 r = add_option(a, &entry->options, iter->value,
926                     strlen(iter->value));
927                 if (r != ARCHIVE_OK)
928                         return (r);
929         }
930
931         for (;;) {
932                 next = line + strspn(line, " \t\r\n");
933                 if (*next == '\0')
934                         return (ARCHIVE_OK);
935                 if (next >= end)
936                         return (ARCHIVE_OK);
937                 line = next;
938                 next = line + strcspn(line, " \t\r\n");
939                 eq = strchr(line, '=');
940                 if (eq == NULL || eq > next)
941                         len = next - line;
942                 else
943                         len = eq - line;
944
945                 remove_option(&entry->options, line, len);
946                 r = add_option(a, &entry->options, line, next - line);
947                 if (r != ARCHIVE_OK)
948                         return (r);
949                 line = next;
950         }
951 }
952
953 static int
954 read_mtree(struct archive_read *a, struct mtree *mtree)
955 {
956         ssize_t len;
957         uintmax_t counter;
958         char *p;
959         struct mtree_option *global;
960         struct mtree_entry *last_entry;
961         int r, is_form_d;
962
963         mtree->archive_format = ARCHIVE_FORMAT_MTREE;
964         mtree->archive_format_name = "mtree";
965
966         global = NULL;
967         last_entry = NULL;
968
969         (void)detect_form(a, &is_form_d);
970
971         for (counter = 1; ; ++counter) {
972                 len = readline(a, mtree, &p, 65536);
973                 if (len == 0) {
974                         mtree->this_entry = mtree->entries;
975                         free_options(global);
976                         return (ARCHIVE_OK);
977                 }
978                 if (len < 0) {
979                         free_options(global);
980                         return ((int)len);
981                 }
982                 /* Leading whitespace is never significant, ignore it. */
983                 while (*p == ' ' || *p == '\t') {
984                         ++p;
985                         --len;
986                 }
987                 /* Skip content lines and blank lines. */
988                 if (*p == '#')
989                         continue;
990                 if (*p == '\r' || *p == '\n' || *p == '\0')
991                         continue;
992                 if (*p != '/') {
993                         r = process_add_entry(a, mtree, &global, p, len,
994                             &last_entry, is_form_d);
995                 } else if (strncmp(p, "/set", 4) == 0) {
996                         if (p[4] != ' ' && p[4] != '\t')
997                                 break;
998                         r = process_global_set(a, &global, p);
999                 } else if (strncmp(p, "/unset", 6) == 0) {
1000                         if (p[6] != ' ' && p[6] != '\t')
1001                                 break;
1002                         r = process_global_unset(a, &global, p);
1003                 } else
1004                         break;
1005
1006                 if (r != ARCHIVE_OK) {
1007                         free_options(global);
1008                         return r;
1009                 }
1010         }
1011
1012         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1013             "Can't parse line %ju", counter);
1014         free_options(global);
1015         return (ARCHIVE_FATAL);
1016 }
1017
1018 /*
1019  * Read in the entire mtree file into memory on the first request.
1020  * Then use the next unused file to satisfy each header request.
1021  */
1022 static int
1023 read_header(struct archive_read *a, struct archive_entry *entry)
1024 {
1025         struct mtree *mtree;
1026         char *p;
1027         int r, use_next;
1028
1029         mtree = (struct mtree *)(a->format->data);
1030
1031         if (mtree->fd >= 0) {
1032                 close(mtree->fd);
1033                 mtree->fd = -1;
1034         }
1035
1036         if (mtree->entries == NULL) {
1037                 mtree->resolver = archive_entry_linkresolver_new();
1038                 if (mtree->resolver == NULL)
1039                         return ARCHIVE_FATAL;
1040                 archive_entry_linkresolver_set_strategy(mtree->resolver,
1041                     ARCHIVE_FORMAT_MTREE);
1042                 r = read_mtree(a, mtree);
1043                 if (r != ARCHIVE_OK)
1044                         return (r);
1045         }
1046
1047         a->archive.archive_format = mtree->archive_format;
1048         a->archive.archive_format_name = mtree->archive_format_name;
1049
1050         for (;;) {
1051                 if (mtree->this_entry == NULL)
1052                         return (ARCHIVE_EOF);
1053                 if (strcmp(mtree->this_entry->name, "..") == 0) {
1054                         mtree->this_entry->used = 1;
1055                         if (archive_strlen(&mtree->current_dir) > 0) {
1056                                 /* Roll back current path. */
1057                                 p = mtree->current_dir.s
1058                                     + mtree->current_dir.length - 1;
1059                                 while (p >= mtree->current_dir.s && *p != '/')
1060                                         --p;
1061                                 if (p >= mtree->current_dir.s)
1062                                         --p;
1063                                 mtree->current_dir.length
1064                                     = p - mtree->current_dir.s + 1;
1065                         }
1066                 }
1067                 if (!mtree->this_entry->used) {
1068                         use_next = 0;
1069                         r = parse_file(a, entry, mtree, mtree->this_entry,
1070                                 &use_next);
1071                         if (use_next == 0)
1072                                 return (r);
1073                 }
1074                 mtree->this_entry = mtree->this_entry->next;
1075         }
1076 }
1077
1078 /*
1079  * A single file can have multiple lines contribute specifications.
1080  * Parse as many lines as necessary, then pull additional information
1081  * from a backing file on disk as necessary.
1082  */
1083 static int
1084 parse_file(struct archive_read *a, struct archive_entry *entry,
1085     struct mtree *mtree, struct mtree_entry *mentry, int *use_next)
1086 {
1087         const char *path;
1088         struct stat st_storage, *st;
1089         struct mtree_entry *mp;
1090         struct archive_entry *sparse_entry;
1091         int r = ARCHIVE_OK, r1, parsed_kws;
1092
1093         mentry->used = 1;
1094
1095         /* Initialize reasonable defaults. */
1096         archive_entry_set_filetype(entry, AE_IFREG);
1097         archive_entry_set_size(entry, 0);
1098         archive_string_empty(&mtree->contents_name);
1099
1100         /* Parse options from this line. */
1101         parsed_kws = 0;
1102         r = parse_line(a, entry, mtree, mentry, &parsed_kws);
1103
1104         if (mentry->full) {
1105                 archive_entry_copy_pathname(entry, mentry->name);
1106                 /*
1107                  * "Full" entries are allowed to have multiple lines
1108                  * and those lines aren't required to be adjacent.  We
1109                  * don't support multiple lines for "relative" entries
1110                  * nor do we make any attempt to merge data from
1111                  * separate "relative" and "full" entries.  (Merging
1112                  * "relative" and "full" entries would require dealing
1113                  * with pathname canonicalization, which is a very
1114                  * tricky subject.)
1115                  */
1116                 for (mp = mentry->next; mp != NULL; mp = mp->next) {
1117                         if (mp->full && !mp->used
1118                             && strcmp(mentry->name, mp->name) == 0) {
1119                                 /* Later lines override earlier ones. */
1120                                 mp->used = 1;
1121                                 r1 = parse_line(a, entry, mtree, mp,
1122                                     &parsed_kws);
1123                                 if (r1 < r)
1124                                         r = r1;
1125                         }
1126                 }
1127         } else {
1128                 /*
1129                  * Relative entries require us to construct
1130                  * the full path and possibly update the
1131                  * current directory.
1132                  */
1133                 size_t n = archive_strlen(&mtree->current_dir);
1134                 if (n > 0)
1135                         archive_strcat(&mtree->current_dir, "/");
1136                 archive_strcat(&mtree->current_dir, mentry->name);
1137                 archive_entry_copy_pathname(entry, mtree->current_dir.s);
1138                 if (archive_entry_filetype(entry) != AE_IFDIR)
1139                         mtree->current_dir.length = n;
1140         }
1141
1142         if (mtree->checkfs) {
1143                 /*
1144                  * Try to open and stat the file to get the real size
1145                  * and other file info.  It would be nice to avoid
1146                  * this here so that getting a listing of an mtree
1147                  * wouldn't require opening every referenced contents
1148                  * file.  But then we wouldn't know the actual
1149                  * contents size, so I don't see a really viable way
1150                  * around this.  (Also, we may want to someday pull
1151                  * other unspecified info from the contents file on
1152                  * disk.)
1153                  */
1154                 mtree->fd = -1;
1155                 if (archive_strlen(&mtree->contents_name) > 0)
1156                         path = mtree->contents_name.s;
1157                 else
1158                         path = archive_entry_pathname(entry);
1159
1160                 if (archive_entry_filetype(entry) == AE_IFREG ||
1161                                 archive_entry_filetype(entry) == AE_IFDIR) {
1162                         mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC);
1163                         __archive_ensure_cloexec_flag(mtree->fd);
1164                         if (mtree->fd == -1 &&
1165                                 (errno != ENOENT ||
1166                                  archive_strlen(&mtree->contents_name) > 0)) {
1167                                 archive_set_error(&a->archive, errno,
1168                                                 "Can't open %s", path);
1169                                 r = ARCHIVE_WARN;
1170                         }
1171                 }
1172
1173                 st = &st_storage;
1174                 if (mtree->fd >= 0) {
1175                         if (fstat(mtree->fd, st) == -1) {
1176                                 archive_set_error(&a->archive, errno,
1177                                                 "Could not fstat %s", path);
1178                                 r = ARCHIVE_WARN;
1179                                 /* If we can't stat it, don't keep it open. */
1180                                 close(mtree->fd);
1181                                 mtree->fd = -1;
1182                                 st = NULL;
1183                         }
1184                 } else if (lstat(path, st) == -1) {
1185                         st = NULL;
1186                 }
1187
1188                 /*
1189                  * Check for a mismatch between the type in the specification
1190                  * and the type of the contents object on disk.
1191                  */
1192                 if (st != NULL) {
1193                         if (((st->st_mode & S_IFMT) == S_IFREG &&
1194                               archive_entry_filetype(entry) == AE_IFREG)
1195 #ifdef S_IFLNK
1196                           ||((st->st_mode & S_IFMT) == S_IFLNK &&
1197                               archive_entry_filetype(entry) == AE_IFLNK)
1198 #endif
1199 #ifdef S_IFSOCK
1200                           ||((st->st_mode & S_IFSOCK) == S_IFSOCK &&
1201                               archive_entry_filetype(entry) == AE_IFSOCK)
1202 #endif
1203 #ifdef S_IFCHR
1204                           ||((st->st_mode & S_IFMT) == S_IFCHR &&
1205                               archive_entry_filetype(entry) == AE_IFCHR)
1206 #endif
1207 #ifdef S_IFBLK
1208                           ||((st->st_mode & S_IFMT) == S_IFBLK &&
1209                               archive_entry_filetype(entry) == AE_IFBLK)
1210 #endif
1211                           ||((st->st_mode & S_IFMT) == S_IFDIR &&
1212                               archive_entry_filetype(entry) == AE_IFDIR)
1213 #ifdef S_IFIFO
1214                           ||((st->st_mode & S_IFMT) == S_IFIFO &&
1215                               archive_entry_filetype(entry) == AE_IFIFO)
1216 #endif
1217                         ) {
1218                                 /* Types match. */
1219                         } else {
1220                                 /* Types don't match; bail out gracefully. */
1221                                 if (mtree->fd >= 0)
1222                                         close(mtree->fd);
1223                                 mtree->fd = -1;
1224                                 if (parsed_kws & MTREE_HAS_OPTIONAL) {
1225                                         /* It's not an error for an optional
1226                                          * entry to not match disk. */
1227                                         *use_next = 1;
1228                                 } else if (r == ARCHIVE_OK) {
1229                                         archive_set_error(&a->archive,
1230                                             ARCHIVE_ERRNO_MISC,
1231                                             "mtree specification has different"
1232                                             " type for %s",
1233                                             archive_entry_pathname(entry));
1234                                         r = ARCHIVE_WARN;
1235                                 }
1236                                 return (r);
1237                         }
1238                 }
1239
1240                 /*
1241                  * If there is a contents file on disk, pick some of the
1242                  * metadata from that file.  For most of these, we only
1243                  * set it from the contents if it wasn't already parsed
1244                  * from the specification.
1245                  */
1246                 if (st != NULL) {
1247                         if (((parsed_kws & MTREE_HAS_DEVICE) == 0 ||
1248                                 (parsed_kws & MTREE_HAS_NOCHANGE) != 0) &&
1249                                 (archive_entry_filetype(entry) == AE_IFCHR ||
1250                                  archive_entry_filetype(entry) == AE_IFBLK))
1251                                 archive_entry_set_rdev(entry, st->st_rdev);
1252                         if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME))
1253                                 == 0 ||
1254                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1255                                 archive_entry_set_gid(entry, st->st_gid);
1256                         if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME))
1257                                 == 0 ||
1258                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1259                                 archive_entry_set_uid(entry, st->st_uid);
1260                         if ((parsed_kws & MTREE_HAS_MTIME) == 0 ||
1261                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0) {
1262 #if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC
1263                                 archive_entry_set_mtime(entry, st->st_mtime,
1264                                                 st->st_mtimespec.tv_nsec);
1265 #elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
1266                                 archive_entry_set_mtime(entry, st->st_mtime,
1267                                                 st->st_mtim.tv_nsec);
1268 #elif HAVE_STRUCT_STAT_ST_MTIME_N
1269                                 archive_entry_set_mtime(entry, st->st_mtime,
1270                                                 st->st_mtime_n);
1271 #elif HAVE_STRUCT_STAT_ST_UMTIME
1272                                 archive_entry_set_mtime(entry, st->st_mtime,
1273                                                 st->st_umtime*1000);
1274 #elif HAVE_STRUCT_STAT_ST_MTIME_USEC
1275                                 archive_entry_set_mtime(entry, st->st_mtime,
1276                                                 st->st_mtime_usec*1000);
1277 #else
1278                                 archive_entry_set_mtime(entry, st->st_mtime, 0);
1279 #endif
1280                         }
1281                         if ((parsed_kws & MTREE_HAS_NLINK) == 0 ||
1282                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1283                                 archive_entry_set_nlink(entry, st->st_nlink);
1284                         if ((parsed_kws & MTREE_HAS_PERM) == 0 ||
1285                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1286                                 archive_entry_set_perm(entry, st->st_mode);
1287                         if ((parsed_kws & MTREE_HAS_SIZE) == 0 ||
1288                             (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1289                                 archive_entry_set_size(entry, st->st_size);
1290                         archive_entry_set_ino(entry, st->st_ino);
1291                         archive_entry_set_dev(entry, st->st_dev);
1292
1293                         archive_entry_linkify(mtree->resolver, &entry,
1294                                 &sparse_entry);
1295                 } else if (parsed_kws & MTREE_HAS_OPTIONAL) {
1296                         /*
1297                          * Couldn't open the entry, stat it or the on-disk type
1298                          * didn't match.  If this entry is optional, just
1299                          * ignore it and read the next header entry.
1300                          */
1301                         *use_next = 1;
1302                         return ARCHIVE_OK;
1303                 }
1304         }
1305
1306         mtree->cur_size = archive_entry_size(entry);
1307         mtree->offset = 0;
1308
1309         return r;
1310 }
1311
1312 /*
1313  * Each line contains a sequence of keywords.
1314  */
1315 static int
1316 parse_line(struct archive_read *a, struct archive_entry *entry,
1317     struct mtree *mtree, struct mtree_entry *mp, int *parsed_kws)
1318 {
1319         struct mtree_option *iter;
1320         int r = ARCHIVE_OK, r1;
1321
1322         for (iter = mp->options; iter != NULL; iter = iter->next) {
1323                 r1 = parse_keyword(a, mtree, entry, iter, parsed_kws);
1324                 if (r1 < r)
1325                         r = r1;
1326         }
1327         if (r == ARCHIVE_OK && (*parsed_kws & MTREE_HAS_TYPE) == 0) {
1328                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1329                     "Missing type keyword in mtree specification");
1330                 return (ARCHIVE_WARN);
1331         }
1332         return (r);
1333 }
1334
1335 /*
1336  * Device entries have one of the following forms:
1337  *  - raw dev_t
1338  *  - format,major,minor[,subdevice]
1339  * When parsing succeeded, `pdev' will contain the appropriate dev_t value.
1340  */
1341
1342 /* strsep() is not in C90, but strcspn() is. */
1343 /* Taken from http://unixpapa.com/incnote/string.html */
1344 static char *
1345 la_strsep(char **sp, const char *sep)
1346 {
1347         char *p, *s;
1348         if (sp == NULL || *sp == NULL || **sp == '\0')
1349                 return(NULL);
1350         s = *sp;
1351         p = s + strcspn(s, sep);
1352         if (*p != '\0')
1353                 *p++ = '\0';
1354         *sp = p;
1355         return(s);
1356 }
1357
1358 static int
1359 parse_device(dev_t *pdev, struct archive *a, char *val)
1360 {
1361 #define MAX_PACK_ARGS 3
1362         unsigned long numbers[MAX_PACK_ARGS];
1363         char *p, *dev;
1364         int argc;
1365         pack_t *pack;
1366         dev_t result;
1367         const char *error = NULL;
1368
1369         memset(pdev, 0, sizeof(*pdev));
1370         if ((dev = strchr(val, ',')) != NULL) {
1371                 /*
1372                  * Device's major/minor are given in a specified format.
1373                  * Decode and pack it accordingly.
1374                  */
1375                 *dev++ = '\0';
1376                 if ((pack = pack_find(val)) == NULL) {
1377                         archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1378                             "Unknown format `%s'", val);
1379                         return ARCHIVE_WARN;
1380                 }
1381                 argc = 0;
1382                 while ((p = la_strsep(&dev, ",")) != NULL) {
1383                         if (*p == '\0') {
1384                                 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1385                                     "Missing number");
1386                                 return ARCHIVE_WARN;
1387                         }
1388                         numbers[argc++] = (unsigned long)mtree_atol(&p);
1389                         if (argc > MAX_PACK_ARGS) {
1390                                 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1391                                     "Too many arguments");
1392                                 return ARCHIVE_WARN;
1393                         }
1394                 }
1395                 if (argc < 2) {
1396                         archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1397                             "Not enough arguments");
1398                         return ARCHIVE_WARN;
1399                 }
1400                 result = (*pack)(argc, numbers, &error);
1401                 if (error != NULL) {
1402                         archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1403                             "%s", error);
1404                         return ARCHIVE_WARN;
1405                 }
1406         } else {
1407                 /* file system raw value. */
1408                 result = (dev_t)mtree_atol(&val);
1409         }
1410         *pdev = result;
1411         return ARCHIVE_OK;
1412 #undef MAX_PACK_ARGS
1413 }
1414
1415 /*
1416  * Parse a single keyword and its value.
1417  */
1418 static int
1419 parse_keyword(struct archive_read *a, struct mtree *mtree,
1420     struct archive_entry *entry, struct mtree_option *opt, int *parsed_kws)
1421 {
1422         char *val, *key;
1423
1424         key = opt->value;
1425
1426         if (*key == '\0')
1427                 return (ARCHIVE_OK);
1428
1429         if (strcmp(key, "nochange") == 0) {
1430                 *parsed_kws |= MTREE_HAS_NOCHANGE;
1431                 return (ARCHIVE_OK);
1432         }
1433         if (strcmp(key, "optional") == 0) {
1434                 *parsed_kws |= MTREE_HAS_OPTIONAL;
1435                 return (ARCHIVE_OK);
1436         }
1437         if (strcmp(key, "ignore") == 0) {
1438                 /*
1439                  * The mtree processing is not recursive, so
1440                  * recursion will only happen for explicitly listed
1441                  * entries.
1442                  */
1443                 return (ARCHIVE_OK);
1444         }
1445
1446         val = strchr(key, '=');
1447         if (val == NULL) {
1448                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1449                     "Malformed attribute \"%s\" (%d)", key, key[0]);
1450                 return (ARCHIVE_WARN);
1451         }
1452
1453         *val = '\0';
1454         ++val;
1455
1456         switch (key[0]) {
1457         case 'c':
1458                 if (strcmp(key, "content") == 0
1459                     || strcmp(key, "contents") == 0) {
1460                         parse_escapes(val, NULL);
1461                         archive_strcpy(&mtree->contents_name, val);
1462                         break;
1463                 }
1464                 if (strcmp(key, "cksum") == 0)
1465                         break;
1466         case 'd':
1467                 if (strcmp(key, "device") == 0) {
1468                         /* stat(2) st_rdev field, e.g. the major/minor IDs
1469                          * of a char/block special file */
1470                         int r;
1471                         dev_t dev;
1472
1473                         *parsed_kws |= MTREE_HAS_DEVICE;
1474                         r = parse_device(&dev, &a->archive, val);
1475                         if (r == ARCHIVE_OK)
1476                                 archive_entry_set_rdev(entry, dev);
1477                         return r;
1478                 }
1479         case 'f':
1480                 if (strcmp(key, "flags") == 0) {
1481                         *parsed_kws |= MTREE_HAS_FFLAGS;
1482                         archive_entry_copy_fflags_text(entry, val);
1483                         break;
1484                 }
1485         case 'g':
1486                 if (strcmp(key, "gid") == 0) {
1487                         *parsed_kws |= MTREE_HAS_GID;
1488                         archive_entry_set_gid(entry, mtree_atol10(&val));
1489                         break;
1490                 }
1491                 if (strcmp(key, "gname") == 0) {
1492                         *parsed_kws |= MTREE_HAS_GNAME;
1493                         archive_entry_copy_gname(entry, val);
1494                         break;
1495                 }
1496         case 'i':
1497                 if (strcmp(key, "inode") == 0) {
1498                         archive_entry_set_ino(entry, mtree_atol10(&val));
1499                         break;
1500                 }
1501         case 'l':
1502                 if (strcmp(key, "link") == 0) {
1503                         archive_entry_copy_symlink(entry, val);
1504                         break;
1505                 }
1506         case 'm':
1507                 if (strcmp(key, "md5") == 0 || strcmp(key, "md5digest") == 0)
1508                         break;
1509                 if (strcmp(key, "mode") == 0) {
1510                         if (val[0] >= '0' && val[0] <= '9') {
1511                                 *parsed_kws |= MTREE_HAS_PERM;
1512                                 archive_entry_set_perm(entry,
1513                                     (mode_t)mtree_atol8(&val));
1514                         } else {
1515                                 archive_set_error(&a->archive,
1516                                     ARCHIVE_ERRNO_FILE_FORMAT,
1517                                     "Symbolic mode \"%s\" unsupported", val);
1518                                 return ARCHIVE_WARN;
1519                         }
1520                         break;
1521                 }
1522         case 'n':
1523                 if (strcmp(key, "nlink") == 0) {
1524                         *parsed_kws |= MTREE_HAS_NLINK;
1525                         archive_entry_set_nlink(entry,
1526                                 (unsigned int)mtree_atol10(&val));
1527                         break;
1528                 }
1529         case 'r':
1530                 if (strcmp(key, "resdevice") == 0) {
1531                         /* stat(2) st_dev field, e.g. the device ID where the
1532                          * inode resides */
1533                         int r;
1534                         dev_t dev;
1535
1536                         r = parse_device(&dev, &a->archive, val);
1537                         if (r == ARCHIVE_OK)
1538                                 archive_entry_set_dev(entry, dev);
1539                         return r;
1540                 }
1541                 if (strcmp(key, "rmd160") == 0 ||
1542                     strcmp(key, "rmd160digest") == 0)
1543                         break;
1544         case 's':
1545                 if (strcmp(key, "sha1") == 0 || strcmp(key, "sha1digest") == 0)
1546                         break;
1547                 if (strcmp(key, "sha256") == 0 ||
1548                     strcmp(key, "sha256digest") == 0)
1549                         break;
1550                 if (strcmp(key, "sha384") == 0 ||
1551                     strcmp(key, "sha384digest") == 0)
1552                         break;
1553                 if (strcmp(key, "sha512") == 0 ||
1554                     strcmp(key, "sha512digest") == 0)
1555                         break;
1556                 if (strcmp(key, "size") == 0) {
1557                         archive_entry_set_size(entry, mtree_atol10(&val));
1558                         break;
1559                 }
1560         case 't':
1561                 if (strcmp(key, "tags") == 0) {
1562                         /*
1563                          * Comma delimited list of tags.
1564                          * Ignore the tags for now, but the interface
1565                          * should be extended to allow inclusion/exclusion.
1566                          */
1567                         break;
1568                 }
1569                 if (strcmp(key, "time") == 0) {
1570                         int64_t m;
1571                         int64_t my_time_t_max = get_time_t_max();
1572                         int64_t my_time_t_min = get_time_t_min();
1573                         long ns = 0;
1574
1575                         *parsed_kws |= MTREE_HAS_MTIME;
1576                         m = mtree_atol10(&val);
1577                         /* Replicate an old mtree bug:
1578                          * 123456789.1 represents 123456789
1579                          * seconds and 1 nanosecond. */
1580                         if (*val == '.') {
1581                                 ++val;
1582                                 ns = (long)mtree_atol10(&val);
1583                         } else
1584                                 ns = 0;
1585                         if (m > my_time_t_max)
1586                                 m = my_time_t_max;
1587                         else if (m < my_time_t_min)
1588                                 m = my_time_t_min;
1589                         archive_entry_set_mtime(entry, (time_t)m, ns);
1590                         break;
1591                 }
1592                 if (strcmp(key, "type") == 0) {
1593                         switch (val[0]) {
1594                         case 'b':
1595                                 if (strcmp(val, "block") == 0) {
1596                                         archive_entry_set_filetype(entry, AE_IFBLK);
1597                                         break;
1598                                 }
1599                         case 'c':
1600                                 if (strcmp(val, "char") == 0) {
1601                                         archive_entry_set_filetype(entry,
1602                                                 AE_IFCHR);
1603                                         break;
1604                                 }
1605                         case 'd':
1606                                 if (strcmp(val, "dir") == 0) {
1607                                         archive_entry_set_filetype(entry,
1608                                                 AE_IFDIR);
1609                                         break;
1610                                 }
1611                         case 'f':
1612                                 if (strcmp(val, "fifo") == 0) {
1613                                         archive_entry_set_filetype(entry,
1614                                                 AE_IFIFO);
1615                                         break;
1616                                 }
1617                                 if (strcmp(val, "file") == 0) {
1618                                         archive_entry_set_filetype(entry,
1619                                                 AE_IFREG);
1620                                         break;
1621                                 }
1622                         case 'l':
1623                                 if (strcmp(val, "link") == 0) {
1624                                         archive_entry_set_filetype(entry,
1625                                                 AE_IFLNK);
1626                                         break;
1627                                 }
1628                         default:
1629                                 archive_set_error(&a->archive,
1630                                     ARCHIVE_ERRNO_FILE_FORMAT,
1631                                     "Unrecognized file type \"%s\"; "
1632                                     "assuming \"file\"", val);
1633                                 archive_entry_set_filetype(entry, AE_IFREG);
1634                                 return (ARCHIVE_WARN);
1635                         }
1636                         *parsed_kws |= MTREE_HAS_TYPE;
1637                         break;
1638                 }
1639         case 'u':
1640                 if (strcmp(key, "uid") == 0) {
1641                         *parsed_kws |= MTREE_HAS_UID;
1642                         archive_entry_set_uid(entry, mtree_atol10(&val));
1643                         break;
1644                 }
1645                 if (strcmp(key, "uname") == 0) {
1646                         *parsed_kws |= MTREE_HAS_UNAME;
1647                         archive_entry_copy_uname(entry, val);
1648                         break;
1649                 }
1650         default:
1651                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1652                     "Unrecognized key %s=%s", key, val);
1653                 return (ARCHIVE_WARN);
1654         }
1655         return (ARCHIVE_OK);
1656 }
1657
1658 static int
1659 read_data(struct archive_read *a, const void **buff, size_t *size,
1660     int64_t *offset)
1661 {
1662         size_t bytes_to_read;
1663         ssize_t bytes_read;
1664         struct mtree *mtree;
1665
1666         mtree = (struct mtree *)(a->format->data);
1667         if (mtree->fd < 0) {
1668                 *buff = NULL;
1669                 *offset = 0;
1670                 *size = 0;
1671                 return (ARCHIVE_EOF);
1672         }
1673         if (mtree->buff == NULL) {
1674                 mtree->buffsize = 64 * 1024;
1675                 mtree->buff = malloc(mtree->buffsize);
1676                 if (mtree->buff == NULL) {
1677                         archive_set_error(&a->archive, ENOMEM,
1678                             "Can't allocate memory");
1679                         return (ARCHIVE_FATAL);
1680                 }
1681         }
1682
1683         *buff = mtree->buff;
1684         *offset = mtree->offset;
1685         if ((int64_t)mtree->buffsize > mtree->cur_size - mtree->offset)
1686                 bytes_to_read = (size_t)(mtree->cur_size - mtree->offset);
1687         else
1688                 bytes_to_read = mtree->buffsize;
1689         bytes_read = read(mtree->fd, mtree->buff, bytes_to_read);
1690         if (bytes_read < 0) {
1691                 archive_set_error(&a->archive, errno, "Can't read");
1692                 return (ARCHIVE_WARN);
1693         }
1694         if (bytes_read == 0) {
1695                 *size = 0;
1696                 return (ARCHIVE_EOF);
1697         }
1698         mtree->offset += bytes_read;
1699         *size = bytes_read;
1700         return (ARCHIVE_OK);
1701 }
1702
1703 /* Skip does nothing except possibly close the contents file. */
1704 static int
1705 skip(struct archive_read *a)
1706 {
1707         struct mtree *mtree;
1708
1709         mtree = (struct mtree *)(a->format->data);
1710         if (mtree->fd >= 0) {
1711                 close(mtree->fd);
1712                 mtree->fd = -1;
1713         }
1714         return (ARCHIVE_OK);
1715 }
1716
1717 /*
1718  * Since parsing backslash sequences always makes strings shorter,
1719  * we can always do this conversion in-place.
1720  */
1721 static void
1722 parse_escapes(char *src, struct mtree_entry *mentry)
1723 {
1724         char *dest = src;
1725         char c;
1726
1727         if (mentry != NULL && strcmp(src, ".") == 0)
1728                 mentry->full = 1;
1729
1730         while (*src != '\0') {
1731                 c = *src++;
1732                 if (c == '/' && mentry != NULL)
1733                         mentry->full = 1;
1734                 if (c == '\\') {
1735                         switch (src[0]) {
1736                         case '0':
1737                                 if (src[1] < '0' || src[1] > '7') {
1738                                         c = 0;
1739                                         ++src;
1740                                         break;
1741                                 }
1742                                 /* FALLTHROUGH */
1743                         case '1':
1744                         case '2':
1745                         case '3':
1746                                 if (src[1] >= '0' && src[1] <= '7' &&
1747                                     src[2] >= '0' && src[2] <= '7') {
1748                                         c = (src[0] - '0') << 6;
1749                                         c |= (src[1] - '0') << 3;
1750                                         c |= (src[2] - '0');
1751                                         src += 3;
1752                                 }
1753                                 break;
1754                         case 'a':
1755                                 c = '\a';
1756                                 ++src;
1757                                 break;
1758                         case 'b':
1759                                 c = '\b';
1760                                 ++src;
1761                                 break;
1762                         case 'f':
1763                                 c = '\f';
1764                                 ++src;
1765                                 break;
1766                         case 'n':
1767                                 c = '\n';
1768                                 ++src;
1769                                 break;
1770                         case 'r':
1771                                 c = '\r';
1772                                 ++src;
1773                                 break;
1774                         case 's':
1775                                 c = ' ';
1776                                 ++src;
1777                                 break;
1778                         case 't':
1779                                 c = '\t';
1780                                 ++src;
1781                                 break;
1782                         case 'v':
1783                                 c = '\v';
1784                                 ++src;
1785                                 break;
1786                         case '\\':
1787                                 c = '\\';
1788                                 ++src;
1789                                 break;
1790                         }
1791                 }
1792                 *dest++ = c;
1793         }
1794         *dest = '\0';
1795 }
1796
1797 /*
1798  * Note that this implementation does not (and should not!) obey
1799  * locale settings; you cannot simply substitute strtol here, since
1800  * it does obey locale.
1801  */
1802 static int64_t
1803 mtree_atol8(char **p)
1804 {
1805         int64_t l, limit, last_digit_limit;
1806         int digit, base;
1807
1808         base = 8;
1809         limit = INT64_MAX / base;
1810         last_digit_limit = INT64_MAX % base;
1811
1812         l = 0;
1813         digit = **p - '0';
1814         while (digit >= 0 && digit < base) {
1815                 if (l>limit || (l == limit && digit > last_digit_limit)) {
1816                         l = INT64_MAX; /* Truncate on overflow. */
1817                         break;
1818                 }
1819                 l = (l * base) + digit;
1820                 digit = *++(*p) - '0';
1821         }
1822         return (l);
1823 }
1824
1825 /*
1826  * Note that this implementation does not (and should not!) obey
1827  * locale settings; you cannot simply substitute strtol here, since
1828  * it does obey locale.
1829  */
1830 static int64_t
1831 mtree_atol10(char **p)
1832 {
1833         int64_t l, limit, last_digit_limit;
1834         int base, digit, sign;
1835
1836         base = 10;
1837
1838         if (**p == '-') {
1839                 sign = -1;
1840                 limit = ((uint64_t)(INT64_MAX) + 1) / base;
1841                 last_digit_limit = ((uint64_t)(INT64_MAX) + 1) % base;
1842                 ++(*p);
1843         } else {
1844                 sign = 1;
1845                 limit = INT64_MAX / base;
1846                 last_digit_limit = INT64_MAX % base;
1847         }
1848
1849         l = 0;
1850         digit = **p - '0';
1851         while (digit >= 0 && digit < base) {
1852                 if (l > limit || (l == limit && digit > last_digit_limit))
1853                         return (sign < 0) ? INT64_MIN : INT64_MAX;
1854                 l = (l * base) + digit;
1855                 digit = *++(*p) - '0';
1856         }
1857         return (sign < 0) ? -l : l;
1858 }
1859
1860 /* Parse a hex digit. */
1861 static int
1862 parsehex(char c)
1863 {
1864         if (c >= '0' && c <= '9')
1865                 return c - '0';
1866         else if (c >= 'a' && c <= 'f')
1867                 return c - 'a';
1868         else if (c >= 'A' && c <= 'F')
1869                 return c - 'A';
1870         else
1871                 return -1;
1872 }
1873
1874 /*
1875  * Note that this implementation does not (and should not!) obey
1876  * locale settings; you cannot simply substitute strtol here, since
1877  * it does obey locale.
1878  */
1879 static int64_t
1880 mtree_atol16(char **p)
1881 {
1882         int64_t l, limit, last_digit_limit;
1883         int base, digit, sign;
1884
1885         base = 16;
1886
1887         if (**p == '-') {
1888                 sign = -1;
1889                 limit = ((uint64_t)(INT64_MAX) + 1) / base;
1890                 last_digit_limit = ((uint64_t)(INT64_MAX) + 1) % base;
1891                 ++(*p);
1892         } else {
1893                 sign = 1;
1894                 limit = INT64_MAX / base;
1895                 last_digit_limit = INT64_MAX % base;
1896         }
1897
1898         l = 0;
1899         digit = parsehex(**p);
1900         while (digit >= 0 && digit < base) {
1901                 if (l > limit || (l == limit && digit > last_digit_limit))
1902                         return (sign < 0) ? INT64_MIN : INT64_MAX;
1903                 l = (l * base) + digit;
1904                 digit = parsehex(*++(*p));
1905         }
1906         return (sign < 0) ? -l : l;
1907 }
1908
1909 static int64_t
1910 mtree_atol(char **p)
1911 {
1912         if (**p != '0')
1913                 return mtree_atol10(p);
1914         if ((*p)[1] == 'x' || (*p)[1] == 'X') {
1915                 *p += 2;
1916                 return mtree_atol16(p);
1917         }
1918         return mtree_atol8(p);
1919 }
1920
1921 /*
1922  * Returns length of line (including trailing newline)
1923  * or negative on error.  'start' argument is updated to
1924  * point to first character of line.
1925  */
1926 static ssize_t
1927 readline(struct archive_read *a, struct mtree *mtree, char **start,
1928     ssize_t limit)
1929 {
1930         ssize_t bytes_read;
1931         ssize_t total_size = 0;
1932         ssize_t find_off = 0;
1933         const void *t;
1934         void *nl;
1935         char *u;
1936
1937         /* Accumulate line in a line buffer. */
1938         for (;;) {
1939                 /* Read some more. */
1940                 t = __archive_read_ahead(a, 1, &bytes_read);
1941                 if (t == NULL)
1942                         return (0);
1943                 if (bytes_read < 0)
1944                         return (ARCHIVE_FATAL);
1945                 nl = memchr(t, '\n', bytes_read);
1946                 /* If we found '\n', trim the read to end exactly there. */
1947                 if (nl != NULL) {
1948                         bytes_read = ((const char *)nl) - ((const char *)t) + 1;
1949                 }
1950                 if (total_size + bytes_read + 1 > limit) {
1951                         archive_set_error(&a->archive,
1952                             ARCHIVE_ERRNO_FILE_FORMAT,
1953                             "Line too long");
1954                         return (ARCHIVE_FATAL);
1955                 }
1956                 if (archive_string_ensure(&mtree->line,
1957                         total_size + bytes_read + 1) == NULL) {
1958                         archive_set_error(&a->archive, ENOMEM,
1959                             "Can't allocate working buffer");
1960                         return (ARCHIVE_FATAL);
1961                 }
1962                 /* Append new bytes to string. */
1963                 memcpy(mtree->line.s + total_size, t, bytes_read);
1964                 __archive_read_consume(a, bytes_read);
1965                 total_size += bytes_read;
1966                 mtree->line.s[total_size] = '\0';
1967
1968                 for (u = mtree->line.s + find_off; *u; ++u) {
1969                         if (u[0] == '\n') {
1970                                 /* Ends with unescaped newline. */
1971                                 *start = mtree->line.s;
1972                                 return total_size;
1973                         } else if (u[0] == '#') {
1974                                 /* Ends with comment sequence #...\n */
1975                                 if (nl == NULL) {
1976                                         /* But we've not found the \n yet */
1977                                         break;
1978                                 }
1979                         } else if (u[0] == '\\') {
1980                                 if (u[1] == '\n') {
1981                                         /* Trim escaped newline. */
1982                                         total_size -= 2;
1983                                         mtree->line.s[total_size] = '\0';
1984                                         break;
1985                                 } else if (u[1] != '\0') {
1986                                         /* Skip the two-char escape sequence */
1987                                         ++u;
1988                                 }
1989                         }
1990                 }
1991                 find_off = u - mtree->line.s;
1992         }
1993 }