2 * Copyright (c) 2007 Kai Wang
3 * Copyright (c) 2007 Tim Kientzle
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "archive_platform.h"
29 __FBSDID("$FreeBSD$");
31 #ifdef HAVE_SYS_STAT_H
48 #include "archive_entry.h"
49 #include "archive_private.h"
50 #include "archive_read_private.h"
53 int64_t entry_bytes_remaining;
54 /* unconsumed is purely to track data we've gotten from readahead,
55 * but haven't yet marked as consumed. Must be paired with
56 * entry_bytes_remaining usage/modification.
58 size_t entry_bytes_unconsumed;
60 int64_t entry_padding;
63 char read_global_header;
67 * Define structure of the "ar" header.
69 #define AR_name_offset 0
70 #define AR_name_size 16
71 #define AR_date_offset 16
72 #define AR_date_size 12
73 #define AR_uid_offset 28
75 #define AR_gid_offset 34
77 #define AR_mode_offset 40
78 #define AR_mode_size 8
79 #define AR_size_offset 48
80 #define AR_size_size 10
81 #define AR_fmag_offset 58
82 #define AR_fmag_size 2
84 static int archive_read_format_ar_bid(struct archive_read *a, int);
85 static int archive_read_format_ar_cleanup(struct archive_read *a);
86 static int archive_read_format_ar_read_data(struct archive_read *a,
87 const void **buff, size_t *size, int64_t *offset);
88 static int archive_read_format_ar_skip(struct archive_read *a);
89 static int archive_read_format_ar_read_header(struct archive_read *a,
90 struct archive_entry *e);
91 static uint64_t ar_atol8(const char *p, unsigned char_cnt);
92 static uint64_t ar_atol10(const char *p, unsigned char_cnt);
93 static int ar_parse_gnu_filename_table(struct archive_read *a);
94 static int ar_parse_common_header(struct ar *ar, struct archive_entry *,
98 archive_read_support_format_ar(struct archive *_a)
100 struct archive_read *a = (struct archive_read *)_a;
104 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
105 ARCHIVE_STATE_NEW, "archive_read_support_format_ar");
107 ar = (struct ar *)calloc(1, sizeof(*ar));
109 archive_set_error(&a->archive, ENOMEM,
110 "Can't allocate ar data");
111 return (ARCHIVE_FATAL);
115 r = __archive_read_register_format(a,
118 archive_read_format_ar_bid,
120 archive_read_format_ar_read_header,
121 archive_read_format_ar_read_data,
122 archive_read_format_ar_skip,
124 archive_read_format_ar_cleanup,
128 if (r != ARCHIVE_OK) {
136 archive_read_format_ar_cleanup(struct archive_read *a)
140 ar = (struct ar *)(a->format->data);
143 (a->format->data) = NULL;
148 archive_read_format_ar_bid(struct archive_read *a, int best_bid)
152 (void)best_bid; /* UNUSED */
155 * Verify the 8-byte file signature.
156 * TODO: Do we need to check more than this?
158 if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
160 if (memcmp(h, "!<arch>\n", 8) == 0) {
167 _ar_read_header(struct archive_read *a, struct archive_entry *entry,
168 struct ar *ar, const char *h, size_t *unconsumed)
170 char filename[AR_name_size + 1];
171 uint64_t number; /* Used to hold parsed numbers before validation. */
172 size_t bsd_name_length, entry_size;
177 /* Verify the magic signature on the file header. */
178 if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
179 archive_set_error(&a->archive, EINVAL,
180 "Incorrect file header signature");
181 return (ARCHIVE_FATAL);
184 /* Copy filename into work buffer. */
185 strncpy(filename, h + AR_name_offset, AR_name_size);
186 filename[AR_name_size] = '\0';
189 * Guess the format variant based on the filename.
191 if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
192 /* We don't already know the variant, so let's guess. */
194 * Biggest clue is presence of '/': GNU starts special
195 * filenames with '/', appends '/' as terminator to
196 * non-special names, so anything with '/' should be
197 * GNU except for BSD long filenames.
199 if (strncmp(filename, "#1/", 3) == 0)
200 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
201 else if (strchr(filename, '/') != NULL)
202 a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
203 else if (strncmp(filename, "__.SYMDEF", 9) == 0)
204 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
206 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
207 * if name exactly fills 16-byte field? If so, we
208 * can't assume entries without '/' are BSD. XXX
212 /* Update format name from the code. */
213 if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
214 a->archive.archive_format_name = "ar (GNU/SVR4)";
215 else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
216 a->archive.archive_format_name = "ar (BSD)";
218 a->archive.archive_format_name = "ar";
221 * Remove trailing spaces from the filename. GNU and BSD
222 * variants both pad filename area out with spaces.
223 * This will only be wrong if GNU/SVR4 'ar' implementations
224 * omit trailing '/' for 16-char filenames and we have
225 * a 16-char filename that ends in ' '.
227 p = filename + AR_name_size - 1;
228 while (p >= filename && *p == ' ') {
234 * Remove trailing slash unless first character is '/'.
235 * (BSD entries never end in '/', so this will only trim
236 * GNU-format entries. GNU special entries start with '/'
237 * and are not terminated in '/', so we don't trim anything
238 * that starts with '/'.)
240 if (filename[0] != '/' && p > filename && *p == '/') {
245 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
246 "Found entry with empty filename");
247 return (ARCHIVE_FATAL);
251 * '//' is the GNU filename table.
252 * Later entries can refer to names in this table.
254 if (strcmp(filename, "//") == 0) {
255 /* This must come before any call to _read_ahead. */
256 ar_parse_common_header(ar, entry, h);
257 archive_entry_copy_pathname(entry, filename);
258 archive_entry_set_filetype(entry, AE_IFREG);
259 /* Get the size of the filename table. */
260 number = ar_atol10(h + AR_size_offset, AR_size_size);
261 if (number > SIZE_MAX || number > 1024 * 1024 * 1024) {
262 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
263 "Filename table too large");
264 return (ARCHIVE_FATAL);
266 entry_size = (size_t)number;
267 if (entry_size == 0) {
268 archive_set_error(&a->archive, EINVAL,
269 "Invalid string table");
270 return (ARCHIVE_FATAL);
272 if (ar->strtab != NULL) {
273 archive_set_error(&a->archive, EINVAL,
274 "More than one string tables exist");
275 return (ARCHIVE_FATAL);
278 /* Read the filename table into memory. */
279 st = malloc(entry_size);
281 archive_set_error(&a->archive, ENOMEM,
282 "Can't allocate filename table buffer");
283 return (ARCHIVE_FATAL);
286 ar->strtab_size = entry_size;
289 __archive_read_consume(a, *unconsumed);
293 if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
294 return (ARCHIVE_FATAL);
295 memcpy(st, b, entry_size);
296 __archive_read_consume(a, entry_size);
297 /* All contents are consumed. */
298 ar->entry_bytes_remaining = 0;
299 archive_entry_set_size(entry, ar->entry_bytes_remaining);
301 /* Parse the filename table. */
302 return (ar_parse_gnu_filename_table(a));
306 * GNU variant handles long filenames by storing /<number>
307 * to indicate a name stored in the filename table.
308 * XXX TODO: Verify that it's all digits... Don't be fooled
311 if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') {
312 number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
314 * If we can't look up the real name, warn and return
315 * the entry with the wrong name.
317 if (ar->strtab == NULL || number >= ar->strtab_size) {
318 archive_set_error(&a->archive, EINVAL,
319 "Can't find long filename for GNU/SVR4 archive entry");
320 archive_entry_copy_pathname(entry, filename);
321 /* Parse the time, owner, mode, size fields. */
322 ar_parse_common_header(ar, entry, h);
323 return (ARCHIVE_FATAL);
326 archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
327 /* Parse the time, owner, mode, size fields. */
328 return (ar_parse_common_header(ar, entry, h));
332 * BSD handles long filenames by storing "#1/" followed by the
333 * length of filename as a decimal number, then prepends the
334 * the filename to the file contents.
336 if (strncmp(filename, "#1/", 3) == 0) {
337 /* Parse the time, owner, mode, size fields. */
338 /* This must occur before _read_ahead is called again. */
339 ar_parse_common_header(ar, entry, h);
341 /* Parse the size of the name, adjust the file size. */
342 number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
343 /* Sanity check the filename length:
344 * = Must be <= SIZE_MAX - 1
346 * = Cannot be bigger than the entire entry
348 if (number > SIZE_MAX - 1
349 || number > 1024 * 1024
350 || (int64_t)number > ar->entry_bytes_remaining) {
351 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
352 "Bad input file size");
353 return (ARCHIVE_FATAL);
355 bsd_name_length = (size_t)number;
356 ar->entry_bytes_remaining -= bsd_name_length;
357 /* Adjust file size reported to client. */
358 archive_entry_set_size(entry, ar->entry_bytes_remaining);
361 __archive_read_consume(a, *unconsumed);
365 /* Read the long name into memory. */
366 if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
367 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
368 "Truncated input file");
369 return (ARCHIVE_FATAL);
371 /* Store it in the entry. */
372 p = (char *)malloc(bsd_name_length + 1);
374 archive_set_error(&a->archive, ENOMEM,
375 "Can't allocate fname buffer");
376 return (ARCHIVE_FATAL);
378 strncpy(p, b, bsd_name_length);
379 p[bsd_name_length] = '\0';
381 __archive_read_consume(a, bsd_name_length);
383 archive_entry_copy_pathname(entry, p);
389 * "/" is the SVR4/GNU archive symbol table.
390 * "/SYM64/" is the SVR4/GNU 64-bit variant archive symbol table.
392 if (strcmp(filename, "/") == 0 || strcmp(filename, "/SYM64/") == 0) {
393 archive_entry_copy_pathname(entry, filename);
394 /* Parse the time, owner, mode, size fields. */
395 r = ar_parse_common_header(ar, entry, h);
396 /* Force the file type to a regular file. */
397 archive_entry_set_filetype(entry, AE_IFREG);
402 * "__.SYMDEF" is a BSD archive symbol table.
404 if (strcmp(filename, "__.SYMDEF") == 0) {
405 archive_entry_copy_pathname(entry, filename);
406 /* Parse the time, owner, mode, size fields. */
407 return (ar_parse_common_header(ar, entry, h));
411 * Otherwise, this is a standard entry. The filename
412 * has already been trimmed as much as possible, based
413 * on our current knowledge of the format.
415 archive_entry_copy_pathname(entry, filename);
416 return (ar_parse_common_header(ar, entry, h));
420 archive_read_format_ar_read_header(struct archive_read *a,
421 struct archive_entry *entry)
423 struct ar *ar = (struct ar*)(a->format->data);
425 const void *header_data;
428 if (!ar->read_global_header) {
430 * We are now at the beginning of the archive,
431 * so we need first consume the ar global header.
433 __archive_read_consume(a, 8);
434 ar->read_global_header = 1;
435 /* Set a default format code for now. */
436 a->archive.archive_format = ARCHIVE_FORMAT_AR;
439 /* Read the header for the next file entry. */
440 if ((header_data = __archive_read_ahead(a, 60, NULL)) == NULL)
442 return (ARCHIVE_EOF);
446 ret = _ar_read_header(a, entry, ar, (const char *)header_data, &unconsumed);
449 __archive_read_consume(a, unconsumed);
456 ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
461 /* Copy remaining header */
462 archive_entry_set_filetype(entry, AE_IFREG);
463 archive_entry_set_mtime(entry,
464 (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
465 archive_entry_set_uid(entry,
466 (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
467 archive_entry_set_gid(entry,
468 (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
469 archive_entry_set_mode(entry,
470 (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
471 n = ar_atol10(h + AR_size_offset, AR_size_size);
473 ar->entry_offset = 0;
474 ar->entry_padding = n % 2;
475 archive_entry_set_size(entry, n);
476 ar->entry_bytes_remaining = n;
481 archive_read_format_ar_read_data(struct archive_read *a,
482 const void **buff, size_t *size, int64_t *offset)
487 ar = (struct ar *)(a->format->data);
489 if (ar->entry_bytes_unconsumed) {
490 __archive_read_consume(a, ar->entry_bytes_unconsumed);
491 ar->entry_bytes_unconsumed = 0;
494 if (ar->entry_bytes_remaining > 0) {
495 *buff = __archive_read_ahead(a, 1, &bytes_read);
496 if (bytes_read == 0) {
497 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
498 "Truncated ar archive");
499 return (ARCHIVE_FATAL);
502 return (ARCHIVE_FATAL);
503 if (bytes_read > ar->entry_bytes_remaining)
504 bytes_read = (ssize_t)ar->entry_bytes_remaining;
506 ar->entry_bytes_unconsumed = bytes_read;
507 *offset = ar->entry_offset;
508 ar->entry_offset += bytes_read;
509 ar->entry_bytes_remaining -= bytes_read;
512 int64_t skipped = __archive_read_consume(a, ar->entry_padding);
514 ar->entry_padding -= skipped;
516 if (ar->entry_padding) {
518 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
519 "Truncated ar archive- failed consuming padding");
521 return (ARCHIVE_FATAL);
525 *offset = ar->entry_offset;
526 return (ARCHIVE_EOF);
531 archive_read_format_ar_skip(struct archive_read *a)
533 int64_t bytes_skipped;
536 ar = (struct ar *)(a->format->data);
538 bytes_skipped = __archive_read_consume(a,
539 ar->entry_bytes_remaining + ar->entry_padding
540 + ar->entry_bytes_unconsumed);
541 if (bytes_skipped < 0)
542 return (ARCHIVE_FATAL);
544 ar->entry_bytes_remaining = 0;
545 ar->entry_bytes_unconsumed = 0;
546 ar->entry_padding = 0;
552 ar_parse_gnu_filename_table(struct archive_read *a)
558 ar = (struct ar*)(a->format->data);
559 size = ar->strtab_size;
561 for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
565 goto bad_string_table;
570 * GNU ar always pads the table to an even size.
571 * The pad character is either '\n' or '`'.
573 if (p != ar->strtab + size && *p != '\n' && *p != '`')
574 goto bad_string_table;
576 /* Enforce zero termination. */
577 ar->strtab[size - 1] = '\0';
582 archive_set_error(&a->archive, EINVAL,
583 "Invalid string table");
586 return (ARCHIVE_FATAL);
590 ar_atol8(const char *p, unsigned char_cnt)
592 uint64_t l, limit, last_digit_limit;
593 unsigned int digit, base;
596 limit = UINT64_MAX / base;
597 last_digit_limit = UINT64_MAX % base;
599 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
604 while (*p >= '0' && digit < base && char_cnt-- > 0) {
605 if (l>limit || (l == limit && digit > last_digit_limit)) {
606 l = UINT64_MAX; /* Truncate on overflow. */
609 l = (l * base) + digit;
616 ar_atol10(const char *p, unsigned char_cnt)
618 uint64_t l, limit, last_digit_limit;
619 unsigned int base, digit;
622 limit = UINT64_MAX / base;
623 last_digit_limit = UINT64_MAX % base;
625 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
629 while (*p >= '0' && digit < base && char_cnt-- > 0) {
630 if (l > limit || (l == limit && digit > last_digit_limit)) {
631 l = UINT64_MAX; /* Truncate on overflow. */
634 l = (l * base) + digit;