2 * Copyright (c) 2007 Kai Wang
3 * Copyright (c) 2007 Tim Kientzle
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "archive_platform.h"
29 __FBSDID("$FreeBSD$");
31 #ifdef HAVE_SYS_STAT_H
45 #include "archive_entry.h"
46 #include "archive_private.h"
47 #include "archive_read_private.h"
50 off_t entry_bytes_remaining;
58 * Define structure of the "ar" header.
60 #define AR_name_offset 0
61 #define AR_name_size 16
62 #define AR_date_offset 16
63 #define AR_date_size 12
64 #define AR_uid_offset 28
66 #define AR_gid_offset 34
68 #define AR_mode_offset 40
69 #define AR_mode_size 8
70 #define AR_size_offset 48
71 #define AR_size_size 10
72 #define AR_fmag_offset 58
73 #define AR_fmag_size 2
75 #define isdigit(x) (x) >= '0' && (x) <= '9'
77 static int archive_read_format_ar_bid(struct archive_read *a);
78 static int archive_read_format_ar_cleanup(struct archive_read *a);
79 static int archive_read_format_ar_read_data(struct archive_read *a,
80 const void **buff, size_t *size, off_t *offset);
81 static int archive_read_format_ar_skip(struct archive_read *a);
82 static int archive_read_format_ar_read_header(struct archive_read *a,
83 struct archive_entry *e);
84 static uint64_t ar_atol8(const char *p, unsigned char_cnt);
85 static uint64_t ar_atol10(const char *p, unsigned char_cnt);
86 static int ar_parse_gnu_filename_table(struct archive_read *a);
87 static int ar_parse_common_header(struct ar *ar, struct archive_entry *,
91 archive_read_support_format_ar(struct archive *_a)
93 struct archive_read *a = (struct archive_read *)_a;
97 ar = (struct ar *)malloc(sizeof(*ar));
99 archive_set_error(&a->archive, ENOMEM,
100 "Can't allocate ar data");
101 return (ARCHIVE_FATAL);
103 memset(ar, 0, sizeof(*ar));
106 r = __archive_read_register_format(a,
108 archive_read_format_ar_bid,
109 archive_read_format_ar_read_header,
110 archive_read_format_ar_read_data,
111 archive_read_format_ar_skip,
112 archive_read_format_ar_cleanup);
114 if (r != ARCHIVE_OK) {
122 archive_read_format_ar_cleanup(struct archive_read *a)
126 ar = (struct ar *)(a->format->data);
130 (a->format->data) = NULL;
135 archive_read_format_ar_bid(struct archive_read *a)
140 if (a->archive.archive_format != 0 &&
141 (a->archive.archive_format & ARCHIVE_FORMAT_BASE_MASK) !=
145 ar = (struct ar *)(a->format->data);
148 * Verify the 8-byte file signature.
149 * TODO: Do we need to check more than this?
151 if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
153 if (strncmp((const char*)h, "!<arch>\n", 8) == 0) {
160 archive_read_format_ar_read_header(struct archive_read *a,
161 struct archive_entry *entry)
163 char filename[AR_name_size + 1];
165 uint64_t number; /* Used to hold parsed numbers before validation. */
167 size_t bsd_name_length, entry_size;
173 ar = (struct ar*)(a->format->data);
175 if (a->archive.file_position == 0) {
177 * We are now at the beginning of the archive,
178 * so we need first consume the ar global header.
180 __archive_read_consume(a, 8);
181 /* Set a default format code for now. */
182 a->archive.archive_format = ARCHIVE_FORMAT_AR;
185 /* Read the header for the next file entry. */
186 if ((b = __archive_read_ahead(a, 60, &bytes_read)) == NULL)
188 return (ARCHIVE_EOF);
189 __archive_read_consume(a, 60);
192 /* Verify the magic signature on the file header. */
193 if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
194 archive_set_error(&a->archive, EINVAL,
195 "Incorrect file header signature");
196 return (ARCHIVE_WARN);
199 /* Copy filename into work buffer. */
200 strncpy(filename, h + AR_name_offset, AR_name_size);
201 filename[AR_name_size] = '\0';
204 * Guess the format variant based on the filename.
206 if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
207 /* We don't already know the variant, so let's guess. */
209 * Biggest clue is presence of '/': GNU starts special
210 * filenames with '/', appends '/' as terminator to
211 * non-special names, so anything with '/' should be
212 * GNU except for BSD long filenames.
214 if (strncmp(filename, "#1/", 3) == 0)
215 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
216 else if (strchr(filename, '/') != NULL)
217 a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
218 else if (strncmp(filename, "__.SYMDEF", 9) == 0)
219 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
221 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
222 * if name exactly fills 16-byte field? If so, we
223 * can't assume entries without '/' are BSD. XXX
227 /* Update format name from the code. */
228 if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
229 a->archive.archive_format_name = "ar (GNU/SVR4)";
230 else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
231 a->archive.archive_format_name = "ar (BSD)";
233 a->archive.archive_format_name = "ar";
236 * Remove trailing spaces from the filename. GNU and BSD
237 * variants both pad filename area out with spaces.
238 * This will only be wrong if GNU/SVR4 'ar' implementations
239 * omit trailing '/' for 16-char filenames and we have
240 * a 16-char filename that ends in ' '.
242 p = filename + AR_name_size - 1;
243 while (p >= filename && *p == ' ') {
249 * Remove trailing slash unless first character is '/'.
250 * (BSD entries never end in '/', so this will only trim
251 * GNU-format entries. GNU special entries start with '/'
252 * and are not terminated in '/', so we don't trim anything
253 * that starts with '/'.)
255 if (filename[0] != '/' && *p == '/')
259 * '//' is the GNU filename table.
260 * Later entries can refer to names in this table.
262 if (strcmp(filename, "//") == 0) {
263 /* This must come before any call to _read_ahead. */
264 ar_parse_common_header(ar, entry, h);
265 archive_entry_copy_pathname(entry, filename);
266 archive_entry_set_filetype(entry, AE_IFREG);
267 /* Get the size of the filename table. */
268 number = ar_atol10(h + AR_size_offset, AR_size_size);
269 if (number > SIZE_MAX) {
270 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
271 "Filename table too large");
272 return (ARCHIVE_FATAL);
274 entry_size = (size_t)number;
275 if (entry_size == 0) {
276 archive_set_error(&a->archive, EINVAL,
277 "Invalid string table");
278 return (ARCHIVE_WARN);
280 if (ar->strtab != NULL) {
281 archive_set_error(&a->archive, EINVAL,
282 "More than one string tables exist");
283 return (ARCHIVE_WARN);
286 /* Read the filename table into memory. */
287 st = malloc(entry_size);
289 archive_set_error(&a->archive, ENOMEM,
290 "Can't allocate filename table buffer");
291 return (ARCHIVE_FATAL);
294 ar->strtab_size = entry_size;
295 if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
296 return (ARCHIVE_FATAL);
297 memcpy(st, b, entry_size);
298 __archive_read_consume(a, entry_size);
299 /* All contents are consumed. */
300 ar->entry_bytes_remaining = 0;
301 archive_entry_set_size(entry, ar->entry_bytes_remaining);
303 /* Parse the filename table. */
304 return (ar_parse_gnu_filename_table(a));
308 * GNU variant handles long filenames by storing /<number>
309 * to indicate a name stored in the filename table.
311 if (filename[0] == '/' && isdigit(filename[1])) {
312 number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
314 * If we can't look up the real name, warn and return
315 * the entry with the wrong name.
317 if (ar->strtab == NULL || number > ar->strtab_size) {
318 archive_set_error(&a->archive, EINVAL,
319 "Can't find long filename for entry");
320 archive_entry_copy_pathname(entry, filename);
321 /* Parse the time, owner, mode, size fields. */
322 ar_parse_common_header(ar, entry, h);
323 return (ARCHIVE_WARN);
326 archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
327 /* Parse the time, owner, mode, size fields. */
328 return (ar_parse_common_header(ar, entry, h));
332 * BSD handles long filenames by storing "#1/" followed by the
333 * length of filename as a decimal number, then prepends the
334 * the filename to the file contents.
336 if (strncmp(filename, "#1/", 3) == 0) {
337 /* Parse the time, owner, mode, size fields. */
338 /* This must occur before _read_ahead is called again. */
339 ar_parse_common_header(ar, entry, h);
341 /* Parse the size of the name, adjust the file size. */
342 number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
343 bsd_name_length = (size_t)number;
344 /* Guard against the filename + trailing NUL
345 * overflowing a size_t and against the filename size
346 * being larger than the entire entry. */
347 if (number > (uint64_t)(bsd_name_length + 1)
348 || (off_t)bsd_name_length > ar->entry_bytes_remaining) {
349 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
350 "Bad input file size");
351 return (ARCHIVE_FATAL);
353 ar->entry_bytes_remaining -= bsd_name_length;
354 /* Adjust file size reported to client. */
355 archive_entry_set_size(entry, ar->entry_bytes_remaining);
357 /* Read the long name into memory. */
358 if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
359 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
360 "Truncated input file");
361 return (ARCHIVE_FATAL);
363 __archive_read_consume(a, bsd_name_length);
365 /* Store it in the entry. */
366 p = (char *)malloc(bsd_name_length + 1);
368 archive_set_error(&a->archive, ENOMEM,
369 "Can't allocate fname buffer");
370 return (ARCHIVE_FATAL);
372 strncpy(p, b, bsd_name_length);
373 p[bsd_name_length] = '\0';
374 archive_entry_copy_pathname(entry, p);
380 * "/" is the SVR4/GNU archive symbol table.
382 if (strcmp(filename, "/") == 0) {
383 archive_entry_copy_pathname(entry, "/");
384 /* Parse the time, owner, mode, size fields. */
385 r = ar_parse_common_header(ar, entry, h);
386 /* Force the file type to a regular file. */
387 archive_entry_set_filetype(entry, AE_IFREG);
392 * "__.SYMDEF" is a BSD archive symbol table.
394 if (strcmp(filename, "__.SYMDEF") == 0) {
395 archive_entry_copy_pathname(entry, filename);
396 /* Parse the time, owner, mode, size fields. */
397 return (ar_parse_common_header(ar, entry, h));
401 * Otherwise, this is a standard entry. The filename
402 * has already been trimmed as much as possible, based
403 * on our current knowledge of the format.
405 archive_entry_copy_pathname(entry, filename);
406 return (ar_parse_common_header(ar, entry, h));
410 ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
415 /* Copy remaining header */
416 archive_entry_set_mtime(entry,
417 (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
418 archive_entry_set_uid(entry,
419 (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
420 archive_entry_set_gid(entry,
421 (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
422 archive_entry_set_mode(entry,
423 (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
424 n = ar_atol10(h + AR_size_offset, AR_size_size);
426 ar->entry_offset = 0;
427 ar->entry_padding = n % 2;
428 archive_entry_set_size(entry, n);
429 ar->entry_bytes_remaining = n;
434 archive_read_format_ar_read_data(struct archive_read *a,
435 const void **buff, size_t *size, off_t *offset)
440 ar = (struct ar *)(a->format->data);
442 if (ar->entry_bytes_remaining > 0) {
443 *buff = __archive_read_ahead(a, 1, &bytes_read);
444 if (bytes_read == 0) {
445 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
446 "Truncated ar archive");
447 return (ARCHIVE_FATAL);
450 return (ARCHIVE_FATAL);
451 if (bytes_read > ar->entry_bytes_remaining)
452 bytes_read = (ssize_t)ar->entry_bytes_remaining;
454 *offset = ar->entry_offset;
455 ar->entry_offset += bytes_read;
456 ar->entry_bytes_remaining -= bytes_read;
457 __archive_read_consume(a, (size_t)bytes_read);
460 while (ar->entry_padding > 0) {
461 *buff = __archive_read_ahead(a, 1, &bytes_read);
463 return (ARCHIVE_FATAL);
464 if (bytes_read > ar->entry_padding)
465 bytes_read = (ssize_t)ar->entry_padding;
466 __archive_read_consume(a, (size_t)bytes_read);
467 ar->entry_padding -= bytes_read;
471 *offset = ar->entry_offset;
472 return (ARCHIVE_EOF);
477 archive_read_format_ar_skip(struct archive_read *a)
482 ar = (struct ar *)(a->format->data);
484 bytes_skipped = __archive_read_skip(a,
485 ar->entry_bytes_remaining + ar->entry_padding);
486 if (bytes_skipped < 0)
487 return (ARCHIVE_FATAL);
489 ar->entry_bytes_remaining = 0;
490 ar->entry_padding = 0;
496 ar_parse_gnu_filename_table(struct archive_read *a)
502 ar = (struct ar*)(a->format->data);
503 size = ar->strtab_size;
505 for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
509 goto bad_string_table;
514 * Sanity check, last two chars must be `/\n' or '\n\n',
515 * depending on whether the string table is padded by a '\n'
516 * (string table produced by GNU ar always has a even size).
518 if (p != ar->strtab + size && *p != '\n')
519 goto bad_string_table;
521 /* Enforce zero termination. */
522 ar->strtab[size - 1] = '\0';
527 archive_set_error(&a->archive, EINVAL,
528 "Invalid string table");
531 return (ARCHIVE_WARN);
535 ar_atol8(const char *p, unsigned char_cnt)
537 uint64_t l, limit, last_digit_limit;
538 unsigned int digit, base;
541 limit = UINT64_MAX / base;
542 last_digit_limit = UINT64_MAX % base;
544 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
549 while (*p >= '0' && digit < base && char_cnt-- > 0) {
550 if (l>limit || (l == limit && digit > last_digit_limit)) {
551 l = UINT64_MAX; /* Truncate on overflow. */
554 l = (l * base) + digit;
561 ar_atol10(const char *p, unsigned char_cnt)
563 uint64_t l, limit, last_digit_limit;
564 unsigned int base, digit;
567 limit = UINT64_MAX / base;
568 last_digit_limit = UINT64_MAX % base;
570 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
574 while (*p >= '0' && digit < base && char_cnt-- > 0) {
575 if (l > limit || (l == limit && digit > last_digit_limit)) {
576 l = UINT64_MAX; /* Truncate on overflow. */
579 l = (l * base) + digit;