2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD$");
29 #ifdef HAVE_SYS_STAT_H
44 #include "archive_entry.h"
47 * This is mostly a pretty straightforward hash table implementation.
48 * The only interesting bit is the different strategies used to
49 * match up links. These strategies match those used by various
51 * tar - content stored with first link, remainder refer back to it.
52 * This requires us to match each subsequent link up with the
54 * cpio - Old cpio just stored body with each link, match-ups were
55 * implicit. This is trivial.
56 * new cpio - New cpio only stores body with last link, match-ups
57 * are implicit. This is actually quite tricky; see the notes
61 /* Users pass us a format code, we translate that into a strategy here. */
62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
67 /* Initial size of link cache. */
68 #define links_cache_initial_size 1024
71 struct links_entry *next;
72 struct links_entry *previous;
73 struct archive_entry *canonical;
74 struct archive_entry *entry;
76 unsigned int links; /* # links not yet seen */
79 struct archive_entry_linkresolver {
80 struct links_entry **buckets;
81 struct links_entry *spare;
82 unsigned long number_entries;
83 size_t number_buckets;
87 #define NEXT_ENTRY_DEFERRED 1
88 #define NEXT_ENTRY_PARTIAL 2
89 #define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)
91 static struct links_entry *find_entry(struct archive_entry_linkresolver *,
92 struct archive_entry *);
93 static void grow_hash(struct archive_entry_linkresolver *);
94 static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
95 struct archive_entry *);
96 static struct links_entry *next_entry(struct archive_entry_linkresolver *,
99 struct archive_entry_linkresolver *
100 archive_entry_linkresolver_new(void)
102 struct archive_entry_linkresolver *res;
104 /* Check for positive power-of-two */
105 if (links_cache_initial_size == 0 ||
106 (links_cache_initial_size & (links_cache_initial_size - 1)) != 0)
109 res = calloc(1, sizeof(struct archive_entry_linkresolver));
112 res->number_buckets = links_cache_initial_size;
113 res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));
114 if (res->buckets == NULL) {
122 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
125 int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
128 case ARCHIVE_FORMAT_7ZIP:
129 case ARCHIVE_FORMAT_AR:
130 case ARCHIVE_FORMAT_ZIP:
131 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
133 case ARCHIVE_FORMAT_CPIO:
135 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
136 case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
137 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
140 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
144 case ARCHIVE_FORMAT_MTREE:
145 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
147 case ARCHIVE_FORMAT_ISO9660:
148 case ARCHIVE_FORMAT_SHAR:
149 case ARCHIVE_FORMAT_TAR:
150 case ARCHIVE_FORMAT_XAR:
151 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
154 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
160 archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
162 struct links_entry *le;
167 while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)
168 archive_entry_free(le->entry);
174 archive_entry_linkify(struct archive_entry_linkresolver *res,
175 struct archive_entry **e, struct archive_entry **f)
177 struct links_entry *le;
178 struct archive_entry *t;
180 *f = NULL; /* Default: Don't return a second entry. */
183 le = next_entry(res, NEXT_ENTRY_DEFERRED);
191 /* If it has only one link, then we're done. */
192 if (archive_entry_nlink(*e) == 1)
194 /* Directories, devices never have hardlinks. */
195 if (archive_entry_filetype(*e) == AE_IFDIR
196 || archive_entry_filetype(*e) == AE_IFBLK
197 || archive_entry_filetype(*e) == AE_IFCHR)
200 switch (res->strategy) {
201 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
202 le = find_entry(res, *e);
204 archive_entry_unset_size(*e);
205 archive_entry_copy_hardlink(*e,
206 archive_entry_pathname(le->canonical));
208 insert_entry(res, *e);
210 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
211 le = find_entry(res, *e);
213 archive_entry_copy_hardlink(*e,
214 archive_entry_pathname(le->canonical));
216 insert_entry(res, *e);
218 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
219 /* This one is trivial. */
221 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
222 le = find_entry(res, *e);
225 * Put the new entry in le, return the
231 /* Make the old entry into a hardlink. */
232 archive_entry_unset_size(*e);
233 archive_entry_copy_hardlink(*e,
234 archive_entry_pathname(le->canonical));
235 /* If we ran out of links, return the
236 * final entry as well. */
237 if (le->links == 0) {
243 * If we haven't seen it, tuck it away
246 le = insert_entry(res, *e);
257 static struct links_entry *
258 find_entry(struct archive_entry_linkresolver *res,
259 struct archive_entry *entry)
261 struct links_entry *le;
266 /* Free a held entry. */
267 if (res->spare != NULL) {
268 archive_entry_free(res->spare->canonical);
269 archive_entry_free(res->spare->entry);
274 dev = archive_entry_dev(entry);
275 ino = archive_entry_ino64(entry);
276 hash = (size_t)(dev ^ ino);
278 /* Try to locate this entry in the links cache. */
279 bucket = hash & (res->number_buckets - 1);
280 for (le = res->buckets[bucket]; le != NULL; le = le->next) {
282 && dev == archive_entry_dev(le->canonical)
283 && ino == archive_entry_ino64(le->canonical)) {
285 * Decrement link count each time and release
286 * the entry if it hits zero. This saves
287 * memory and is necessary for detecting
293 /* Remove it from this hash bucket. */
294 if (le->previous != NULL)
295 le->previous->next = le->next;
296 if (le->next != NULL)
297 le->next->previous = le->previous;
298 if (res->buckets[bucket] == le)
299 res->buckets[bucket] = le->next;
300 res->number_entries--;
301 /* Defer freeing this entry. */
309 static struct links_entry *
310 next_entry(struct archive_entry_linkresolver *res, int mode)
312 struct links_entry *le;
315 /* Free a held entry. */
316 if (res->spare != NULL) {
317 archive_entry_free(res->spare->canonical);
318 archive_entry_free(res->spare->entry);
323 /* Look for next non-empty bucket in the links cache. */
324 for (bucket = 0; bucket < res->number_buckets; bucket++) {
325 for (le = res->buckets[bucket]; le != NULL; le = le->next) {
326 if (le->entry != NULL &&
327 (mode & NEXT_ENTRY_DEFERRED) == 0)
329 if (le->entry == NULL &&
330 (mode & NEXT_ENTRY_PARTIAL) == 0)
332 /* Remove it from this hash bucket. */
333 if (le->next != NULL)
334 le->next->previous = le->previous;
335 if (le->previous != NULL)
336 le->previous->next = le->next;
338 res->buckets[bucket] = le->next;
339 res->number_entries--;
340 /* Defer freeing this entry. */
348 static struct links_entry *
349 insert_entry(struct archive_entry_linkresolver *res,
350 struct archive_entry *entry)
352 struct links_entry *le;
355 /* Add this entry to the links cache. */
356 le = calloc(1, sizeof(struct links_entry));
359 le->canonical = archive_entry_clone(entry);
361 /* If the links cache is getting too full, enlarge the hash table. */
362 if (res->number_entries > res->number_buckets * 2)
365 hash = archive_entry_dev(entry) ^ archive_entry_ino64(entry);
366 bucket = hash & (res->number_buckets - 1);
368 /* If we could allocate the entry, record it. */
369 if (res->buckets[bucket] != NULL)
370 res->buckets[bucket]->previous = le;
371 res->number_entries++;
372 le->next = res->buckets[bucket];
374 res->buckets[bucket] = le;
376 le->links = archive_entry_nlink(entry) - 1;
381 grow_hash(struct archive_entry_linkresolver *res)
383 struct links_entry *le, **new_buckets;
387 /* Try to enlarge the bucket list. */
388 new_size = res->number_buckets * 2;
389 if (new_size < res->number_buckets)
391 new_buckets = calloc(new_size, sizeof(struct links_entry *));
393 if (new_buckets == NULL)
396 for (i = 0; i < res->number_buckets; i++) {
397 while (res->buckets[i] != NULL) {
398 /* Remove entry from old bucket. */
399 le = res->buckets[i];
400 res->buckets[i] = le->next;
402 /* Add entry to new bucket. */
403 bucket = le->hash & (new_size - 1);
405 if (new_buckets[bucket] != NULL)
406 new_buckets[bucket]->previous = le;
407 le->next = new_buckets[bucket];
409 new_buckets[bucket] = le;
413 res->buckets = new_buckets;
414 res->number_buckets = new_size;
417 struct archive_entry *
418 archive_entry_partial_links(struct archive_entry_linkresolver *res,
421 struct archive_entry *e;
422 struct links_entry *le;
424 /* Free a held entry. */
425 if (res->spare != NULL) {
426 archive_entry_free(res->spare->canonical);
427 archive_entry_free(res->spare->entry);
432 le = next_entry(res, NEXT_ENTRY_PARTIAL);
437 le->canonical = NULL;