]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - contrib/libarchive/libarchive/test/test_pax_filename_encoding.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / contrib / libarchive / libarchive / test / test_pax_filename_encoding.c
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 __FBSDID("$FreeBSD$");
27
28 #include <locale.h>
29
30 /*
31  * Pax interchange is supposed to encode filenames into
32  * UTF-8.  Of course, that's not always possible.  This
33  * test is intended to verify that filenames always get
34  * stored and restored correctly, regardless of the encodings.
35  */
36
37 /*
38  * Read a manually-created archive that has filenames that are
39  * stored in binary instead of UTF-8 and verify that we get
40  * the right filename returned and that we get a warning only
41  * if the header isn't marked as binary.
42  */
43 static void
44 test_pax_filename_encoding_1(void)
45 {
46         static const char testname[] = "test_pax_filename_encoding.tar";
47         /*
48          * \314\214 is a valid 2-byte UTF-8 sequence.
49          * \374 is invalid in UTF-8.
50          */
51         char filename[] = "abc\314\214mno\374xyz";
52         struct archive *a;
53         struct archive_entry *entry;
54
55         /*
56          * Read an archive that has non-UTF8 pax filenames in it.
57          */
58         extract_reference_file(testname);
59         a = archive_read_new();
60         assertEqualInt(ARCHIVE_OK, archive_read_support_format_tar(a));
61         assertEqualInt(ARCHIVE_OK, archive_read_support_compression_all(a));
62         assertEqualInt(ARCHIVE_OK,
63             archive_read_open_filename(a, testname, 10240));
64         /*
65          * First entry in this test archive has an invalid UTF-8 sequence
66          * in it, but the header is not marked as hdrcharset=BINARY, so that
67          * requires a warning.
68          */
69         failure("Invalid UTF8 in a pax archive pathname should cause a warning");
70         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
71         assertEqualString(filename, archive_entry_pathname(entry));
72         /*
73          * Second entry is identical except that it does have
74          * hdrcharset=BINARY, so no warning should be generated.
75          */
76         failure("A pathname with hdrcharset=BINARY can have invalid UTF8\n"
77             " characters in it without generating a warning");
78         assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry));
79         assertEqualString(filename, archive_entry_pathname(entry));
80         archive_read_finish(a);
81 }
82
83 /*
84  * Set the locale and write a pathname containing invalid characters.
85  * This should work; the underlying implementation should automatically
86  * fall back to storing the pathname in binary.
87  */
88 static void
89 test_pax_filename_encoding_2(void)
90 {
91         char filename[] = "abc\314\214mno\374xyz";
92         struct archive *a;
93         struct archive_entry *entry;
94         char buff[65536];
95         char longname[] = "abc\314\214mno\374xyz"
96             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
97             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
98             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
99             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
100             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
101             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
102             ;
103         size_t used;
104
105         /*
106          * We need a starting locale which has invalid sequences.
107          * de_DE.UTF-8 seems to be commonly supported.
108          */
109         /* If it doesn't exist, just warn and return. */
110         if (LOCALE_UTF8 == NULL
111             || NULL == setlocale(LC_ALL, LOCALE_UTF8)) {
112                 skipping("invalid encoding tests require a suitable locale;"
113                     " %s not available on this system", LOCALE_UTF8);
114                 return;
115         }
116
117         assert((a = archive_write_new()) != NULL);
118         assertEqualIntA(a, 0, archive_write_set_format_pax(a));
119         assertEqualIntA(a, 0, archive_write_set_compression_none(a));
120         assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
121         assertEqualInt(0,
122             archive_write_open_memory(a, buff, sizeof(buff), &used));
123
124         assert((entry = archive_entry_new()) != NULL);
125         /* Set pathname, gname, uname, hardlink to nonconvertible values. */
126         archive_entry_copy_pathname(entry, filename);
127         archive_entry_copy_gname(entry, filename);
128         archive_entry_copy_uname(entry, filename);
129         archive_entry_copy_hardlink(entry, filename);
130         archive_entry_set_filetype(entry, AE_IFREG);
131         failure("This should generate a warning for nonconvertible names.");
132         assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
133         archive_entry_free(entry);
134
135         assert((entry = archive_entry_new()) != NULL);
136         /* Set path, gname, uname, and symlink to nonconvertible values. */
137         archive_entry_copy_pathname(entry, filename);
138         archive_entry_copy_gname(entry, filename);
139         archive_entry_copy_uname(entry, filename);
140         archive_entry_copy_symlink(entry, filename);
141         archive_entry_set_filetype(entry, AE_IFLNK);
142         failure("This should generate a warning for nonconvertible names.");
143         assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
144         archive_entry_free(entry);
145
146         assert((entry = archive_entry_new()) != NULL);
147         /* Set pathname to a very long nonconvertible value. */
148         archive_entry_copy_pathname(entry, longname);
149         archive_entry_set_filetype(entry, AE_IFREG);
150         failure("This should generate a warning for nonconvertible names.");
151         assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
152         archive_entry_free(entry);
153
154         assertEqualInt(0, archive_write_close(a));
155         assertEqualInt(0, archive_write_finish(a));
156
157         /*
158          * Now read the entries back.
159          */
160
161         assert((a = archive_read_new()) != NULL);
162         assertEqualInt(0, archive_read_support_format_tar(a));
163         assertEqualInt(0, archive_read_open_memory(a, buff, used));
164
165         assertEqualInt(0, archive_read_next_header(a, &entry));
166         assertEqualString(filename, archive_entry_pathname(entry));
167         assertEqualString(filename, archive_entry_gname(entry));
168         assertEqualString(filename, archive_entry_uname(entry));
169         assertEqualString(filename, archive_entry_hardlink(entry));
170
171         assertEqualInt(0, archive_read_next_header(a, &entry));
172         assertEqualString(filename, archive_entry_pathname(entry));
173         assertEqualString(filename, archive_entry_gname(entry));
174         assertEqualString(filename, archive_entry_uname(entry));
175         assertEqualString(filename, archive_entry_symlink(entry));
176
177         assertEqualInt(0, archive_read_next_header(a, &entry));
178         assertEqualString(longname, archive_entry_pathname(entry));
179
180         assertEqualInt(0, archive_read_close(a));
181         assertEqualInt(0, archive_read_finish(a));
182 }
183
184 /*
185  * Create an entry starting from a wide-character Unicode pathname,
186  * read it back into "C" locale, which doesn't support the name.
187  * TODO: Figure out the "right" behavior here.
188  */
189 static void
190 test_pax_filename_encoding_3(void)
191 {
192         wchar_t badname[] = L"xxxAyyyBzzz";
193         const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz";
194         struct archive *a;
195         struct archive_entry *entry;
196         char buff[65536];
197         size_t used;
198
199         badname[3] = 0x1234;
200         badname[7] = 0x5678;
201
202         /* If it doesn't exist, just warn and return. */
203         if (NULL == setlocale(LC_ALL, "C")) {
204                 skipping("Can't set \"C\" locale, so can't exercise "
205                     "certain character-conversion failures");
206                 return;
207         }
208
209         /* If wctomb is broken, warn and return. */
210         if (wctomb(buff, 0x1234) > 0) {
211                 skipping("Cannot test conversion failures because \"C\" "
212                     "locale on this system has no invalid characters.");
213                 return;
214         }
215
216         /* If wctomb is broken, warn and return. */
217         if (wctomb(buff, 0x1234) > 0) {
218                 skipping("Cannot test conversion failures because \"C\" "
219                     "locale on this system has no invalid characters.");
220                 return;
221         }
222
223         /* Skip test if archive_entry_update_pathname_utf8() is broken. */
224         /* In particular, this is currently broken on Win32 because
225          * setlocale() does not set the default encoding for CP_ACP. */
226         entry = archive_entry_new();
227         if (archive_entry_update_pathname_utf8(entry, badname_utf8)) {
228                 archive_entry_free(entry);
229                 skipping("Cannot test conversion failures.");
230                 return;
231         }
232         archive_entry_free(entry);
233
234         assert((a = archive_write_new()) != NULL);
235         assertEqualIntA(a, 0, archive_write_set_format_pax(a));
236         assertEqualIntA(a, 0, archive_write_set_compression_none(a));
237         assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
238         assertEqualInt(0,
239             archive_write_open_memory(a, buff, sizeof(buff), &used));
240
241         assert((entry = archive_entry_new()) != NULL);
242         /* Set pathname to non-convertible wide value. */
243         archive_entry_copy_pathname_w(entry, badname);
244         archive_entry_set_filetype(entry, AE_IFREG);
245         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
246         archive_entry_free(entry);
247
248         assert((entry = archive_entry_new()) != NULL);
249         archive_entry_copy_pathname_w(entry, L"abc");
250         /* Set gname to non-convertible wide value. */
251         archive_entry_copy_gname_w(entry, badname);
252         archive_entry_set_filetype(entry, AE_IFREG);
253         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
254         archive_entry_free(entry);
255
256         assert((entry = archive_entry_new()) != NULL);
257         archive_entry_copy_pathname_w(entry, L"abc");
258         /* Set uname to non-convertible wide value. */
259         archive_entry_copy_uname_w(entry, badname);
260         archive_entry_set_filetype(entry, AE_IFREG);
261         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
262         archive_entry_free(entry);
263
264         assert((entry = archive_entry_new()) != NULL);
265         archive_entry_copy_pathname_w(entry, L"abc");
266         /* Set hardlink to non-convertible wide value. */
267         archive_entry_copy_hardlink_w(entry, badname);
268         archive_entry_set_filetype(entry, AE_IFREG);
269         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
270         archive_entry_free(entry);
271
272         assert((entry = archive_entry_new()) != NULL);
273         archive_entry_copy_pathname_w(entry, L"abc");
274         /* Set symlink to non-convertible wide value. */
275         archive_entry_copy_symlink_w(entry, badname);
276         archive_entry_set_filetype(entry, AE_IFLNK);
277         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
278         archive_entry_free(entry);
279
280         assertEqualInt(0, archive_write_close(a));
281         assertEqualInt(0, archive_write_finish(a));
282
283         /*
284          * Now read the entries back.
285          */
286
287         assert((a = archive_read_new()) != NULL);
288         assertEqualInt(0, archive_read_support_format_tar(a));
289         assertEqualInt(0, archive_read_open_memory(a, buff, used));
290
291         failure("A non-convertible pathname should cause a warning.");
292         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
293         assertEqualWString(badname, archive_entry_pathname_w(entry));
294         failure("If native locale can't convert, we should get UTF-8 back.");
295         assertEqualString(badname_utf8, archive_entry_pathname(entry));
296
297         failure("A non-convertible gname should cause a warning.");
298         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
299         assertEqualWString(badname, archive_entry_gname_w(entry));
300         failure("If native locale can't convert, we should get UTF-8 back.");
301         assertEqualString(badname_utf8, archive_entry_gname(entry));
302
303         failure("A non-convertible uname should cause a warning.");
304         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
305         assertEqualWString(badname, archive_entry_uname_w(entry));
306         failure("If native locale can't convert, we should get UTF-8 back.");
307         assertEqualString(badname_utf8, archive_entry_uname(entry));
308
309         failure("A non-convertible hardlink should cause a warning.");
310         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
311         assertEqualWString(badname, archive_entry_hardlink_w(entry));
312         failure("If native locale can't convert, we should get UTF-8 back.");
313         assertEqualString(badname_utf8, archive_entry_hardlink(entry));
314
315         failure("A non-convertible symlink should cause a warning.");
316         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
317         assertEqualWString(badname, archive_entry_symlink_w(entry));
318         assertEqualWString(NULL, archive_entry_hardlink_w(entry));
319         failure("If native locale can't convert, we should get UTF-8 back.");
320         assertEqualString(badname_utf8, archive_entry_symlink(entry));
321
322         assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry));
323
324         assertEqualInt(0, archive_read_close(a));
325         assertEqualInt(0, archive_read_finish(a));
326 }
327
328 DEFINE_TEST(test_pax_filename_encoding)
329 {
330         test_pax_filename_encoding_1();
331         test_pax_filename_encoding_2();
332         test_pax_filename_encoding_3();
333 }