2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 __FBSDID("$FreeBSD$");
31 * Pax interchange is supposed to encode filenames into
32 * UTF-8. Of course, that's not always possible. This
33 * test is intended to verify that filenames always get
34 * stored and restored correctly, regardless of the encodings.
38 * Read a manually-created archive that has filenames that are
39 * stored in binary instead of UTF-8 and verify that we get
40 * the right filename returned and that we get a warning only
41 * if the header isn't marked as binary.
44 test_pax_filename_encoding_1(void)
46 static const char testname[] = "test_pax_filename_encoding.tar";
48 * \314\214 is a valid 2-byte UTF-8 sequence.
49 * \374 is invalid in UTF-8.
51 char filename[] = "abc\314\214mno\374xyz";
53 struct archive_entry *entry;
56 * Read an archive that has non-UTF8 pax filenames in it.
58 extract_reference_file(testname);
59 a = archive_read_new();
60 assertEqualInt(ARCHIVE_OK, archive_read_support_format_tar(a));
61 assertEqualInt(ARCHIVE_OK, archive_read_support_compression_all(a));
62 assertEqualInt(ARCHIVE_OK,
63 archive_read_open_filename(a, testname, 10240));
65 * First entry in this test archive has an invalid UTF-8 sequence
66 * in it, but the header is not marked as hdrcharset=BINARY, so that
69 failure("Invalid UTF8 in a pax archive pathname should cause a warning");
70 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
71 assertEqualString(filename, archive_entry_pathname(entry));
73 * Second entry is identical except that it does have
74 * hdrcharset=BINARY, so no warning should be generated.
76 failure("A pathname with hdrcharset=BINARY can have invalid UTF8\n"
77 " characters in it without generating a warning");
78 assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry));
79 assertEqualString(filename, archive_entry_pathname(entry));
80 archive_read_finish(a);
84 * Set the locale and write a pathname containing invalid characters.
85 * This should work; the underlying implementation should automatically
86 * fall back to storing the pathname in binary.
89 test_pax_filename_encoding_2(void)
91 char filename[] = "abc\314\214mno\374xyz";
93 struct archive_entry *entry;
95 char longname[] = "abc\314\214mno\374xyz"
96 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
97 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
98 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
99 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
100 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
101 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
106 * We need a starting locale which has invalid sequences.
107 * de_DE.UTF-8 seems to be commonly supported.
109 /* If it doesn't exist, just warn and return. */
110 if (LOCALE_UTF8 == NULL
111 || NULL == setlocale(LC_ALL, LOCALE_UTF8)) {
112 skipping("invalid encoding tests require a suitable locale;"
113 " %s not available on this system", LOCALE_UTF8);
117 assert((a = archive_write_new()) != NULL);
118 assertEqualIntA(a, 0, archive_write_set_format_pax(a));
119 assertEqualIntA(a, 0, archive_write_set_compression_none(a));
120 assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
122 archive_write_open_memory(a, buff, sizeof(buff), &used));
124 assert((entry = archive_entry_new()) != NULL);
125 /* Set pathname, gname, uname, hardlink to nonconvertible values. */
126 archive_entry_copy_pathname(entry, filename);
127 archive_entry_copy_gname(entry, filename);
128 archive_entry_copy_uname(entry, filename);
129 archive_entry_copy_hardlink(entry, filename);
130 archive_entry_set_filetype(entry, AE_IFREG);
131 failure("This should generate a warning for nonconvertible names.");
132 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
133 archive_entry_free(entry);
135 assert((entry = archive_entry_new()) != NULL);
136 /* Set path, gname, uname, and symlink to nonconvertible values. */
137 archive_entry_copy_pathname(entry, filename);
138 archive_entry_copy_gname(entry, filename);
139 archive_entry_copy_uname(entry, filename);
140 archive_entry_copy_symlink(entry, filename);
141 archive_entry_set_filetype(entry, AE_IFLNK);
142 failure("This should generate a warning for nonconvertible names.");
143 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
144 archive_entry_free(entry);
146 assert((entry = archive_entry_new()) != NULL);
147 /* Set pathname to a very long nonconvertible value. */
148 archive_entry_copy_pathname(entry, longname);
149 archive_entry_set_filetype(entry, AE_IFREG);
150 failure("This should generate a warning for nonconvertible names.");
151 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
152 archive_entry_free(entry);
154 assertEqualInt(0, archive_write_close(a));
155 assertEqualInt(0, archive_write_finish(a));
158 * Now read the entries back.
161 assert((a = archive_read_new()) != NULL);
162 assertEqualInt(0, archive_read_support_format_tar(a));
163 assertEqualInt(0, archive_read_open_memory(a, buff, used));
165 assertEqualInt(0, archive_read_next_header(a, &entry));
166 assertEqualString(filename, archive_entry_pathname(entry));
167 assertEqualString(filename, archive_entry_gname(entry));
168 assertEqualString(filename, archive_entry_uname(entry));
169 assertEqualString(filename, archive_entry_hardlink(entry));
171 assertEqualInt(0, archive_read_next_header(a, &entry));
172 assertEqualString(filename, archive_entry_pathname(entry));
173 assertEqualString(filename, archive_entry_gname(entry));
174 assertEqualString(filename, archive_entry_uname(entry));
175 assertEqualString(filename, archive_entry_symlink(entry));
177 assertEqualInt(0, archive_read_next_header(a, &entry));
178 assertEqualString(longname, archive_entry_pathname(entry));
180 assertEqualInt(0, archive_read_close(a));
181 assertEqualInt(0, archive_read_finish(a));
185 * Create an entry starting from a wide-character Unicode pathname,
186 * read it back into "C" locale, which doesn't support the name.
187 * TODO: Figure out the "right" behavior here.
190 test_pax_filename_encoding_3(void)
192 wchar_t badname[] = L"xxxAyyyBzzz";
193 const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz";
195 struct archive_entry *entry;
202 /* If it doesn't exist, just warn and return. */
203 if (NULL == setlocale(LC_ALL, "C")) {
204 skipping("Can't set \"C\" locale, so can't exercise "
205 "certain character-conversion failures");
209 /* If wctomb is broken, warn and return. */
210 if (wctomb(buff, 0x1234) > 0) {
211 skipping("Cannot test conversion failures because \"C\" "
212 "locale on this system has no invalid characters.");
216 /* If wctomb is broken, warn and return. */
217 if (wctomb(buff, 0x1234) > 0) {
218 skipping("Cannot test conversion failures because \"C\" "
219 "locale on this system has no invalid characters.");
223 /* Skip test if archive_entry_update_pathname_utf8() is broken. */
224 /* In particular, this is currently broken on Win32 because
225 * setlocale() does not set the default encoding for CP_ACP. */
226 entry = archive_entry_new();
227 if (archive_entry_update_pathname_utf8(entry, badname_utf8)) {
228 archive_entry_free(entry);
229 skipping("Cannot test conversion failures.");
232 archive_entry_free(entry);
234 assert((a = archive_write_new()) != NULL);
235 assertEqualIntA(a, 0, archive_write_set_format_pax(a));
236 assertEqualIntA(a, 0, archive_write_set_compression_none(a));
237 assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
239 archive_write_open_memory(a, buff, sizeof(buff), &used));
241 assert((entry = archive_entry_new()) != NULL);
242 /* Set pathname to non-convertible wide value. */
243 archive_entry_copy_pathname_w(entry, badname);
244 archive_entry_set_filetype(entry, AE_IFREG);
245 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
246 archive_entry_free(entry);
248 assert((entry = archive_entry_new()) != NULL);
249 archive_entry_copy_pathname_w(entry, L"abc");
250 /* Set gname to non-convertible wide value. */
251 archive_entry_copy_gname_w(entry, badname);
252 archive_entry_set_filetype(entry, AE_IFREG);
253 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
254 archive_entry_free(entry);
256 assert((entry = archive_entry_new()) != NULL);
257 archive_entry_copy_pathname_w(entry, L"abc");
258 /* Set uname to non-convertible wide value. */
259 archive_entry_copy_uname_w(entry, badname);
260 archive_entry_set_filetype(entry, AE_IFREG);
261 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
262 archive_entry_free(entry);
264 assert((entry = archive_entry_new()) != NULL);
265 archive_entry_copy_pathname_w(entry, L"abc");
266 /* Set hardlink to non-convertible wide value. */
267 archive_entry_copy_hardlink_w(entry, badname);
268 archive_entry_set_filetype(entry, AE_IFREG);
269 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
270 archive_entry_free(entry);
272 assert((entry = archive_entry_new()) != NULL);
273 archive_entry_copy_pathname_w(entry, L"abc");
274 /* Set symlink to non-convertible wide value. */
275 archive_entry_copy_symlink_w(entry, badname);
276 archive_entry_set_filetype(entry, AE_IFLNK);
277 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
278 archive_entry_free(entry);
280 assertEqualInt(0, archive_write_close(a));
281 assertEqualInt(0, archive_write_finish(a));
284 * Now read the entries back.
287 assert((a = archive_read_new()) != NULL);
288 assertEqualInt(0, archive_read_support_format_tar(a));
289 assertEqualInt(0, archive_read_open_memory(a, buff, used));
291 failure("A non-convertible pathname should cause a warning.");
292 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
293 assertEqualWString(badname, archive_entry_pathname_w(entry));
294 failure("If native locale can't convert, we should get UTF-8 back.");
295 assertEqualString(badname_utf8, archive_entry_pathname(entry));
297 failure("A non-convertible gname should cause a warning.");
298 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
299 assertEqualWString(badname, archive_entry_gname_w(entry));
300 failure("If native locale can't convert, we should get UTF-8 back.");
301 assertEqualString(badname_utf8, archive_entry_gname(entry));
303 failure("A non-convertible uname should cause a warning.");
304 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
305 assertEqualWString(badname, archive_entry_uname_w(entry));
306 failure("If native locale can't convert, we should get UTF-8 back.");
307 assertEqualString(badname_utf8, archive_entry_uname(entry));
309 failure("A non-convertible hardlink should cause a warning.");
310 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
311 assertEqualWString(badname, archive_entry_hardlink_w(entry));
312 failure("If native locale can't convert, we should get UTF-8 back.");
313 assertEqualString(badname_utf8, archive_entry_hardlink(entry));
315 failure("A non-convertible symlink should cause a warning.");
316 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
317 assertEqualWString(badname, archive_entry_symlink_w(entry));
318 assertEqualWString(NULL, archive_entry_hardlink_w(entry));
319 failure("If native locale can't convert, we should get UTF-8 back.");
320 assertEqualString(badname_utf8, archive_entry_symlink(entry));
322 assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry));
324 assertEqualInt(0, archive_read_close(a));
325 assertEqualInt(0, archive_read_finish(a));
328 DEFINE_TEST(test_pax_filename_encoding)
330 test_pax_filename_encoding_1();
331 test_pax_filename_encoding_2();
332 test_pax_filename_encoding_3();