2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 __FBSDID("$FreeBSD$");
31 * Pax interchange is supposed to encode filenames into
32 * UTF-8. Of course, that's not always possible. This
33 * test is intended to verify that filenames always get
34 * stored and restored correctly, regardless of the encodings.
38 * Read a manually-created archive that has filenames that are
39 * stored in binary instead of UTF-8 and verify that we get
40 * the right filename returned and that we get a warning only
41 * if the header isn't marked as binary.
43 DEFINE_TEST(test_pax_filename_encoding_1)
45 static const char testname[] = "test_pax_filename_encoding.tar.gz";
47 * \314\214 is a valid 2-byte UTF-8 sequence.
48 * \374 is invalid in UTF-8.
50 char filename[] = "abc\314\214mno\374xyz";
52 struct archive_entry *entry;
55 * Read an archive that has non-UTF8 pax filenames in it.
57 extract_reference_file(testname);
58 a = archive_read_new();
59 assertEqualInt(ARCHIVE_OK, archive_read_support_format_tar(a));
60 assertEqualInt(ARCHIVE_OK, archive_read_support_compression_gzip(a));
61 assertEqualInt(ARCHIVE_OK,
62 archive_read_open_filename(a, testname, 10240));
64 * First entry in this test archive has an invalid UTF-8 sequence
65 * in it, but the header is not marked as hdrcharset=BINARY, so that
68 failure("Invalid UTF8 in a pax archive pathname should cause a warning");
69 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
70 assertEqualString(filename, archive_entry_pathname(entry));
72 * Second entry is identical except that it does have
73 * hdrcharset=BINARY, so no warning should be generated.
75 failure("A pathname with hdrcharset=BINARY can have invalid UTF8\n"
76 " characters in it without generating a warning");
77 assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry));
78 assertEqualString(filename, archive_entry_pathname(entry));
79 archive_read_finish(a);
83 * Set the locale and write a pathname containing invalid characters.
84 * This should work; the underlying implementation should automatically
85 * fall back to storing the pathname in binary.
87 DEFINE_TEST(test_pax_filename_encoding_2)
89 char filename[] = "abc\314\214mno\374xyz";
91 struct archive_entry *entry;
93 char longname[] = "abc\314\214mno\374xyz"
94 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
95 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
96 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
97 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
98 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
99 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
104 * We need a starting locale which has invalid sequences.
105 * de_DE.UTF-8 seems to be commonly supported.
107 /* If it doesn't exist, just warn and return. */
108 if (NULL == setlocale(LC_ALL, "de_DE.UTF-8")) {
109 skipping("invalid encoding tests require a suitable locale;"
110 " de_DE.UTF-8 not available on this system");
114 assert((a = archive_write_new()) != NULL);
115 assertEqualIntA(a, 0, archive_write_set_format_pax(a));
116 assertEqualIntA(a, 0, archive_write_set_compression_none(a));
117 assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
119 archive_write_open_memory(a, buff, sizeof(buff), &used));
121 assert((entry = archive_entry_new()) != NULL);
122 /* Set pathname, gname, uname, hardlink to nonconvertible values. */
123 archive_entry_copy_pathname(entry, filename);
124 archive_entry_copy_gname(entry, filename);
125 archive_entry_copy_uname(entry, filename);
126 archive_entry_copy_hardlink(entry, filename);
127 archive_entry_set_filetype(entry, AE_IFREG);
128 failure("This should generate a warning for nonconvertible names.");
129 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
130 archive_entry_free(entry);
132 assert((entry = archive_entry_new()) != NULL);
133 /* Set path, gname, uname, and symlink to nonconvertible values. */
134 archive_entry_copy_pathname(entry, filename);
135 archive_entry_copy_gname(entry, filename);
136 archive_entry_copy_uname(entry, filename);
137 archive_entry_copy_symlink(entry, filename);
138 archive_entry_set_filetype(entry, AE_IFLNK);
139 failure("This should generate a warning for nonconvertible names.");
140 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
141 archive_entry_free(entry);
143 assert((entry = archive_entry_new()) != NULL);
144 /* Set pathname to a very long nonconvertible value. */
145 archive_entry_copy_pathname(entry, longname);
146 archive_entry_set_filetype(entry, AE_IFREG);
147 failure("This should generate a warning for nonconvertible names.");
148 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
149 archive_entry_free(entry);
151 assertEqualInt(0, archive_write_close(a));
152 #if ARCHIVE_VERSION_NUMBER < 2000000
153 archive_write_finish(a);
155 assertEqualInt(0, archive_write_finish(a));
159 * Now read the entries back.
162 assert((a = archive_read_new()) != NULL);
163 assertEqualInt(0, archive_read_support_format_tar(a));
164 assertEqualInt(0, archive_read_open_memory(a, buff, used));
166 assertEqualInt(0, archive_read_next_header(a, &entry));
167 assertEqualString(filename, archive_entry_pathname(entry));
168 assertEqualString(filename, archive_entry_gname(entry));
169 assertEqualString(filename, archive_entry_uname(entry));
170 assertEqualString(filename, archive_entry_hardlink(entry));
172 assertEqualInt(0, archive_read_next_header(a, &entry));
173 assertEqualString(filename, archive_entry_pathname(entry));
174 assertEqualString(filename, archive_entry_gname(entry));
175 assertEqualString(filename, archive_entry_uname(entry));
176 assertEqualString(filename, archive_entry_symlink(entry));
178 assertEqualInt(0, archive_read_next_header(a, &entry));
179 assertEqualString(longname, archive_entry_pathname(entry));
181 assertEqualInt(0, archive_read_close(a));
182 #if ARCHIVE_VERSION_NUMBER < 2000000
183 archive_read_finish(a);
185 assertEqualInt(0, archive_read_finish(a));
190 * Create an entry starting from a wide-character Unicode pathname,
191 * read it back into "C" locale, which doesn't support the name.
192 * TODO: Figure out the "right" behavior here.
194 DEFINE_TEST(test_pax_filename_encoding_3)
196 wchar_t badname[] = L"xxxAyyyBzzz";
197 const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz";
199 struct archive_entry *entry;
206 /* If it doesn't exist, just warn and return. */
207 if (NULL == setlocale(LC_ALL, "C")) {
208 skipping("Can't set \"C\" locale, so can't exercise "
209 "certain character-conversion failures");
213 assert((a = archive_write_new()) != NULL);
214 assertEqualIntA(a, 0, archive_write_set_format_pax(a));
215 assertEqualIntA(a, 0, archive_write_set_compression_none(a));
216 assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
218 archive_write_open_memory(a, buff, sizeof(buff), &used));
220 assert((entry = archive_entry_new()) != NULL);
221 /* Set pathname to non-convertible wide value. */
222 archive_entry_copy_pathname_w(entry, badname);
223 archive_entry_set_filetype(entry, AE_IFREG);
224 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
225 archive_entry_free(entry);
227 assert((entry = archive_entry_new()) != NULL);
228 archive_entry_copy_pathname_w(entry, L"abc");
229 /* Set gname to non-convertible wide value. */
230 archive_entry_copy_gname_w(entry, badname);
231 archive_entry_set_filetype(entry, AE_IFREG);
232 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
233 archive_entry_free(entry);
235 assert((entry = archive_entry_new()) != NULL);
236 archive_entry_copy_pathname_w(entry, L"abc");
237 /* Set uname to non-convertible wide value. */
238 archive_entry_copy_uname_w(entry, badname);
239 archive_entry_set_filetype(entry, AE_IFREG);
240 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
241 archive_entry_free(entry);
243 assert((entry = archive_entry_new()) != NULL);
244 archive_entry_copy_pathname_w(entry, L"abc");
245 /* Set hardlink to non-convertible wide value. */
246 archive_entry_copy_hardlink_w(entry, badname);
247 archive_entry_set_filetype(entry, AE_IFREG);
248 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
249 archive_entry_free(entry);
251 assert((entry = archive_entry_new()) != NULL);
252 archive_entry_copy_pathname_w(entry, L"abc");
253 /* Set symlink to non-convertible wide value. */
254 archive_entry_copy_symlink_w(entry, badname);
255 archive_entry_set_filetype(entry, AE_IFLNK);
256 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
257 archive_entry_free(entry);
259 assertEqualInt(0, archive_write_close(a));
260 #if ARCHIVE_VERSION_NUMBER < 2000000
261 archive_write_finish(a);
263 assertEqualInt(0, archive_write_finish(a));
267 * Now read the entries back.
270 assert((a = archive_read_new()) != NULL);
271 assertEqualInt(0, archive_read_support_format_tar(a));
272 assertEqualInt(0, archive_read_open_memory(a, buff, used));
274 failure("A non-convertible pathname should cause a warning.");
275 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
276 assertEqualWString(badname, archive_entry_pathname_w(entry));
277 failure("If native locale can't convert, we should get UTF-8 back.");
278 assertEqualString(badname_utf8, archive_entry_pathname(entry));
280 failure("A non-convertible gname should cause a warning.");
281 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
282 assertEqualWString(badname, archive_entry_gname_w(entry));
283 failure("If native locale can't convert, we should get UTF-8 back.");
284 assertEqualString(badname_utf8, archive_entry_gname(entry));
286 failure("A non-convertible uname should cause a warning.");
287 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
288 assertEqualWString(badname, archive_entry_uname_w(entry));
289 failure("If native locale can't convert, we should get UTF-8 back.");
290 assertEqualString(badname_utf8, archive_entry_uname(entry));
292 failure("A non-convertible hardlink should cause a warning.");
293 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
294 assertEqualWString(badname, archive_entry_hardlink_w(entry));
295 failure("If native locale can't convert, we should get UTF-8 back.");
296 assertEqualString(badname_utf8, archive_entry_hardlink(entry));
298 failure("A non-convertible symlink should cause a warning.");
299 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
300 assertEqualWString(badname, archive_entry_symlink_w(entry));
301 assertEqualWString(NULL, archive_entry_hardlink_w(entry));
302 failure("If native locale can't convert, we should get UTF-8 back.");
303 assertEqualString(badname_utf8, archive_entry_symlink(entry));
305 assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry));
307 assertEqualInt(0, archive_read_close(a));
308 #if ARCHIVE_VERSION_NUMBER < 2000000
309 archive_read_finish(a);
311 assertEqualInt(0, archive_read_finish(a));