]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - lib/libarchive/test/test_pax_filename_encoding.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / lib / libarchive / test / test_pax_filename_encoding.c
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 __FBSDID("$FreeBSD$");
27
28 #include <locale.h>
29
30 /*
31  * Pax interchange is supposed to encode filenames into
32  * UTF-8.  Of course, that's not always possible.  This
33  * test is intended to verify that filenames always get
34  * stored and restored correctly, regardless of the encodings.
35  */
36
37 /*
38  * Read a manually-created archive that has filenames that are
39  * stored in binary instead of UTF-8 and verify that we get
40  * the right filename returned and that we get a warning only
41  * if the header isn't marked as binary.
42  */
43 DEFINE_TEST(test_pax_filename_encoding_1)
44 {
45         static const char testname[] = "test_pax_filename_encoding.tar.gz";
46         /*
47          * \314\214 is a valid 2-byte UTF-8 sequence.
48          * \374 is invalid in UTF-8.
49          */
50         char filename[] = "abc\314\214mno\374xyz";
51         struct archive *a;
52         struct archive_entry *entry;
53
54         /*
55          * Read an archive that has non-UTF8 pax filenames in it.
56          */
57         extract_reference_file(testname);
58         a = archive_read_new();
59         assertEqualInt(ARCHIVE_OK, archive_read_support_format_tar(a));
60         assertEqualInt(ARCHIVE_OK, archive_read_support_compression_gzip(a));
61         assertEqualInt(ARCHIVE_OK,
62             archive_read_open_filename(a, testname, 10240));
63         /*
64          * First entry in this test archive has an invalid UTF-8 sequence
65          * in it, but the header is not marked as hdrcharset=BINARY, so that
66          * requires a warning.
67          */
68         failure("Invalid UTF8 in a pax archive pathname should cause a warning");
69         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
70         assertEqualString(filename, archive_entry_pathname(entry));
71         /*
72          * Second entry is identical except that it does have
73          * hdrcharset=BINARY, so no warning should be generated.
74          */
75         failure("A pathname with hdrcharset=BINARY can have invalid UTF8\n"
76             " characters in it without generating a warning");
77         assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry));
78         assertEqualString(filename, archive_entry_pathname(entry));
79         archive_read_finish(a);
80 }
81
82 /*
83  * Set the locale and write a pathname containing invalid characters.
84  * This should work; the underlying implementation should automatically
85  * fall back to storing the pathname in binary.
86  */
87 DEFINE_TEST(test_pax_filename_encoding_2)
88 {
89         char filename[] = "abc\314\214mno\374xyz";
90         struct archive *a;
91         struct archive_entry *entry;
92         char buff[65536];
93         char longname[] = "abc\314\214mno\374xyz"
94             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
95             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
96             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
97             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
98             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
99             "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
100             ;
101         size_t used;
102
103         /*
104          * We need a starting locale which has invalid sequences.
105          * de_DE.UTF-8 seems to be commonly supported.
106          */
107         /* If it doesn't exist, just warn and return. */
108         if (NULL == setlocale(LC_ALL, "de_DE.UTF-8")) {
109                 skipping("invalid encoding tests require a suitable locale;"
110                     " de_DE.UTF-8 not available on this system");
111                 return;
112         }
113
114         assert((a = archive_write_new()) != NULL);
115         assertEqualIntA(a, 0, archive_write_set_format_pax(a));
116         assertEqualIntA(a, 0, archive_write_set_compression_none(a));
117         assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
118         assertEqualInt(0,
119             archive_write_open_memory(a, buff, sizeof(buff), &used));
120
121         assert((entry = archive_entry_new()) != NULL);
122         /* Set pathname, gname, uname, hardlink to nonconvertible values. */
123         archive_entry_copy_pathname(entry, filename);
124         archive_entry_copy_gname(entry, filename);
125         archive_entry_copy_uname(entry, filename);
126         archive_entry_copy_hardlink(entry, filename);
127         archive_entry_set_filetype(entry, AE_IFREG);
128         failure("This should generate a warning for nonconvertible names.");
129         assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
130         archive_entry_free(entry);
131
132         assert((entry = archive_entry_new()) != NULL);
133         /* Set path, gname, uname, and symlink to nonconvertible values. */
134         archive_entry_copy_pathname(entry, filename);
135         archive_entry_copy_gname(entry, filename);
136         archive_entry_copy_uname(entry, filename);
137         archive_entry_copy_symlink(entry, filename);
138         archive_entry_set_filetype(entry, AE_IFLNK);
139         failure("This should generate a warning for nonconvertible names.");
140         assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
141         archive_entry_free(entry);
142
143         assert((entry = archive_entry_new()) != NULL);
144         /* Set pathname to a very long nonconvertible value. */
145         archive_entry_copy_pathname(entry, longname);
146         archive_entry_set_filetype(entry, AE_IFREG);
147         failure("This should generate a warning for nonconvertible names.");
148         assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
149         archive_entry_free(entry);
150
151         assertEqualInt(0, archive_write_close(a));
152 #if ARCHIVE_VERSION_NUMBER < 2000000
153         archive_write_finish(a);
154 #else
155         assertEqualInt(0, archive_write_finish(a));
156 #endif
157
158         /*
159          * Now read the entries back.
160          */
161
162         assert((a = archive_read_new()) != NULL);
163         assertEqualInt(0, archive_read_support_format_tar(a));
164         assertEqualInt(0, archive_read_open_memory(a, buff, used));
165
166         assertEqualInt(0, archive_read_next_header(a, &entry));
167         assertEqualString(filename, archive_entry_pathname(entry));
168         assertEqualString(filename, archive_entry_gname(entry));
169         assertEqualString(filename, archive_entry_uname(entry));
170         assertEqualString(filename, archive_entry_hardlink(entry));
171
172         assertEqualInt(0, archive_read_next_header(a, &entry));
173         assertEqualString(filename, archive_entry_pathname(entry));
174         assertEqualString(filename, archive_entry_gname(entry));
175         assertEqualString(filename, archive_entry_uname(entry));
176         assertEqualString(filename, archive_entry_symlink(entry));
177
178         assertEqualInt(0, archive_read_next_header(a, &entry));
179         assertEqualString(longname, archive_entry_pathname(entry));
180
181         assertEqualInt(0, archive_read_close(a));
182 #if ARCHIVE_VERSION_NUMBER < 2000000
183         archive_read_finish(a);
184 #else
185         assertEqualInt(0, archive_read_finish(a));
186 #endif
187 }
188
189 /*
190  * Create an entry starting from a wide-character Unicode pathname,
191  * read it back into "C" locale, which doesn't support the name.
192  * TODO: Figure out the "right" behavior here.
193  */
194 DEFINE_TEST(test_pax_filename_encoding_3)
195 {
196         wchar_t badname[] = L"xxxAyyyBzzz";
197         const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz";
198         struct archive *a;
199         struct archive_entry *entry;
200         char buff[65536];
201         size_t used;
202
203         badname[3] = 0x1234;
204         badname[7] = 0x5678;
205
206         /* If it doesn't exist, just warn and return. */
207         if (NULL == setlocale(LC_ALL, "C")) {
208                 skipping("Can't set \"C\" locale, so can't exercise "
209                     "certain character-conversion failures");
210                 return;
211         }
212
213         assert((a = archive_write_new()) != NULL);
214         assertEqualIntA(a, 0, archive_write_set_format_pax(a));
215         assertEqualIntA(a, 0, archive_write_set_compression_none(a));
216         assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
217         assertEqualInt(0,
218             archive_write_open_memory(a, buff, sizeof(buff), &used));
219
220         assert((entry = archive_entry_new()) != NULL);
221         /* Set pathname to non-convertible wide value. */
222         archive_entry_copy_pathname_w(entry, badname);
223         archive_entry_set_filetype(entry, AE_IFREG);
224         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
225         archive_entry_free(entry);
226
227         assert((entry = archive_entry_new()) != NULL);
228         archive_entry_copy_pathname_w(entry, L"abc");
229         /* Set gname to non-convertible wide value. */
230         archive_entry_copy_gname_w(entry, badname);
231         archive_entry_set_filetype(entry, AE_IFREG);
232         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
233         archive_entry_free(entry);
234
235         assert((entry = archive_entry_new()) != NULL);
236         archive_entry_copy_pathname_w(entry, L"abc");
237         /* Set uname to non-convertible wide value. */
238         archive_entry_copy_uname_w(entry, badname);
239         archive_entry_set_filetype(entry, AE_IFREG);
240         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
241         archive_entry_free(entry);
242
243         assert((entry = archive_entry_new()) != NULL);
244         archive_entry_copy_pathname_w(entry, L"abc");
245         /* Set hardlink to non-convertible wide value. */
246         archive_entry_copy_hardlink_w(entry, badname);
247         archive_entry_set_filetype(entry, AE_IFREG);
248         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
249         archive_entry_free(entry);
250
251         assert((entry = archive_entry_new()) != NULL);
252         archive_entry_copy_pathname_w(entry, L"abc");
253         /* Set symlink to non-convertible wide value. */
254         archive_entry_copy_symlink_w(entry, badname);
255         archive_entry_set_filetype(entry, AE_IFLNK);
256         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
257         archive_entry_free(entry);
258
259         assertEqualInt(0, archive_write_close(a));
260 #if ARCHIVE_VERSION_NUMBER < 2000000
261         archive_write_finish(a);
262 #else
263         assertEqualInt(0, archive_write_finish(a));
264 #endif
265
266         /*
267          * Now read the entries back.
268          */
269
270         assert((a = archive_read_new()) != NULL);
271         assertEqualInt(0, archive_read_support_format_tar(a));
272         assertEqualInt(0, archive_read_open_memory(a, buff, used));
273
274         failure("A non-convertible pathname should cause a warning.");
275         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
276         assertEqualWString(badname, archive_entry_pathname_w(entry));
277         failure("If native locale can't convert, we should get UTF-8 back.");
278         assertEqualString(badname_utf8, archive_entry_pathname(entry));
279
280         failure("A non-convertible gname should cause a warning.");
281         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
282         assertEqualWString(badname, archive_entry_gname_w(entry));
283         failure("If native locale can't convert, we should get UTF-8 back.");
284         assertEqualString(badname_utf8, archive_entry_gname(entry));
285
286         failure("A non-convertible uname should cause a warning.");
287         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
288         assertEqualWString(badname, archive_entry_uname_w(entry));
289         failure("If native locale can't convert, we should get UTF-8 back.");
290         assertEqualString(badname_utf8, archive_entry_uname(entry));
291
292         failure("A non-convertible hardlink should cause a warning.");
293         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
294         assertEqualWString(badname, archive_entry_hardlink_w(entry));
295         failure("If native locale can't convert, we should get UTF-8 back.");
296         assertEqualString(badname_utf8, archive_entry_hardlink(entry));
297
298         failure("A non-convertible symlink should cause a warning.");
299         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
300         assertEqualWString(badname, archive_entry_symlink_w(entry));
301         assertEqualWString(NULL, archive_entry_hardlink_w(entry));
302         failure("If native locale can't convert, we should get UTF-8 back.");
303         assertEqualString(badname_utf8, archive_entry_symlink(entry));
304
305         assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry));
306
307         assertEqualInt(0, archive_read_close(a));
308 #if ARCHIVE_VERSION_NUMBER < 2000000
309         archive_read_finish(a);
310 #else
311         assertEqualInt(0, archive_read_finish(a));
312 #endif
313 }