2 * Copyright (c) 2011 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 * The sample tar file was made in LANG=KOI8-R and it contains two
32 * files the charset of which are different.
33 * - the filename of first file is stored in BINARY mode.
34 * - the filename of second file is stored in UTF-8.
36 * Whenever hdrcharset option is specified, we will correctly read the
37 * filename of second file, which is stored in UTF-8 by default.
41 test_read_format_tar_filename_KOI8R_CP866(const char *refname)
44 struct archive_entry *ae;
47 * Read filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option.
48 * We should correctly read two filenames.
50 if (NULL == setlocale(LC_ALL, "Russian_Russia.866") &&
51 NULL == setlocale(LC_ALL, "ru_RU.CP866")) {
52 skipping("ru_RU.CP866 locale not available on this system.");
56 /* Test if the platform can convert from UTF-8. */
57 assert((a = archive_read_new()) != NULL);
58 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
59 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
60 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
61 skipping("This system cannot convert character-set"
62 " from UTF-8 to CP866.");
65 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
67 assert((a = archive_read_new()) != NULL);
68 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
69 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
70 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
71 skipping("This system cannot convert character-set"
72 " from KOI8-R to CP866.");
75 assertEqualIntA(a, ARCHIVE_OK,
76 archive_read_open_filename(a, refname, 10240));
78 /* Verify regular first file. */
79 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
80 assertEqualString("\x8f\x90\x88\x82\x85\x92",
81 archive_entry_pathname(ae));
82 assertEqualInt(6, archive_entry_size(ae));
83 assertEqualInt(archive_entry_is_encrypted(ae), 0);
84 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
86 /* Verify regular second file. */
87 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
88 assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
89 archive_entry_pathname(ae));
90 assertEqualInt(6, archive_entry_size(ae));
91 assertEqualInt(archive_entry_is_encrypted(ae), 0);
92 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
96 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
98 /* Verify archive format. */
99 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
100 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
103 /* Close the archive. */
104 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
106 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
110 * Read filename in ru_RU.CP866 without "hdrcharset=KOI8-R" option.
111 * The filename we can properly read is only second file.
114 assert((a = archive_read_new()) != NULL);
115 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
116 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
117 assertEqualIntA(a, ARCHIVE_OK,
118 archive_read_open_filename(a, refname, 10240));
121 * Verify regular first file.
122 * The filename is not translated to CP866 because hdrcharset
123 * attribute is BINARY and there is not way to know its charset.
125 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
126 /* A filename is in KOI8-R. */
127 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
128 archive_entry_pathname(ae));
129 assertEqualInt(6, archive_entry_size(ae));
130 assertEqualInt(archive_entry_is_encrypted(ae), 0);
131 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
134 * Verify regular second file.
135 * The filename is translated from UTF-8 to CP866
137 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
138 assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
139 archive_entry_pathname(ae));
140 assertEqualInt(6, archive_entry_size(ae));
141 assertEqualInt(archive_entry_is_encrypted(ae), 0);
142 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
145 /* End of archive. */
146 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
148 /* Verify archive format. */
149 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
150 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
153 /* Close the archive. */
154 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
155 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
159 test_read_format_tar_filename_KOI8R_UTF8(const char *refname)
162 struct archive_entry *ae;
165 * Read filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option.
166 * We should correctly read two filenames.
168 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
169 skipping("en_US.UTF-8 locale not available on this system.");
173 assert((a = archive_read_new()) != NULL);
174 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
175 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
176 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
177 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
178 skipping("This system cannot convert character-set"
179 " from KOI8-R to UTF-8.");
182 assertEqualIntA(a, ARCHIVE_OK,
183 archive_read_open_filename(a, refname, 10240));
185 /* Verify regular file. */
186 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
187 assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2",
188 archive_entry_pathname(ae));
189 assertEqualInt(6, archive_entry_size(ae));
190 assertEqualInt(archive_entry_is_encrypted(ae), 0);
191 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
193 /* Verify regular file. */
194 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
195 assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
196 archive_entry_pathname(ae));
197 assertEqualInt(6, archive_entry_size(ae));
198 assertEqualInt(archive_entry_is_encrypted(ae), 0);
199 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
201 /* Verify encryption status */
202 assertEqualInt(archive_entry_is_encrypted(ae), 0);
203 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
205 /* End of archive. */
206 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
208 /* Verify archive format. */
209 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
210 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
213 /* Verify encryption status */
214 assertEqualInt(archive_entry_is_encrypted(ae), 0);
215 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
217 /* Close the archive. */
218 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
219 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
222 * Read filename in en_US.UTF-8 without "hdrcharset=KOI8-R" option.
223 * The filename we can properly read is only second file.
226 assert((a = archive_read_new()) != NULL);
227 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
228 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
229 assertEqualIntA(a, ARCHIVE_OK,
230 archive_read_open_filename(a, refname, 10240));
233 * Verify regular first file.
234 * The filename is not translated to UTF-8 because hdrcharset
235 * attribute is BINARY and there is not way to know its charset.
237 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
238 /* A filename is in KOI8-R. */
239 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
240 archive_entry_pathname(ae));
241 assertEqualInt(6, archive_entry_size(ae));
243 /* Verify encryption status */
244 assertEqualInt(archive_entry_is_encrypted(ae), 0);
245 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
248 * Verify regular second file.
250 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
251 assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
252 archive_entry_pathname(ae));
253 assertEqualInt(6, archive_entry_size(ae));
256 /* End of archive. */
257 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
259 /* Verify archive format. */
260 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
261 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
264 /* Close the archive. */
265 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
266 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
270 test_read_format_tar_filename_KOI8R_CP1251(const char *refname)
273 struct archive_entry *ae;
276 * Read filename in CP1251 with "hdrcharset=KOI8-R" option.
277 * We should correctly read two filenames.
279 if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
280 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
281 skipping("CP1251 locale not available on this system.");
285 /* Test if the platform can convert from UTF-8. */
286 assert((a = archive_read_new()) != NULL);
287 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
288 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
289 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
290 skipping("This system cannot convert character-set"
291 " from UTF-8 to CP1251.");
294 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
296 assert((a = archive_read_new()) != NULL);
297 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
298 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
299 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
300 skipping("This system cannot convert character-set"
301 " from KOI8-R to CP1251.");
304 assertEqualIntA(a, ARCHIVE_OK,
305 archive_read_open_filename(a, refname, 10240));
307 /* Verify regular first file. */
308 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
309 assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2",
310 archive_entry_pathname(ae));
311 assertEqualInt(6, archive_entry_size(ae));
312 assertEqualInt(archive_entry_is_encrypted(ae), 0);
313 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
315 /* Verify regular second file. */
316 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
317 assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
318 archive_entry_pathname(ae));
319 assertEqualInt(6, archive_entry_size(ae));
320 assertEqualInt(archive_entry_is_encrypted(ae), 0);
321 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
324 /* End of archive. */
325 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
327 /* Verify archive format. */
328 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
329 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
332 /* Close the archive. */
333 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
335 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
338 * Read filename in CP1251 without "hdrcharset=KOI8-R" option.
339 * The filename we can properly read is only second file.
342 assert((a = archive_read_new()) != NULL);
343 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
344 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
345 assertEqualIntA(a, ARCHIVE_OK,
346 archive_read_open_filename(a, refname, 10240));
349 * Verify regular first file.
350 * The filename is not translated to CP1251 because hdrcharset
351 * attribute is BINARY and there is not way to know its charset.
353 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
354 /* A filename is in KOI8-R. */
355 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
356 archive_entry_pathname(ae));
357 assertEqualInt(6, archive_entry_size(ae));
358 assertEqualInt(archive_entry_is_encrypted(ae), 0);
359 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
362 * Verify regular second file.
364 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
365 assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
366 archive_entry_pathname(ae));
367 assertEqualInt(6, archive_entry_size(ae));
368 assertEqualInt(archive_entry_is_encrypted(ae), 0);
369 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
372 /* End of archive. */
373 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
375 /* Verify archive format. */
376 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
377 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
380 /* Close the archive. */
381 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
382 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
386 DEFINE_TEST(test_read_format_tar_filename)
388 const char *refname = "test_read_format_tar_filename_koi8r.tar.Z";
390 extract_reference_file(refname);
391 test_read_format_tar_filename_KOI8R_CP866(refname);
392 test_read_format_tar_filename_KOI8R_UTF8(refname);
393 test_read_format_tar_filename_KOI8R_CP1251(refname);