2 * Copyright (c) 2011 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 __FBSDID("$FreeBSD$");
30 DEFINE_TEST(test_zip_filename_encoding_UTF8)
33 struct archive_entry *entry;
37 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
38 skipping("en_US.UTF-8 locale not available on this system.");
43 * Verify that UTF-8 filenames are correctly stored with
44 * hdrcharset=UTF-8 option.
46 a = archive_write_new();
47 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
48 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
49 skipping("This system cannot convert character-set"
51 archive_write_free(a);
54 assertEqualInt(ARCHIVE_OK,
55 archive_write_open_memory(a, buff, sizeof(buff), &used));
57 entry = archive_entry_new2(a);
58 /* Set a UTF-8 filename. */
59 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
60 archive_entry_set_filetype(entry, AE_IFREG);
61 archive_entry_set_size(entry, 0);
62 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
63 archive_entry_free(entry);
64 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
66 /* A bit 11 of general purpose flag should be 0x08,
67 * which indicates the filename charset is UTF-8. */
68 assertEqualInt(0x08, buff[7]);
69 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
72 * Verify that UTF-8 filenames are correctly stored without
73 * hdrcharset=UTF-8 option.
75 a = archive_write_new();
76 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
77 assertEqualInt(ARCHIVE_OK,
78 archive_write_open_memory(a, buff, sizeof(buff), &used));
80 entry = archive_entry_new2(a);
81 /* Set a UTF-8 filename. */
82 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
83 archive_entry_set_filetype(entry, AE_IFREG);
84 archive_entry_set_size(entry, 0);
85 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
86 archive_entry_free(entry);
87 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
89 /* A bit 11 of general purpose flag should be 0x08,
90 * which indicates the filename charset is UTF-8. */
91 assertEqualInt(0x08, buff[7]);
92 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
95 * Verify that A bit 11 of general purpose flag is not set
96 * when ASCII filenames are stored.
98 a = archive_write_new();
99 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
100 assertEqualInt(ARCHIVE_OK,
101 archive_write_open_memory(a, buff, sizeof(buff), &used));
103 entry = archive_entry_new2(a);
104 /* Set an ASCII filename. */
105 archive_entry_set_pathname(entry, "abcABC");
106 archive_entry_set_filetype(entry, AE_IFREG);
107 archive_entry_set_size(entry, 0);
108 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
109 archive_entry_free(entry);
110 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
112 /* A bit 11 of general purpose flag should be 0,
113 * which indicates the filename charset is unknown. */
114 assertEqualInt(0, buff[7]);
115 assertEqualMem(buff + 30, "abcABC", 6);
118 DEFINE_TEST(test_zip_filename_encoding_KOI8R)
121 struct archive_entry *entry;
125 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
126 skipping("KOI8-R locale not available on this system.");
131 * Verify that KOI8-R filenames are correctly translated to UTF-8.
133 a = archive_write_new();
134 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
135 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
136 skipping("This system cannot convert character-set"
137 " from KOI8-R to UTF-8.");
138 archive_write_free(a);
141 assertEqualInt(ARCHIVE_OK,
142 archive_write_open_memory(a, buff, sizeof(buff), &used));
144 entry = archive_entry_new2(a);
145 /* Set a KOI8-R filename. */
146 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
147 archive_entry_set_filetype(entry, AE_IFREG);
148 archive_entry_set_size(entry, 0);
149 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
150 archive_entry_free(entry);
151 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
153 /* A bit 11 of general purpose flag should be 0x08,
154 * which indicates the filename charset is UTF-8. */
155 assertEqualInt(0x08, buff[7]);
156 /* Above three characters in KOI8-R should translate to the following
157 * three characters (two bytes each) in UTF-8. */
158 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
161 * Verify that KOI8-R filenames are not translated to UTF-8.
163 a = archive_write_new();
164 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
165 assertEqualInt(ARCHIVE_OK,
166 archive_write_open_memory(a, buff, sizeof(buff), &used));
168 entry = archive_entry_new2(a);
169 /* Set a KOI8-R filename. */
170 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
171 archive_entry_set_filetype(entry, AE_IFREG);
172 archive_entry_set_size(entry, 0);
173 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
174 archive_entry_free(entry);
175 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
177 /* A bit 11 of general purpose flag should be 0,
178 * which indicates the filename charset is unknown. */
179 assertEqualInt(0, buff[7]);
180 /* Above three characters in KOI8-R should not translate to
181 * any character-set. */
182 assertEqualMem(buff + 30, "\xD0\xD2\xC9", 3);
185 * Verify that A bit 11 of general purpose flag is not set
186 * when ASCII filenames are stored even if hdrcharset=UTF-8
189 a = archive_write_new();
190 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
191 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
192 skipping("This system cannot convert character-set"
193 " from KOI8-R to UTF-8.");
194 archive_write_free(a);
197 assertEqualInt(ARCHIVE_OK,
198 archive_write_open_memory(a, buff, sizeof(buff), &used));
200 entry = archive_entry_new2(a);
201 /* Set an ASCII filename. */
202 archive_entry_set_pathname(entry, "abcABC");
203 archive_entry_set_filetype(entry, AE_IFREG);
204 archive_entry_set_size(entry, 0);
205 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
206 archive_entry_free(entry);
207 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
209 /* A bit 11 of general purpose flag should be 0,
210 * which indicates the filename charset is unknown. */
211 assertEqualInt(0, buff[7]);
212 assertEqualMem(buff + 30, "abcABC", 6);
216 * Do not translate CP1251 into CP866 if non Windows platform.
218 DEFINE_TEST(test_zip_filename_encoding_ru_RU_CP1251)
221 struct archive_entry *entry;
225 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
226 skipping("Russian_Russia locale not available on this system.");
231 * Verify that CP1251 filenames are not translated into any
232 * other character-set, in particular, CP866.
234 a = archive_write_new();
235 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
236 assertEqualInt(ARCHIVE_OK,
237 archive_write_open_memory(a, buff, sizeof(buff), &used));
239 entry = archive_entry_new2(a);
240 /* Set a CP1251 filename. */
241 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
242 archive_entry_set_filetype(entry, AE_IFREG);
243 archive_entry_set_size(entry, 0);
244 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
245 archive_entry_free(entry);
246 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
248 /* A bit 11 of general purpose flag should be 0,
249 * which indicates the filename charset is unknown. */
250 assertEqualInt(0, buff[7]);
251 /* Above three characters in CP1251 should not translate into
252 * any other character-set. */
253 assertEqualMem(buff + 30, "\xEF\xF0\xE8", 3);
257 * Other archiver applications on Windows translate CP1251 filenames
258 * into CP866 filenames and store it in the zip file.
259 * Test above behavior works well.
261 DEFINE_TEST(test_zip_filename_encoding_Russian_Russia)
264 struct archive_entry *entry;
268 if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
269 skipping("Russian_Russia locale not available on this system.");
274 * Verify that Russian_Russia(CP1251) filenames are correctly translated
277 a = archive_write_new();
278 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
279 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
280 skipping("This system cannot convert character-set"
281 " from Russian_Russia.CP1251 to UTF-8.");
282 archive_write_free(a);
285 assertEqualInt(ARCHIVE_OK,
286 archive_write_open_memory(a, buff, sizeof(buff), &used));
288 entry = archive_entry_new2(a);
289 /* Set a CP1251 filename. */
290 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
291 archive_entry_set_filetype(entry, AE_IFREG);
292 archive_entry_set_size(entry, 0);
293 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
294 archive_entry_free(entry);
295 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
297 /* A bit 11 of general purpose flag should be 0x08,
298 * which indicates the filename charset is UTF-8. */
299 assertEqualInt(0x08, buff[7]);
300 /* Above three characters in CP1251 should translate to the following
301 * three characters (two bytes each) in UTF-8. */
302 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
305 * Verify that Russian_Russia(CP1251) filenames are correctly translated
308 a = archive_write_new();
309 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
310 assertEqualInt(ARCHIVE_OK,
311 archive_write_open_memory(a, buff, sizeof(buff), &used));
313 entry = archive_entry_new2(a);
314 /* Set a CP1251 filename. */
315 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
316 archive_entry_set_filetype(entry, AE_IFREG);
317 archive_entry_set_size(entry, 0);
318 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
319 archive_entry_free(entry);
320 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
322 /* A bit 11 of general purpose flag should be 0,
323 * which indicates the filename charset is unknown. */
324 assertEqualInt(0, buff[7]);
325 /* Above three characters in CP1251 should translate to the following
326 * three characters in CP866. */
327 assertEqualMem(buff + 30, "\xAF\xE0\xA8", 3);
330 DEFINE_TEST(test_zip_filename_encoding_EUCJP)
333 struct archive_entry *entry;
337 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
338 skipping("eucJP locale not available on this system.");
343 * Verify that EUC-JP filenames are correctly translated to UTF-8.
345 a = archive_write_new();
346 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
347 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
348 skipping("This system cannot convert character-set"
349 " from eucJP to UTF-8.");
350 archive_write_free(a);
353 assertEqualInt(ARCHIVE_OK,
354 archive_write_open_memory(a, buff, sizeof(buff), &used));
356 entry = archive_entry_new2(a);
357 /* Set an EUC-JP filename. */
358 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
359 /* Check the Unicode version. */
360 archive_entry_set_filetype(entry, AE_IFREG);
361 archive_entry_set_size(entry, 0);
362 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
363 archive_entry_free(entry);
364 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
366 /* A bit 11 of general purpose flag should be 0x08,
367 * which indicates the filename charset is UTF-8. */
368 assertEqualInt(0x08, buff[7]);
369 /* Check UTF-8 version. */
370 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
373 * Verify that EUC-JP filenames are not translated to UTF-8.
375 a = archive_write_new();
376 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
377 assertEqualInt(ARCHIVE_OK,
378 archive_write_open_memory(a, buff, sizeof(buff), &used));
380 entry = archive_entry_new2(a);
381 /* Set an EUC-JP filename. */
382 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
383 /* Check the Unicode version. */
384 archive_entry_set_filetype(entry, AE_IFREG);
385 archive_entry_set_size(entry, 0);
386 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
387 archive_entry_free(entry);
388 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
390 /* A bit 11 of general purpose flag should be 0,
391 * which indicates the filename charset is unknown. */
392 assertEqualInt(0, buff[7]);
393 /* Above three characters in EUC-JP should not translate to
394 * any character-set. */
395 assertEqualMem(buff + 30, "\xC9\xBD.txt", 6);
398 * Verify that A bit 11 of general purpose flag is not set
399 * when ASCII filenames are stored even if hdrcharset=UTF-8
402 a = archive_write_new();
403 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
404 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
405 skipping("This system cannot convert character-set"
406 " from eucJP to UTF-8.");
407 archive_write_free(a);
410 assertEqualInt(ARCHIVE_OK,
411 archive_write_open_memory(a, buff, sizeof(buff), &used));
413 entry = archive_entry_new2(a);
414 /* Set an ASCII filename. */
415 archive_entry_set_pathname(entry, "abcABC");
416 /* Check the Unicode version. */
417 archive_entry_set_filetype(entry, AE_IFREG);
418 archive_entry_set_size(entry, 0);
419 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
420 archive_entry_free(entry);
421 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
423 /* A bit 11 of general purpose flag should be 0,
424 * which indicates the filename charset is unknown. */
425 assertEqualInt(0, buff[7]);
426 assertEqualMem(buff + 30, "abcABC", 6);
429 DEFINE_TEST(test_zip_filename_encoding_CP932)
432 struct archive_entry *entry;
436 if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
437 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
438 skipping("CP932/SJIS locale not available on this system.");
443 * Verify that EUC-JP filenames are correctly translated to UTF-8.
445 a = archive_write_new();
446 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
447 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
448 skipping("This system cannot convert character-set"
449 " from CP932/SJIS to UTF-8.");
450 archive_write_free(a);
453 assertEqualInt(ARCHIVE_OK,
454 archive_write_open_memory(a, buff, sizeof(buff), &used));
456 entry = archive_entry_new2(a);
457 /* Set a CP932/SJIS filename. */
458 archive_entry_set_pathname(entry, "\x95\x5C.txt");
459 /* Check the Unicode version. */
460 archive_entry_set_filetype(entry, AE_IFREG);
461 archive_entry_set_size(entry, 0);
462 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
463 archive_entry_free(entry);
464 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
466 /* A bit 11 of general purpose flag should be 0x08,
467 * which indicates the filename charset is UTF-8. */
468 assertEqualInt(0x08, buff[7]);
469 /* Check UTF-8 version. */
470 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
473 * Verify that CP932/SJIS filenames are not translated to UTF-8.
475 a = archive_write_new();
476 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
477 assertEqualInt(ARCHIVE_OK,
478 archive_write_open_memory(a, buff, sizeof(buff), &used));
480 entry = archive_entry_new2(a);
481 /* Set a CP932/SJIS filename. */
482 archive_entry_set_pathname(entry, "\x95\x5C.txt");
483 /* Check the Unicode version. */
484 archive_entry_set_filetype(entry, AE_IFREG);
485 archive_entry_set_size(entry, 0);
486 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
487 archive_entry_free(entry);
488 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
490 /* A bit 11 of general purpose flag should be 0,
491 * which indicates the filename charset is unknown. */
492 assertEqualInt(0, buff[7]);
493 /* Above three characters in CP932/SJIS should not translate to
494 * any character-set. */
495 assertEqualMem(buff + 30, "\x95\x5C.txt", 6);
498 * Verify that A bit 11 of general purpose flag is not set
499 * when ASCII filenames are stored even if hdrcharset=UTF-8
502 a = archive_write_new();
503 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
504 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
505 skipping("This system cannot convert character-set"
506 " from CP932/SJIS to UTF-8.");
507 archive_write_free(a);
510 assertEqualInt(ARCHIVE_OK,
511 archive_write_open_memory(a, buff, sizeof(buff), &used));
513 entry = archive_entry_new2(a);
514 /* Set an ASCII filename. */
515 archive_entry_set_pathname(entry, "abcABC");
516 /* Check the Unicode version. */
517 archive_entry_set_filetype(entry, AE_IFREG);
518 archive_entry_set_size(entry, 0);
519 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
520 archive_entry_free(entry);
521 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
523 /* A bit 11 of general purpose flag should be 0,
524 * which indicates the filename charset is unknown. */
525 assertEqualInt(0, buff[7]);
526 assertEqualMem(buff + 30, "abcABC", 6);