2 * Copyright (c) 2011 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 __FBSDID("$FreeBSD$");
31 test_zip_filename_encoding_UTF8(void)
34 struct archive_entry *entry;
38 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
39 skipping("en_US.UTF-8 locale not available on this system.");
44 * Verify that UTF-8 filenames are correctly stored with
45 * hdrcharset=UTF-8 option.
47 a = archive_write_new();
48 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
49 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
50 skipping("This system cannot convert character-set"
52 archive_write_free(a);
55 assertEqualInt(ARCHIVE_OK,
56 archive_write_open_memory(a, buff, sizeof(buff), &used));
58 entry = archive_entry_new2(a);
59 /* Set a UTF-8 filename. */
60 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
61 archive_entry_set_filetype(entry, AE_IFREG);
62 archive_entry_set_size(entry, 0);
63 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
64 archive_entry_free(entry);
65 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
67 /* A bit 11 of general purpose flag should be 0x08,
68 * which indicates the filename charset is UTF-8. */
69 assertEqualInt(0x08, buff[7]);
70 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
73 * Verify that UTF-8 filenames are correctly stored without
74 * hdrcharset=UTF-8 option.
76 a = archive_write_new();
77 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
78 assertEqualInt(ARCHIVE_OK,
79 archive_write_open_memory(a, buff, sizeof(buff), &used));
81 entry = archive_entry_new2(a);
82 /* Set a UTF-8 filename. */
83 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
84 archive_entry_set_filetype(entry, AE_IFREG);
85 archive_entry_set_size(entry, 0);
86 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
87 archive_entry_free(entry);
88 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
90 /* A bit 11 of general purpose flag should be 0x08,
91 * which indicates the filename charset is UTF-8. */
92 assertEqualInt(0x08, buff[7]);
93 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
96 * Verify that A bit 11 of general purpose flag is not set
97 * when ASCII filenames are stored.
99 a = archive_write_new();
100 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
101 assertEqualInt(ARCHIVE_OK,
102 archive_write_open_memory(a, buff, sizeof(buff), &used));
104 entry = archive_entry_new2(a);
105 /* Set an ASCII filename. */
106 archive_entry_set_pathname(entry, "abcABC");
107 archive_entry_set_filetype(entry, AE_IFREG);
108 archive_entry_set_size(entry, 0);
109 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
110 archive_entry_free(entry);
111 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
113 /* A bit 11 of general purpose flag should be 0,
114 * which indicates the filename charset is unknown. */
115 assertEqualInt(0, buff[7]);
116 assertEqualMem(buff + 30, "abcABC", 6);
120 test_zip_filename_encoding_KOI8R(void)
123 struct archive_entry *entry;
127 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
128 skipping("KOI8-R locale not available on this system.");
133 * Verify that KOI8-R filenames are correctly translated to UTF-8.
135 a = archive_write_new();
136 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
137 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
138 skipping("This system cannot convert character-set"
139 " from KOI8-R to UTF-8.");
140 archive_write_free(a);
143 assertEqualInt(ARCHIVE_OK,
144 archive_write_open_memory(a, buff, sizeof(buff), &used));
146 entry = archive_entry_new2(a);
147 /* Set a KOI8-R filename. */
148 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
149 archive_entry_set_filetype(entry, AE_IFREG);
150 archive_entry_set_size(entry, 0);
151 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
152 archive_entry_free(entry);
153 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
155 /* A bit 11 of general purpose flag should be 0x08,
156 * which indicates the filename charset is UTF-8. */
157 assertEqualInt(0x08, buff[7]);
158 /* Above three characters in KOI8-R should translate to the following
159 * three characters (two bytes each) in UTF-8. */
160 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
163 * Verify that KOI8-R filenames are not translated to UTF-8.
165 a = archive_write_new();
166 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
167 assertEqualInt(ARCHIVE_OK,
168 archive_write_open_memory(a, buff, sizeof(buff), &used));
170 entry = archive_entry_new2(a);
171 /* Set a KOI8-R filename. */
172 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
173 archive_entry_set_filetype(entry, AE_IFREG);
174 archive_entry_set_size(entry, 0);
175 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
176 archive_entry_free(entry);
177 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
179 /* A bit 11 of general purpose flag should be 0,
180 * which indicates the filename charset is unknown. */
181 assertEqualInt(0, buff[7]);
182 /* Above three characters in KOI8-R should not translate to
183 * any character-set. */
184 assertEqualMem(buff + 30, "\xD0\xD2\xC9", 3);
187 * Verify that A bit 11 of general purpose flag is not set
188 * when ASCII filenames are stored even if hdrcharset=UTF-8
191 a = archive_write_new();
192 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
193 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
194 skipping("This system cannot convert character-set"
195 " from KOI8-R to UTF-8.");
196 archive_write_free(a);
199 assertEqualInt(ARCHIVE_OK,
200 archive_write_open_memory(a, buff, sizeof(buff), &used));
202 entry = archive_entry_new2(a);
203 /* Set an ASCII filename. */
204 archive_entry_set_pathname(entry, "abcABC");
205 archive_entry_set_filetype(entry, AE_IFREG);
206 archive_entry_set_size(entry, 0);
207 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
208 archive_entry_free(entry);
209 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
211 /* A bit 11 of general purpose flag should be 0,
212 * which indicates the filename charset is unknown. */
213 assertEqualInt(0, buff[7]);
214 assertEqualMem(buff + 30, "abcABC", 6);
218 * Do not translate CP1251 into CP866 if non Windows platform.
221 test_zip_filename_encoding_ru_RU_CP1251(void)
224 struct archive_entry *entry;
228 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
229 skipping("Russian_Russia locale not available on this system.");
234 * Verify that CP1251 filenames are not translated into any
235 * other character-set, in particular, CP866.
237 a = archive_write_new();
238 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
239 assertEqualInt(ARCHIVE_OK,
240 archive_write_open_memory(a, buff, sizeof(buff), &used));
242 entry = archive_entry_new2(a);
243 /* Set a CP1251 filename. */
244 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
245 archive_entry_set_filetype(entry, AE_IFREG);
246 archive_entry_set_size(entry, 0);
247 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
248 archive_entry_free(entry);
249 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
251 /* A bit 11 of general purpose flag should be 0,
252 * which indicates the filename charset is unknown. */
253 assertEqualInt(0, buff[7]);
254 /* Above three characters in CP1251 should not translate into
255 * any other character-set. */
256 assertEqualMem(buff + 30, "\xEF\xF0\xE8", 3);
260 * Other archiver applications on Windows translate CP1251 filenames
261 * into CP866 filenames and store it in the zip file.
262 * Test above behavior works well.
265 test_zip_filename_encoding_Russian_Russia(void)
268 struct archive_entry *entry;
272 if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
273 skipping("Russian_Russia locale not available on this system.");
278 * Verify that Russian_Russia(CP1251) filenames are correctly translated
281 a = archive_write_new();
282 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
283 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
284 skipping("This system cannot convert character-set"
285 " from Russian_Russia.CP1251 to UTF-8.");
286 archive_write_free(a);
289 assertEqualInt(ARCHIVE_OK,
290 archive_write_open_memory(a, buff, sizeof(buff), &used));
292 entry = archive_entry_new2(a);
293 /* Set a CP1251 filename. */
294 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
295 archive_entry_set_filetype(entry, AE_IFREG);
296 archive_entry_set_size(entry, 0);
297 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
298 archive_entry_free(entry);
299 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
301 /* A bit 11 of general purpose flag should be 0x08,
302 * which indicates the filename charset is UTF-8. */
303 assertEqualInt(0x08, buff[7]);
304 /* Above three characters in CP1251 should translate to the following
305 * three characters (two bytes each) in UTF-8. */
306 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
309 * Verify that Russian_Russia(CP1251) filenames are correctly translated
312 a = archive_write_new();
313 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
314 assertEqualInt(ARCHIVE_OK,
315 archive_write_open_memory(a, buff, sizeof(buff), &used));
317 entry = archive_entry_new2(a);
318 /* Set a CP1251 filename. */
319 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
320 archive_entry_set_filetype(entry, AE_IFREG);
321 archive_entry_set_size(entry, 0);
322 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
323 archive_entry_free(entry);
324 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
326 /* A bit 11 of general purpose flag should be 0,
327 * which indicates the filename charset is unknown. */
328 assertEqualInt(0, buff[7]);
329 /* Above three characters in CP1251 should translate to the following
330 * three characters in CP866. */
331 assertEqualMem(buff + 30, "\xAF\xE0\xA8", 3);
335 test_zip_filename_encoding_EUCJP(void)
338 struct archive_entry *entry;
342 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
343 skipping("eucJP locale not available on this system.");
348 * Verify that EUC-JP filenames are correctly translated to UTF-8.
350 a = archive_write_new();
351 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
352 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
353 skipping("This system cannot convert character-set"
354 " from eucJP to UTF-8.");
355 archive_write_free(a);
358 assertEqualInt(ARCHIVE_OK,
359 archive_write_open_memory(a, buff, sizeof(buff), &used));
361 entry = archive_entry_new2(a);
362 /* Set an EUC-JP filename. */
363 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
364 /* Check the Unicode version. */
365 archive_entry_set_filetype(entry, AE_IFREG);
366 archive_entry_set_size(entry, 0);
367 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
368 archive_entry_free(entry);
369 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
371 /* A bit 11 of general purpose flag should be 0x08,
372 * which indicates the filename charset is UTF-8. */
373 assertEqualInt(0x08, buff[7]);
374 /* Check UTF-8 version. */
375 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
378 * Verify that EUC-JP filenames are not translated to UTF-8.
380 a = archive_write_new();
381 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
382 assertEqualInt(ARCHIVE_OK,
383 archive_write_open_memory(a, buff, sizeof(buff), &used));
385 entry = archive_entry_new2(a);
386 /* Set an EUC-JP filename. */
387 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
388 /* Check the Unicode version. */
389 archive_entry_set_filetype(entry, AE_IFREG);
390 archive_entry_set_size(entry, 0);
391 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
392 archive_entry_free(entry);
393 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
395 /* A bit 11 of general purpose flag should be 0,
396 * which indicates the filename charset is unknown. */
397 assertEqualInt(0, buff[7]);
398 /* Above three characters in EUC-JP should not translate to
399 * any character-set. */
400 assertEqualMem(buff + 30, "\xC9\xBD.txt", 6);
403 * Verify that A bit 11 of general purpose flag is not set
404 * when ASCII filenames are stored even if hdrcharset=UTF-8
407 a = archive_write_new();
408 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
409 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
410 skipping("This system cannot convert character-set"
411 " from eucJP to UTF-8.");
412 archive_write_free(a);
415 assertEqualInt(ARCHIVE_OK,
416 archive_write_open_memory(a, buff, sizeof(buff), &used));
418 entry = archive_entry_new2(a);
419 /* Set an ASCII filename. */
420 archive_entry_set_pathname(entry, "abcABC");
421 /* Check the Unicode version. */
422 archive_entry_set_filetype(entry, AE_IFREG);
423 archive_entry_set_size(entry, 0);
424 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
425 archive_entry_free(entry);
426 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
428 /* A bit 11 of general purpose flag should be 0,
429 * which indicates the filename charset is unknown. */
430 assertEqualInt(0, buff[7]);
431 assertEqualMem(buff + 30, "abcABC", 6);
435 test_zip_filename_encoding_CP932(void)
438 struct archive_entry *entry;
442 if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
443 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
444 skipping("CP932/SJIS locale not available on this system.");
449 * Verify that EUC-JP filenames are correctly translated to UTF-8.
451 a = archive_write_new();
452 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
453 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
454 skipping("This system cannot convert character-set"
455 " from CP932/SJIS to UTF-8.");
456 archive_write_free(a);
459 assertEqualInt(ARCHIVE_OK,
460 archive_write_open_memory(a, buff, sizeof(buff), &used));
462 entry = archive_entry_new2(a);
463 /* Set a CP932/SJIS filename. */
464 archive_entry_set_pathname(entry, "\x95\x5C.txt");
465 /* Check the Unicode version. */
466 archive_entry_set_filetype(entry, AE_IFREG);
467 archive_entry_set_size(entry, 0);
468 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
469 archive_entry_free(entry);
470 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
472 /* A bit 11 of general purpose flag should be 0x08,
473 * which indicates the filename charset is UTF-8. */
474 assertEqualInt(0x08, buff[7]);
475 /* Check UTF-8 version. */
476 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
479 * Verify that CP932/SJIS filenames are not translated to UTF-8.
481 a = archive_write_new();
482 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
483 assertEqualInt(ARCHIVE_OK,
484 archive_write_open_memory(a, buff, sizeof(buff), &used));
486 entry = archive_entry_new2(a);
487 /* Set a CP932/SJIS filename. */
488 archive_entry_set_pathname(entry, "\x95\x5C.txt");
489 /* Check the Unicode version. */
490 archive_entry_set_filetype(entry, AE_IFREG);
491 archive_entry_set_size(entry, 0);
492 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
493 archive_entry_free(entry);
494 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
496 /* A bit 11 of general purpose flag should be 0,
497 * which indicates the filename charset is unknown. */
498 assertEqualInt(0, buff[7]);
499 /* Above three characters in CP932/SJIS should not translate to
500 * any character-set. */
501 assertEqualMem(buff + 30, "\x95\x5C.txt", 6);
504 * Verify that A bit 11 of general purpose flag is not set
505 * when ASCII filenames are stored even if hdrcharset=UTF-8
508 a = archive_write_new();
509 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
510 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
511 skipping("This system cannot convert character-set"
512 " from CP932/SJIS to UTF-8.");
513 archive_write_free(a);
516 assertEqualInt(ARCHIVE_OK,
517 archive_write_open_memory(a, buff, sizeof(buff), &used));
519 entry = archive_entry_new2(a);
520 /* Set an ASCII filename. */
521 archive_entry_set_pathname(entry, "abcABC");
522 /* Check the Unicode version. */
523 archive_entry_set_filetype(entry, AE_IFREG);
524 archive_entry_set_size(entry, 0);
525 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
526 archive_entry_free(entry);
527 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
529 /* A bit 11 of general purpose flag should be 0,
530 * which indicates the filename charset is unknown. */
531 assertEqualInt(0, buff[7]);
532 assertEqualMem(buff + 30, "abcABC", 6);
535 DEFINE_TEST(test_zip_filename_encoding)
537 test_zip_filename_encoding_UTF8();
538 test_zip_filename_encoding_KOI8R();
539 test_zip_filename_encoding_ru_RU_CP1251();
540 test_zip_filename_encoding_Russian_Russia();
541 test_zip_filename_encoding_EUCJP();
542 test_zip_filename_encoding_CP932();