]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - contrib/libarchive/libarchive/test/test_zip_filename_encoding.c
MFC r368207,368607:
[FreeBSD/stable/10.git] / contrib / libarchive / libarchive / test / test_zip_filename_encoding.c
1 /*-
2  * Copyright (c) 2011 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 __FBSDID("$FreeBSD$");
27
28 #include <locale.h>
29
30 DEFINE_TEST(test_zip_filename_encoding_UTF8)
31 {
32         struct archive *a;
33         struct archive_entry *entry;
34         char buff[4096];
35         size_t used;
36
37         if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
38                 skipping("en_US.UTF-8 locale not available on this system.");
39                 return;
40         }
41
42         /*
43          * Verify that UTF-8 filenames are correctly stored with
44          * hdrcharset=UTF-8 option.
45          */
46         a = archive_write_new();
47         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
48         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
49                 skipping("This system cannot convert character-set"
50                     " for UTF-8.");
51                 archive_write_free(a);
52                 return;
53         }
54         assertEqualInt(ARCHIVE_OK,
55             archive_write_open_memory(a, buff, sizeof(buff), &used));
56
57         entry = archive_entry_new2(a);
58         /* Set a UTF-8 filename. */
59         archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
60         archive_entry_set_filetype(entry, AE_IFREG);
61         archive_entry_set_size(entry, 0);
62         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
63         archive_entry_free(entry);
64         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
65
66         /* A bit 11 of general purpose flag should be 0x08,
67          * which indicates the filename charset is UTF-8. */
68         assertEqualInt(0x08, buff[7]);
69         assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
70
71         /*
72          * Verify that UTF-8 filenames are correctly stored without
73          * hdrcharset=UTF-8 option.
74          */
75         a = archive_write_new();
76         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
77         assertEqualInt(ARCHIVE_OK,
78             archive_write_open_memory(a, buff, sizeof(buff), &used));
79
80         entry = archive_entry_new2(a);
81         /* Set a UTF-8 filename. */
82         archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
83         archive_entry_set_filetype(entry, AE_IFREG);
84         archive_entry_set_size(entry, 0);
85         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
86         archive_entry_free(entry);
87         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
88
89         /* A bit 11 of general purpose flag should be 0x08,
90          * which indicates the filename charset is UTF-8. */
91         assertEqualInt(0x08, buff[7]);
92         assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
93
94         /*
95          * Verify that A bit 11 of general purpose flag is not set
96          * when ASCII filenames are stored.
97          */
98         a = archive_write_new();
99         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
100         assertEqualInt(ARCHIVE_OK,
101             archive_write_open_memory(a, buff, sizeof(buff), &used));
102
103         entry = archive_entry_new2(a);
104         /* Set an ASCII filename. */
105         archive_entry_set_pathname(entry, "abcABC");
106         archive_entry_set_filetype(entry, AE_IFREG);
107         archive_entry_set_size(entry, 0);
108         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
109         archive_entry_free(entry);
110         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
111
112         /* A bit 11 of general purpose flag should be 0,
113          * which indicates the filename charset is unknown. */
114         assertEqualInt(0, buff[7]);
115         assertEqualMem(buff + 30, "abcABC", 6);
116 }
117
118 DEFINE_TEST(test_zip_filename_encoding_KOI8R)
119 {
120         struct archive *a;
121         struct archive_entry *entry;
122         char buff[4096];
123         size_t used;
124
125         if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
126                 skipping("KOI8-R locale not available on this system.");
127                 return;
128         }
129
130         /*
131          * Verify that KOI8-R filenames are correctly translated to UTF-8.
132          */
133         a = archive_write_new();
134         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
135         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
136                 skipping("This system cannot convert character-set"
137                     " from KOI8-R to UTF-8.");
138                 archive_write_free(a);
139                 return;
140         }
141         assertEqualInt(ARCHIVE_OK,
142             archive_write_open_memory(a, buff, sizeof(buff), &used));
143
144         entry = archive_entry_new2(a);
145         /* Set a KOI8-R filename. */
146         archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
147         archive_entry_set_filetype(entry, AE_IFREG);
148         archive_entry_set_size(entry, 0);
149         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
150         archive_entry_free(entry);
151         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
152
153         /* A bit 11 of general purpose flag should be 0x08,
154          * which indicates the filename charset is UTF-8. */
155         assertEqualInt(0x08, buff[7]);
156         /* Above three characters in KOI8-R should translate to the following
157          * three characters (two bytes each) in UTF-8. */
158         assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
159
160         /*
161          * Verify that KOI8-R filenames are not translated to UTF-8.
162          */
163         a = archive_write_new();
164         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
165         assertEqualInt(ARCHIVE_OK,
166             archive_write_open_memory(a, buff, sizeof(buff), &used));
167
168         entry = archive_entry_new2(a);
169         /* Set a KOI8-R filename. */
170         archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
171         archive_entry_set_filetype(entry, AE_IFREG);
172         archive_entry_set_size(entry, 0);
173         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
174         archive_entry_free(entry);
175         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
176
177         /* A bit 11 of general purpose flag should be 0,
178          * which indicates the filename charset is unknown. */
179         assertEqualInt(0, buff[7]);
180         /* Above three characters in KOI8-R should not translate to
181          * any character-set. */
182         assertEqualMem(buff + 30, "\xD0\xD2\xC9", 3);
183
184         /*
185          * Verify that A bit 11 of general purpose flag is not set
186          * when ASCII filenames are stored even if hdrcharset=UTF-8
187          * is specified.
188          */
189         a = archive_write_new();
190         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
191         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
192                 skipping("This system cannot convert character-set"
193                     " from KOI8-R to UTF-8.");
194                 archive_write_free(a);
195                 return;
196         }
197         assertEqualInt(ARCHIVE_OK,
198             archive_write_open_memory(a, buff, sizeof(buff), &used));
199
200         entry = archive_entry_new2(a);
201         /* Set an ASCII filename. */
202         archive_entry_set_pathname(entry, "abcABC");
203         archive_entry_set_filetype(entry, AE_IFREG);
204         archive_entry_set_size(entry, 0);
205         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
206         archive_entry_free(entry);
207         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
208
209         /* A bit 11 of general purpose flag should be 0,
210          * which indicates the filename charset is unknown. */
211         assertEqualInt(0, buff[7]);
212         assertEqualMem(buff + 30, "abcABC", 6);
213 }
214
215 /*
216  * Do not translate CP1251 into CP866 if non Windows platform.
217  */
218 DEFINE_TEST(test_zip_filename_encoding_ru_RU_CP1251)
219 {
220         struct archive *a;
221         struct archive_entry *entry;
222         char buff[4096];
223         size_t used;
224
225         if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
226                 skipping("Russian_Russia locale not available on this system.");
227                 return;
228         }
229
230         /*
231          * Verify that CP1251 filenames are not translated into any
232          * other character-set, in particular, CP866.
233          */
234         a = archive_write_new();
235         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
236         assertEqualInt(ARCHIVE_OK,
237             archive_write_open_memory(a, buff, sizeof(buff), &used));
238
239         entry = archive_entry_new2(a);
240         /* Set a CP1251 filename. */
241         archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
242         archive_entry_set_filetype(entry, AE_IFREG);
243         archive_entry_set_size(entry, 0);
244         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
245         archive_entry_free(entry);
246         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
247
248         /* A bit 11 of general purpose flag should be 0,
249          * which indicates the filename charset is unknown. */
250         assertEqualInt(0, buff[7]);
251         /* Above three characters in CP1251 should not translate into
252          * any other character-set. */
253         assertEqualMem(buff + 30, "\xEF\xF0\xE8", 3);
254 }
255
256 /*
257  * Other archiver applications on Windows translate CP1251 filenames
258  * into CP866 filenames and store it in the zip file.
259  * Test above behavior works well.
260  */
261 DEFINE_TEST(test_zip_filename_encoding_Russian_Russia)
262 {
263         struct archive *a;
264         struct archive_entry *entry;
265         char buff[4096];
266         size_t used;
267
268         if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
269                 skipping("Russian_Russia locale not available on this system.");
270                 return;
271         }
272
273         /*
274          * Verify that Russian_Russia(CP1251) filenames are correctly translated
275          * to UTF-8.
276          */
277         a = archive_write_new();
278         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
279         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
280                 skipping("This system cannot convert character-set"
281                     " from Russian_Russia.CP1251 to UTF-8.");
282                 archive_write_free(a);
283                 return;
284         }
285         assertEqualInt(ARCHIVE_OK,
286             archive_write_open_memory(a, buff, sizeof(buff), &used));
287
288         entry = archive_entry_new2(a);
289         /* Set a CP1251 filename. */
290         archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
291         archive_entry_set_filetype(entry, AE_IFREG);
292         archive_entry_set_size(entry, 0);
293         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
294         archive_entry_free(entry);
295         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
296
297         /* A bit 11 of general purpose flag should be 0x08,
298          * which indicates the filename charset is UTF-8. */
299         assertEqualInt(0x08, buff[7]);
300         /* Above three characters in CP1251 should translate to the following
301          * three characters (two bytes each) in UTF-8. */
302         assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
303
304         /*
305          * Verify that Russian_Russia(CP1251) filenames are correctly translated
306          * to CP866.
307          */
308         a = archive_write_new();
309         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
310         assertEqualInt(ARCHIVE_OK,
311             archive_write_open_memory(a, buff, sizeof(buff), &used));
312
313         entry = archive_entry_new2(a);
314         /* Set a CP1251 filename. */
315         archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
316         archive_entry_set_filetype(entry, AE_IFREG);
317         archive_entry_set_size(entry, 0);
318         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
319         archive_entry_free(entry);
320         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
321
322         /* A bit 11 of general purpose flag should be 0,
323          * which indicates the filename charset is unknown. */
324         assertEqualInt(0, buff[7]);
325         /* Above three characters in CP1251 should translate to the following
326          * three characters in CP866. */
327         assertEqualMem(buff + 30, "\xAF\xE0\xA8", 3);
328 }
329
330 DEFINE_TEST(test_zip_filename_encoding_EUCJP)
331 {
332         struct archive *a;
333         struct archive_entry *entry;
334         char buff[4096];
335         size_t used;
336
337         if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
338                 skipping("eucJP locale not available on this system.");
339                 return;
340         }
341
342         /*
343          * Verify that EUC-JP filenames are correctly translated to UTF-8.
344          */
345         a = archive_write_new();
346         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
347         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
348                 skipping("This system cannot convert character-set"
349                     " from eucJP to UTF-8.");
350                 archive_write_free(a);
351                 return;
352         }
353         assertEqualInt(ARCHIVE_OK,
354             archive_write_open_memory(a, buff, sizeof(buff), &used));
355
356         entry = archive_entry_new2(a);
357         /* Set an EUC-JP filename. */
358         archive_entry_set_pathname(entry, "\xC9\xBD.txt");
359         /* Check the Unicode version. */
360         archive_entry_set_filetype(entry, AE_IFREG);
361         archive_entry_set_size(entry, 0);
362         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
363         archive_entry_free(entry);
364         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
365
366         /* A bit 11 of general purpose flag should be 0x08,
367          * which indicates the filename charset is UTF-8. */
368         assertEqualInt(0x08, buff[7]);
369         /* Check UTF-8 version. */
370         assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
371
372         /*
373          * Verify that EUC-JP filenames are not translated to UTF-8.
374          */
375         a = archive_write_new();
376         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
377         assertEqualInt(ARCHIVE_OK,
378             archive_write_open_memory(a, buff, sizeof(buff), &used));
379
380         entry = archive_entry_new2(a);
381         /* Set an EUC-JP filename. */
382         archive_entry_set_pathname(entry, "\xC9\xBD.txt");
383         /* Check the Unicode version. */
384         archive_entry_set_filetype(entry, AE_IFREG);
385         archive_entry_set_size(entry, 0);
386         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
387         archive_entry_free(entry);
388         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
389
390         /* A bit 11 of general purpose flag should be 0,
391          * which indicates the filename charset is unknown. */
392         assertEqualInt(0, buff[7]);
393         /* Above three characters in EUC-JP should not translate to
394          * any character-set. */
395         assertEqualMem(buff + 30, "\xC9\xBD.txt", 6);
396
397         /*
398          * Verify that A bit 11 of general purpose flag is not set
399          * when ASCII filenames are stored even if hdrcharset=UTF-8
400          * is specified.
401          */
402         a = archive_write_new();
403         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
404         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
405                 skipping("This system cannot convert character-set"
406                     " from eucJP to UTF-8.");
407                 archive_write_free(a);
408                 return;
409         }
410         assertEqualInt(ARCHIVE_OK,
411             archive_write_open_memory(a, buff, sizeof(buff), &used));
412
413         entry = archive_entry_new2(a);
414         /* Set an ASCII filename. */
415         archive_entry_set_pathname(entry, "abcABC");
416         /* Check the Unicode version. */
417         archive_entry_set_filetype(entry, AE_IFREG);
418         archive_entry_set_size(entry, 0);
419         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
420         archive_entry_free(entry);
421         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
422
423         /* A bit 11 of general purpose flag should be 0,
424          * which indicates the filename charset is unknown. */
425         assertEqualInt(0, buff[7]);
426         assertEqualMem(buff + 30, "abcABC", 6);
427 }
428
429 DEFINE_TEST(test_zip_filename_encoding_CP932)
430 {
431         struct archive *a;
432         struct archive_entry *entry;
433         char buff[4096];
434         size_t used;
435
436         if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
437             NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
438                 skipping("CP932/SJIS locale not available on this system.");
439                 return;
440         }
441
442         /*
443          * Verify that EUC-JP filenames are correctly translated to UTF-8.
444          */
445         a = archive_write_new();
446         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
447         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
448                 skipping("This system cannot convert character-set"
449                     " from CP932/SJIS to UTF-8.");
450                 archive_write_free(a);
451                 return;
452         }
453         assertEqualInt(ARCHIVE_OK,
454             archive_write_open_memory(a, buff, sizeof(buff), &used));
455
456         entry = archive_entry_new2(a);
457         /* Set a CP932/SJIS filename. */
458         archive_entry_set_pathname(entry, "\x95\x5C.txt");
459         /* Check the Unicode version. */
460         archive_entry_set_filetype(entry, AE_IFREG);
461         archive_entry_set_size(entry, 0);
462         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
463         archive_entry_free(entry);
464         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
465
466         /* A bit 11 of general purpose flag should be 0x08,
467          * which indicates the filename charset is UTF-8. */
468         assertEqualInt(0x08, buff[7]);
469         /* Check UTF-8 version. */
470         assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7);
471
472         /*
473          * Verify that CP932/SJIS filenames are not translated to UTF-8.
474          */
475         a = archive_write_new();
476         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
477         assertEqualInt(ARCHIVE_OK,
478             archive_write_open_memory(a, buff, sizeof(buff), &used));
479
480         entry = archive_entry_new2(a);
481         /* Set a CP932/SJIS filename. */
482         archive_entry_set_pathname(entry, "\x95\x5C.txt");
483         /* Check the Unicode version. */
484         archive_entry_set_filetype(entry, AE_IFREG);
485         archive_entry_set_size(entry, 0);
486         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
487         archive_entry_free(entry);
488         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
489
490         /* A bit 11 of general purpose flag should be 0,
491          * which indicates the filename charset is unknown. */
492         assertEqualInt(0, buff[7]);
493         /* Above three characters in CP932/SJIS should not translate to
494          * any character-set. */
495         assertEqualMem(buff + 30, "\x95\x5C.txt", 6);
496
497         /*
498          * Verify that A bit 11 of general purpose flag is not set
499          * when ASCII filenames are stored even if hdrcharset=UTF-8
500          * is specified.
501          */
502         a = archive_write_new();
503         assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
504         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
505                 skipping("This system cannot convert character-set"
506                     " from CP932/SJIS to UTF-8.");
507                 archive_write_free(a);
508                 return;
509         }
510         assertEqualInt(ARCHIVE_OK,
511             archive_write_open_memory(a, buff, sizeof(buff), &used));
512
513         entry = archive_entry_new2(a);
514         /* Set an ASCII filename. */
515         archive_entry_set_pathname(entry, "abcABC");
516         /* Check the Unicode version. */
517         archive_entry_set_filetype(entry, AE_IFREG);
518         archive_entry_set_size(entry, 0);
519         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
520         archive_entry_free(entry);
521         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
522
523         /* A bit 11 of general purpose flag should be 0,
524          * which indicates the filename charset is unknown. */
525         assertEqualInt(0, buff[7]);
526         assertEqualMem(buff + 30, "abcABC", 6);
527 }