]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - contrib/libarchive/libarchive/test/test_ustar_filename_encoding.c
MFC r299529,r299540,r299576,r299896:
[FreeBSD/stable/10.git] / contrib / libarchive / libarchive / test / test_ustar_filename_encoding.c
1 /*-
2  * Copyright (c) 2011 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 __FBSDID("$FreeBSD$");
27
28 #include <locale.h>
29
30 DEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866)
31 {
32         struct archive *a;
33         struct archive_entry *entry;
34         char buff[4096];
35         size_t used;
36
37         if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
38                 skipping("en_US.UTF-8 locale not available on this system.");
39                 return;
40         }
41
42         /*
43          * Verify that UTF-8 filenames are correctly translated into CP866
44          * and stored with hdrcharset=CP866 option.
45          */
46         a = archive_write_new();
47         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
48         if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
49                 skipping("This system cannot convert character-set"
50                     " from UTF-8 to CP866.");
51                 archive_write_free(a);
52                 return;
53         }
54         assertEqualInt(ARCHIVE_OK,
55             archive_write_open_memory(a, buff, sizeof(buff), &used));
56
57         entry = archive_entry_new2(a);
58         /* Set a UTF-8 filename. */
59         archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
60         archive_entry_set_filetype(entry, AE_IFREG);
61         archive_entry_set_size(entry, 0);
62         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
63         archive_entry_free(entry);
64         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
65
66         /* Above three characters in UTF-8 should translate to the following
67          * three characters in CP866. */
68         assertEqualMem(buff, "\xAF\xE0\xA8", 3);
69 }
70
71 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8)
72 {
73         struct archive *a;
74         struct archive_entry *entry;
75         char buff[4096];
76         size_t used;
77
78         if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
79                 skipping("KOI8-R locale not available on this system.");
80                 return;
81         }
82
83         /*
84          * Verify that KOI8-R filenames are correctly translated into UTF-8
85          * and stored with hdrcharset=UTF-8 option.
86          */
87         a = archive_write_new();
88         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
89         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
90                 skipping("This system cannot convert character-set"
91                     " from KOI8-R to UTF-8.");
92                 archive_write_free(a);
93                 return;
94         }
95         assertEqualInt(ARCHIVE_OK,
96             archive_write_open_memory(a, buff, sizeof(buff), &used));
97
98         entry = archive_entry_new2(a);
99         /* Set a KOI8-R filename. */
100         archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
101         archive_entry_set_filetype(entry, AE_IFREG);
102         archive_entry_set_size(entry, 0);
103         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
104         archive_entry_free(entry);
105         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
106
107         /* Above three characters in KOI8-R should translate to the following
108          * three characters (two bytes each) in UTF-8. */
109         assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
110 }
111
112 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866)
113 {
114         struct archive *a;
115         struct archive_entry *entry;
116         char buff[4096];
117         size_t used;
118
119         if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
120                 skipping("KOI8-R locale not available on this system.");
121                 return;
122         }
123
124         /*
125          * Verify that KOI8-R filenames are correctly translated into CP866
126          * and stored with hdrcharset=CP866 option.
127          */
128         a = archive_write_new();
129         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
130         if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
131                 skipping("This system cannot convert character-set"
132                     " from KOI8-R to CP866.");
133                 archive_write_free(a);
134                 return;
135         }
136         assertEqualInt(ARCHIVE_OK,
137             archive_write_open_memory(a, buff, sizeof(buff), &used));
138
139         entry = archive_entry_new2(a);
140         /* Set a KOI8-R filename. */
141         archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
142         archive_entry_set_filetype(entry, AE_IFREG);
143         archive_entry_set_size(entry, 0);
144         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
145         archive_entry_free(entry);
146         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
147
148         /* Above three characters in KOI8-R should translate to the following
149          * three characters in CP866. */
150         assertEqualMem(buff, "\xAF\xE0\xA8", 3);
151 }
152
153 DEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8)
154 {
155         struct archive *a;
156         struct archive_entry *entry;
157         char buff[4096];
158         size_t used;
159
160         if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
161             NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
162                 skipping("KOI8-R locale not available on this system.");
163                 return;
164         }
165
166         /*
167          * Verify that CP1251 filenames are correctly translated into UTF-8
168          * and stored with hdrcharset=UTF-8 option.
169          */
170         a = archive_write_new();
171         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
172         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
173                 skipping("This system cannot convert character-set"
174                     " from KOI8-R to UTF-8.");
175                 archive_write_free(a);
176                 return;
177         }
178         assertEqualInt(ARCHIVE_OK,
179             archive_write_open_memory(a, buff, sizeof(buff), &used));
180
181         entry = archive_entry_new2(a);
182         /* Set a KOI8-R filename. */
183         archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
184         archive_entry_set_filetype(entry, AE_IFREG);
185         archive_entry_set_size(entry, 0);
186         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
187         archive_entry_free(entry);
188         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
189
190         /* Above three characters in CP1251 should translate to the following
191          * three characters (two bytes each) in UTF-8. */
192         assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
193 }
194
195 /*
196  * Do not translate CP1251 into CP866 if non Windows platform.
197  */
198 DEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251)
199 {
200         struct archive *a;
201         struct archive_entry *entry;
202         char buff[4096];
203         size_t used;
204
205         if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
206                 skipping("KOI8-R locale not available on this system.");
207                 return;
208         }
209
210         /*
211          * Verify that CP1251 filenames are not translated into any
212          * other character-set, in particular, CP866.
213          */
214         a = archive_write_new();
215         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
216         assertEqualInt(ARCHIVE_OK,
217             archive_write_open_memory(a, buff, sizeof(buff), &used));
218
219         entry = archive_entry_new2(a);
220         /* Set a KOI8-R filename. */
221         archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
222         archive_entry_set_filetype(entry, AE_IFREG);
223         archive_entry_set_size(entry, 0);
224         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
225         archive_entry_free(entry);
226         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
227
228         /* Above three characters in CP1251 should not translate to
229          * any other character-set. */
230         assertEqualMem(buff, "\xEF\xF0\xE8", 3);
231 }
232
233 /*
234  * Other archiver applications on Windows translate CP1251 filenames
235  * into CP866 filenames and store it in the ustar file.
236  * Test above behavior works well.
237  */
238 DEFINE_TEST(test_ustar_filename_encoding_Russian_Russia)
239 {
240         struct archive *a;
241         struct archive_entry *entry;
242         char buff[4096];
243         size_t used;
244
245         if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
246                 skipping("Russian_Russia locale not available on this system.");
247                 return;
248         }
249
250         /*
251          * Verify that Russian_Russia(CP1251) filenames are correctly translated
252          * to CP866.
253          */
254         a = archive_write_new();
255         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
256         assertEqualInt(ARCHIVE_OK,
257             archive_write_open_memory(a, buff, sizeof(buff), &used));
258
259         entry = archive_entry_new2(a);
260         /* Set a CP1251 filename. */
261         archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
262         archive_entry_set_filetype(entry, AE_IFREG);
263         archive_entry_set_size(entry, 0);
264         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
265         archive_entry_free(entry);
266         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
267
268         /* Above three characters in CP1251 should translate to the following
269          * three characters in CP866. */
270         assertEqualMem(buff, "\xAF\xE0\xA8", 3);
271 }
272
273 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8)
274 {
275         struct archive *a;
276         struct archive_entry *entry;
277         char buff[4096];
278         size_t used;
279
280         if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
281                 skipping("eucJP locale not available on this system.");
282                 return;
283         }
284
285         /*
286          * Verify that EUC-JP filenames are correctly translated to UTF-8.
287          */
288         a = archive_write_new();
289         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
290         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
291                 skipping("This system cannot convert character-set"
292                     " from eucJP to UTF-8.");
293                 archive_write_free(a);
294                 return;
295         }
296         assertEqualInt(ARCHIVE_OK,
297             archive_write_open_memory(a, buff, sizeof(buff), &used));
298
299         entry = archive_entry_new2(a);
300         /* Set an EUC-JP filename. */
301         archive_entry_set_pathname(entry, "\xC9\xBD.txt");
302         /* Check the Unicode version. */
303         archive_entry_set_filetype(entry, AE_IFREG);
304         archive_entry_set_size(entry, 0);
305         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
306         archive_entry_free(entry);
307         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
308
309         /* Check UTF-8 version. */
310         assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
311 }
312
313 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932)
314 {
315         struct archive *a;
316         struct archive_entry *entry;
317         char buff[4096];
318         size_t used;
319
320         if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
321                 skipping("eucJP locale not available on this system.");
322                 return;
323         }
324
325         /*
326          * Verify that EUC-JP filenames are correctly translated to CP932.
327          */
328         a = archive_write_new();
329         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
330         if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
331                 skipping("This system cannot convert character-set"
332                     " from eucJP to CP932.");
333                 archive_write_free(a);
334                 return;
335         }
336         assertEqualInt(ARCHIVE_OK,
337             archive_write_open_memory(a, buff, sizeof(buff), &used));
338
339         entry = archive_entry_new2(a);
340         /* Set an EUC-JP filename. */
341         archive_entry_set_pathname(entry, "\xC9\xBD.txt");
342         /* Check the Unicode version. */
343         archive_entry_set_filetype(entry, AE_IFREG);
344         archive_entry_set_size(entry, 0);
345         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
346         archive_entry_free(entry);
347         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
348
349         /* Check CP932 version. */
350         assertEqualMem(buff, "\x95\x5C.txt", 6);
351 }
352
353 DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)
354 {
355         struct archive *a;
356         struct archive_entry *entry;
357         char buff[4096];
358         size_t used;
359
360         if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
361             NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
362                 skipping("CP932/SJIS locale not available on this system.");
363                 return;
364         }
365
366         /*
367          * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
368          */
369         a = archive_write_new();
370         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
371         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
372                 skipping("This system cannot convert character-set"
373                     " from CP932/SJIS to UTF-8.");
374                 archive_write_free(a);
375                 return;
376         }
377         assertEqualInt(ARCHIVE_OK,
378             archive_write_open_memory(a, buff, sizeof(buff), &used));
379
380         entry = archive_entry_new2(a);
381         /* Set a CP932/SJIS filename. */
382         archive_entry_set_pathname(entry, "\x95\x5C.txt");
383         /* Check the Unicode version. */
384         archive_entry_set_filetype(entry, AE_IFREG);
385         archive_entry_set_size(entry, 0);
386         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
387         archive_entry_free(entry);
388         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
389
390         /* Check UTF-8 version. */
391         assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
392 }
393