]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - contrib/libarchive/libarchive/test/test_ustar_filename_encoding.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / contrib / libarchive / libarchive / test / test_ustar_filename_encoding.c
1 /*-
2  * Copyright (c) 2011 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 __FBSDID("$FreeBSD$");
27
28 #include <locale.h>
29
30 static void
31 test_ustar_filename_encoding_UTF8_CP866(void)
32 {
33         struct archive *a;
34         struct archive_entry *entry;
35         char buff[4096];
36         size_t used;
37
38         if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
39                 skipping("en_US.UTF-8 locale not available on this system.");
40                 return;
41         }
42
43         /*
44          * Verify that UTF-8 filenames are correctly translated into CP866
45          * and stored with hdrcharset=CP866 option.
46          */
47         a = archive_write_new();
48         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
49         if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
50                 skipping("This system cannot convert character-set"
51                     " from UTF-8 to CP866.");
52                 archive_write_free(a);
53                 return;
54         }
55         assertEqualInt(ARCHIVE_OK,
56             archive_write_open_memory(a, buff, sizeof(buff), &used));
57
58         entry = archive_entry_new2(a);
59         /* Set a UTF-8 filename. */
60         archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
61         archive_entry_set_filetype(entry, AE_IFREG);
62         archive_entry_set_size(entry, 0);
63         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
64         archive_entry_free(entry);
65         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
66
67         /* Above three characters in UTF-8 should translate to the following
68          * three characters in CP866. */
69         assertEqualMem(buff, "\xAF\xE0\xA8", 3);
70 }
71
72 static void
73 test_ustar_filename_encoding_KOI8R_UTF8(void)
74 {
75         struct archive *a;
76         struct archive_entry *entry;
77         char buff[4096];
78         size_t used;
79
80         if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
81                 skipping("KOI8-R locale not available on this system.");
82                 return;
83         }
84
85         /*
86          * Verify that KOI8-R filenames are correctly translated into UTF-8
87          * and stored with hdrcharset=UTF-8 option.
88          */
89         a = archive_write_new();
90         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
91         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
92                 skipping("This system cannot convert character-set"
93                     " from KOI8-R to UTF-8.");
94                 archive_write_free(a);
95                 return;
96         }
97         assertEqualInt(ARCHIVE_OK,
98             archive_write_open_memory(a, buff, sizeof(buff), &used));
99
100         entry = archive_entry_new2(a);
101         /* Set a KOI8-R filename. */
102         archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
103         archive_entry_set_filetype(entry, AE_IFREG);
104         archive_entry_set_size(entry, 0);
105         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
106         archive_entry_free(entry);
107         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
108
109         /* Above three characters in KOI8-R should translate to the following
110          * three characters (two bytes each) in UTF-8. */
111         assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
112 }
113
114 static void
115 test_ustar_filename_encoding_KOI8R_CP866(void)
116 {
117         struct archive *a;
118         struct archive_entry *entry;
119         char buff[4096];
120         size_t used;
121
122         if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
123                 skipping("KOI8-R locale not available on this system.");
124                 return;
125         }
126
127         /*
128          * Verify that KOI8-R filenames are correctly translated into CP866
129          * and stored with hdrcharset=CP866 option.
130          */
131         a = archive_write_new();
132         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
133         if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
134                 skipping("This system cannot convert character-set"
135                     " from KOI8-R to CP866.");
136                 archive_write_free(a);
137                 return;
138         }
139         assertEqualInt(ARCHIVE_OK,
140             archive_write_open_memory(a, buff, sizeof(buff), &used));
141
142         entry = archive_entry_new2(a);
143         /* Set a KOI8-R filename. */
144         archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
145         archive_entry_set_filetype(entry, AE_IFREG);
146         archive_entry_set_size(entry, 0);
147         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
148         archive_entry_free(entry);
149         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
150
151         /* Above three characters in KOI8-R should translate to the following
152          * three characters in CP866. */
153         assertEqualMem(buff, "\xAF\xE0\xA8", 3);
154 }
155
156 static void
157 test_ustar_filename_encoding_CP1251_UTF8(void)
158 {
159         struct archive *a;
160         struct archive_entry *entry;
161         char buff[4096];
162         size_t used;
163
164         if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
165             NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
166                 skipping("KOI8-R locale not available on this system.");
167                 return;
168         }
169
170         /*
171          * Verify that CP1251 filenames are correctly translated into UTF-8
172          * and stored with hdrcharset=UTF-8 option.
173          */
174         a = archive_write_new();
175         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
176         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
177                 skipping("This system cannot convert character-set"
178                     " from KOI8-R to UTF-8.");
179                 archive_write_free(a);
180                 return;
181         }
182         assertEqualInt(ARCHIVE_OK,
183             archive_write_open_memory(a, buff, sizeof(buff), &used));
184
185         entry = archive_entry_new2(a);
186         /* Set a KOI8-R filename. */
187         archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
188         archive_entry_set_filetype(entry, AE_IFREG);
189         archive_entry_set_size(entry, 0);
190         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
191         archive_entry_free(entry);
192         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
193
194         /* Above three characters in CP1251 should translate to the following
195          * three characters (two bytes each) in UTF-8. */
196         assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
197 }
198
199 /*
200  * Do not translate CP1251 into CP866 if non Windows platform.
201  */
202 static void
203 test_ustar_filename_encoding_ru_RU_CP1251(void)
204 {
205         struct archive *a;
206         struct archive_entry *entry;
207         char buff[4096];
208         size_t used;
209
210         if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
211                 skipping("KOI8-R locale not available on this system.");
212                 return;
213         }
214
215         /*
216          * Verify that CP1251 filenames are not translated into any
217          * other character-set, in particular, CP866.
218          */
219         a = archive_write_new();
220         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
221         assertEqualInt(ARCHIVE_OK,
222             archive_write_open_memory(a, buff, sizeof(buff), &used));
223
224         entry = archive_entry_new2(a);
225         /* Set a KOI8-R filename. */
226         archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
227         archive_entry_set_filetype(entry, AE_IFREG);
228         archive_entry_set_size(entry, 0);
229         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
230         archive_entry_free(entry);
231         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
232
233         /* Above three characters in CP1251 should not translate to
234          * any other character-set. */
235         assertEqualMem(buff, "\xEF\xF0\xE8", 3);
236 }
237
238 /*
239  * Other archiver applications on Windows translate CP1251 filenames
240  * into CP866 filenames and store it in the ustar file.
241  * Test above behavior works well.
242  */
243 static void
244 test_ustar_filename_encoding_Russian_Russia(void)
245 {
246         struct archive *a;
247         struct archive_entry *entry;
248         char buff[4096];
249         size_t used;
250
251         if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
252                 skipping("Russian_Russia locale not available on this system.");
253                 return;
254         }
255
256         /*
257          * Verify that Russian_Russia(CP1251) filenames are correctly translated
258          * to CP866.
259          */
260         a = archive_write_new();
261         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
262         assertEqualInt(ARCHIVE_OK,
263             archive_write_open_memory(a, buff, sizeof(buff), &used));
264
265         entry = archive_entry_new2(a);
266         /* Set a CP1251 filename. */
267         archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
268         archive_entry_set_filetype(entry, AE_IFREG);
269         archive_entry_set_size(entry, 0);
270         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
271         archive_entry_free(entry);
272         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
273
274         /* Above three characters in CP1251 should translate to the following
275          * three characters in CP866. */
276         assertEqualMem(buff, "\xAF\xE0\xA8", 3);
277 }
278
279 static void
280 test_ustar_filename_encoding_EUCJP_UTF8(void)
281 {
282         struct archive *a;
283         struct archive_entry *entry;
284         char buff[4096];
285         size_t used;
286
287         if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
288                 skipping("eucJP locale not available on this system.");
289                 return;
290         }
291
292         /*
293          * Verify that EUC-JP filenames are correctly translated to UTF-8.
294          */
295         a = archive_write_new();
296         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
297         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
298                 skipping("This system cannot convert character-set"
299                     " from eucJP to UTF-8.");
300                 archive_write_free(a);
301                 return;
302         }
303         assertEqualInt(ARCHIVE_OK,
304             archive_write_open_memory(a, buff, sizeof(buff), &used));
305
306         entry = archive_entry_new2(a);
307         /* Set an EUC-JP filename. */
308         archive_entry_set_pathname(entry, "\xC9\xBD.txt");
309         /* Check the Unicode version. */
310         archive_entry_set_filetype(entry, AE_IFREG);
311         archive_entry_set_size(entry, 0);
312         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
313         archive_entry_free(entry);
314         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
315
316         /* Check UTF-8 version. */
317         assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
318 }
319
320 static void
321 test_ustar_filename_encoding_EUCJP_CP932(void)
322 {
323         struct archive *a;
324         struct archive_entry *entry;
325         char buff[4096];
326         size_t used;
327
328         if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
329                 skipping("eucJP locale not available on this system.");
330                 return;
331         }
332
333         /*
334          * Verify that EUC-JP filenames are correctly translated to CP932.
335          */
336         a = archive_write_new();
337         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
338         if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
339                 skipping("This system cannot convert character-set"
340                     " from eucJP to CP932.");
341                 archive_write_free(a);
342                 return;
343         }
344         assertEqualInt(ARCHIVE_OK,
345             archive_write_open_memory(a, buff, sizeof(buff), &used));
346
347         entry = archive_entry_new2(a);
348         /* Set an EUC-JP filename. */
349         archive_entry_set_pathname(entry, "\xC9\xBD.txt");
350         /* Check the Unicode version. */
351         archive_entry_set_filetype(entry, AE_IFREG);
352         archive_entry_set_size(entry, 0);
353         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
354         archive_entry_free(entry);
355         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
356
357         /* Check CP932 version. */
358         assertEqualMem(buff, "\x95\x5C.txt", 6);
359 }
360
361 static void
362 test_ustar_filename_encoding_CP932_UTF8(void)
363 {
364         struct archive *a;
365         struct archive_entry *entry;
366         char buff[4096];
367         size_t used;
368
369         if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
370             NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
371                 skipping("CP932/SJIS locale not available on this system.");
372                 return;
373         }
374
375         /*
376          * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
377          */
378         a = archive_write_new();
379         assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
380         if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
381                 skipping("This system cannot convert character-set"
382                     " from CP932/SJIS to UTF-8.");
383                 archive_write_free(a);
384                 return;
385         }
386         assertEqualInt(ARCHIVE_OK,
387             archive_write_open_memory(a, buff, sizeof(buff), &used));
388
389         entry = archive_entry_new2(a);
390         /* Set a CP932/SJIS filename. */
391         archive_entry_set_pathname(entry, "\x95\x5C.txt");
392         /* Check the Unicode version. */
393         archive_entry_set_filetype(entry, AE_IFREG);
394         archive_entry_set_size(entry, 0);
395         assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
396         archive_entry_free(entry);
397         assertEqualInt(ARCHIVE_OK, archive_write_free(a));
398
399         /* Check UTF-8 version. */
400         assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
401 }
402
403 DEFINE_TEST(test_ustar_filename_encoding)
404 {
405         test_ustar_filename_encoding_UTF8_CP866();
406         test_ustar_filename_encoding_KOI8R_UTF8();
407         test_ustar_filename_encoding_KOI8R_CP866();
408         test_ustar_filename_encoding_CP1251_UTF8();
409         test_ustar_filename_encoding_ru_RU_CP1251();
410         test_ustar_filename_encoding_Russian_Russia();
411         test_ustar_filename_encoding_EUCJP_UTF8();
412         test_ustar_filename_encoding_EUCJP_CP932();
413         test_ustar_filename_encoding_CP932_UTF8();
414 }