2 * Copyright (c) 2011 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 __FBSDID("$FreeBSD$");
31 test_ustar_filename_encoding_UTF8_CP866(void)
34 struct archive_entry *entry;
38 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
39 skipping("en_US.UTF-8 locale not available on this system.");
44 * Verify that UTF-8 filenames are correctly translated into CP866
45 * and stored with hdrcharset=CP866 option.
47 a = archive_write_new();
48 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
49 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
50 skipping("This system cannot convert character-set"
51 " from UTF-8 to CP866.");
52 archive_write_free(a);
55 assertEqualInt(ARCHIVE_OK,
56 archive_write_open_memory(a, buff, sizeof(buff), &used));
58 entry = archive_entry_new2(a);
59 /* Set a UTF-8 filename. */
60 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
61 archive_entry_set_filetype(entry, AE_IFREG);
62 archive_entry_set_size(entry, 0);
63 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
64 archive_entry_free(entry);
65 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
67 /* Above three characters in UTF-8 should translate to the following
68 * three characters in CP866. */
69 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
73 test_ustar_filename_encoding_KOI8R_UTF8(void)
76 struct archive_entry *entry;
80 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
81 skipping("KOI8-R locale not available on this system.");
86 * Verify that KOI8-R filenames are correctly translated into UTF-8
87 * and stored with hdrcharset=UTF-8 option.
89 a = archive_write_new();
90 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
91 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
92 skipping("This system cannot convert character-set"
93 " from KOI8-R to UTF-8.");
94 archive_write_free(a);
97 assertEqualInt(ARCHIVE_OK,
98 archive_write_open_memory(a, buff, sizeof(buff), &used));
100 entry = archive_entry_new2(a);
101 /* Set a KOI8-R filename. */
102 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
103 archive_entry_set_filetype(entry, AE_IFREG);
104 archive_entry_set_size(entry, 0);
105 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
106 archive_entry_free(entry);
107 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
109 /* Above three characters in KOI8-R should translate to the following
110 * three characters (two bytes each) in UTF-8. */
111 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
115 test_ustar_filename_encoding_KOI8R_CP866(void)
118 struct archive_entry *entry;
122 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
123 skipping("KOI8-R locale not available on this system.");
128 * Verify that KOI8-R filenames are correctly translated into CP866
129 * and stored with hdrcharset=CP866 option.
131 a = archive_write_new();
132 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
133 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
134 skipping("This system cannot convert character-set"
135 " from KOI8-R to CP866.");
136 archive_write_free(a);
139 assertEqualInt(ARCHIVE_OK,
140 archive_write_open_memory(a, buff, sizeof(buff), &used));
142 entry = archive_entry_new2(a);
143 /* Set a KOI8-R filename. */
144 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
145 archive_entry_set_filetype(entry, AE_IFREG);
146 archive_entry_set_size(entry, 0);
147 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
148 archive_entry_free(entry);
149 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
151 /* Above three characters in KOI8-R should translate to the following
152 * three characters in CP866. */
153 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
157 test_ustar_filename_encoding_CP1251_UTF8(void)
160 struct archive_entry *entry;
164 if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
165 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
166 skipping("KOI8-R locale not available on this system.");
171 * Verify that CP1251 filenames are correctly translated into UTF-8
172 * and stored with hdrcharset=UTF-8 option.
174 a = archive_write_new();
175 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
176 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
177 skipping("This system cannot convert character-set"
178 " from KOI8-R to UTF-8.");
179 archive_write_free(a);
182 assertEqualInt(ARCHIVE_OK,
183 archive_write_open_memory(a, buff, sizeof(buff), &used));
185 entry = archive_entry_new2(a);
186 /* Set a KOI8-R filename. */
187 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
188 archive_entry_set_filetype(entry, AE_IFREG);
189 archive_entry_set_size(entry, 0);
190 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
191 archive_entry_free(entry);
192 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
194 /* Above three characters in CP1251 should translate to the following
195 * three characters (two bytes each) in UTF-8. */
196 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
200 * Do not translate CP1251 into CP866 if non Windows platform.
203 test_ustar_filename_encoding_ru_RU_CP1251(void)
206 struct archive_entry *entry;
210 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
211 skipping("KOI8-R locale not available on this system.");
216 * Verify that CP1251 filenames are not translated into any
217 * other character-set, in particular, CP866.
219 a = archive_write_new();
220 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
221 assertEqualInt(ARCHIVE_OK,
222 archive_write_open_memory(a, buff, sizeof(buff), &used));
224 entry = archive_entry_new2(a);
225 /* Set a KOI8-R filename. */
226 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
227 archive_entry_set_filetype(entry, AE_IFREG);
228 archive_entry_set_size(entry, 0);
229 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
230 archive_entry_free(entry);
231 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
233 /* Above three characters in CP1251 should not translate to
234 * any other character-set. */
235 assertEqualMem(buff, "\xEF\xF0\xE8", 3);
239 * Other archiver applications on Windows translate CP1251 filenames
240 * into CP866 filenames and store it in the ustar file.
241 * Test above behavior works well.
244 test_ustar_filename_encoding_Russian_Russia(void)
247 struct archive_entry *entry;
251 if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
252 skipping("Russian_Russia locale not available on this system.");
257 * Verify that Russian_Russia(CP1251) filenames are correctly translated
260 a = archive_write_new();
261 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
262 assertEqualInt(ARCHIVE_OK,
263 archive_write_open_memory(a, buff, sizeof(buff), &used));
265 entry = archive_entry_new2(a);
266 /* Set a CP1251 filename. */
267 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
268 archive_entry_set_filetype(entry, AE_IFREG);
269 archive_entry_set_size(entry, 0);
270 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
271 archive_entry_free(entry);
272 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
274 /* Above three characters in CP1251 should translate to the following
275 * three characters in CP866. */
276 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
280 test_ustar_filename_encoding_EUCJP_UTF8(void)
283 struct archive_entry *entry;
287 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
288 skipping("eucJP locale not available on this system.");
293 * Verify that EUC-JP filenames are correctly translated to UTF-8.
295 a = archive_write_new();
296 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
297 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
298 skipping("This system cannot convert character-set"
299 " from eucJP to UTF-8.");
300 archive_write_free(a);
303 assertEqualInt(ARCHIVE_OK,
304 archive_write_open_memory(a, buff, sizeof(buff), &used));
306 entry = archive_entry_new2(a);
307 /* Set an EUC-JP filename. */
308 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
309 /* Check the Unicode version. */
310 archive_entry_set_filetype(entry, AE_IFREG);
311 archive_entry_set_size(entry, 0);
312 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
313 archive_entry_free(entry);
314 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
316 /* Check UTF-8 version. */
317 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
321 test_ustar_filename_encoding_EUCJP_CP932(void)
324 struct archive_entry *entry;
328 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
329 skipping("eucJP locale not available on this system.");
334 * Verify that EUC-JP filenames are correctly translated to CP932.
336 a = archive_write_new();
337 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
338 if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
339 skipping("This system cannot convert character-set"
340 " from eucJP to CP932.");
341 archive_write_free(a);
344 assertEqualInt(ARCHIVE_OK,
345 archive_write_open_memory(a, buff, sizeof(buff), &used));
347 entry = archive_entry_new2(a);
348 /* Set an EUC-JP filename. */
349 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
350 /* Check the Unicode version. */
351 archive_entry_set_filetype(entry, AE_IFREG);
352 archive_entry_set_size(entry, 0);
353 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
354 archive_entry_free(entry);
355 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
357 /* Check CP932 version. */
358 assertEqualMem(buff, "\x95\x5C.txt", 6);
362 test_ustar_filename_encoding_CP932_UTF8(void)
365 struct archive_entry *entry;
369 if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
370 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
371 skipping("CP932/SJIS locale not available on this system.");
376 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
378 a = archive_write_new();
379 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
380 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
381 skipping("This system cannot convert character-set"
382 " from CP932/SJIS to UTF-8.");
383 archive_write_free(a);
386 assertEqualInt(ARCHIVE_OK,
387 archive_write_open_memory(a, buff, sizeof(buff), &used));
389 entry = archive_entry_new2(a);
390 /* Set a CP932/SJIS filename. */
391 archive_entry_set_pathname(entry, "\x95\x5C.txt");
392 /* Check the Unicode version. */
393 archive_entry_set_filetype(entry, AE_IFREG);
394 archive_entry_set_size(entry, 0);
395 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
396 archive_entry_free(entry);
397 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
399 /* Check UTF-8 version. */
400 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
403 DEFINE_TEST(test_ustar_filename_encoding)
405 test_ustar_filename_encoding_UTF8_CP866();
406 test_ustar_filename_encoding_KOI8R_UTF8();
407 test_ustar_filename_encoding_KOI8R_CP866();
408 test_ustar_filename_encoding_CP1251_UTF8();
409 test_ustar_filename_encoding_ru_RU_CP1251();
410 test_ustar_filename_encoding_Russian_Russia();
411 test_ustar_filename_encoding_EUCJP_UTF8();
412 test_ustar_filename_encoding_EUCJP_CP932();
413 test_ustar_filename_encoding_CP932_UTF8();