2 * Copyright (c) 2011 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 __FBSDID("$FreeBSD$");
30 DEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866)
33 struct archive_entry *entry;
37 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
38 skipping("en_US.UTF-8 locale not available on this system.");
43 * Verify that UTF-8 filenames are correctly translated into CP866
44 * and stored with hdrcharset=CP866 option.
46 a = archive_write_new();
47 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
48 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
49 skipping("This system cannot convert character-set"
50 " from UTF-8 to CP866.");
51 archive_write_free(a);
54 assertEqualInt(ARCHIVE_OK,
55 archive_write_open_memory(a, buff, sizeof(buff), &used));
57 entry = archive_entry_new2(a);
58 /* Set a UTF-8 filename. */
59 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
60 archive_entry_set_filetype(entry, AE_IFREG);
61 archive_entry_set_size(entry, 0);
62 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
63 archive_entry_free(entry);
64 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
66 /* Above three characters in UTF-8 should translate to the following
67 * three characters in CP866. */
68 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
71 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8)
74 struct archive_entry *entry;
78 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
79 skipping("KOI8-R locale not available on this system.");
84 * Verify that KOI8-R filenames are correctly translated into UTF-8
85 * and stored with hdrcharset=UTF-8 option.
87 a = archive_write_new();
88 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
89 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
90 skipping("This system cannot convert character-set"
91 " from KOI8-R to UTF-8.");
92 archive_write_free(a);
95 assertEqualInt(ARCHIVE_OK,
96 archive_write_open_memory(a, buff, sizeof(buff), &used));
98 entry = archive_entry_new2(a);
99 /* Set a KOI8-R filename. */
100 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
101 archive_entry_set_filetype(entry, AE_IFREG);
102 archive_entry_set_size(entry, 0);
103 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
104 archive_entry_free(entry);
105 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
107 /* Above three characters in KOI8-R should translate to the following
108 * three characters (two bytes each) in UTF-8. */
109 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
112 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866)
115 struct archive_entry *entry;
119 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
120 skipping("KOI8-R locale not available on this system.");
125 * Verify that KOI8-R filenames are correctly translated into CP866
126 * and stored with hdrcharset=CP866 option.
128 a = archive_write_new();
129 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
130 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
131 skipping("This system cannot convert character-set"
132 " from KOI8-R to CP866.");
133 archive_write_free(a);
136 assertEqualInt(ARCHIVE_OK,
137 archive_write_open_memory(a, buff, sizeof(buff), &used));
139 entry = archive_entry_new2(a);
140 /* Set a KOI8-R filename. */
141 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
142 archive_entry_set_filetype(entry, AE_IFREG);
143 archive_entry_set_size(entry, 0);
144 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
145 archive_entry_free(entry);
146 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
148 /* Above three characters in KOI8-R should translate to the following
149 * three characters in CP866. */
150 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
153 DEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8)
156 struct archive_entry *entry;
160 if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
161 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
162 skipping("KOI8-R locale not available on this system.");
167 * Verify that CP1251 filenames are correctly translated into UTF-8
168 * and stored with hdrcharset=UTF-8 option.
170 a = archive_write_new();
171 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
172 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
173 skipping("This system cannot convert character-set"
174 " from KOI8-R to UTF-8.");
175 archive_write_free(a);
178 assertEqualInt(ARCHIVE_OK,
179 archive_write_open_memory(a, buff, sizeof(buff), &used));
181 entry = archive_entry_new2(a);
182 /* Set a KOI8-R filename. */
183 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
184 archive_entry_set_filetype(entry, AE_IFREG);
185 archive_entry_set_size(entry, 0);
186 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
187 archive_entry_free(entry);
188 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
190 /* Above three characters in CP1251 should translate to the following
191 * three characters (two bytes each) in UTF-8. */
192 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
196 * Do not translate CP1251 into CP866 if non Windows platform.
198 DEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251)
201 struct archive_entry *entry;
205 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
206 skipping("KOI8-R locale not available on this system.");
211 * Verify that CP1251 filenames are not translated into any
212 * other character-set, in particular, CP866.
214 a = archive_write_new();
215 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
216 assertEqualInt(ARCHIVE_OK,
217 archive_write_open_memory(a, buff, sizeof(buff), &used));
219 entry = archive_entry_new2(a);
220 /* Set a KOI8-R filename. */
221 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
222 archive_entry_set_filetype(entry, AE_IFREG);
223 archive_entry_set_size(entry, 0);
224 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
225 archive_entry_free(entry);
226 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
228 /* Above three characters in CP1251 should not translate to
229 * any other character-set. */
230 assertEqualMem(buff, "\xEF\xF0\xE8", 3);
234 * Other archiver applications on Windows translate CP1251 filenames
235 * into CP866 filenames and store it in the ustar file.
236 * Test above behavior works well.
238 DEFINE_TEST(test_ustar_filename_encoding_Russian_Russia)
241 struct archive_entry *entry;
245 if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
246 skipping("Russian_Russia locale not available on this system.");
251 * Verify that Russian_Russia(CP1251) filenames are correctly translated
254 a = archive_write_new();
255 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
256 assertEqualInt(ARCHIVE_OK,
257 archive_write_open_memory(a, buff, sizeof(buff), &used));
259 entry = archive_entry_new2(a);
260 /* Set a CP1251 filename. */
261 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
262 archive_entry_set_filetype(entry, AE_IFREG);
263 archive_entry_set_size(entry, 0);
264 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
265 archive_entry_free(entry);
266 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
268 /* Above three characters in CP1251 should translate to the following
269 * three characters in CP866. */
270 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
273 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8)
276 struct archive_entry *entry;
280 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
281 skipping("eucJP locale not available on this system.");
286 * Verify that EUC-JP filenames are correctly translated to UTF-8.
288 a = archive_write_new();
289 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
290 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
291 skipping("This system cannot convert character-set"
292 " from eucJP to UTF-8.");
293 archive_write_free(a);
296 assertEqualInt(ARCHIVE_OK,
297 archive_write_open_memory(a, buff, sizeof(buff), &used));
299 entry = archive_entry_new2(a);
300 /* Set an EUC-JP filename. */
301 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
302 /* Check the Unicode version. */
303 archive_entry_set_filetype(entry, AE_IFREG);
304 archive_entry_set_size(entry, 0);
305 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
306 archive_entry_free(entry);
307 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
309 /* Check UTF-8 version. */
310 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
313 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932)
316 struct archive_entry *entry;
320 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
321 skipping("eucJP locale not available on this system.");
326 * Verify that EUC-JP filenames are correctly translated to CP932.
328 a = archive_write_new();
329 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
330 if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
331 skipping("This system cannot convert character-set"
332 " from eucJP to CP932.");
333 archive_write_free(a);
336 assertEqualInt(ARCHIVE_OK,
337 archive_write_open_memory(a, buff, sizeof(buff), &used));
339 entry = archive_entry_new2(a);
340 /* Set an EUC-JP filename. */
341 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
342 /* Check the Unicode version. */
343 archive_entry_set_filetype(entry, AE_IFREG);
344 archive_entry_set_size(entry, 0);
345 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
346 archive_entry_free(entry);
347 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
349 /* Check CP932 version. */
350 assertEqualMem(buff, "\x95\x5C.txt", 6);
353 DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)
356 struct archive_entry *entry;
360 if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
361 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
362 skipping("CP932/SJIS locale not available on this system.");
367 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
369 a = archive_write_new();
370 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
371 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
372 skipping("This system cannot convert character-set"
373 " from CP932/SJIS to UTF-8.");
374 archive_write_free(a);
377 assertEqualInt(ARCHIVE_OK,
378 archive_write_open_memory(a, buff, sizeof(buff), &used));
380 entry = archive_entry_new2(a);
381 /* Set a CP932/SJIS filename. */
382 archive_entry_set_pathname(entry, "\x95\x5C.txt");
383 /* Check the Unicode version. */
384 archive_entry_set_filetype(entry, AE_IFREG);
385 archive_entry_set_size(entry, 0);
386 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
387 archive_entry_free(entry);
388 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
390 /* Check UTF-8 version. */
391 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);