/*- * Copyright (c) 2011 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "test.h" __FBSDID("$FreeBSD$"); #include DEFINE_TEST(test_zip_filename_encoding_UTF8) { struct archive *a; struct archive_entry *entry; char buff[4096]; size_t used; if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { skipping("en_US.UTF-8 locale not available on this system."); return; } /* * Verify that UTF-8 filenames are correctly stored with * hdrcharset=UTF-8 option. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { skipping("This system cannot convert character-set" " for UTF-8."); archive_write_free(a); return; } assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set a UTF-8 filename. */ archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0x08, * which indicates the filename charset is UTF-8. */ assertEqualInt(0x08, buff[7]); assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); /* * Verify that UTF-8 filenames are correctly stored without * hdrcharset=UTF-8 option. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set a UTF-8 filename. */ archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0x08, * which indicates the filename charset is UTF-8. */ assertEqualInt(0x08, buff[7]); assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); /* * Verify that A bit 11 of general purpose flag is not set * when ASCII filenames are stored. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set an ASCII filename. */ archive_entry_set_pathname(entry, "abcABC"); archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0, * which indicates the filename charset is unknown. */ assertEqualInt(0, buff[7]); assertEqualMem(buff + 30, "abcABC", 6); } DEFINE_TEST(test_zip_filename_encoding_KOI8R) { struct archive *a; struct archive_entry *entry; char buff[4096]; size_t used; if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { skipping("KOI8-R locale not available on this system."); return; } /* * Verify that KOI8-R filenames are correctly translated to UTF-8. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { skipping("This system cannot convert character-set" " from KOI8-R to UTF-8."); archive_write_free(a); return; } assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set a KOI8-R filename. */ archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0x08, * which indicates the filename charset is UTF-8. */ assertEqualInt(0x08, buff[7]); /* Above three characters in KOI8-R should translate to the following * three characters (two bytes each) in UTF-8. */ assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); /* * Verify that KOI8-R filenames are not translated to UTF-8. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set a KOI8-R filename. */ archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0, * which indicates the filename charset is unknown. */ assertEqualInt(0, buff[7]); /* Above three characters in KOI8-R should not translate to * any character-set. */ assertEqualMem(buff + 30, "\xD0\xD2\xC9", 3); /* * Verify that A bit 11 of general purpose flag is not set * when ASCII filenames are stored even if hdrcharset=UTF-8 * is specified. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { skipping("This system cannot convert character-set" " from KOI8-R to UTF-8."); archive_write_free(a); return; } assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set an ASCII filename. */ archive_entry_set_pathname(entry, "abcABC"); archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0, * which indicates the filename charset is unknown. */ assertEqualInt(0, buff[7]); assertEqualMem(buff + 30, "abcABC", 6); } /* * Do not translate CP1251 into CP866 if non Windows platform. */ DEFINE_TEST(test_zip_filename_encoding_ru_RU_CP1251) { struct archive *a; struct archive_entry *entry; char buff[4096]; size_t used; if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { skipping("Russian_Russia locale not available on this system."); return; } /* * Verify that CP1251 filenames are not translated into any * other character-set, in particular, CP866. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set a CP1251 filename. */ archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0, * which indicates the filename charset is unknown. */ assertEqualInt(0, buff[7]); /* Above three characters in CP1251 should not translate into * any other character-set. */ assertEqualMem(buff + 30, "\xEF\xF0\xE8", 3); } /* * Other archiver applications on Windows translate CP1251 filenames * into CP866 filenames and store it in the zip file. * Test above behavior works well. */ DEFINE_TEST(test_zip_filename_encoding_Russian_Russia) { struct archive *a; struct archive_entry *entry; char buff[4096]; size_t used; if (NULL == setlocale(LC_ALL, "Russian_Russia")) { skipping("Russian_Russia locale not available on this system."); return; } /* * Verify that Russian_Russia(CP1251) filenames are correctly translated * to UTF-8. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { skipping("This system cannot convert character-set" " from Russian_Russia.CP1251 to UTF-8."); archive_write_free(a); return; } assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set a CP1251 filename. */ archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0x08, * which indicates the filename charset is UTF-8. */ assertEqualInt(0x08, buff[7]); /* Above three characters in CP1251 should translate to the following * three characters (two bytes each) in UTF-8. */ assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); /* * Verify that Russian_Russia(CP1251) filenames are correctly translated * to CP866. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set a CP1251 filename. */ archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0, * which indicates the filename charset is unknown. */ assertEqualInt(0, buff[7]); /* Above three characters in CP1251 should translate to the following * three characters in CP866. */ assertEqualMem(buff + 30, "\xAF\xE0\xA8", 3); } DEFINE_TEST(test_zip_filename_encoding_EUCJP) { struct archive *a; struct archive_entry *entry; char buff[4096]; size_t used; if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { skipping("eucJP locale not available on this system."); return; } /* * Verify that EUC-JP filenames are correctly translated to UTF-8. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { skipping("This system cannot convert character-set" " from eucJP to UTF-8."); archive_write_free(a); return; } assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set an EUC-JP filename. */ archive_entry_set_pathname(entry, "\xC9\xBD.txt"); /* Check the Unicode version. */ archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0x08, * which indicates the filename charset is UTF-8. */ assertEqualInt(0x08, buff[7]); /* Check UTF-8 version. */ assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7); /* * Verify that EUC-JP filenames are not translated to UTF-8. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set an EUC-JP filename. */ archive_entry_set_pathname(entry, "\xC9\xBD.txt"); /* Check the Unicode version. */ archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0, * which indicates the filename charset is unknown. */ assertEqualInt(0, buff[7]); /* Above three characters in EUC-JP should not translate to * any character-set. */ assertEqualMem(buff + 30, "\xC9\xBD.txt", 6); /* * Verify that A bit 11 of general purpose flag is not set * when ASCII filenames are stored even if hdrcharset=UTF-8 * is specified. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { skipping("This system cannot convert character-set" " from eucJP to UTF-8."); archive_write_free(a); return; } assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set an ASCII filename. */ archive_entry_set_pathname(entry, "abcABC"); /* Check the Unicode version. */ archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0, * which indicates the filename charset is unknown. */ assertEqualInt(0, buff[7]); assertEqualMem(buff + 30, "abcABC", 6); } DEFINE_TEST(test_zip_filename_encoding_CP932) { struct archive *a; struct archive_entry *entry; char buff[4096]; size_t used; if (NULL == setlocale(LC_ALL, "Japanese_Japan") && NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { skipping("CP932/SJIS locale not available on this system."); return; } /* * Verify that EUC-JP filenames are correctly translated to UTF-8. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { skipping("This system cannot convert character-set" " from CP932/SJIS to UTF-8."); archive_write_free(a); return; } assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set a CP932/SJIS filename. */ archive_entry_set_pathname(entry, "\x95\x5C.txt"); /* Check the Unicode version. */ archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0x08, * which indicates the filename charset is UTF-8. */ assertEqualInt(0x08, buff[7]); /* Check UTF-8 version. */ assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7); /* * Verify that CP932/SJIS filenames are not translated to UTF-8. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set a CP932/SJIS filename. */ archive_entry_set_pathname(entry, "\x95\x5C.txt"); /* Check the Unicode version. */ archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0, * which indicates the filename charset is unknown. */ assertEqualInt(0, buff[7]); /* Above three characters in CP932/SJIS should not translate to * any character-set. */ assertEqualMem(buff + 30, "\x95\x5C.txt", 6); /* * Verify that A bit 11 of general purpose flag is not set * when ASCII filenames are stored even if hdrcharset=UTF-8 * is specified. */ a = archive_write_new(); assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { skipping("This system cannot convert character-set" " from CP932/SJIS to UTF-8."); archive_write_free(a); return; } assertEqualInt(ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); entry = archive_entry_new2(a); /* Set an ASCII filename. */ archive_entry_set_pathname(entry, "abcABC"); /* Check the Unicode version. */ archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_size(entry, 0); assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); archive_entry_free(entry); assertEqualInt(ARCHIVE_OK, archive_write_free(a)); /* A bit 11 of general purpose flag should be 0, * which indicates the filename charset is unknown. */ assertEqualInt(0, buff[7]); assertEqualMem(buff + 30, "abcABC", 6); }