summaryrefslogtreecommitdiffstats
path: root/contrib/libarchive/libarchive/test/test_ustar_filename_encoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/libarchive/libarchive/test/test_ustar_filename_encoding.c')
-rw-r--r--contrib/libarchive/libarchive/test/test_ustar_filename_encoding.c414
1 files changed, 414 insertions, 0 deletions
diff --git a/contrib/libarchive/libarchive/test/test_ustar_filename_encoding.c b/contrib/libarchive/libarchive/test/test_ustar_filename_encoding.c
new file mode 100644
index 0000000..17b7e4a
--- /dev/null
+++ b/contrib/libarchive/libarchive/test/test_ustar_filename_encoding.c
@@ -0,0 +1,414 @@
+/*-
+ * Copyright (c) 2011 Michihiro NAKAJIMA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "test.h"
+__FBSDID("$FreeBSD$");
+
+#include <locale.h>
+
+static void
+test_ustar_filename_encoding_UTF8_CP866(void)
+{
+ struct archive *a;
+ struct archive_entry *entry;
+ char buff[4096];
+ size_t used;
+
+ if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
+ skipping("en_US.UTF-8 locale not available on this system.");
+ return;
+ }
+
+ /*
+ * Verify that UTF-8 filenames are correctly translated into CP866
+ * and stored with hdrcharset=CP866 option.
+ */
+ a = archive_write_new();
+ assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+ if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
+ skipping("This system cannot convert character-set"
+ " from UTF-8 to CP866.");
+ archive_write_free(a);
+ return;
+ }
+ assertEqualInt(ARCHIVE_OK,
+ archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+ entry = archive_entry_new2(a);
+ /* Set a UTF-8 filename. */
+ archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
+ archive_entry_set_filetype(entry, AE_IFREG);
+ archive_entry_set_size(entry, 0);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+ assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+ /* Above three characters in UTF-8 should translate to the following
+ * three characters in CP866. */
+ assertEqualMem(buff, "\xAF\xE0\xA8", 3);
+}
+
+static void
+test_ustar_filename_encoding_KOI8R_UTF8(void)
+{
+ struct archive *a;
+ struct archive_entry *entry;
+ char buff[4096];
+ size_t used;
+
+ if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
+ skipping("KOI8-R locale not available on this system.");
+ return;
+ }
+
+ /*
+ * Verify that KOI8-R filenames are correctly translated into UTF-8
+ * and stored with hdrcharset=UTF-8 option.
+ */
+ a = archive_write_new();
+ assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+ if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
+ skipping("This system cannot convert character-set"
+ " from KOI8-R to UTF-8.");
+ archive_write_free(a);
+ return;
+ }
+ assertEqualInt(ARCHIVE_OK,
+ archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+ entry = archive_entry_new2(a);
+ /* Set a KOI8-R filename. */
+ archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
+ archive_entry_set_filetype(entry, AE_IFREG);
+ archive_entry_set_size(entry, 0);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+ assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+ /* Above three characters in KOI8-R should translate to the following
+ * three characters (two bytes each) in UTF-8. */
+ assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
+}
+
+static void
+test_ustar_filename_encoding_KOI8R_CP866(void)
+{
+ struct archive *a;
+ struct archive_entry *entry;
+ char buff[4096];
+ size_t used;
+
+ if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
+ skipping("KOI8-R locale not available on this system.");
+ return;
+ }
+
+ /*
+ * Verify that KOI8-R filenames are correctly translated into CP866
+ * and stored with hdrcharset=CP866 option.
+ */
+ a = archive_write_new();
+ assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+ if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
+ skipping("This system cannot convert character-set"
+ " from KOI8-R to CP866.");
+ archive_write_free(a);
+ return;
+ }
+ assertEqualInt(ARCHIVE_OK,
+ archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+ entry = archive_entry_new2(a);
+ /* Set a KOI8-R filename. */
+ archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
+ archive_entry_set_filetype(entry, AE_IFREG);
+ archive_entry_set_size(entry, 0);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+ assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+ /* Above three characters in KOI8-R should translate to the following
+ * three characters in CP866. */
+ assertEqualMem(buff, "\xAF\xE0\xA8", 3);
+}
+
+static void
+test_ustar_filename_encoding_CP1251_UTF8(void)
+{
+ struct archive *a;
+ struct archive_entry *entry;
+ char buff[4096];
+ size_t used;
+
+ if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
+ NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
+ skipping("KOI8-R locale not available on this system.");
+ return;
+ }
+
+ /*
+ * Verify that CP1251 filenames are correctly translated into UTF-8
+ * and stored with hdrcharset=UTF-8 option.
+ */
+ a = archive_write_new();
+ assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+ if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
+ skipping("This system cannot convert character-set"
+ " from KOI8-R to UTF-8.");
+ archive_write_free(a);
+ return;
+ }
+ assertEqualInt(ARCHIVE_OK,
+ archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+ entry = archive_entry_new2(a);
+ /* Set a KOI8-R filename. */
+ archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
+ archive_entry_set_filetype(entry, AE_IFREG);
+ archive_entry_set_size(entry, 0);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+ assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+ /* Above three characters in CP1251 should translate to the following
+ * three characters (two bytes each) in UTF-8. */
+ assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
+}
+
+/*
+ * Do not translate CP1251 into CP866 if non Windows platform.
+ */
+static void
+test_ustar_filename_encoding_ru_RU_CP1251(void)
+{
+ struct archive *a;
+ struct archive_entry *entry;
+ char buff[4096];
+ size_t used;
+
+ if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
+ skipping("KOI8-R locale not available on this system.");
+ return;
+ }
+
+ /*
+ * Verify that CP1251 filenames are not translated into any
+ * other character-set, in particular, CP866.
+ */
+ a = archive_write_new();
+ assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+ assertEqualInt(ARCHIVE_OK,
+ archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+ entry = archive_entry_new2(a);
+ /* Set a KOI8-R filename. */
+ archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
+ archive_entry_set_filetype(entry, AE_IFREG);
+ archive_entry_set_size(entry, 0);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+ assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+ /* Above three characters in CP1251 should not translate to
+ * any other character-set. */
+ assertEqualMem(buff, "\xEF\xF0\xE8", 3);
+}
+
+/*
+ * Other archiver applications on Windows translate CP1251 filenames
+ * into CP866 filenames and store it in the ustar file.
+ * Test above behavior works well.
+ */
+static void
+test_ustar_filename_encoding_Russian_Russia(void)
+{
+ struct archive *a;
+ struct archive_entry *entry;
+ char buff[4096];
+ size_t used;
+
+ if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
+ skipping("Russian_Russia locale not available on this system.");
+ return;
+ }
+
+ /*
+ * Verify that Russian_Russia(CP1251) filenames are correctly translated
+ * to CP866.
+ */
+ a = archive_write_new();
+ assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+ assertEqualInt(ARCHIVE_OK,
+ archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+ entry = archive_entry_new2(a);
+ /* Set a CP1251 filename. */
+ archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
+ archive_entry_set_filetype(entry, AE_IFREG);
+ archive_entry_set_size(entry, 0);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+ assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+ /* Above three characters in CP1251 should translate to the following
+ * three characters in CP866. */
+ assertEqualMem(buff, "\xAF\xE0\xA8", 3);
+}
+
+static void
+test_ustar_filename_encoding_EUCJP_UTF8(void)
+{
+ struct archive *a;
+ struct archive_entry *entry;
+ char buff[4096];
+ size_t used;
+
+ if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
+ skipping("eucJP locale not available on this system.");
+ return;
+ }
+
+ /*
+ * Verify that EUC-JP filenames are correctly translated to UTF-8.
+ */
+ a = archive_write_new();
+ assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+ if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
+ skipping("This system cannot convert character-set"
+ " from eucJP to UTF-8.");
+ archive_write_free(a);
+ return;
+ }
+ assertEqualInt(ARCHIVE_OK,
+ archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+ entry = archive_entry_new2(a);
+ /* Set an EUC-JP filename. */
+ archive_entry_set_pathname(entry, "\xC9\xBD.txt");
+ /* Check the Unicode version. */
+ archive_entry_set_filetype(entry, AE_IFREG);
+ archive_entry_set_size(entry, 0);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+ assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+ /* Check UTF-8 version. */
+ assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
+}
+
+static void
+test_ustar_filename_encoding_EUCJP_CP932(void)
+{
+ struct archive *a;
+ struct archive_entry *entry;
+ char buff[4096];
+ size_t used;
+
+ if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
+ skipping("eucJP locale not available on this system.");
+ return;
+ }
+
+ /*
+ * Verify that EUC-JP filenames are correctly translated to CP932.
+ */
+ a = archive_write_new();
+ assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+ if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
+ skipping("This system cannot convert character-set"
+ " from eucJP to CP932.");
+ archive_write_free(a);
+ return;
+ }
+ assertEqualInt(ARCHIVE_OK,
+ archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+ entry = archive_entry_new2(a);
+ /* Set an EUC-JP filename. */
+ archive_entry_set_pathname(entry, "\xC9\xBD.txt");
+ /* Check the Unicode version. */
+ archive_entry_set_filetype(entry, AE_IFREG);
+ archive_entry_set_size(entry, 0);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+ assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+ /* Check CP932 version. */
+ assertEqualMem(buff, "\x95\x5C.txt", 6);
+}
+
+static void
+test_ustar_filename_encoding_CP932_UTF8(void)
+{
+ struct archive *a;
+ struct archive_entry *entry;
+ char buff[4096];
+ size_t used;
+
+ if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
+ NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
+ skipping("CP932/SJIS locale not available on this system.");
+ return;
+ }
+
+ /*
+ * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
+ */
+ a = archive_write_new();
+ assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+ if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
+ skipping("This system cannot convert character-set"
+ " from CP932/SJIS to UTF-8.");
+ archive_write_free(a);
+ return;
+ }
+ assertEqualInt(ARCHIVE_OK,
+ archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+ entry = archive_entry_new2(a);
+ /* Set a CP932/SJIS filename. */
+ archive_entry_set_pathname(entry, "\x95\x5C.txt");
+ /* Check the Unicode version. */
+ archive_entry_set_filetype(entry, AE_IFREG);
+ archive_entry_set_size(entry, 0);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+ assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+ /* Check UTF-8 version. */
+ assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
+}
+
+DEFINE_TEST(test_ustar_filename_encoding)
+{
+ test_ustar_filename_encoding_UTF8_CP866();
+ test_ustar_filename_encoding_KOI8R_UTF8();
+ test_ustar_filename_encoding_KOI8R_CP866();
+ test_ustar_filename_encoding_CP1251_UTF8();
+ test_ustar_filename_encoding_ru_RU_CP1251();
+ test_ustar_filename_encoding_Russian_Russia();
+ test_ustar_filename_encoding_EUCJP_UTF8();
+ test_ustar_filename_encoding_EUCJP_CP932();
+ test_ustar_filename_encoding_CP932_UTF8();
+}
OpenPOWER on IntegriCloud