diff options
author | bapt <bapt@FreeBSD.org> | 2015-11-01 12:00:55 +0000 |
---|---|---|
committer | bapt <bapt@FreeBSD.org> | 2015-11-01 12:00:55 +0000 |
commit | 8abd49c1b547efe6b06146384ace2ea07f277e16 (patch) | |
tree | 69bb3318565f63e2df861b74b54bd33f51dbbe67 /lib/libc | |
parent | ffec40a708e5930cc1fa0297e69724f64101efa7 (diff) | |
download | FreeBSD-src-8abd49c1b547efe6b06146384ace2ea07f277e16.zip FreeBSD-src-8abd49c1b547efe6b06146384ace2ea07f277e16.tar.gz |
libc: Fix (and improve) nl_langinfo (CODESET)
The output of "locale charmap" is identical to the result of
nl_langinfo (CODESET) for any given locale. The logic for returning the
codeset was very simplistic. It just returned portion of the locale name
after the period (e.g. en_FR.ISO8859-1 returned "ISO8859-1").
When softlinks were added to locales, this broke. e.g.:
en_US returned ""
en_FR.UTF8 returned "UTF8"
en_FR.UTF-8 returned "UTF-8"
zh_Hant_HK.Big5HKSCS returned "Big5HKSCS"
zh_Hant_TW.Big5 returned "Big5"
es_ES@euro returned ""
In order to fix this properly, the named locale cannot be used to
determine the encoding. This information was almost available in the
rune data. Unfortunately, all the single byte encodings were listed
as "NONE" encoding.
So I adjusted localedef tool to provide more information about the
encoding. For example, instead of "NONE", the LC_CTYPE used by
fr_FR.ISO8859-15 is now encoded as "NONE:ISO8859-15". The locale
handlers now check if the first four characters of the encoding is
"NONE" and if so, treats it as a single-byte encoding.
The nl_langinfo handling of CODESET was adjusting accordingly. Now the
following is returned:
en_US returns "ISO8859-1"
fr_FR.UTF8 returns "UTF-8"
fr_FR.UTF-8 returns "UTF-8"
zh_Hant_HK.Big5HKSCS returns "Big5"
zh_Hant_TW.Big5 returns "Big5"
es_ES@euro returns "ISO8859-15"
as before, "C" and "POSIX" locales return "US-ASCII". This is a big
improvement. The result of nl_langinfo can never be a zero-length
string and it will always exclusively one of the values of the
character maps of /usr/src/tools/tools/locale/etc/final-maps.
Submitted by: marino
Obtained from: DragonflyBSD
Diffstat (limited to 'lib/libc')
-rw-r--r-- | lib/libc/locale/nl_langinfo.c | 30 | ||||
-rw-r--r-- | lib/libc/locale/setrunelocale.c | 2 |
2 files changed, 23 insertions, 9 deletions
diff --git a/lib/libc/locale/nl_langinfo.c b/lib/libc/locale/nl_langinfo.c index 3e8fe7c..e3b370a 100644 --- a/lib/libc/locale/nl_langinfo.c +++ b/lib/libc/locale/nl_langinfo.c @@ -37,7 +37,10 @@ __FBSDID("$FreeBSD$"); #include <locale.h> #include <stdlib.h> #include <string.h> +#include <runetype.h> +#include <wchar.h> +#include "mblocal.h" #include "lnumeric.h" #include "lmessages.h" #include "lmonetary.h" @@ -54,14 +57,25 @@ nl_langinfo_l(nl_item item, locale_t loc) switch (item) { case CODESET: - ret = ""; - if ((s = querylocale(LC_CTYPE_MASK, loc)) != NULL) { - if ((cs = strchr(s, '.')) != NULL) - ret = cs + 1; - else if (strcmp(s, "C") == 0 || - strcmp(s, "POSIX") == 0) - ret = "US-ASCII"; - } + s = XLOCALE_CTYPE(loc)->runes->__encoding; + if (strcmp(s, "EUC-CN") == 0) + ret = "eucCN"; + else if (strcmp(s, "EUC-JP") == 0) + ret = "eucJP"; + else if (strcmp(s, "EUC-KR") == 0) + ret = "eucKR"; + else if (strcmp(s, "EUC-TW") == 0) + ret = "eucTW"; + else if (strcmp(s, "BIG5") == 0) + ret = "Big5"; + else if (strcmp(s, "MSKanji") == 0) + ret = "SJIS"; + else if (strcmp(s, "NONE") == 0) + ret = "US-ASCII"; + else if (strncmp(s, "NONE:", 5) == 0) + ret = (char *)(s + 5); + else + ret = (char *)s; break; case D_T_FMT: ret = (char *) __get_current_time_locale(loc)->c_fmt; diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c index 67c632e..00e4d98 100644 --- a/lib/libc/locale/setrunelocale.c +++ b/lib/libc/locale/setrunelocale.c @@ -129,7 +129,7 @@ __setrunelocale(struct xlocale_ctype *l, const char *encoding) rl->__sputrune = NULL; rl->__sgetrune = NULL; - if (strcmp(rl->__encoding, "NONE") == 0) + if (strncmp(rl->__encoding, "NONE", 4) == 0) ret = _none_init(l, rl); else if (strcmp(rl->__encoding, "UTF-8") == 0) ret = _UTF8_init(l, rl); |