summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/tools/locale/Makefile69
-rw-r--r--tools/tools/locale/etc/charmaps.xml58
-rw-r--r--tools/tools/locale/etc/charmaps/charmaps.txt1
-rwxr-xr-xtools/tools/locale/tools/cldr2def.pl14
-rwxr-xr-xtools/tools/locale/tools/convert_map.pl3
-rw-r--r--tools/tools/locale/tools/extract-colldef.awk18
-rwxr-xr-xtools/tools/locale/tools/finalize44
7 files changed, 160 insertions, 47 deletions
diff --git a/tools/tools/locale/Makefile b/tools/tools/locale/Makefile
index 2b5aa55..bac5c3e 100644
--- a/tools/tools/locale/Makefile
+++ b/tools/tools/locale/Makefile
@@ -22,6 +22,23 @@ KNOWN= monetdef numericdef msgdef timedef colldef ctypedef
TYPES?= ${KNOWN}
LOCALE_DESTDIR?= /tmp/generated-locales/
+COLLATION_SPECIAL?= \
+ cs_CZ ISO8859-2 \
+ da_DK ISO8859-1 \
+ da_DK ISO8859-15 \
+ hr_HR ISO8859-2 \
+ hu_HU ISO8859-2 \
+ nb_NO ISO8859-1 \
+ nb_NO ISO8859-15 \
+ sk_SK ISO8859-2 \
+ zh_Hans_CN GB2312 \
+ zh_Hans_CN eucCN \
+
+.for area enc in ${COLLATION_SPECIAL}
+COLLATIONS_SPECIAL_ENV+= ${area}.${enc}
+.endfor
+PASSON+= COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}"
+
.if defined(LC)
LC:= --lc=${LC}
.endif
@@ -55,17 +72,26 @@ post-install:
.endfor
.for t in ${TYPES}
-build-${t}:
+gen-${t}:
mkdir -p ${t} ${t}.draft
perl -I tools tools/cldr2def.pl \
--cldr=$$(realpath ${CLDRDIR}) \
--unidata=$$(realpath ${UNIDATADIR}) \
--etc=$$(realpath ${ETCDIR}) \
--type=${t} ${LC}
+
+build-${t}: gen-${t}
env ${PASSON} tools/finalize ${t}
.endfor
-build-ctypedef: transfer-rollup
+gen-ctypedef: transfer-rollup
+static-colldef: gen-colldef
+build-colldef: static-colldef
+
+static-colldef:
+.for area enc in ${COLLATION_SPECIAL}
+ awk -f tools/extract-colldef.awk ${CLDRDIR}/posix/${area}.${enc}.src > colldef/${area}.${enc}.src
+.endfor
transfer-rollup:
cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_US.UTF-8.src
@@ -93,12 +119,34 @@ BASE_LOCALES_OF_INTEREST?= \
uk_UA \
kk_Cyrl_KZ mn_Cyrl_MN sr_Cyrl_RS sr_Latn_RS \
zh_Hans_CN zh_Hant_HK zh_Hant_TW \
- \
- \
bn_IN gu_IN or_IN ta_IN te_IN kn_IN ml_IN si_LK \
th_TH lo_LA bo_IN my_MM pa_Guru_IN ka_GE chr_US \
km_KH shi_Tfng_MA ii_CN vai_Vaii_LR vi_VN
+ENCODINGS= Big5 \
+ CP1251 \
+ CP866 \
+ CP949 \
+ eucCN \
+ eucJP \
+ eucKR \
+ GB18030 \
+ GB2312 \
+ GBK \
+ ISO8859-1 \
+ ISO8859-13 \
+ ISO8859-15 \
+ ISO8859-2 \
+ ISO8859-5 \
+ ISO8859-7 \
+ ISO8859-9 \
+ KOI8-R \
+ KOI8-U \
+ SJIS \
+ US-ASCII \
+ UTF-8 \
+
+
POSIX:
.if exists (${CLDRDIR}/tools/java/cldr.jar)
mkdir -p ${CLDRDIR}/posix
@@ -109,11 +157,20 @@ POSIX:
-d ${CLDRDIR}/posix -m ${area} -c UTF-8
. endif
. endfor
-. if !exists(${CLDRDIR}/posix/UTF-8.cm)
+. for area encoding in ${COLLATION_SPECIAL}
+. if !exists(${CLDRDIR}/posix/${area}.${encoding}.src)
+ java -DCLDR_DIR=${CLDRDIR:Q} -jar ${CLDRDIR}/tools/java/cldr.jar \
+ org.unicode.cldr.posix.GeneratePOSIX \
+ -d ${CLDRDIR}/posix -m ${area} -c ${encoding}
+. endif
+. endfor
+. for enc in ${ENCODINGS}
+. if !exists(${CLDRDIR}/posix/${enc}.cm)
java -DCLDR_DIR=${CLDRDIR:Q} -jar ${CLDRDIR}/tools/java/cldr.jar \
org.unicode.cldr.posix.GenerateCharmap \
- -d ${CLDRDIR}/posix
+ -d ${CLDRDIR}/posix -c ${enc}
. endif
+. endfor
.else
@echo "Please install CLDR toolset for the desired release"
@echo "It should go at ${CLDRDIR}/tools"
diff --git a/tools/tools/locale/etc/charmaps.xml b/tools/tools/locale/etc/charmaps.xml
index 0b6551a..e0d39b2 100644
--- a/tools/tools/locale/etc/charmaps.xml
+++ b/tools/tools/locale/etc/charmaps.xml
@@ -187,10 +187,6 @@
countries="CN" />
<language name="zh"
family="Hant"
- encoding="Big5HKSCS"
- countries="HK" />
- <language name="zh"
- family="Hant"
encoding="Big5"
countries="TW" />
</languages>
@@ -444,69 +440,69 @@
unicode="FULLWIDTH HYPHEN-MINUS" />
<translation encoding="Big5" cldr="DOLLAR SIGN"
unicode="FULLWIDTH DOLLAR SIGN" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-4E00" ucc="4E00" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-4E03" ucc="4E03" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-4E09" ucc="4E09" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-4E0A" ucc="4E0A" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-4E0B" ucc="4E0B" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-4E0D" ucc="4E0D" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-4E5D" ucc="4E5D" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-4E8C" ucc="4E8C" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-4E94" ucc="4E94" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-516B" ucc="516B" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-516D" ucc="516D" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-5206" ucc="5206" />
<translation encoding="eucJP SJIS"
cldr="CJK UNIFIED IDEOGRAPH-524D" ucc="524D" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-5341" ucc="5341" />
<translation
- encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN eucJP SJIS"
+ encoding="GB2312 GB18030 GBK Big5 eucCN eucJP SJIS"
cldr="CJK UNIFIED IDEOGRAPH-5348" ucc="5348" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-5426" ucc="5426" />
<translation encoding="GB2312 GB18030 GBK eucCN"
cldr="CJK UNIFIED IDEOGRAPH-5468" ucc="5468" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-56DB" ucc="56DB" />
<translation encoding="eucJP SJIS"
cldr="CJK UNIFIED IDEOGRAPH-571F" ucc="571F" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-5B9A" ucc="5B9A" />
<translation
- encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN eucJP SJIS"
+ encoding="GB2312 GB18030 GBK Big5 eucCN eucJP SJIS"
cldr="CJK UNIFIED IDEOGRAPH-5E74" ucc="5E74" />
<translation encoding="eucJP SJIS"
cldr="CJK UNIFIED IDEOGRAPH-5F8C" ucc="5F8C" />
<translation
- encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN eucJP SJIS"
+ encoding="GB2312 GB18030 GBK Big5 eucCN eucJP SJIS"
cldr="CJK UNIFIED IDEOGRAPH-65E5" ucc="65E5" />
<translation encoding="GB2312 GB18030 GBK eucCN"
cldr="CJK UNIFIED IDEOGRAPH-65F6" ucc="65F6" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-661F" ucc="661F" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-662F" ucc="662F" />
- <translation encoding="Big5 Big5HKSCS"
+ <translation encoding="Big5 "
cldr="CJK UNIFIED IDEOGRAPH-6642" ucc="6642" />
<translation encoding="eucJP SJIS"
cldr="CJK UNIFIED IDEOGRAPH-66DC" ucc="66DC" />
<translation
- encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN eucJP SJIS"
+ encoding="GB2312 GB18030 GBK Big5 eucCN eucJP SJIS"
cldr="CJK UNIFIED IDEOGRAPH-6708" ucc="6708" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-671F" ucc="671F" />
<translation encoding="eucJP SJIS"
cldr="CJK UNIFIED IDEOGRAPH-6728" ucc="6728" />
@@ -516,11 +512,11 @@
cldr="CJK UNIFIED IDEOGRAPH-706B" ucc="706B" />
<translation encoding="GB2312 GB18030 GBK eucCN"
cldr="CJK UNIFIED IDEOGRAPH-786E" ucc="786E" />
- <translation encoding="Big5 Big5HKSCS"
+ <translation encoding="Big5 "
cldr="CJK UNIFIED IDEOGRAPH-78BA" ucc="78BA" />
- <translation encoding="GB2312 GB18030 GBK Big5 Big5HKSCS eucCN"
+ <translation encoding="GB2312 GB18030 GBK Big5 eucCN"
cldr="CJK UNIFIED IDEOGRAPH-79D2" ucc="79D2" />
- <translation encoding="Big5 Big5HKSCS"
+ <translation encoding="Big5 "
cldr="CJK UNIFIED IDEOGRAPH-9031" ucc="9031" />
<translation encoding="eucJP SJIS"
cldr="CJK UNIFIED IDEOGRAPH-91D1" ucc="91D1" />
diff --git a/tools/tools/locale/etc/charmaps/charmaps.txt b/tools/tools/locale/etc/charmaps/charmaps.txt
index a0791f7..d8f8bb8 100644
--- a/tools/tools/locale/etc/charmaps/charmaps.txt
+++ b/tools/tools/locale/etc/charmaps/charmaps.txt
@@ -8,7 +8,6 @@ haible.de: http://haible.de/bruno/charsets/conversion-tables/
ARMSCII-8 haible.de: Armenian.html
Big5 unicodeorg: OBSOLETE/EASTASIA/OTHER
- Big5HKSCS haible.de: BIG5-HKSCS.html /
CP1131 haible.de: CP1131.html / aix-4.3.2/IBM-1131.TXT
CP1251 unicode.org: VENDORS/MICSFT/WINDOWS
CP866 unicode.org: VENDORS/MICSFT/PC
diff --git a/tools/tools/locale/tools/cldr2def.pl b/tools/tools/locale/tools/cldr2def.pl
index fae7c91..3f61bb1 100755
--- a/tools/tools/locale/tools/cldr2def.pl
+++ b/tools/tools/locale/tools/cldr2def.pl
@@ -808,14 +808,24 @@ sub make_makefile {
my $SRCOUT;
my $SRCOUT2;
my $SRCOUT3 = "";
+ my $SRCOUT4 = "";
my $MAPLOC;
if ($TYPE eq "colldef") {
$SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
- "\t-f \${MAPLOC}/map.UTF-8 " .
+ "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E} " .
"\${.OBJDIR}/\${.IMPSRC:T:R}";
$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
"locale/etc/final-maps\n";
$SRCOUT2 = "LC_COLLATE";
+ $SRCOUT3 = "" .
+ ".for f t in \${LOCALES_MAPPED}\n" .
+ "FILES+=\t\$t.LC_COLLATE\n" .
+ "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" .
+ "\tlocaledef -D -U -i \${.ALLSRC} \\\n" .
+ "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E} \\\n" .
+ "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" .
+ ".endfor\n\n";
+ $SRCOUT4 = "## LOCALES_MAPPED\n";
}
elsif ($TYPE eq "ctypedef") {
$SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
@@ -855,6 +865,8 @@ ${MAPLOC}
## PLACEHOLDER
+${SRCOUT4}
+
EOF
foreach my $hash (keys(%hashtable)) {
diff --git a/tools/tools/locale/tools/convert_map.pl b/tools/tools/locale/tools/convert_map.pl
index e5381f3..8822253 100755
--- a/tools/tools/locale/tools/convert_map.pl
+++ b/tools/tools/locale/tools/convert_map.pl
@@ -1,5 +1,7 @@
#! /usr/local/bin/perl
#
+# $FreeBSD$
+#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
@@ -167,7 +169,6 @@ elsif ($codeset eq "eucKR") { $max_mb = 2 }
elsif ($codeset eq "GBK") { $max_mb = 2 }
elsif ($codeset eq "GB2312") { $max_mb = 2 }
elsif ($codeset eq "Big5") { $max_mb = 2 }
-elsif ($codeset eq "Big5HKSCS") { $max_mb = 2 }
else { $max_mb = 1 };
print("<code_set_name> \"$codeset\"\n");
print("<mb_cur_min> 1\n");
diff --git a/tools/tools/locale/tools/extract-colldef.awk b/tools/tools/locale/tools/extract-colldef.awk
new file mode 100644
index 0000000..3f29249
--- /dev/null
+++ b/tools/tools/locale/tools/extract-colldef.awk
@@ -0,0 +1,18 @@
+# $FreeBSD$
+
+BEGIN {
+ print "# Warning: Do not edit. This is automatically extracted"
+ print "# from CLDR project data, obtained from http://cldr.unicode.org/"
+ print "# -----------------------------------------------------------------------------"
+}
+$1 == "comment_char" { print $0 }
+$1 == "escape_char" { print $0 }
+$1 == "LC_COLLATE" {
+ print $0
+ while (getline line) {
+ print line
+ if (line == "END LC_COLLATE") {
+ break
+ }
+ }
+}
diff --git a/tools/tools/locale/tools/finalize b/tools/tools/locale/tools/finalize
index 7ce3e74..b32c52c 100755
--- a/tools/tools/locale/tools/finalize
+++ b/tools/tools/locale/tools/finalize
@@ -26,12 +26,15 @@ new=${base}/../${1}
TEMP=/tmp/${1}.locales
TEMP2=/tmp/${1}.hashes
TEMP3=/tmp/${1}.symlinks
+TEMP4=/tmp/${1}.mapped
FULLMAP=/tmp/utf8-map
FULLEXTRACT=/tmp/extracted-names
AWKCMD="/## PLACEHOLDER/ { \
while ( getline line < \"${TEMP}\" ) {print line} } \
/## SYMPAIRS/ { \
while ( getline line < \"${TEMP3}\" ) {print line} } \
+ /## LOCALES_MAPPED/ { \
+ while ( getline line < \"${TEMP4}\" ) {print line} } \
!/## / { print \$0 }"
grep '^LOCALES+' ${old}/Makefile > ${TEMP}
@@ -51,21 +54,23 @@ then
/usr/bin/sed -E -e 's/[ ]+/ /g' \
${CLDRDIR}/posix/UTF-8.cm \
> ${base}/../etc/final-maps/map.UTF-8
- CHARMAPS="ARMSCII-8 Big5 Big5HKSCS CP1131 CP1251 \
+ /usr/bin/sed -E -e 's/[ ]+/ /g' \
+ ${CLDRDIR}/posix/eucCN.cm \
+ > ${base}/../etc/final-maps/map.eucCN
+ /usr/bin/sed -E -e 's/[ ]+/ /g' \
+ ${CLDRDIR}/posix/eucCN.cm \
+ > ${base}/../etc/final-maps/map.GB2312
+ CHARMAPS="ARMSCII-8 Big5 CP1131 CP1251 \
CP866 GB2312 GBK ISCII-DEV ISO8859-1 \
ISO8859-13 ISO8859-15 ISO8859-2 ISO8859-4 \
ISO8859-5 ISO8859-7 ISO8859-9 KOI8-R KOI8-U \
- PT154 SJIS US-ASCII eucCN eucJP eucKR"
+ PT154 SJIS US-ASCII eucJP eucKR"
# GB18030 blows up, use pre-generate Illumos version
for map in ${CHARMAPS}
do
encoding=${map}
- if [ ${map} = "Big5HKSCS" ]
- then
- encoding="Big5"
- fi
/usr/local/bin/perl ${base}/convert_map.pl \
${base}/../etc/charmaps/${map}.TXT ${encoding} \
| /usr/bin/sed -E -e 's/ +/ /g' \
@@ -73,6 +78,31 @@ then
echo map ${map} converted.
done
+elif [ $1 = "colldef" ]
+then
+ awk -v tmp4=${TEMP4} '$1 == "SAME+=" && $0 !~ /legacy/ {
+ orig=$2
+ dest=$3
+ gsub(/.*\./, "", orig)
+ gsub(/.*\./, "", dest)
+ if (orig != dest )
+ print "LOCALES_MAPPED+=\t"$2 " "$3 > tmp4
+ }' ${old}/Makefile
+
+ for line in $(awk '{ print $3 }' ${TEMP4}); do
+ sed -i '' "/^SAME.*$line$/d" ${old}/Makefile
+ done
+ echo "" >> ${TEMP4}
+ for enc in ${COLLATIONS_SPECIAL}; do
+ sed -i '' "/^.*${enc}$/d" ${TEMP4}
+ echo "LOCALES+= ${enc}" >> ${TEMP4}
+ done
+
+ keep=$(cat ${TEMP} | awk '{ print $2 }')
+ for original in ${keep}
+ do
+ cp ${old}/${original}.src ${new}/
+ done
else # below is everything but ctypedef
keep=$(cat ${TEMP} | awk '{ print $2 }')
@@ -85,4 +115,4 @@ fi
grep -v '^LOCALES+' ${old}/Makefile | awk "${AWKCMD}" > ${new}/Makefile
-rm -f ${TEMP} ${TEMP3}
+rm -f ${TEMP} ${TEMP3} ${TEMP4}
OpenPOWER on IntegriCloud