The conversion tools have been further improved and some erroneous

conversions have been detected and fixed. It is now possible to add options after the encoding in the parameter list for convert-keymap.pl. This is currently used to selectively enable interpretation of the ISO8859-1 currency symbol as the Euro sign found in ISO5589-15, or to add a Yen symbol in place of '\' for specific Japanese keyboards. The option are appended to the parameter list, as in e.g. "convert-keymap.pl german.iso.kbd ISO5589-1 EURO". The options are appended to the encoding in the form "+EURO" or "+YEN" in KBDFILES.map, to keep the meaning of the columns intact. MFC after: 3 days
author: se <se@FreeBSD.org> 2014-08-20 17:07:41 +0000
committer: se <se@FreeBSD.org> 2014-08-20 17:07:41 +0000
commit: e28f5c3fbc2c8d89f41becf1ce551c7b41531320 (patch)
tree: 7d9177d107e1cce42525b44a85f040451e3fb01d /tools
parent: ab6d2528d2c28910838f5d9ea7728f114fa64c60 (diff)
download: FreeBSD-src-e28f5c3fbc2c8d89f41becf1ce551c7b41531320.zip
FreeBSD-src-e28f5c3fbc2c8d89f41becf1ce551c7b41531320.tar.gz
3 files changed, 94 insertions, 53 deletions
diff --git a/tools/tools/vt/keymaps/KBDFILES.map b/tools/tools/vt/keymaps/KBDFILES.map
index f4e5a66..7080b0b 100644
--- a/tools/tools/vt/keymaps/KBDFILES.map
+++ b/tools/tools/vt/keymaps/KBDFILES.map
@@ -1,7 +1,15 @@
 # $FreeBSD$
-
-ISO8859-15	be.iso.kbd			be.kbd
-ISO8859-15	be.iso.acc.kbd			be.acc.kbd
+#
+# The Files are converted by "convert-keymaps.pl" from the given encoding to UCS.
+#
+# An additional "+EURO" causes the translation of the generic currency symbol to 
+# an Euro symbol, even if the source locale does not support an Euro symbol.
+# This conversion is only performed for the "E" key (not e.g. on Shift-4, which 
+# still generates the currency symbol).
+#
+# Encoding      syscons file name               newcons (vt) file name
+ISO8859-1+EURO	be.iso.kbd			be.kbd
+ISO8859-1+EURO	be.iso.acc.kbd			be.acc.kbd
 
 ISO8859-5	bg.bds.ctrlcaps.kbd		bg.bds.kbd
 ISO8859-5	bg.phonetic.ctrlcaps.kbd	bg.bds.ctrlcaps.kbd
@@ -14,15 +22,15 @@ CP850		br275.cp850.kbd			br.noacc.kbd
 #CP1251		by.cp1251.kbd			by.kbd.from-cp1251	(result identical to CP1251)
 ISO8859-5	by.iso5.kbd			by.kbd
 
-ISO8859-2	ce.iso2.kbd			centraleuropean.kbd
+ISO8859-2	ce.iso2.kbd			centraleuropean.qwerty.kbd
 
 ISO8859-1	colemak.iso15.acc.kbd		colemak.kbd
 
 ISO8859-2	cs.latin2.qwertz.kbd		cz.kbd
-ISO8859-2	cz.iso2.kbd			cz.kbd.from-ce
+ISO8859-2	cz.iso2.kbd			cz.qwerty.kbd.from-ce
 
-ISO8859-15	danish.iso.kbd			dk.kbd
-ISO8859-15	danish.iso.acc.kbd		dk.acc.kbd
+ISO8859-1+EURO	danish.iso.kbd			dk.kbd
+ISO8859-1+EURO	danish.iso.acc.kbd		dk.acc.kbd
 CP865		danish.cp865.kbd		dk.kbd.from-cp865
 ISO8859-1	danish.iso.macbook.kbd		dk.macbook.kbd
 
@@ -36,19 +44,19 @@ ISO8859-1	estonian.iso.kbd		ee.kbd.from-iso1
 ISO8859-15	estonian.iso15.kbd		ee.kbd
 CP850		estonian.cp850.kbd		ee.kbd.from-cp850
 
-ISO8859-15	finnish.iso.kbd			fi.kbd
+ISO8859-1+EURO	finnish.iso.kbd			fi.kbd
 CP850		finnish.cp850.kbd		fi.kbd.from-cp850
 
-ISO8859-15	fr.iso.kbd			fr.kbd
-ISO8859-15	fr.iso.acc.kbd			fr.acc.kbd
-ISO8859-15	fr.macbook.acc.kbd		fr.macbook.kbd
-ISO8859-1	fr.dvorak.kbd			fr.dvorak.kbd
-ISO8859-15	fr.dvorak.acc.kbd		fr.dvorak.acc.kbd
+ISO8859-1+EURO	fr.iso.kbd			fr.kbd
+ISO8859-1+EURO	fr.iso.acc.kbd			fr.acc.kbd
+ISO8859-1+EURO	fr.macbook.acc.kbd		fr.macbook.kbd
+ISO8859-1+EURO	fr.dvorak.kbd			fr.dvorak.kbd
+ISO8859-1	fr.dvorak.acc.kbd		fr.dvorak.acc.kbd
 
-ISO8859-15	fr_CA.iso.acc.kbd		ca-fr.kbd
+ISO8859-1+EURO	fr_CA.iso.acc.kbd		ca-fr.kbd
 
-ISO8859-15	german.iso.kbd			de.kbd
-ISO8859-15	german.iso.acc.kbd		de.acc.kbd
+ISO8859-1+EURO	german.iso.kbd			de.noacc.kbd
+ISO8859-1+EURO	german.iso.acc.kbd		de.acc.kbd
 CP850		german.cp850.kbd		de.kbd.from-cp850
 
 ISO8859-7	gr.elot.acc.kbd			gr.elot.acc.kbd
@@ -66,12 +74,12 @@ ARMSCII-8	hy.armscii-8.kbd		am.kbd
 ISO8859-1	icelandic.iso.kbd		is.kbd
 ISO8859-1	icelandic.iso.acc.kbd		is.acc.kbd
 
-ISO8859-15	it.iso.kbd			it.kbd
+ISO8859-1+EURO	it.iso.kbd			it.kbd
 
-ISO8859-1	jp.106.kbd			jp.kbd
-ISO8859-1	jp.106x.kbd			jp.capsctrl.kbd
-ISO8859-1	jp.pc98.kbd			jp.pc98.kbd
-ISO8859-1	jp.pc98.iso.kbd			jp.pc98.iso.kbd
+ISO8859-1+YEN	jp.106.kbd			jp.kbd
+ISO8859-1+YEN	jp.106x.kbd			jp.capsctrl.kbd
+ISO8859-1+YEN	jp.pc98.kbd			jp.pc98.kbd
+ISO8859-1+YEN	jp.pc98.iso.kbd			jp.pc98.iso.kbd
 
 PT154		kk.pt154.kst.kbd		kz.kst.kbd
 PT154		kk.pt154.io.kbd			kz.io.kbd
@@ -87,8 +95,8 @@ ISO8859-1	norwegian.dvorak.kbd		no.dvorak.kbd
 ISO8859-2	pl_PL.ISO8859-2.kbd		pl.kbd
 ISO8859-2	pl_PL.dvorak.kbd		pl.dvorak.kbd
 
-ISO8859-15	pt.iso.kbd			pt.kbd
-ISO8859-15	pt.iso.acc.kbd			pt.acc.kbd
+ISO8859-1+EURO	pt.iso.kbd			pt.kbd
+ISO8859-1+EURO	pt.iso.acc.kbd			pt.acc.kbd
 
 CP866		ru.cp866.kbd			ru.kbd.from-cp866
 ISO8859-5	ru.iso5.kbd			ru.kbd.from-iso5
@@ -96,31 +104,31 @@ KOI8-R		ru.koi8-r.kbd			ru.kbd
 KOI8-R		ru.koi8-r.shift.kbd		ru.shift.kbd
 KOI8-R		ru.koi8-r.win.kbd		ru.win.kbd
 
-ISO8859-15	spanish.dvorak.kbd		es.dvorak.kbd
-ISO8859-1	spanish.iso.kbd			es.kbd.from-iso1
-ISO8859-1	spanish.iso.acc.kbd		es.acc.kbd
-ISO8859-15	spanish.iso15.acc.kbd		es.kbd
+ISO8859-1+EURO	spanish.dvorak.kbd		es.dvorak.kbd
+ISO8859-1+EURO	spanish.iso.kbd			es.kbd.from-iso1
+ISO8859-1+EURO	spanish.iso.acc.kbd		es.acc.kbd
+ISO8859-1+EURO	spanish.iso15.acc.kbd		es.kbd
 
 ISO8859-2	si.iso.kbd			si.kbd
 
 ISO8859-2	sk.iso2.kbd			sk.kbd
 
-ISO8859-1	swedish.iso.kbd			se.kbd
+ISO8859-1+EURO	swedish.iso.kbd			se.kbd
 CP850		swedish.cp850.kbd		se.kbd.from-cp850
 
-ISO8859-1	swissfrench.iso.kbd		ch-fr.kbd
-ISO8859-1	swissfrench.iso.acc.kbd		ch-fr.acc.kbd
+ISO8859-1+EURO	swissfrench.iso.kbd		ch-fr.kbd
+ISO8859-1+EURO	swissfrench.iso.acc.kbd		ch-fr.acc.kbd
 CP850		swissfrench.cp850.kbd		ch-fr.kbd.from-cp850
 
-ISO8859-1	swissgerman.iso.kbd		ch.kbd
-ISO8859-1	swissgerman.iso.acc.kbd		ch.acc.kbd
+ISO8859-1+EURO	swissgerman.iso.kbd		ch.kbd
+ISO8859-1+EURO	swissgerman.iso.acc.kbd		ch.acc.kbd
 CP850		swissgerman.cp850.kbd		ch.kbd.from-cp850
-ISO8859-1	swissgerman.macbook.acc.kbd	ch.macbook.acc.kbd
+ISO8859-1+EURO	swissgerman.macbook.acc.kbd	ch.macbook.acc.kbd
 
 ISO8859-9	tr.iso9.q.kbd			tr.kbd
 
-ISO8859-15	uk.iso.kbd			uk.kbd
-ISO8859-15	uk.iso-ctrl.kbd			uk.capsctrl.kbd
+ISO8859-1+EURO	uk.iso.kbd			uk.kbd
+ISO8859-1+EURO	uk.iso-ctrl.kbd			uk.capsctrl.kbd
 #CP850		uk.cp850.kbd			uk.kbd.from-cp850		(no � and different Alt/Alt-Shift encodings)
 #CP850		uk.cp850-ctrl.kbd		uk.capsctrl.kbd.from-cp850	(no � and different Alt/Alt-Shift encodings)
 ISO8859-15	uk.dvorak.kbd			uk.dvorak.kbd
diff --git a/tools/tools/vt/keymaps/convert-keymap.pl b/tools/tools/vt/keymaps/convert-keymap.pl
index f2a0799..778ae10 100755
--- a/tools/tools/vt/keymaps/convert-keymap.pl
+++ b/tools/tools/vt/keymaps/convert-keymap.pl
@@ -6,9 +6,26 @@ use Encode;
 use strict;
 use utf8;
 
-die "Usage: $0 filename.kbd CHARSET" unless ($ARGV[1]);
-my $converter = Text::Iconv->new($ARGV[1], "UTF-8");
+# command line parsing
+die "Usage: $0 filename.kbd CHARSET [EURO]"
+    unless ($ARGV[1]);
 
+my $inputfile = shift;					# first command argument
+my $converter = Text::Iconv->new(shift, "UTF-8");	# second argument
+my $use_euro;
+my $use_yen;
+my $current_char;
+my $current_scancode;
+
+while (my $arg = shift) {
+    $use_euro = 1, next
+	if $arg eq "EURO";
+    $use_yen = 1, next
+	if $arg eq "YEN";
+    die "Unknown encoding option '$arg'\n";
+}
+
+# converter functions
 sub local_to_UCS_string
 {
     my ($string) = @_;
@@ -18,21 +35,35 @@ sub local_to_UCS_string
 
 sub prettyprint_token
 {
-    my ($code) = @_;
-
-    return "'" . chr($code) . "'"
-        if 32 <= $code and $code <= 126; # print as ASCII if possible
-#    return sprintf "%d", $code; # <---- temporary decimal
-    return sprintf "0x%02x", $code
-        if $code <= 255;        # print as hex number, else
-    return sprintf "0x%04x", $code;
+    my ($ucs_char) = @_;
+
+    return "'" . chr($ucs_char) . "'"
+        if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible
+#    return sprintf "%d", $ucs_char; # <---- temporary decimal
+    return sprintf "0x%02x", $ucs_char
+        if $ucs_char <= 255;        # print as hex number, else
+    return sprintf "0x%04x", $ucs_char;
 }
 
 sub local_to_UCS_code
 {
     my ($char) = @_;
 
-    return prettyprint_token(ord(Encode::decode("UTF-8", local_to_UCS_string($char))));
+    my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char)));
+
+    $current_char = lc(chr($ucs_char)), print("SETCUR: $ucs_char\n")
+	if $current_char eq "";
+
+    $ucs_char = 0x20ac	# replace with Euro character
+	if $ucs_char == 0xa4 and $use_euro and $current_char eq "e";
+
+    $ucs_char = 0xa5	# replace with Jap. Yen character on PC kbd
+	if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125;
+
+    $ucs_char = 0xa5	# replace with Jap. Yen character on PC98x1 kbd
+	if $ucs_char == ord('\\') and $use_yen and $current_scancode == 13;
+
+    return prettyprint_token($ucs_char);
 }
 
 sub malformed_to_UCS_code
@@ -62,7 +93,6 @@ sub convert_token
 sub tokenize { # split on white space and parentheses (but not within token)
     my ($line) = @_;
 
-#print "<< $line";
     $line =~ s/'\('/ _lpar_ /g; # prevent splitting of '('
     $line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')'
     $line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below
@@ -70,7 +100,6 @@ sub tokenize { # split on white space and parentheses (but not within token)
     my $matches;
     do {
 	$matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g);
-#	print "-> $line<> $matches: ('$1','$2')\n";
     } while $matches;
     $line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' '
     my @KEYTOKEN = split (" ", $line);
@@ -78,12 +107,11 @@ sub tokenize { # split on white space and parentheses (but not within token)
     grep(s/_spc_/' '/, @KEYTOKEN);
     grep(s/_lpar_/'('/, @KEYTOKEN);
     grep(s/_rpar_/')'/, @KEYTOKEN);
-#printf ">> $line%s\n", join('|', @KEYTOKEN);
     return @KEYTOKEN;
 }
 
 # main program
-open FH, "<$ARGV[0]";
+open FH, "<$inputfile";
 while (<FH>) {
     if (m/^#/) {
 	print local_to_UCS_string($_);
@@ -95,7 +123,10 @@ while (<FH>) {
 	my $C;
 	foreach $C (@KEYTOKEN) {
 	    if ($at_bol) {
+		$current_char = "";
+		$current_scancode = -1;
 		if ($C =~ m/^\s*\d/) { # line begins with key code number
+		    $current_scancode = $C;
 		    printf "  %03d   ", $C;
 		} elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren
 		    printf "  %-4s ", $C; # accent name starts accent definition
@@ -109,6 +140,7 @@ while (<FH>) {
 		if ($C =~ m/^([BCNO])$/) {
 		    print " $1"; # special case: effect of Caps Lock/Num Lock
 		} elsif ($C eq "(") {
+		    $current_char = "";
 		    print " ( ";
 		} elsif ($C eq ")") {
 		    print " )";
diff --git a/tools/tools/vt/keymaps/convert-keymaps.pl b/tools/tools/vt/keymaps/convert-keymaps.pl
index dd452af..ea8fbc2 100755
--- a/tools/tools/vt/keymaps/convert-keymaps.pl
+++ b/tools/tools/vt/keymaps/convert-keymaps.pl
@@ -83,12 +83,13 @@ my $kbdfile;
 foreach $kbdfile (glob("$dir_keymaps_syscons/*.kbd")) {
     my $basename;
     ($basename = $kbdfile) =~ s:.*/::;
-    my $encoding = $ENCODING{$basename};
+    my ($encoding) = $ENCODING{$basename};
+    $encoding =~ s/\+/ /g;		# e.g. "ISO8859-1+EURO" -> "ISO8859-1 EURO"
     my $outfile = $FILE_NEW{$basename};
     if ($encoding and $outfile) {
 	if (-r $kbdfile) {
-	    print "converting from '$basename' ($encoding) to '$outfile' (Unicode)\n";
-	    my $cmdline = "$dir_convtool/convert-keymap.pl $kbdfile $ENCODING{$basename} > $dir_keymaps_output/$outfile";
+	    print "converting from '$basename' ($encoding) to '$outfile' (UCS)\n";
+	    my $cmdline = "$dir_convtool/convert-keymap.pl $kbdfile $encoding > $dir_keymaps_output/$outfile";
 	    system "$cmdline";
 	} else {
 	    print "$kbdfile not found\n";
author	se <se@FreeBSD.org>	2014-08-20 17:07:41 +0000
committer	se <se@FreeBSD.org>	2014-08-20 17:07:41 +0000
commit	e28f5c3fbc2c8d89f41becf1ce551c7b41531320 (patch)
tree	7d9177d107e1cce42525b44a85f040451e3fb01d /tools
parent	ab6d2528d2c28910838f5d9ea7728f114fa64c60 (diff)
download	FreeBSD-src-e28f5c3fbc2c8d89f41becf1ce551c7b41531320.zip FreeBSD-src-e28f5c3fbc2c8d89f41becf1ce551c7b41531320.tar.gz