summaryrefslogtreecommitdiffstats
path: root/contrib/texinfo/makeinfo/lang.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/texinfo/makeinfo/lang.c')
-rw-r--r--contrib/texinfo/makeinfo/lang.c158
1 files changed, 140 insertions, 18 deletions
diff --git a/contrib/texinfo/makeinfo/lang.c b/contrib/texinfo/makeinfo/lang.c
index a9cbfe1..2938196 100644
--- a/contrib/texinfo/makeinfo/lang.c
+++ b/contrib/texinfo/makeinfo/lang.c
@@ -1,7 +1,7 @@
/* lang.c -- language-dependent support.
- $Id: lang.c,v 1.5 2002/11/12 18:48:52 feloy Exp $
+ $Id: lang.c,v 1.8 2003/05/01 00:05:27 karl Exp $
- Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -31,12 +31,12 @@ encoding_code_type document_encoding_code = no_encoding;
/* Current language code; default is English. */
language_code_type language_code = en;
-iso_map_type us_ascii_map [] = {{NULL, 0, 0}}; /* ASCII map is trivial */
+static iso_map_type us_ascii_map [] = {{NULL, 0, 0}}; /* ASCII map is trivial */
/* Translation table between HTML and ISO Codes. The last item is
hopefully the Unicode. It might be possible that those Unicodes are
not correct, cause I didn't check them. kama */
-iso_map_type iso8859_1_map [] = {
+static iso_map_type iso8859_1_map [] = {
{ "nbsp", 0xA0, 0x00A0 },
{ "iexcl", 0xA1, 0x00A1 },
{ "cent", 0xA2, 0x00A2 },
@@ -86,7 +86,7 @@ iso_map_type iso8859_1_map [] = {
{ "Iacute", 0xCD, 0x00CD },
{ "Icirc", 0xCE, 0x00CE },
{ "Iuml", 0xCF, 0x00CF },
- { "ETH", 0xD0, 0x00D0 }, /* I don't know ;-( */
+ { "ETH", 0xD0, 0x00D0 },
{ "Ntilde", 0xD1, 0x00D1 },
{ "Ograve", 0xD2, 0x00D2 },
{ "Oacute", 0xD3, 0x00D3 },
@@ -137,11 +137,133 @@ iso_map_type iso8859_1_map [] = {
{ NULL, 0, 0 }
};
+
+
+/* Date: Mon, 31 Mar 2003 00:19:28 +0200
+ From: Wojciech Polak <polak@gnu.org>
+...
+ * Primary Polish site for ogonki is http://www.agh.edu.pl/ogonki/,
+ but it's only in Polish language (it has some interesting links).
+
+ * A general site about ISO 8859-2 at http://nl.ijs.si/gnusl/cee/iso8859-2.html
+
+ * ISO 8859-2 Character Set at http://nl.ijs.si/gnusl/cee/charset.html
+ This site provides almost all information about iso-8859-2,
+ including the character table!!! (must see!)
+
+ * ISO 8859-2 and even HTML entities !!! (must see!)
+ http://people.ssh.fi/mtr/genscript/88592.txt
+
+ * (minor) http://www.agh.edu.pl/ogonki/plchars.html
+ One more table, this time it includes even information about Polish
+ characters in Unicode.
+*/
+
+static iso_map_type iso8859_2_map [] = {
+ { "nbsp", 0xA0, 0x00A0 }, /* NO-BREAK SPACE */
+ { "", 0xA1, 0x0104 }, /* LATIN CAPITAL LETTER A WITH OGONEK */
+ { "", 0xA2, 0x02D8 }, /* BREVE */
+ { "", 0xA3, 0x0141 }, /* LATIN CAPITAL LETTER L WITH STROKE */
+ { "curren", 0xA4, 0x00A4 }, /* CURRENCY SIGN */
+ { "", 0xA5, 0x013D }, /* LATIN CAPITAL LETTER L WITH CARON */
+ { "", 0xA6, 0x015A }, /* LATIN CAPITAL LETTER S WITH ACUTE */
+ { "sect", 0xA7, 0x00A7 }, /* SECTION SIGN */
+ { "uml", 0xA8, 0x00A8 }, /* DIAERESIS */
+ { "", 0xA9, 0x0160 }, /* LATIN CAPITAL LETTER S WITH CARON */
+ { "", 0xAA, 0x015E }, /* LATIN CAPITAL LETTER S WITH CEDILLA */
+ { "", 0xAB, 0x0164 }, /* LATIN CAPITAL LETTER T WITH CARON */
+ { "", 0xAC, 0x0179 }, /* LATIN CAPITAL LETTER Z WITH ACUTE */
+ { "shy", 0xAD, 0x00AD }, /* SOFT HYPHEN */
+ { "", 0xAE, 0x017D }, /* LATIN CAPITAL LETTER Z WITH CARON */
+ { "", 0xAF, 0x017B }, /* LATIN CAPITAL LETTER Z WITH DOT ABOVE */
+ { "deg", 0xB0, 0x00B0 }, /* DEGREE SIGN */
+ { "", 0xB1, 0x0105 }, /* LATIN SMALL LETTER A WITH OGONEK */
+ { "", 0xB2, 0x02DB }, /* OGONEK */
+ { "", 0xB3, 0x0142 }, /* LATIN SMALL LETTER L WITH STROKE */
+ { "acute", 0xB4, 0x00B4 }, /* ACUTE ACCENT */
+ { "", 0xB5, 0x013E }, /* LATIN SMALL LETTER L WITH CARON */
+ { "", 0xB6, 0x015B }, /* LATIN SMALL LETTER S WITH ACUTE */
+ { "", 0xB7, 0x02C7 }, /* CARON (Mandarin Chinese third tone) */
+ { "cedil", 0xB8, 0x00B8 }, /* CEDILLA */
+ { "", 0xB9, 0x0161 }, /* LATIN SMALL LETTER S WITH CARON */
+ { "", 0xBA, 0x015F }, /* LATIN SMALL LETTER S WITH CEDILLA */
+ { "", 0xBB, 0x0165 }, /* LATIN SMALL LETTER T WITH CARON */
+ { "", 0xBC, 0x017A }, /* LATIN SMALL LETTER Z WITH ACUTE */
+ { "", 0xBD, 0x02DD }, /* DOUBLE ACUTE ACCENT */
+ { "", 0xBE, 0x017E }, /* LATIN SMALL LETTER Z WITH CARON */
+ { "", 0xBF, 0x017C }, /* LATIN SMALL LETTER Z WITH DOT ABOVE */
+ { "", 0xC0, 0x0154 }, /* LATIN CAPITAL LETTER R WITH ACUTE */
+ { "", 0xC1, 0x00C1 }, /* LATIN CAPITAL LETTER A WITH ACUTE */
+ { "", 0xC2, 0x00C2 }, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
+ { "", 0xC3, 0x0102 }, /* LATIN CAPITAL LETTER A WITH BREVE */
+ { "", 0xC4, 0x00C4 }, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
+ { "", 0xC5, 0x0139 }, /* LATIN CAPITAL LETTER L WITH ACUTE */
+ { "", 0xC6, 0x0106 }, /* LATIN CAPITAL LETTER C WITH ACUTE */
+ { "", 0xC7, 0x00C7 }, /* LATIN CAPITAL LETTER C WITH CEDILLA */
+ { "", 0xC8, 0x010C }, /* LATIN CAPITAL LETTER C WITH CARON */
+ { "", 0xC9, 0x00C9 }, /* LATIN CAPITAL LETTER E WITH ACUTE */
+ { "", 0xCA, 0x0118 }, /* LATIN CAPITAL LETTER E WITH OGONEK */
+ { "", 0xCB, 0x00CB }, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
+ { "", 0xCC, 0x011A }, /* LATIN CAPITAL LETTER E WITH CARON */
+ { "", 0xCD, 0x00CD }, /* LATIN CAPITAL LETTER I WITH ACUTE */
+ { "", 0xCE, 0x00CE }, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
+ { "", 0xCF, 0x010E }, /* LATIN CAPITAL LETTER D WITH CARON */
+ { "", 0xD0, 0x0110 }, /* LATIN CAPITAL LETTER D WITH STROKE */
+ { "", 0xD1, 0x0143 }, /* LATIN CAPITAL LETTER N WITH ACUTE */
+ { "", 0xD2, 0x0147 }, /* LATIN CAPITAL LETTER N WITH CARON */
+ { "", 0xD3, 0x00D3 }, /* LATIN CAPITAL LETTER O WITH ACUTE */
+ { "", 0xD4, 0x00D4 }, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
+ { "", 0xD5, 0x0150 }, /* LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */
+ { "", 0xD6, 0x00D6 }, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
+ { "times", 0xD7, 0x00D7 }, /* MULTIPLICATION SIGN */
+ { "", 0xD8, 0x0158 }, /* LATIN CAPITAL LETTER R WITH CARON */
+ { "", 0xD9, 0x016E }, /* LATIN CAPITAL LETTER U WITH RING ABOVE */
+ { "", 0xDA, 0x00DA }, /* LATIN CAPITAL LETTER U WITH ACUTE */
+ { "", 0xDB, 0x0170 }, /* LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */
+ { "", 0xDC, 0x00DC }, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
+ { "", 0xDD, 0x00DD }, /* LATIN CAPITAL LETTER Y WITH ACUTE */
+ { "", 0xDE, 0x0162 }, /* LATIN CAPITAL LETTER T WITH CEDILLA */
+ { "", 0xDF, 0x00DF }, /* LATIN SMALL LETTER SHARP S (German) */
+ { "", 0xE0, 0x0155 }, /* LATIN SMALL LETTER R WITH ACUTE */
+ { "", 0xE1, 0x00E1 }, /* LATIN SMALL LETTER A WITH ACUTE */
+ { "", 0xE2, 0x00E2 }, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
+ { "", 0xE3, 0x0103 }, /* LATIN SMALL LETTER A WITH BREVE */
+ { "", 0xE4, 0x00E4 }, /* LATIN SMALL LETTER A WITH DIAERESIS */
+ { "", 0xE5, 0x013A }, /* LATIN SMALL LETTER L WITH ACUTE */
+ { "", 0xE6, 0x0107 }, /* LATIN SMALL LETTER C WITH ACUTE */
+ { "", 0xE7, 0x00E7 }, /* LATIN SMALL LETTER C WITH CEDILLA */
+ { "", 0xE8, 0x010D }, /* LATIN SMALL LETTER C WITH CARON */
+ { "", 0xE9, 0x00E9 }, /* LATIN SMALL LETTER E WITH ACUTE */
+ { "", 0xEA, 0x0119 }, /* LATIN SMALL LETTER E WITH OGONEK */
+ { "", 0xEB, 0x00EB }, /* LATIN SMALL LETTER E WITH DIAERESIS */
+ { "", 0xEC, 0x011B }, /* LATIN SMALL LETTER E WITH CARON */
+ { "", 0xED, 0x00ED }, /* LATIN SMALL LETTER I WITH ACUTE */
+ { "", 0xEE, 0x00EE }, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
+ { "", 0xEF, 0x010F }, /* LATIN SMALL LETTER D WITH CARON */
+ { "", 0xF0, 0x0111 }, /* LATIN SMALL LETTER D WITH STROKE */
+ { "", 0xF1, 0x0144 }, /* LATIN SMALL LETTER N WITH ACUTE */
+ { "", 0xF2, 0x0148 }, /* LATIN SMALL LETTER N WITH CARON */
+ { "", 0xF3, 0x00F3 }, /* LATIN SMALL LETTER O WITH ACUTE */
+ { "", 0xF4, 0x00F4 }, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
+ { "", 0xF5, 0x0151 }, /* LATIN SMALL LETTER O WITH DOUBLE ACUTE */
+ { "", 0xF6, 0x00F6 }, /* LATIN SMALL LETTER O WITH DIAERESIS */
+ { "divide", 0xF7, 0x00F7 }, /* DIVISION SIGN */
+ { "", 0xF8, 0x0159 }, /* LATIN SMALL LETTER R WITH CARON */
+ { "", 0xF9, 0x016F }, /* LATIN SMALL LETTER U WITH RING ABOVE */
+ { "", 0xFA, 0x00FA }, /* LATIN SMALL LETTER U WITH ACUTE */
+ { "", 0xFB, 0x0171 }, /* LATIN SMALL LETTER U WITH DOUBLE ACUTE */
+ { "", 0xFC, 0x00FC }, /* LATIN SMALL LETTER U WITH DIAERESIS */
+ { "", 0xFD, 0x00FD }, /* LATIN SMALL LETTER Y WITH ACUTE */
+ { "", 0xFE, 0x0163 }, /* LATIN SMALL LETTER T WITH CEDILLA */
+ { "", 0xFF, 0x02D9 }, /* DOT ABOVE (Mandarin Chinese light tone) */
+ { NULL, 0, 0 }
+};
+
encoding_type encoding_table[] = {
{ no_encoding, "(no encoding)", NULL },
{ US_ASCII, "US-ASCII", us_ascii_map },
{ ISO_8859_1, "ISO-8859-1", (iso_map_type *) iso8859_1_map },
- { ISO_8859_2, "ISO-8859-2", NULL },
+ { ISO_8859_2, "ISO-8859-2", (iso_map_type *) iso8859_2_map },
{ ISO_8859_3, "ISO-8859-3", NULL },
{ ISO_8859_4, "ISO-8859-4", NULL },
{ ISO_8859_5, "ISO-8859-5", NULL },
@@ -310,7 +432,7 @@ void
cm_documentlanguage ()
{
language_code_type c;
- char *lang_arg;
+ char *lang_arg;
/* Read the line with the language code on it. */
get_rest_of_line (0, &lang_arg);
@@ -365,13 +487,13 @@ cm_documentencoding ()
{
encoding_code_type enc;
char *enc_arg;
-
+
get_rest_of_line (1, &enc_arg);
/* See if we have this encoding. */
for (enc = no_encoding+1; enc != last_encoding_code; enc++)
{
- if (strcasecmp (enc_arg, encoding_table[enc].ecname) == 0)
+ if (strcasecmp (enc_arg, encoding_table[enc].encname) == 0)
{
document_encoding_code = enc;
break;
@@ -380,7 +502,7 @@ cm_documentencoding ()
/* If we didn't find this code, complain. */
if (enc == last_encoding_code)
- warning (_("unrecogized encoding name `%s'"), enc_arg);
+ warning (_("unrecognized encoding name `%s'"), enc_arg);
else if (encoding_table[document_encoding_code].isotab == NULL)
warning (_("sorry, encoding `%s' not supported"), enc_arg);
@@ -434,7 +556,7 @@ cm_accent_generic_html (arg, start, end, html_supported, single,
char *html_solo;
{
static int valid_html_accent; /* yikes */
-
+
if (arg == START)
{ /* If HTML has good support for this character, use it. */
if (strchr (html_supported, curchar ()))
@@ -447,7 +569,7 @@ cm_accent_generic_html (arg, start, end, html_supported, single,
escape_html = saved_escape_html;
}
else
- {
+ {
valid_html_accent = 0;
if (html_solo_standalone)
{ /* No special HTML support, so produce standalone char. */
@@ -492,7 +614,7 @@ cm_accent_generic_no_headers (arg, start, end, single, html_solo)
buffer[0] = output_paragraph[end - 1];
buffer[1] = 0;
strcat (buffer, html_solo);
-
+
rc = cm_search_iso_map (buffer);
if (rc >= 0)
/* A little bit tricky ;-)
@@ -505,12 +627,12 @@ cm_accent_generic_no_headers (arg, start, end, single, html_solo)
else
{ /* If we didn't find a translation for this character,
put the single instead. E.g., &Xuml; does not exist so X&uml;
- should be produced. */
+ should be produced. */
warning (_("%s is an invalid ISO code, using %c"),
buffer, single);
add_char (single);
}
-
+
free (buffer);
}
}
@@ -579,7 +701,7 @@ cm_accent_generic (arg, start, end, html_supported, single,
else if (no_headers)
cm_accent_generic_no_headers (arg, start, end, single, html_solo);
else if (arg == END)
- {
+ {
if (enable_encoding)
/* use 8-bit if available */
cm_accent_generic_no_headers (arg, start, end, single, html_solo);
@@ -665,9 +787,9 @@ cm_special_char (arg)
else if (strcmp (command, "ae") == 0)
add_encoded_char ("aelig", command);
else if (strcmp (command, "OE") == 0)
- add_word ("&#140;", command);
+ add_encoded_char ("#140", command);
else if (strcmp (command, "oe") == 0)
- add_word ("&#156;", command);
+ add_encoded_char ("#156", command);
else if (strcmp (command, "AA") == 0)
add_encoded_char ("Aring", command);
else if (strcmp (command, "aa") == 0)
OpenPOWER on IntegriCloud