diff options
Diffstat (limited to 'contrib/texinfo/makeinfo/lang.c')
-rw-r--r-- | contrib/texinfo/makeinfo/lang.c | 158 |
1 files changed, 140 insertions, 18 deletions
diff --git a/contrib/texinfo/makeinfo/lang.c b/contrib/texinfo/makeinfo/lang.c index a9cbfe1..2938196 100644 --- a/contrib/texinfo/makeinfo/lang.c +++ b/contrib/texinfo/makeinfo/lang.c @@ -1,7 +1,7 @@ /* lang.c -- language-dependent support. - $Id: lang.c,v 1.5 2002/11/12 18:48:52 feloy Exp $ + $Id: lang.c,v 1.8 2003/05/01 00:05:27 karl Exp $ - Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,12 +31,12 @@ encoding_code_type document_encoding_code = no_encoding; /* Current language code; default is English. */ language_code_type language_code = en; -iso_map_type us_ascii_map [] = {{NULL, 0, 0}}; /* ASCII map is trivial */ +static iso_map_type us_ascii_map [] = {{NULL, 0, 0}}; /* ASCII map is trivial */ /* Translation table between HTML and ISO Codes. The last item is hopefully the Unicode. It might be possible that those Unicodes are not correct, cause I didn't check them. kama */ -iso_map_type iso8859_1_map [] = { +static iso_map_type iso8859_1_map [] = { { "nbsp", 0xA0, 0x00A0 }, { "iexcl", 0xA1, 0x00A1 }, { "cent", 0xA2, 0x00A2 }, @@ -86,7 +86,7 @@ iso_map_type iso8859_1_map [] = { { "Iacute", 0xCD, 0x00CD }, { "Icirc", 0xCE, 0x00CE }, { "Iuml", 0xCF, 0x00CF }, - { "ETH", 0xD0, 0x00D0 }, /* I don't know ;-( */ + { "ETH", 0xD0, 0x00D0 }, { "Ntilde", 0xD1, 0x00D1 }, { "Ograve", 0xD2, 0x00D2 }, { "Oacute", 0xD3, 0x00D3 }, @@ -137,11 +137,133 @@ iso_map_type iso8859_1_map [] = { { NULL, 0, 0 } }; + + +/* Date: Mon, 31 Mar 2003 00:19:28 +0200 + From: Wojciech Polak <polak@gnu.org> +... + * Primary Polish site for ogonki is http://www.agh.edu.pl/ogonki/, + but it's only in Polish language (it has some interesting links). + + * A general site about ISO 8859-2 at http://nl.ijs.si/gnusl/cee/iso8859-2.html + + * ISO 8859-2 Character Set at http://nl.ijs.si/gnusl/cee/charset.html + This site provides almost all information about iso-8859-2, + including the character table!!! (must see!) + + * ISO 8859-2 and even HTML entities !!! (must see!) + http://people.ssh.fi/mtr/genscript/88592.txt + + * (minor) http://www.agh.edu.pl/ogonki/plchars.html + One more table, this time it includes even information about Polish + characters in Unicode. +*/ + +static iso_map_type iso8859_2_map [] = { + { "nbsp", 0xA0, 0x00A0 }, /* NO-BREAK SPACE */ + { "", 0xA1, 0x0104 }, /* LATIN CAPITAL LETTER A WITH OGONEK */ + { "", 0xA2, 0x02D8 }, /* BREVE */ + { "", 0xA3, 0x0141 }, /* LATIN CAPITAL LETTER L WITH STROKE */ + { "curren", 0xA4, 0x00A4 }, /* CURRENCY SIGN */ + { "", 0xA5, 0x013D }, /* LATIN CAPITAL LETTER L WITH CARON */ + { "", 0xA6, 0x015A }, /* LATIN CAPITAL LETTER S WITH ACUTE */ + { "sect", 0xA7, 0x00A7 }, /* SECTION SIGN */ + { "uml", 0xA8, 0x00A8 }, /* DIAERESIS */ + { "", 0xA9, 0x0160 }, /* LATIN CAPITAL LETTER S WITH CARON */ + { "", 0xAA, 0x015E }, /* LATIN CAPITAL LETTER S WITH CEDILLA */ + { "", 0xAB, 0x0164 }, /* LATIN CAPITAL LETTER T WITH CARON */ + { "", 0xAC, 0x0179 }, /* LATIN CAPITAL LETTER Z WITH ACUTE */ + { "shy", 0xAD, 0x00AD }, /* SOFT HYPHEN */ + { "", 0xAE, 0x017D }, /* LATIN CAPITAL LETTER Z WITH CARON */ + { "", 0xAF, 0x017B }, /* LATIN CAPITAL LETTER Z WITH DOT ABOVE */ + { "deg", 0xB0, 0x00B0 }, /* DEGREE SIGN */ + { "", 0xB1, 0x0105 }, /* LATIN SMALL LETTER A WITH OGONEK */ + { "", 0xB2, 0x02DB }, /* OGONEK */ + { "", 0xB3, 0x0142 }, /* LATIN SMALL LETTER L WITH STROKE */ + { "acute", 0xB4, 0x00B4 }, /* ACUTE ACCENT */ + { "", 0xB5, 0x013E }, /* LATIN SMALL LETTER L WITH CARON */ + { "", 0xB6, 0x015B }, /* LATIN SMALL LETTER S WITH ACUTE */ + { "", 0xB7, 0x02C7 }, /* CARON (Mandarin Chinese third tone) */ + { "cedil", 0xB8, 0x00B8 }, /* CEDILLA */ + { "", 0xB9, 0x0161 }, /* LATIN SMALL LETTER S WITH CARON */ + { "", 0xBA, 0x015F }, /* LATIN SMALL LETTER S WITH CEDILLA */ + { "", 0xBB, 0x0165 }, /* LATIN SMALL LETTER T WITH CARON */ + { "", 0xBC, 0x017A }, /* LATIN SMALL LETTER Z WITH ACUTE */ + { "", 0xBD, 0x02DD }, /* DOUBLE ACUTE ACCENT */ + { "", 0xBE, 0x017E }, /* LATIN SMALL LETTER Z WITH CARON */ + { "", 0xBF, 0x017C }, /* LATIN SMALL LETTER Z WITH DOT ABOVE */ + { "", 0xC0, 0x0154 }, /* LATIN CAPITAL LETTER R WITH ACUTE */ + { "", 0xC1, 0x00C1 }, /* LATIN CAPITAL LETTER A WITH ACUTE */ + { "", 0xC2, 0x00C2 }, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ + { "", 0xC3, 0x0102 }, /* LATIN CAPITAL LETTER A WITH BREVE */ + { "", 0xC4, 0x00C4 }, /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + { "", 0xC5, 0x0139 }, /* LATIN CAPITAL LETTER L WITH ACUTE */ + { "", 0xC6, 0x0106 }, /* LATIN CAPITAL LETTER C WITH ACUTE */ + { "", 0xC7, 0x00C7 }, /* LATIN CAPITAL LETTER C WITH CEDILLA */ + { "", 0xC8, 0x010C }, /* LATIN CAPITAL LETTER C WITH CARON */ + { "", 0xC9, 0x00C9 }, /* LATIN CAPITAL LETTER E WITH ACUTE */ + { "", 0xCA, 0x0118 }, /* LATIN CAPITAL LETTER E WITH OGONEK */ + { "", 0xCB, 0x00CB }, /* LATIN CAPITAL LETTER E WITH DIAERESIS */ + { "", 0xCC, 0x011A }, /* LATIN CAPITAL LETTER E WITH CARON */ + { "", 0xCD, 0x00CD }, /* LATIN CAPITAL LETTER I WITH ACUTE */ + { "", 0xCE, 0x00CE }, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */ + { "", 0xCF, 0x010E }, /* LATIN CAPITAL LETTER D WITH CARON */ + { "", 0xD0, 0x0110 }, /* LATIN CAPITAL LETTER D WITH STROKE */ + { "", 0xD1, 0x0143 }, /* LATIN CAPITAL LETTER N WITH ACUTE */ + { "", 0xD2, 0x0147 }, /* LATIN CAPITAL LETTER N WITH CARON */ + { "", 0xD3, 0x00D3 }, /* LATIN CAPITAL LETTER O WITH ACUTE */ + { "", 0xD4, 0x00D4 }, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */ + { "", 0xD5, 0x0150 }, /* LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */ + { "", 0xD6, 0x00D6 }, /* LATIN CAPITAL LETTER O WITH DIAERESIS */ + { "times", 0xD7, 0x00D7 }, /* MULTIPLICATION SIGN */ + { "", 0xD8, 0x0158 }, /* LATIN CAPITAL LETTER R WITH CARON */ + { "", 0xD9, 0x016E }, /* LATIN CAPITAL LETTER U WITH RING ABOVE */ + { "", 0xDA, 0x00DA }, /* LATIN CAPITAL LETTER U WITH ACUTE */ + { "", 0xDB, 0x0170 }, /* LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */ + { "", 0xDC, 0x00DC }, /* LATIN CAPITAL LETTER U WITH DIAERESIS */ + { "", 0xDD, 0x00DD }, /* LATIN CAPITAL LETTER Y WITH ACUTE */ + { "", 0xDE, 0x0162 }, /* LATIN CAPITAL LETTER T WITH CEDILLA */ + { "", 0xDF, 0x00DF }, /* LATIN SMALL LETTER SHARP S (German) */ + { "", 0xE0, 0x0155 }, /* LATIN SMALL LETTER R WITH ACUTE */ + { "", 0xE1, 0x00E1 }, /* LATIN SMALL LETTER A WITH ACUTE */ + { "", 0xE2, 0x00E2 }, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */ + { "", 0xE3, 0x0103 }, /* LATIN SMALL LETTER A WITH BREVE */ + { "", 0xE4, 0x00E4 }, /* LATIN SMALL LETTER A WITH DIAERESIS */ + { "", 0xE5, 0x013A }, /* LATIN SMALL LETTER L WITH ACUTE */ + { "", 0xE6, 0x0107 }, /* LATIN SMALL LETTER C WITH ACUTE */ + { "", 0xE7, 0x00E7 }, /* LATIN SMALL LETTER C WITH CEDILLA */ + { "", 0xE8, 0x010D }, /* LATIN SMALL LETTER C WITH CARON */ + { "", 0xE9, 0x00E9 }, /* LATIN SMALL LETTER E WITH ACUTE */ + { "", 0xEA, 0x0119 }, /* LATIN SMALL LETTER E WITH OGONEK */ + { "", 0xEB, 0x00EB }, /* LATIN SMALL LETTER E WITH DIAERESIS */ + { "", 0xEC, 0x011B }, /* LATIN SMALL LETTER E WITH CARON */ + { "", 0xED, 0x00ED }, /* LATIN SMALL LETTER I WITH ACUTE */ + { "", 0xEE, 0x00EE }, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */ + { "", 0xEF, 0x010F }, /* LATIN SMALL LETTER D WITH CARON */ + { "", 0xF0, 0x0111 }, /* LATIN SMALL LETTER D WITH STROKE */ + { "", 0xF1, 0x0144 }, /* LATIN SMALL LETTER N WITH ACUTE */ + { "", 0xF2, 0x0148 }, /* LATIN SMALL LETTER N WITH CARON */ + { "", 0xF3, 0x00F3 }, /* LATIN SMALL LETTER O WITH ACUTE */ + { "", 0xF4, 0x00F4 }, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */ + { "", 0xF5, 0x0151 }, /* LATIN SMALL LETTER O WITH DOUBLE ACUTE */ + { "", 0xF6, 0x00F6 }, /* LATIN SMALL LETTER O WITH DIAERESIS */ + { "divide", 0xF7, 0x00F7 }, /* DIVISION SIGN */ + { "", 0xF8, 0x0159 }, /* LATIN SMALL LETTER R WITH CARON */ + { "", 0xF9, 0x016F }, /* LATIN SMALL LETTER U WITH RING ABOVE */ + { "", 0xFA, 0x00FA }, /* LATIN SMALL LETTER U WITH ACUTE */ + { "", 0xFB, 0x0171 }, /* LATIN SMALL LETTER U WITH DOUBLE ACUTE */ + { "", 0xFC, 0x00FC }, /* LATIN SMALL LETTER U WITH DIAERESIS */ + { "", 0xFD, 0x00FD }, /* LATIN SMALL LETTER Y WITH ACUTE */ + { "", 0xFE, 0x0163 }, /* LATIN SMALL LETTER T WITH CEDILLA */ + { "", 0xFF, 0x02D9 }, /* DOT ABOVE (Mandarin Chinese light tone) */ + { NULL, 0, 0 } +}; + encoding_type encoding_table[] = { { no_encoding, "(no encoding)", NULL }, { US_ASCII, "US-ASCII", us_ascii_map }, { ISO_8859_1, "ISO-8859-1", (iso_map_type *) iso8859_1_map }, - { ISO_8859_2, "ISO-8859-2", NULL }, + { ISO_8859_2, "ISO-8859-2", (iso_map_type *) iso8859_2_map }, { ISO_8859_3, "ISO-8859-3", NULL }, { ISO_8859_4, "ISO-8859-4", NULL }, { ISO_8859_5, "ISO-8859-5", NULL }, @@ -310,7 +432,7 @@ void cm_documentlanguage () { language_code_type c; - char *lang_arg; + char *lang_arg; /* Read the line with the language code on it. */ get_rest_of_line (0, &lang_arg); @@ -365,13 +487,13 @@ cm_documentencoding () { encoding_code_type enc; char *enc_arg; - + get_rest_of_line (1, &enc_arg); /* See if we have this encoding. */ for (enc = no_encoding+1; enc != last_encoding_code; enc++) { - if (strcasecmp (enc_arg, encoding_table[enc].ecname) == 0) + if (strcasecmp (enc_arg, encoding_table[enc].encname) == 0) { document_encoding_code = enc; break; @@ -380,7 +502,7 @@ cm_documentencoding () /* If we didn't find this code, complain. */ if (enc == last_encoding_code) - warning (_("unrecogized encoding name `%s'"), enc_arg); + warning (_("unrecognized encoding name `%s'"), enc_arg); else if (encoding_table[document_encoding_code].isotab == NULL) warning (_("sorry, encoding `%s' not supported"), enc_arg); @@ -434,7 +556,7 @@ cm_accent_generic_html (arg, start, end, html_supported, single, char *html_solo; { static int valid_html_accent; /* yikes */ - + if (arg == START) { /* If HTML has good support for this character, use it. */ if (strchr (html_supported, curchar ())) @@ -447,7 +569,7 @@ cm_accent_generic_html (arg, start, end, html_supported, single, escape_html = saved_escape_html; } else - { + { valid_html_accent = 0; if (html_solo_standalone) { /* No special HTML support, so produce standalone char. */ @@ -492,7 +614,7 @@ cm_accent_generic_no_headers (arg, start, end, single, html_solo) buffer[0] = output_paragraph[end - 1]; buffer[1] = 0; strcat (buffer, html_solo); - + rc = cm_search_iso_map (buffer); if (rc >= 0) /* A little bit tricky ;-) @@ -505,12 +627,12 @@ cm_accent_generic_no_headers (arg, start, end, single, html_solo) else { /* If we didn't find a translation for this character, put the single instead. E.g., &Xuml; does not exist so X¨ - should be produced. */ + should be produced. */ warning (_("%s is an invalid ISO code, using %c"), buffer, single); add_char (single); } - + free (buffer); } } @@ -579,7 +701,7 @@ cm_accent_generic (arg, start, end, html_supported, single, else if (no_headers) cm_accent_generic_no_headers (arg, start, end, single, html_solo); else if (arg == END) - { + { if (enable_encoding) /* use 8-bit if available */ cm_accent_generic_no_headers (arg, start, end, single, html_solo); @@ -665,9 +787,9 @@ cm_special_char (arg) else if (strcmp (command, "ae") == 0) add_encoded_char ("aelig", command); else if (strcmp (command, "OE") == 0) - add_word ("Œ", command); + add_encoded_char ("#140", command); else if (strcmp (command, "oe") == 0) - add_word ("œ", command); + add_encoded_char ("#156", command); else if (strcmp (command, "AA") == 0) add_encoded_char ("Aring", command); else if (strcmp (command, "aa") == 0) |