1 files changed, 293 insertions, 100 deletions
diff --git a/contrib/texinfo/makeinfo/lang.c b/contrib/texinfo/makeinfo/lang.c
index 2938196..c72e8db 100644
--- a/contrib/texinfo/makeinfo/lang.c
+++ b/contrib/texinfo/makeinfo/lang.c
@@ -1,7 +1,8 @@
 /* lang.c -- language-dependent support.
-   $Id: lang.c,v 1.8 2003/05/01 00:05:27 karl Exp $
+   $Id: lang.c,v 1.14 2004/11/22 23:57:33 karl Exp $
 
-   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+   Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -21,6 +22,7 @@
 
 #include "system.h"
 #include "cmds.h"
+#include "files.h"
 #include "lang.h"
 #include "makeinfo.h"
 #include "xml.h"
@@ -31,6 +33,9 @@ encoding_code_type document_encoding_code = no_encoding;
 /* Current language code; default is English.  */
 language_code_type language_code = en;
 
+/* By default, unsupported encoding is an empty string.  */
+char *unknown_encoding = NULL;
+
 static iso_map_type us_ascii_map [] = {{NULL, 0, 0}}; /* ASCII map is trivial */
 
 /* Translation table between HTML and ISO Codes.  The last item is
@@ -137,6 +142,126 @@ static iso_map_type iso8859_1_map [] = {
   { NULL, 0, 0 }
 };
 
+
+/* ISO 8859-15, also known as Latin 9, differs from Latin 1 in only a
+   few positions.  http://www.cs.tut.fi/~jkorpela/latin9.html has a good
+   explanation and listing, summarized here.  The names are abbreviated
+   from the official Unicode names, to fit in a decent line length.
+
+  code position
+  dec	oct   hex   latin1 latin1 name	      latin9 latin9 name
+
+  164  0244  0xA4   U+00A4 currency symbol    U+20AC euro sign
+  166  0246  0xA6   U+00A6 broken bar	      U+0160 S with caron
+  168  0250  0xA8   U+00A8 diaeresis	      U+0161 s with caron
+  180  0264  0xB4   U+00B4 acute accent	      U+017D Z with caron
+  184  0270  0xB8   U+00B8 cedilla	      U+017E z with caron
+  188  0274  0xBC   U+00BC fraction 1/4	      U+0152 ligature OE
+  189  0275  0xBD   U+00BD fraction 1/2	      U+0153 ligature oe
+  190  0276  0xBE   U+00BE fraction 3/4	      U+0178 Y with diaeresis
+*/
+
+static iso_map_type iso8859_15_map [] = {
+  { "nbsp",   0xA0, 0x00A0 },
+  { "iexcl",  0xA1, 0x00A1 },
+  { "cent",   0xA2, 0x00A2 },
+  { "pound",  0xA3, 0x00A3 },
+  { "euro",   0xA4, 0x20AC },
+  { "yen",    0xA5, 0x00A5 },
+  { "Scaron", 0xA6, 0x0160 },
+  { "sect",   0xA7, 0x00A7 },
+  { "scaron", 0xA8, 0x0161 },
+  { "copy",   0xA9, 0x00A9 },
+  { "ordf",   0xAA, 0x00AA },
+  { "laquo",  0xAB, 0x00AB },
+  { "not",    0xAC, 0x00AC },
+  { "shy",    0xAD, 0x00AD },
+  { "reg",    0xAE, 0x00AE },
+  { "hibar",  0xAF, 0x00AF },
+  { "deg",    0xB0, 0x00B0 },
+  { "plusmn", 0xB1, 0x00B1 },
+  { "sup2",   0xB2, 0x00B2 },
+  { "sup3",   0xB3, 0x00B3 },
+  { "Zcaron", 0xB4, 0x017D },
+  { "micro",  0xB5, 0x00B5 },
+  { "para",   0xB6, 0x00B6 },
+  { "middot", 0xB7, 0x00B7 },
+  { "zcaron", 0xB8, 0x017E },
+  { "sup1",   0xB9, 0x00B9 },
+  { "ordm",   0xBA, 0x00BA },
+  { "raquo",  0xBB, 0x00BB },
+  { "OElig",  0xBC, 0x0152 },
+  { "oelig",  0xBD, 0x0153 },
+  { "Yuml",   0xBE, 0x0178 },
+  { "iquest", 0xBF, 0x00BF },
+  { "Agrave", 0xC0, 0x00C0 },
+  { "Aacute", 0xC1, 0x00C1 },
+  { "Acirc",  0xC2, 0x00C2 },
+  { "Atilde", 0xC3, 0x00C3 },
+  { "Auml",   0xC4, 0x00C4 },
+  { "Aring",  0xC5, 0x00C5 },
+  { "AElig",  0xC6, 0x00C6 },
+  { "Ccedil", 0xC7, 0x00C7 },
+  { "Ccedil", 0xC7, 0x00C7 },
+  { "Egrave", 0xC8, 0x00C8 },
+  { "Eacute", 0xC9, 0x00C9 },
+  { "Ecirc",  0xCA, 0x00CA },
+  { "Euml",   0xCB, 0x00CB },
+  { "Igrave", 0xCC, 0x00CC },
+  { "Iacute", 0xCD, 0x00CD },
+  { "Icirc",  0xCE, 0x00CE },
+  { "Iuml",   0xCF, 0x00CF },
+  { "ETH",    0xD0, 0x00D0 },
+  { "Ntilde", 0xD1, 0x00D1 },
+  { "Ograve", 0xD2, 0x00D2 },
+  { "Oacute", 0xD3, 0x00D3 },
+  { "Ocirc",  0xD4, 0x00D4 },
+  { "Otilde", 0xD5, 0x00D5 },
+  { "Ouml",   0xD6, 0x00D6 },
+  { "times",  0xD7, 0x00D7 },
+  { "Oslash", 0xD8, 0x00D8 },
+  { "Ugrave", 0xD9, 0x00D9 },
+  { "Uacute", 0xDA, 0x00DA },
+  { "Ucirc",  0xDB, 0x00DB },
+  { "Uuml",   0xDC, 0x00DC },
+  { "Yacute", 0xDD, 0x00DD },
+  { "THORN",  0xDE, 0x00DE },
+  { "szlig",  0xDF, 0x00DF },
+  { "agrave", 0xE0, 0x00E0 },
+  { "aacute", 0xE1, 0x00E1 },
+  { "acirc",  0xE2, 0x00E2 },
+  { "atilde", 0xE3, 0x00E3 },
+  { "auml",   0xE4, 0x00E4 },
+  { "aring",  0xE5, 0x00E5 },
+  { "aelig",  0xE6, 0x00E6 },
+  { "ccedil", 0xE7, 0x00E7 },
+  { "egrave", 0xE8, 0x00E8 },
+  { "eacute", 0xE9, 0x00E9 },
+  { "ecirc",  0xEA, 0x00EA },
+  { "euml",   0xEB, 0x00EB },
+  { "igrave", 0xEC, 0x00EC },
+  { "iacute", 0xED, 0x00ED },
+  { "icirc",  0xEE, 0x00EE },
+  { "iuml",   0xEF, 0x00EF },
+  { "eth",    0xF0, 0x00F0 },
+  { "ntilde", 0xF1, 0x00F1 },
+  { "ograve", 0xF2, 0x00F2 },
+  { "oacute", 0xF3, 0x00F3 },
+  { "ocirc",  0xF4, 0x00F4 },
+  { "otilde", 0xF5, 0x00F5 },
+  { "ouml",   0xF6, 0x00F6 },
+  { "divide", 0xF7, 0x00F7 },
+  { "oslash", 0xF8, 0x00F8 },
+  { "ugrave", 0xF9, 0x00F9 },
+  { "uacute", 0xFA, 0x00FA },
+  { "ucirc",  0xFB, 0x00FB },
+  { "uuml",   0xFC, 0x00FC },
+  { "yacute", 0xFD, 0x00FD },
+  { "thorn",  0xFE, 0x00FE },
+  { "yuml",   0xFF, 0x00FF },
+  { NULL, 0, 0 }
+};
+
 
 
 /* Date: Mon, 31 Mar 2003 00:19:28 +0200
@@ -262,21 +387,21 @@ static iso_map_type iso8859_2_map [] = {
 encoding_type encoding_table[] = {
   { no_encoding, "(no encoding)", NULL },
   { US_ASCII,    "US-ASCII",    us_ascii_map },
-  { ISO_8859_1,  "ISO-8859-1",  (iso_map_type *) iso8859_1_map },
-  { ISO_8859_2,  "ISO-8859-2",  (iso_map_type *) iso8859_2_map },
-  { ISO_8859_3,  "ISO-8859-3",  NULL },
-  { ISO_8859_4,  "ISO-8859-4",  NULL },
-  { ISO_8859_5,  "ISO-8859-5",  NULL },
-  { ISO_8859_6,  "ISO-8859-6",  NULL },
-  { ISO_8859_7,  "ISO-8859-7",  NULL },
-  { ISO_8859_8,  "ISO-8859-8",  NULL },
-  { ISO_8859_9,  "ISO-8859-9",  NULL },
-  { ISO_8859_10, "ISO-8859-10", NULL },
-  { ISO_8859_11, "ISO-8859-11", NULL },
-  { ISO_8859_12, "ISO-8859-12", NULL },
-  { ISO_8859_13, "ISO-8859-13", NULL },
-  { ISO_8859_14, "ISO-8859-14", NULL },
-  { ISO_8859_15, "ISO-8859-15", NULL },
+  { ISO_8859_1,  "iso-8859-1",  (iso_map_type *) iso8859_1_map },
+  { ISO_8859_2,  "iso-8859-2",  (iso_map_type *) iso8859_2_map },
+  { ISO_8859_3,  "iso-8859-3",  NULL },
+  { ISO_8859_4,  "iso-8859-4",  NULL },
+  { ISO_8859_5,  "iso-8859-5",  NULL },
+  { ISO_8859_6,  "iso-8859-6",  NULL },
+  { ISO_8859_7,  "iso-8859-7",  NULL },
+  { ISO_8859_8,  "iso-8859-8",  NULL },
+  { ISO_8859_9,  "iso-8859-9",  NULL },
+  { ISO_8859_10, "iso-8859-10", NULL },
+  { ISO_8859_11, "iso-8859-11", NULL },
+  { ISO_8859_12, "iso-8859-12", NULL },
+  { ISO_8859_13, "iso-8859-13", NULL },
+  { ISO_8859_14, "iso-8859-14", NULL },
+  { ISO_8859_15, "iso-8859-15", (iso_map_type *) iso8859_15_map },
   { last_encoding_code, NULL, NULL }
 };
 
@@ -423,13 +548,16 @@ language_type language_table[] = {
   { zu, "zu", "Zulu" },
   { last_language_code, NULL, NULL }
 };
-
-
 
 /* @documentlanguage.  Maybe we'll do something useful with this in the
    future.  For now, we just recognize it.  */
+
+/* XML documents can make use of this data.  Unfortunately, it clashes with
+   the structure currently used.  So instead of enclosing content into
+   a language block, we just output an empty element.  Anyways, a stream based
+   parser can make good use of it.  */
 void
-cm_documentlanguage ()
+cm_documentlanguage (void)
 {
   language_code_type c;
   char *lang_arg;
@@ -451,6 +579,12 @@ cm_documentlanguage ()
   if (c == last_language_code)
     warning (_("%s is not a valid ISO 639 language code"), lang_arg);
 
+  if (xml && !docbook)
+    {
+      xml_insert_element_with_attribute (DOCUMENTLANGUAGE, START, "xml:lang=\"%s\"", lang_arg);
+      xml_insert_element (DOCUMENTLANGUAGE, END);
+    }
+
   free (lang_arg);
 }
 
@@ -460,8 +594,7 @@ cm_documentlanguage ()
    its equivalent.  */
 
 static int
-cm_search_iso_map (html)
-      char *html;
+cm_search_iso_map (char *html)
 {
   int i;
   iso_map_type *iso = encoding_table[document_encoding_code].isotab;
@@ -483,43 +616,88 @@ cm_search_iso_map (html)
 /* @documentencoding.  Set the translation table.  */
 
 void
-cm_documentencoding ()
+cm_documentencoding (void)
 {
-  encoding_code_type enc;
-  char *enc_arg;
-
-  get_rest_of_line (1, &enc_arg);
-
-  /* See if we have this encoding.  */
-  for (enc = no_encoding+1; enc != last_encoding_code; enc++)
+  if (!handling_delayed_writes)
     {
-      if (strcasecmp (enc_arg, encoding_table[enc].encname) == 0)
+      encoding_code_type enc;
+      char *enc_arg;
+
+      /* This is ugly and probably needs to apply to other commands'
+         argument parsing as well.  When we're doing @documentencoding,
+         we're generally in the frontmatter of the document, and so the.
+         expansion in html/xml/docbook would generally be the empty string.
+         (Because those modes wait until the first normal text of the
+         document to start outputting.)  The result would thus be a warning
+         "unrecognized encoding name `'".  Sigh.  */
+      int save_html = html;
+      int save_xml = xml;
+
+      html = 0;
+      xml = 0;
+      get_rest_of_line (1, &enc_arg);
+      html = save_html;
+      xml = save_xml;
+
+      /* See if we have this encoding.  */
+      for (enc = no_encoding+1; enc != last_encoding_code; enc++)
         {
-          document_encoding_code = enc;
-          break;
+          if (strcasecmp (enc_arg, encoding_table[enc].encname) == 0)
+            {
+              document_encoding_code = enc;
+              break;
+            }
+        }
+
+      /* If we didn't find this code, complain.  */
+      if (enc == last_encoding_code)
+        {
+          warning (_("unrecognized encoding name `%s'"), enc_arg);
+          /* Let the previous one go.  */
+          if (unknown_encoding && *unknown_encoding)
+            free (unknown_encoding);
+          unknown_encoding = xstrdup (enc_arg);
         }
+
+      else if (encoding_table[document_encoding_code].isotab == NULL)
+        warning (_("sorry, encoding `%s' not supported"), enc_arg);
+
+      free (enc_arg);
     }
+  else if (xml)
+    {
+      char *encoding = current_document_encoding ();
 
-  /* If we didn't find this code, complain.  */
-  if (enc == last_encoding_code)
-    warning (_("unrecognized encoding name `%s'"), enc_arg);
+      if (encoding && *encoding)
+        {
+          insert_string (" encoding=\"");
+          insert_string (encoding);
+          insert_string ("\"");
+        }
 
-  else if (encoding_table[document_encoding_code].isotab == NULL)
-    warning (_("sorry, encoding `%s' not supported"), enc_arg);
+      free (encoding);
+    }
+}
 
-  free (enc_arg);
+char *
+current_document_encoding (void)
+{
+  if (document_encoding_code != no_encoding)
+    return xstrdup (encoding_table[document_encoding_code].encname);
+  else if (unknown_encoding && *unknown_encoding)
+    return xstrdup (unknown_encoding);
+  else
+    return xstrdup ("");
 }
 
 
-/* If html or xml output, add HTML_STR to the output.  If not html and
+/* If html or xml output, add &HTML_STR; to the output.  If not html and
    the user requested encoded output, add the real 8-bit character
    corresponding to HTML_STR from the translation tables.  Otherwise,
    add INFO_STR.  */
 
-void
-add_encoded_char (html_str, info_str)
-      char *html_str;
-      char *info_str;
+static void
+add_encoded_char (char *html_str, char *info_str)
 {
   if (html)
     add_word_args ("&%s;", html_str);
@@ -547,13 +725,8 @@ add_encoded_char (html_str, info_str)
 /* Output an accent for HTML or XML. */
 
 static void
-cm_accent_generic_html (arg, start, end, html_supported, single,
-                        html_solo_standalone, html_solo)
-     int arg, start, end;
-     char *html_supported;
-     int single;
-     int html_solo_standalone;
-     char *html_solo;
+cm_accent_generic_html (int arg, int start, int end, char *html_supported,
+    int single, int html_solo_standalone, char *html_solo)
 {
   static int valid_html_accent; /* yikes */
 
@@ -569,20 +742,39 @@ cm_accent_generic_html (arg, start, end, html_supported, single,
 	  escape_html = saved_escape_html;
         }
       else
-        {
-          valid_html_accent = 0;
-          if (html_solo_standalone)
-            { /* No special HTML support, so produce standalone char.  */
-	      if (xml)
-		xml_insert_entity (html_solo);
+        { /* @dotless{i} is not listed in html_supported but HTML entities
+	     starting with `i' can be used, such as &icirc;.  */
+	  int save_input_text_offset = input_text_offset;
+	  char *accent_contents;
+
+	  get_until_in_braces ("\n", &accent_contents);
+	  canon_white (accent_contents);
+
+	  if (strstr (accent_contents, "@dotless{i"))
+	    {
+	      add_word_args ("&%c", accent_contents[9]);
+	      valid_html_accent = 1;
+	    }
+	  else
+	    {
+	      /* Search for @dotless{} wasn't successful, so rewind.  */
+	      input_text_offset = save_input_text_offset;
+	      valid_html_accent = 0;
+	      if (html_solo_standalone)
+		{ /* No special HTML support, so produce standalone char.  */
+		  if (xml)
+		    xml_insert_entity (html_solo);
+		  else
+		    add_word_args ("&%s;", html_solo);
+		}
 	      else
-		add_word_args ("&%s;", html_solo);
-            }
-          else
-            /* If the html_solo does not exist as standalone character
-               (namely &circ; &grave; &tilde;), then we use
-               the single character version instead.  */
-            add_char (single);
+		/* If the html_solo does not exist as standalone character
+		   (namely &circ; &grave; &tilde;), then we use
+		   the single character version instead.  */
+		add_char (single);
+	    }
+
+	  free (accent_contents);
         }
     }
   else if (arg == END)
@@ -598,10 +790,8 @@ cm_accent_generic_html (arg, start, end, html_supported, single,
 
 
 static void
-cm_accent_generic_no_headers (arg, start, end, single, html_solo)
-     int arg, start, end;
-     int single;
-     char *html_solo;
+cm_accent_generic_no_headers (int arg, int start, int end, int single,
+    char *html_solo)
 {
   if (arg == END)
     {
@@ -628,8 +818,11 @@ cm_accent_generic_no_headers (arg, start, end, single, html_solo)
             { /* If we didn't find a translation for this character,
                  put the single instead. E.g., &Xuml; does not exist so X&uml;
                  should be produced. */
-              warning (_("%s is an invalid ISO code, using %c"),
-                       buffer, single);
+              /* When the below warning is issued, an author has nothing
+                 wrong in their document, let alone anything ``fixable''
+                 on their side.  So it is commented out for now.  */
+              /* warning (_("%s is an invalid ISO code, using %c"),
+                       buffer, single); */
               add_char (single);
             }
 
@@ -644,8 +837,7 @@ cm_accent_generic_no_headers (arg, start, end, single, html_solo)
    special HTML support.  */
 
 void
-cm_accent (arg)
-    int arg;
+cm_accent (int arg)
 {
   int old_escape_html = escape_html;
   escape_html = 0;
@@ -687,14 +879,14 @@ cm_accent (arg)
    exists as valid standalone character in HTML, e.g., &uml;.  */
 
 static void
-cm_accent_generic (arg, start, end, html_supported, single,
-                   html_solo_standalone, html_solo)
-     int arg, start, end;
-     char *html_supported;
-     int single;
-     int html_solo_standalone;
-     char *html_solo;
+cm_accent_generic (int arg, int start, int end, char *html_supported,
+    int single, int html_solo_standalone, char *html_solo)
 {
+  /* Accentuating space characters makes no sense, so issue a warning.  */
+  if (arg == START && isspace (input_text[input_text_offset]))
+    warning ("Accent command `@%s' must not be followed by whitespace",
+        command);
+
   if (html || xml)
     cm_accent_generic_html (arg, start, end, html_supported,
                             single, html_solo_standalone, html_solo);
@@ -712,43 +904,37 @@ cm_accent_generic (arg, start, end, html_supported, single,
 }
 
 void
-cm_accent_umlaut (arg, start, end)
-     int arg, start, end;
+cm_accent_umlaut (int arg, int start, int end)
 {
   cm_accent_generic (arg, start, end, "aouAOUEeIiy", '"', 1, "uml");
 }
 
 void
-cm_accent_acute (arg, start, end)
-     int arg, start, end;
+cm_accent_acute (int arg, int start, int end)
 {
   cm_accent_generic (arg, start, end, "AEIOUYaeiouy", '\'', 1, "acute");
 }
 
 void
-cm_accent_cedilla (arg, start, end)
-     int arg, start, end;
+cm_accent_cedilla (int arg, int start, int end)
 {
   cm_accent_generic (arg, start, end, "Cc", ',', 1, "cedil");
 }
 
 void
-cm_accent_hat (arg, start, end)
-     int arg, start, end;
+cm_accent_hat (int arg, int start, int end)
 {
   cm_accent_generic (arg, start, end, "AEIOUaeiou", '^', 0, "circ");
 }
 
 void
-cm_accent_grave (arg, start, end)
-     int arg, start, end;
+cm_accent_grave (int arg, int start, int end)
 {
   cm_accent_generic (arg, start, end, "AEIOUaeiou", '`', 0, "grave");
 }
 
 void
-cm_accent_tilde (arg, start, end)
-     int arg, start, end;
+cm_accent_tilde (int arg, int start, int end)
 {
   cm_accent_generic (arg, start, end, "ANOano", '~', 0, "tilde");
 }
@@ -757,7 +943,7 @@ cm_accent_tilde (arg, start, end)
 
 /* Non-English letters/characters that don't insert themselves.  */
 void
-cm_special_char (arg)
+cm_special_char (int arg)
 {
   int old_escape_html = escape_html;
   escape_html = 0;
@@ -769,27 +955,35 @@ cm_special_char (arg)
           && command[1] == 0)
         { /* Lslash lslash Oslash oslash.
              Lslash and lslash aren't supported in HTML.  */
-          if ((html || xml) && command[0] == 'O')
+          if (command[0] == 'O')
             add_encoded_char ("Oslash", "/O");
-          else if ((html || xml) && command[0] == 'o')
+          else if (command[0] == 'o')
             add_encoded_char ("oslash", "/o");
           else
             add_word_args ("/%c", command[0]);
         }
       else if (strcmp (command, "exclamdown") == 0)
         add_encoded_char ("iexcl", "!");
-      else if (strcmp (command, "pounds") == 0)
-        add_encoded_char ("pound" , "#");
       else if (strcmp (command, "questiondown") == 0)
         add_encoded_char ("iquest", "?");
+      else if (strcmp (command, "euro") == 0)
+        /* http://www.cs.tut.fi/~jkorpela/html/euro.html suggests that
+           &euro; degrades best in old browsers.  */
+        add_encoded_char ("euro", "Euro ");
+      else if (strcmp (command, "pounds") == 0)
+        add_encoded_char ("pound" , "#");
+      else if (strcmp (command, "ordf") == 0)
+        add_encoded_char ("ordf" , "a");
+      else if (strcmp (command, "ordm") == 0)
+        add_encoded_char ("ordm" , "o");
       else if (strcmp (command, "AE") == 0)
         add_encoded_char ("AElig", command);
       else if (strcmp (command, "ae") == 0)
         add_encoded_char ("aelig",  command);
       else if (strcmp (command, "OE") == 0)
-        add_encoded_char ("#140", command);
+        add_encoded_char ("OElig", command);
       else if (strcmp (command, "oe") == 0)
-        add_encoded_char ("#156", command);
+        add_encoded_char ("oelig", command);
       else if (strcmp (command, "AA") == 0)
         add_encoded_char ("Aring", command);
       else if (strcmp (command, "aa") == 0)
@@ -804,8 +998,7 @@ cm_special_char (arg)
 
 /* Dotless i or j.  */
 void
-cm_dotless (arg, start, end)
-    int arg, start, end;
+cm_dotless (int arg, int start, int end)
 {
   if (arg == END)
     {