/* index.c -- indexing for Texinfo. $Id: index.c,v 1.8 2003/05/16 23:52:40 karl Exp $ Copyright (C) 1998, 1999, 2002, 2003 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "system.h" #include "index.h" #include "lang.h" #include "macro.h" #include "toc.h" #include "xml.h" /* An index element... */ typedef struct index_elt { struct index_elt *next; char *entry; /* The index entry itself, after expansion. */ char *entry_text; /* The original, non-expanded entry text. */ char *node; /* The node from whence it came. */ int code; /* Nonzero means add `@code{...}' when printing this element. */ int defining_line; /* Line number where this entry was written. */ char *defining_file; /* Source file for defining_line. */ } INDEX_ELT; /* A list of short-names for each index. There are two indices into the the_indices array. * read_index is the index that points to the list of index entries that we will find if we ask for the list of entries for this name. * write_index is the index that points to the list of index entries that we will add new entries to. Initially, read_index and write_index are the same, but the @syncodeindex and @synindex commands can change the list we add entries to. For example, after the commands @cindex foo @defindex ii @synindex cp ii @cindex bar the cp index will contain the entry `foo', and the new ii index will contain the entry `bar'. This is consistent with the way texinfo.tex handles the same situation. In addition, for each index, it is remembered whether that index is a code index or not. Code indices have @code{} inserted around the first word when they are printed with printindex. */ typedef struct { char *name; int read_index; /* index entries for `name' */ int write_index; /* store index entries here, @synindex can change it */ int code; } INDEX_ALIST; INDEX_ALIST **name_index_alist = NULL; /* An array of pointers. Each one is for a different index. The "synindex" command changes which array slot is pointed to by a given "index". */ INDEX_ELT **the_indices = NULL; /* The number of defined indices. */ int defined_indices = 0; /* Stuff for defining commands on the fly. */ COMMAND **user_command_array = NULL; int user_command_array_len = 0; /* How to compare index entries for sorting. May be set to strcoll. */ int (*index_compare_fn) () = strcasecmp; /* Find which element in the known list of indices has this name. Returns -1 if NAME isn't found. */ static int find_index_offset (name) char *name; { int i; for (i = 0; i < defined_indices; i++) if (name_index_alist[i] && STREQ (name, name_index_alist[i]->name)) return i; return -1; } /* Return a pointer to the entry of (name . index) for this name. Return NULL if the index doesn't exist. */ INDEX_ALIST * find_index (name) char *name; { int offset = find_index_offset (name); if (offset > -1) return name_index_alist[offset]; else return NULL; } /* User-defined commands, which happens only from user-defined indexes. Used to initialize the builtin indices, too. */ void define_user_command (name, proc, needs_braces_p) char *name; COMMAND_FUNCTION *proc; int needs_braces_p; { int slot = user_command_array_len; user_command_array_len++; if (!user_command_array) user_command_array = xmalloc (1 * sizeof (COMMAND *)); user_command_array = xrealloc (user_command_array, (1 + user_command_array_len) * sizeof (COMMAND *)); user_command_array[slot] = xmalloc (sizeof (COMMAND)); user_command_array[slot]->name = xstrdup (name); user_command_array[slot]->proc = proc; user_command_array[slot]->argument_in_braces = needs_braces_p; } /* Please release me, let me go... */ static void free_index (index) INDEX_ELT *index; { INDEX_ELT *temp; while ((temp = index)) { free (temp->entry); free (temp->entry_text); /* Do not free the node, because we already freed the tag table, which freed all the node names. */ /* free (temp->node); */ index = index->next; free (temp); } } /* Flush an index by name. This will delete the list of entries that would be written by a @printindex command for this index. */ static void undefindex (name) char *name; { int i; int which = find_index_offset (name); /* The index might have already been freed if this was the target of an @synindex. */ if (which < 0 || !name_index_alist[which]) return; i = name_index_alist[which]->read_index; free_index (the_indices[i]); the_indices[i] = NULL; free (name_index_alist[which]->name); free (name_index_alist[which]); name_index_alist[which] = NULL; } /* Add the arguments to the current index command to the index NAME. html fixxme generate specific html anchor */ static void index_add_arg (name) char *name; { int which; char *index_entry; INDEX_ALIST *tem; tem = find_index (name); which = tem ? tem->write_index : -1; if (macro_expansion_output_stream && !executing_string) append_to_expansion_output (input_text_offset + 1); get_rest_of_line (0, &index_entry); ignore_blank_line (); if (macro_expansion_output_stream && !executing_string) { char *index_line = xmalloc (strlen (index_entry) + 2); sprintf (index_line, "%s\n", index_entry); me_execute_string_keep_state (index_line, NULL); free (index_line); } if (which < 0) { line_error (_("Unknown index `%s'"), name); free (index_entry); } else { INDEX_ELT *new = xmalloc (sizeof (INDEX_ELT)); new->next = the_indices[which]; new->entry_text = index_entry; new->entry = NULL; new->node = current_node ? current_node : xstrdup (""); new->code = tem->code; new->defining_line = line_number - 1; /* We need to make a copy since input_filename may point to something that goes away, for example, inside a macro. (see the findexerr test). */ new->defining_file = xstrdup (input_filename); the_indices[which] = new; #if 0 /* The index breaks if there are colons in the entry. -- This is true, but it's too painful to force changing index entries to use `colon', and too confusing for users. The real fix is to change Info support to support arbitrary characters in node names, and we're not ready to do that. --karl, 19mar02. */ if (strchr (new->entry_text, ':')) warning (_("Info cannot handle `:' in index entry `%s'"), new->entry_text); #endif } if (xml) xml_insert_indexterm (index_entry, name); } /* The function which user defined index commands call. */ static void gen_index () { char *name = xstrdup (command); if (strlen (name) >= strlen ("index")) name[strlen (name) - strlen ("index")] = 0; index_add_arg (name); free (name); } /* Define an index known as NAME. We assign the slot number. If CODE is nonzero, make this a code index. */ static void defindex (name, code) char *name; int code; { int i, slot; /* If it already exists, flush it. */ undefindex (name); /* Try to find an empty slot. */ slot = -1; for (i = 0; i < defined_indices; i++) if (!name_index_alist[i]) { slot = i; break; } if (slot < 0) { /* No such luck. Make space for another index. */ slot = defined_indices; defined_indices++; name_index_alist = (INDEX_ALIST **) xrealloc (name_index_alist, (1 + defined_indices) * sizeof (INDEX_ALIST *)); the_indices = (INDEX_ELT **) xrealloc (the_indices, (1 + defined_indices) * sizeof (INDEX_ELT *)); } /* We have a slot. Start assigning. */ name_index_alist[slot] = xmalloc (sizeof (INDEX_ALIST)); name_index_alist[slot]->name = xstrdup (name); name_index_alist[slot]->read_index = slot; name_index_alist[slot]->write_index = slot; name_index_alist[slot]->code = code; the_indices[slot] = NULL; } /* Define an index NAME, implicitly @code if CODE is nonzero. */ static void top_defindex (name, code) char *name; int code; { char *temp; temp = xmalloc (1 + strlen (name) + strlen ("index")); sprintf (temp, "%sindex", name); define_user_command (temp, gen_index, 0); defindex (name, code); free (temp); } /* Set up predefined indices. */ void init_indices () { int i; /* Create the default data structures. */ /* Initialize data space. */ if (!the_indices) { the_indices = xmalloc ((1 + defined_indices) * sizeof (INDEX_ELT *)); the_indices[defined_indices] = NULL; name_index_alist = xmalloc ((1 + defined_indices) * sizeof (INDEX_ALIST *)); name_index_alist[defined_indices] = NULL; } /* If there were existing indices, get rid of them now. */ for (i = 0; i < defined_indices; i++) { if (name_index_alist[i]) { /* Suppose we're called with two input files, and the first does a @synindex pg cp. Then, when we get here to start the second file, the "pg" element won't get freed by undefindex (because it's pointing to "cp"). So free it here; otherwise, when we try to define the pg index again just below, it will still point to cp. */ undefindex (name_index_alist[i]->name); /* undefindex sets all this to null in some cases. */ if (name_index_alist[i]) { free (name_index_alist[i]->name); free (name_index_alist[i]); name_index_alist[i] = NULL; } } } /* Add the default indices. */ top_defindex ("cp", 0); /* cp is the only non-code index. */ top_defindex ("fn", 1); top_defindex ("ky", 1); top_defindex ("pg", 1); top_defindex ("tp", 1); top_defindex ("vr", 1); } /* Given an index name, return the offset in the_indices of this index, or -1 if there is no such index. */ int translate_index (name) char *name; { INDEX_ALIST *which = find_index (name); if (which) return which->read_index; else return -1; } /* Return the index list which belongs to NAME. */ INDEX_ELT * index_list (name) char *name; { int which = translate_index (name); if (which < 0) return (INDEX_ELT *) -1; else return the_indices[which]; } /* Define a new index command. Arg is name of index. */ static void gen_defindex (code) int code; { char *name; get_rest_of_line (0, &name); if (find_index (name)) { line_error (_("Index `%s' already exists"), name); } else { char *temp = xmalloc (strlen (name) + sizeof ("index")); sprintf (temp, "%sindex", name); define_user_command (temp, gen_index, 0); defindex (name, code); free (temp); } free (name); } void cm_defindex () { gen_defindex (0); } void cm_defcodeindex () { gen_defindex (1); } /* Expects 2 args, on the same line. Both are index abbreviations. Make the first one be a synonym for the second one, i.e. make the first one have the same index as the second one. */ void cm_synindex () { int source, target; char *abbrev1, *abbrev2; skip_whitespace (); get_until_in_line (0, " ", &abbrev1); target = find_index_offset (abbrev1); skip_whitespace (); get_until_in_line (0, " ", &abbrev2); source = find_index_offset (abbrev2); if (source < 0 || target < 0) { line_error (_("Unknown index `%s' and/or `%s' in @synindex"), abbrev1, abbrev2); } else { name_index_alist[target]->write_index = name_index_alist[source]->write_index; } free (abbrev1); free (abbrev2); } void cm_pindex () /* Pinhead index. */ { index_add_arg ("pg"); } void cm_vindex () /* Variable index. */ { index_add_arg ("vr"); } void cm_kindex () /* Key index. */ { index_add_arg ("ky"); } void cm_cindex () /* Concept index. */ { index_add_arg ("cp"); } void cm_findex () /* Function index. */ { index_add_arg ("fn"); } void cm_tindex () /* Data Type index. */ { index_add_arg ("tp"); } int index_element_compare (element1, element2) INDEX_ELT **element1, **element2; { return index_compare_fn ((*element1)->entry, (*element2)->entry); } /* Force all index entries to be unique. */ void make_index_entries_unique (array, count) INDEX_ELT **array; int count; { int i, j; INDEX_ELT **copy; int counter = 1; copy = xmalloc ((1 + count) * sizeof (INDEX_ELT *)); for (i = 0, j = 0; i < count; i++) { if (i == (count - 1) || array[i]->node != array[i + 1]->node || !STREQ (array[i]->entry, array[i + 1]->entry)) copy[j++] = array[i]; else { free (array[i]->entry); free (array[i]->entry_text); free (array[i]); } } copy[j] = NULL; /* Now COPY contains only unique entries. Duplicated entries in the original array have been freed. Replace the current array with the copy, fixing the NEXT pointers. */ for (i = 0; copy[i]; i++) { copy[i]->next = copy[i + 1]; /* Fix entry names which are the same. They point to different nodes, so we make the entry name unique. */ if (copy[i+1] && STREQ (copy[i]->entry, copy[i + 1]->entry) && !html) { char *new_entry_name; new_entry_name = xmalloc (10 + strlen (copy[i]->entry)); sprintf (new_entry_name, "%s <%d>", copy[i]->entry, counter); free (copy[i]->entry); copy[i]->entry = new_entry_name; counter++; } else counter = 1; array[i] = copy[i]; } array[i] = NULL; /* Free the storage used only by COPY. */ free (copy); } /* Sort the index passed in INDEX, returning an array of pointers to elements. The array is terminated with a NULL pointer. */ INDEX_ELT ** sort_index (index) INDEX_ELT *index; { INDEX_ELT **array; INDEX_ELT *temp; int count = 0; int save_line_number = line_number; char *save_input_filename = input_filename; int save_html = html; /* Pretend we are in non-HTML mode, for the purpose of getting the expanded index entry that lacks any markup and other HTML escape characters which could produce a wrong sort order. */ /* fixme: html: this still causes some markup, such as non-ASCII characters @AE{} etc., to sort incorrectly. */ html = 0; for (temp = index, count = 0; temp; temp = temp->next, count++) ; /* We have the length, now we can allocate an array. */ array = xmalloc ((count + 1) * sizeof (INDEX_ELT *)); for (temp = index, count = 0; temp; temp = temp->next, count++) { /* Allocate new memory for the return array, since parts of the original INDEX get freed. Otherwise, if the document calls @printindex twice on the same index, with duplicate entries, we'll have garbage the second time. There are cleaner ways to deal, but this will suffice for now. */ array[count] = xmalloc (sizeof (INDEX_ELT)); *(array[count]) = *(temp); /* struct assignment, hope it's ok */ /* Adjust next pointers to use the new memory. */ if (count > 0) array[count-1]->next = array[count]; /* Set line number and input filename to the source line for this index entry, as this expansion finds any errors. */ line_number = array[count]->defining_line; input_filename = array[count]->defining_file; /* If this particular entry should be printed as a "code" index, then expand it as @code{entry}, i.e., as in fixed-width font. */ array[count]->entry = expansion (temp->entry_text, array[count]->code); } array[count] = NULL; /* terminate the array. */ line_number = save_line_number; input_filename = save_input_filename; html = save_html; #ifdef HAVE_STRCOLL /* This is not perfect. We should set (then restore) the locale to the documentlanguage, so strcoll operates according to the document's locale, not the user's. For now, I'm just going to assume that those few new documents which use @documentlanguage will be processed in the appropriate locale. In any case, don't use strcoll in the C (aka POSIX) locale, that is the ASCII ordering. */ if (language_code != en) { char *lang_env = getenv ("LANG"); if (lang_env && !STREQ (lang_env, "C") && !STREQ (lang_env, "POSIX")) index_compare_fn = strcoll; } #endif /* HAVE_STRCOLL */ /* Sort the array. */ qsort (array, count, sizeof (INDEX_ELT *), index_element_compare); /* Remove duplicate entries. */ make_index_entries_unique (array, count); /* Replace the original index with the sorted one, in case the document wants to print it again. If the index wasn't empty. */ if (index) *index = **array; return array; } /* Nonzero means that we are in the middle of printing an index. */ int printing_index = 0; /* Takes one arg, a short name of an index to print. Outputs a menu of the sorted elements of the index. */ void cm_printindex () { if (xml && !docbook) { char *index_name; get_rest_of_line (0, &index_name); xml_insert_element (PRINTINDEX, START); insert_string (index_name); xml_insert_element (PRINTINDEX, END); } else { int item; INDEX_ELT *index; INDEX_ELT *last_index = 0; INDEX_ELT **array; char *index_name; unsigned line_length; char *line; int saved_inhibit_paragraph_indentation = inhibit_paragraph_indentation; int saved_filling_enabled = filling_enabled; int saved_line_number = line_number; char *saved_input_filename = input_filename; close_paragraph (); get_rest_of_line (0, &index_name); index = index_list (index_name); if (index == (INDEX_ELT *)-1) { line_error (_("Unknown index `%s' in @printindex"), index_name); free (index_name); return; } /* Do this before sorting, so execute_string is in the good environment */ if (xml && docbook) xml_begin_index (); /* Do this before sorting, so execute_string in index_element_compare will give the same results as when we actually print. */ printing_index = 1; filling_enabled = 0; inhibit_paragraph_indentation = 1; xml_sort_index = 1; array = sort_index (index); xml_sort_index = 0; close_paragraph (); if (html) add_word_args ("