diff options
Diffstat (limited to 'contrib/gnu-sort/src/sort.c')
-rw-r--r-- | contrib/gnu-sort/src/sort.c | 3237 |
1 files changed, 0 insertions, 3237 deletions
diff --git a/contrib/gnu-sort/src/sort.c b/contrib/gnu-sort/src/sort.c deleted file mode 100644 index 0b9d33f..0000000 --- a/contrib/gnu-sort/src/sort.c +++ /dev/null @@ -1,3237 +0,0 @@ -/* $FreeBSD$ */ -/* sort - sort lines of text (with all kinds of options). - Copyright (C) 88, 1991-2004 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - Written December 1988 by Mike Haertel. - The author may be reached (Email) at the address mike@gnu.ai.mit.edu, - or (US mail) as Mike Haertel c/o Free Software Foundation. - - Ørn E. Hansen added NLS support in 1997. */ - -#include <config.h> - -#include <assert.h> -#include <getopt.h> -#include <sys/types.h> -#include <signal.h> -#include <stdio.h> - -/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ -/* Get mbstate_t, mbrtowc(), wcwidth(). */ -#if HAVE_WCHAR_H -# include <wchar.h> -#endif - -/* Get isw* functions. */ -#if HAVE_WCTYPE_H -# include <wctype.h> -#endif - -/* Get nl_langinfo(). */ -#if HAVE_LANGINFO_CODESET -# include <langinfo.h> -#endif - -/* Include this after wctype.h so that we `#undef' ISPRINT - (from Solaris's euc.h, from widec.h, from wctype.h) before - redefining and using it. */ -#include "system.h" -#include "error.h" -#include "hard-locale.h" -#include "inttostr.h" -#include "long-options.h" -#include "physmem.h" -#include "posixver.h" -#include "quote.h" -#include "stdio-safer.h" -#include "xmemcoll.h" -#include "xstrtol.h" - -#if HAVE_SYS_RESOURCE_H -# include <sys/resource.h> -#endif -#ifndef RLIMIT_DATA -struct rlimit { size_t rlim_cur; }; -# define getrlimit(Resource, Rlp) (-1) -#endif - -/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC - installation; work around this configuration error. */ -#if !defined MB_LEN_MAX || MB_LEN_MAX == 1 -# define MB_LEN_MAX 16 -#endif - -/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -#if HAVE_MBRTOWC && defined mbstate_t -# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -#endif - -/* The official name of this program (e.g., no `g' prefix). */ -#define PROGRAM_NAME "sort" - -#define AUTHORS "Mike Haertel", "Paul Eggert" - -#if HAVE_LANGINFO_CODESET -# include <langinfo.h> -#endif - -#ifndef SA_NOCLDSTOP -# define sigprocmask(How, Set, Oset) /* empty */ -# define sigset_t int -#endif - -#ifndef STDC_HEADERS -double strtod (); -#endif - -#define UCHAR_LIM (UCHAR_MAX + 1) - -#ifndef DEFAULT_TMPDIR -# define DEFAULT_TMPDIR "/tmp" -#endif - -/* Exit statuses. */ -enum - { - /* POSIX says to exit with status 1 if invoked with -c and the - input is not properly sorted. */ - SORT_OUT_OF_ORDER = 1, - - /* POSIX says any other irregular exit must exit with a status - code greater than 1. */ - SORT_FAILURE = 2 - }; - -#define C_DECIMAL_POINT '.' -#define NEGATION_SIGN '-' -#define NUMERIC_ZERO '0' - -#if HAVE_SETLOCALE - -static char decimal_point; -static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */ -static int force_general_numcompare = 0; - -/* Nonzero if the corresponding locales are hard. */ -static bool hard_LC_COLLATE; -# if HAVE_NL_LANGINFO -static bool hard_LC_TIME; -# endif - -# define IS_THOUSANDS_SEP(x) ((x) == th_sep) - -#else - -# define decimal_point C_DECIMAL_POINT -# define IS_THOUSANDS_SEP(x) false - -#endif - -#define NONZERO(x) (x != 0) - -/* get a multibyte character's byte length. */ -#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \ - do \ - { \ - wchar_t wc; \ - mbstate_t state_bak; \ - \ - state_bak = STATE; \ - mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \ - \ - switch (MBLENGTH) \ - { \ - case (size_t)-1: \ - case (size_t)-2: \ - STATE = state_bak; \ - /* Fall through. */ \ - case 0: \ - MBLENGTH = 1; \ - } \ - } \ - while (0) - -/* The kind of blanks for '-b' to skip in various options. */ -enum blanktype { bl_start, bl_end, bl_both }; - -/* The character marking end of line. Default to \n. */ -static char eolchar = '\n'; - -/* Lines are held in core as counted strings. */ -struct line -{ - char *text; /* Text of the line. */ - size_t length; /* Length including final newline. */ - char *keybeg; /* Start of first key. */ - char *keylim; /* Limit of first key. */ -}; - -/* Input buffers. */ -struct buffer -{ - char *buf; /* Dynamically allocated buffer, - partitioned into 3 regions: - - input data; - - unused area; - - an array of lines, in reverse order. */ - size_t used; /* Number of bytes used for input data. */ - size_t nlines; /* Number of lines in the line array. */ - size_t alloc; /* Number of bytes allocated. */ - size_t left; /* Number of bytes left from previous reads. */ - size_t line_bytes; /* Number of bytes to reserve for each line. */ - bool eof; /* An EOF has been read. */ -}; - -struct keyfield -{ - size_t sword; /* Zero-origin 'word' to start at. */ - size_t schar; /* Additional characters to skip. */ - size_t eword; /* Zero-origin first word after field. */ - size_t echar; /* Additional characters in field. */ - bool const *ignore; /* Boolean array of characters to ignore. */ - char const *translate; /* Translation applied to characters. */ - bool skipsblanks; /* Skip leading blanks when finding start. */ - bool skipeblanks; /* Skip leading blanks when finding end. */ - bool numeric; /* Flag for numeric comparison. Handle - strings of digits with optional decimal - point, but no exponential notation. */ - bool general_numeric; /* Flag for general, numeric comparison. - Handle numbers in exponential notation. */ - bool month; /* Flag for comparison by month name. */ - bool reverse; /* Reverse the sense of comparison. */ - struct keyfield *next; /* Next keyfield to try. */ -}; - -struct month -{ - char const *name; - int val; -}; - -/* The name this program was run with. */ -char *program_name; - -/* FIXME: None of these tables work with multibyte character sets. - Also, there are many other bugs when handling multibyte characters. - One way to fix this is to rewrite `sort' to use wide characters - internally, but doing this with good performance is a bit - tricky. */ - -/* Table of blanks. */ -static bool blanks[UCHAR_LIM]; - -/* Table of non-printing characters. */ -static bool nonprinting[UCHAR_LIM]; - -/* Table of non-dictionary characters (not letters, digits, or blanks). */ -static bool nondictionary[UCHAR_LIM]; - -/* Translation table folding lower case to upper. */ -static char fold_toupper[UCHAR_LIM]; - -#define MONTHS_PER_YEAR 12 - -/* Table mapping month names to integers. - Alphabetic order allows binary search. */ -static struct month monthtab[] = -{ - {"APR", 4}, - {"AUG", 8}, - {"DEC", 12}, - {"FEB", 2}, - {"JAN", 1}, - {"JUL", 7}, - {"JUN", 6}, - {"MAR", 3}, - {"MAY", 5}, - {"NOV", 11}, - {"OCT", 10}, - {"SEP", 9} -}; - -/* During the merge phase, the number of files to merge at once. */ -#define NMERGE 16 - -/* Minimum size for a merge or check buffer. */ -#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line)) - -/* Minimum sort size; the code might not work with smaller sizes. */ -#define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE) - -/* The number of bytes needed for a merge or check buffer, which can - function relatively efficiently even if it holds only one line. If - a longer line is seen, this value is increased. */ -static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024); - -/* The approximate maximum number of bytes of main memory to use, as - specified by the user. Zero if the user has not specified a size. */ -static size_t sort_size; - -/* The guessed size for non-regular files. */ -#define INPUT_FILE_SIZE_GUESS (1024 * 1024) - -/* Array of directory names in which any temporary files are to be created. */ -static char const **temp_dirs; - -/* Number of temporary directory names used. */ -static size_t temp_dir_count; - -/* Number of allocated slots in temp_dirs. */ -static size_t temp_dir_alloc; - -/* Flag to reverse the order of all comparisons. */ -static bool reverse; - -/* Flag for stable sort. This turns off the last ditch bytewise - comparison of lines, and instead leaves lines in the same order - they were read if all keys compare equal. */ -static bool stable; - -/* Tab character separating fields. If tab_default, then fields are - separated by the empty string between a non-blank character and a blank - character. */ -static bool tab_default = true; -static unsigned char tab[MB_LEN_MAX + 1]; -static size_t tab_length = 1; - -/* Flag to remove consecutive duplicate lines from the output. - Only the last of a sequence of equal lines will be output. */ -static bool unique; - -/* Nonzero if any of the input files are the standard input. */ -static bool have_read_stdin; - -/* List of key field comparisons to be tried. */ -static struct keyfield *keylist; - -static void sortlines_temp (struct line *, size_t, struct line *); - -void -usage (int status) -{ - if (status != EXIT_SUCCESS) - fprintf (stderr, _("Try `%s --help' for more information.\n"), - program_name); - else - { - printf (_("\ -Usage: %s [OPTION]... [FILE]...\n\ -"), - program_name); - fputs (_("\ -Write sorted concatenation of all FILE(s) to standard output.\n\ -\n\ -Ordering options:\n\ -\n\ -"), stdout); - fputs (_("\ -Mandatory arguments to long options are mandatory for short options too.\n\ -"), stdout); - fputs (_("\ - -b, --ignore-leading-blanks ignore leading blanks\n\ - -d, --dictionary-order consider only blanks and alphanumeric characters\n\ - -f, --ignore-case fold lower case to upper case characters\n\ -"), stdout); - fputs (_("\ - -g, --general-numeric-sort compare according to general numerical value\n\ - -i, --ignore-nonprinting consider only printable characters\n\ - -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\ - -n, --numeric-sort compare according to string numerical value\n\ - -r, --reverse reverse the result of comparisons\n\ -\n\ -"), stdout); - fputs (_("\ -Other options:\n\ -\n\ - -c, --check check whether input is sorted; do not sort\n\ - -k, --key=POS1[,POS2] start a key at POS1, end it at POS 2 (origin 1)\n\ - -m, --merge merge already sorted files; do not sort\n\ - -o, --output=FILE write result to FILE instead of standard output\n\ - -s, --stable stabilize sort by disabling last-resort comparison\n\ - -S, --buffer-size=SIZE use SIZE for main memory buffer\n\ -"), stdout); - printf (_("\ - -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\ - -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\ - multiple options specify multiple directories\n\ - -u, --unique with -c, check for strict ordering;\n\ - without -c, output only the first of an equal run\n\ -"), DEFAULT_TMPDIR); - fputs (_("\ - -z, --zero-terminated end lines with 0 byte, not newline\n\ -"), stdout); - fputs (HELP_OPTION_DESCRIPTION, stdout); - fputs (VERSION_OPTION_DESCRIPTION, stdout); - fputs (_("\ -\n\ -POS is F[.C][OPTS], where F is the field number and C the character position\n\ -in the field. OPTS is one or more single-letter ordering options, which\n\ -override global ordering options for that key. If no key is given, use the\n\ -entire line as the key.\n\ -\n\ -SIZE may be followed by the following multiplicative suffixes:\n\ -"), stdout); - fputs (_("\ -% 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\ -\n\ -With no FILE, or when FILE is -, read standard input.\n\ -\n\ -*** WARNING ***\n\ -The locale specified by the environment affects sort order.\n\ -Set LC_ALL=C to get the traditional sort order that uses\n\ -native byte values.\n\ -"), stdout ); - printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); - } - - exit (status); -} - -#define COMMON_SHORT_OPTIONS "-bcdfgik:mMno:rsS:t:T:uz" - -static struct option const long_options[] = -{ - {"ignore-leading-blanks", no_argument, NULL, 'b'}, - {"check", no_argument, NULL, 'c'}, - {"dictionary-order", no_argument, NULL, 'd'}, - {"ignore-case", no_argument, NULL, 'f'}, - {"general-numeric-sort", no_argument, NULL, 'g'}, - {"ignore-nonprinting", no_argument, NULL, 'i'}, - {"key", required_argument, NULL, 'k'}, - {"merge", no_argument, NULL, 'm'}, - {"month-sort", no_argument, NULL, 'M'}, - {"numeric-sort", no_argument, NULL, 'n'}, - {"output", required_argument, NULL, 'o'}, - {"reverse", no_argument, NULL, 'r'}, - {"stable", no_argument, NULL, 's'}, - {"buffer-size", required_argument, NULL, 'S'}, - {"field-separator", required_argument, NULL, 't'}, - {"temporary-directory", required_argument, NULL, 'T'}, - {"unique", no_argument, NULL, 'u'}, - {"zero-terminated", no_argument, NULL, 'z'}, - {GETOPT_HELP_OPTION_DECL}, - {GETOPT_VERSION_OPTION_DECL}, - {0, 0, 0, 0}, -}; - -/* The set of signals that are caught. */ -static sigset_t caught_signals; - -/* The list of temporary files. */ -struct tempnode -{ - struct tempnode *volatile next; - char name[1]; /* Actual size is 1 + file name length. */ -}; -static struct tempnode *volatile temphead; - -/* Fucntion pointers. */ -static void -(*inittables) (void); - -static char * -(* begfield) (const struct line *line, const struct keyfield *key); - -static char * -(* limfield) (const struct line *line, const struct keyfield *key); - -static int -(*getmonth) (const char *s, size_t len); - -static int -(* keycompare) (const struct line *a, const struct line *b); - -/* Test for white space multibyte character. - Set LENGTH the byte length of investigated multibyte character. */ -#if HAVE_MBRTOWC -static int -ismbblank (const char *str, size_t len, size_t *length) -{ - size_t mblength; - wchar_t wc; - mbstate_t state; - - memset (&state, '\0', sizeof(mbstate_t)); - mblength = mbrtowc (&wc, str, len, &state); - - if (mblength == (size_t)-1 || mblength == (size_t)-2) - { - *length = 1; - return 0; - } - - *length = (mblength < 1) ? 1 : mblength; - return iswblank (wc); -} -#endif - -/* Clean up any remaining temporary files. */ - -static void -cleanup (void) -{ - struct tempnode const *node; - - for (node = temphead; node; node = node->next) - unlink (node->name); -} - -/* Report MESSAGE for FILE, then clean up and exit. - If FILE is null, it represents standard output. */ - -static void die (char const *, char const *) ATTRIBUTE_NORETURN; -static void -die (char const *message, char const *file) -{ - error (0, errno, "%s: %s", message, file ? file : _("standard output")); - exit (SORT_FAILURE); -} - -/* Create a new temporary file, returning its newly allocated name. - Store into *PFP a stream open for writing. */ - -static char * -create_temp_file (FILE **pfp) -{ - static char const slashbase[] = "/sortXXXXXX"; - static size_t temp_dir_index; - sigset_t oldset; - int fd; - int saved_errno; - char const *temp_dir = temp_dirs[temp_dir_index]; - size_t len = strlen (temp_dir); - struct tempnode *node = - xmalloc (sizeof node->next + len + sizeof slashbase); - char *file = node->name; - - memcpy (file, temp_dir, len); - memcpy (file + len, slashbase, sizeof slashbase); - node->next = temphead; - if (++temp_dir_index == temp_dir_count) - temp_dir_index = 0; - - /* Create the temporary file in a critical section, to avoid races. */ - sigprocmask (SIG_BLOCK, &caught_signals, &oldset); - fd = mkstemp (file); - if (0 <= fd) - temphead = node; - saved_errno = errno; - sigprocmask (SIG_SETMASK, &oldset, NULL); - errno = saved_errno; - - if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL) - die (_("cannot create temporary file"), file); - - return file; -} - -/* Return a stream for FILE, opened with mode HOW. A null FILE means - standard output; HOW should be "w". When opening for input, "-" - means standard input. To avoid confusion, do not return file - descriptors 0, 1, or 2. */ - -static FILE * -xfopen (const char *file, const char *how) -{ - FILE *fp; - - if (!file) - fp = stdout; - else if (STREQ (file, "-") && *how == 'r') - { - have_read_stdin = true; - fp = stdin; - } - else - { - if ((fp = fopen_safer (file, how)) == NULL) - die (_("open failed"), file); - } - - return fp; -} - -/* Close FP, whose name is FILE, and report any errors. */ - -static void -xfclose (FILE *fp, char const *file) -{ - if (fp == stdin) - { - /* Allow reading stdin from tty more than once. */ - if (feof (fp)) - clearerr (fp); - } - else - { - if (fclose (fp) != 0) - die (_("close failed"), file); - } -} - -static void -write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file) -{ - if (fwrite (buf, 1, n_bytes, fp) != n_bytes) - die (_("write failed"), output_file); -} - -/* Append DIR to the array of temporary directory names. */ -static void -add_temp_dir (char const *dir) -{ - if (temp_dir_count == temp_dir_alloc) - temp_dirs = x2nrealloc (temp_dirs, &temp_dir_alloc, sizeof *temp_dirs); - - temp_dirs[temp_dir_count++] = dir; -} - -/* Search through the list of temporary files for NAME; - remove it if it is found on the list. */ - -static void -zaptemp (const char *name) -{ - struct tempnode *volatile *pnode; - struct tempnode *node; - - for (pnode = &temphead; (node = *pnode); pnode = &node->next) - if (node->name == name) - { - unlink (name); - *pnode = node->next; - free (node); - break; - } -} - -#if HAVE_LANGINFO_CODESET - -static int -struct_month_cmp (const void *m1, const void *m2) -{ - struct month const *month1 = m1; - struct month const *month2 = m2; - return strcmp (month1->name, month2->name); -} - -#endif - -/* Initialize the character class tables. */ - -static void -inittables_uni (void) -{ - int i; - - for (i = 0; i < UCHAR_LIM; ++i) - { - blanks[i] = !!ISBLANK (i); - nonprinting[i] = !ISPRINT (i); - nondictionary[i] = !ISALNUM (i) && !ISBLANK (i); - fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i); - } - -#if HAVE_NL_LANGINFO - /* If we're not in the "C" locale, read different names for months. */ - if (hard_LC_TIME) - { - for (i = 0; i < MONTHS_PER_YEAR; i++) - { - char const *s; - size_t s_len; - size_t j; - char *name; - - s = (char *) nl_langinfo (ABMON_1 + i); - s_len = strlen (s); - monthtab[i].name = name = xmalloc (s_len + 1); - monthtab[i].val = i + 1; - - for (j = 0; j < s_len; j++) - name[j] = fold_toupper[to_uchar (s[j])]; - name[j] = '\0'; - } - qsort ((void *) monthtab, MONTHS_PER_YEAR, - sizeof *monthtab, struct_month_cmp); - } -#endif -} - -#if HAVE_MBRTOWC -static void -inittables_mb (void) -{ - int i, j, k, l; - char *name, *s; - size_t s_len, mblength; - char mbc[MB_LEN_MAX]; - wchar_t wc, pwc; - mbstate_t state_mb, state_wc; - - for (i = 0; i < MONTHS_PER_YEAR; i++) - { - s = (char *) nl_langinfo (ABMON_1 + i); - s_len = strlen (s); - monthtab[i].name = name = (char *) xmalloc (s_len + 1); - monthtab[i].val = i + 1; - - memset (&state_mb, '\0', sizeof (mbstate_t)); - memset (&state_wc, '\0', sizeof (mbstate_t)); - - for (j = 0; j < s_len;) - { - if (!ismbblank (s + j, s_len - j, &mblength)) - break; - j += mblength; - } - - for (k = 0; j < s_len;) - { - mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb); - assert (mblength != (size_t)-1 && mblength != (size_t)-2); - if (mblength == 0) - break; - - pwc = towupper (wc); - if (pwc == wc) - { - memcpy (mbc, s + j, mblength); - j += mblength; - } - else - { - j += mblength; - mblength = wcrtomb (mbc, pwc, &state_wc); - assert (mblength != (size_t)0 && mblength != (size_t)-1); - } - - for (l = 0; l < mblength; l++) - name[k++] = mbc[l]; - } - name[k] = '\0'; - } - qsort ((void *) monthtab, MONTHS_PER_YEAR, - sizeof (struct month), struct_month_cmp); -} -#endif - -/* Specify the amount of main memory to use when sorting. */ -static void -specify_sort_size (char const *s) -{ - uintmax_t n; - char *suffix; - enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ"); - - /* The default unit is KiB. */ - if (e == LONGINT_OK && ISDIGIT (suffix[-1])) - { - if (n <= UINTMAX_MAX / 1024) - n *= 1024; - else - e = LONGINT_OVERFLOW; - } - - /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */ - if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1]) - switch (suffix[0]) - { - case 'b': - e = LONGINT_OK; - break; - - case '%': - { - double mem = physmem_total () * n / 100; - - /* Use "<", not "<=", to avoid problems with rounding. */ - if (mem < UINTMAX_MAX) - { - n = mem; - e = LONGINT_OK; - } - else - e = LONGINT_OVERFLOW; - } - break; - } - - if (e == LONGINT_OK) - { - /* If multiple sort sizes are specified, take the maximum, so - that option order does not matter. */ - if (n < sort_size) - return; - - sort_size = n; - if (sort_size == n) - { - sort_size = MAX (sort_size, MIN_SORT_SIZE); - return; - } - - e = LONGINT_OVERFLOW; - } - - STRTOL_FATAL_ERROR (s, _("sort size"), e); -} - -/* Return the default sort size. */ -static size_t -default_sort_size (void) -{ - /* Let MEM be available memory or 1/8 of total memory, whichever - is greater. */ - double avail = physmem_available (); - double total = physmem_total (); - double mem = MAX (avail, total / 8); - struct rlimit rlimit; - - /* Let SIZE be MEM, but no more than the maximum object size or - system resource limits. Avoid the MIN macro here, as it is not - quite right when only one argument is floating point. Don't - bother to check for values like RLIM_INFINITY since in practice - they are not much less than SIZE_MAX. */ - size_t size = SIZE_MAX; - if (mem < size) - size = mem; - if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size) - size = rlimit.rlim_cur; -#ifdef RLIMIT_AS - if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size) - size = rlimit.rlim_cur; -#endif - - /* Leave a large safety margin for the above limits, as failure can - occur when they are exceeded. */ - size /= 2; - -#ifdef RLIMIT_RSS - /* Leave a 1/16 margin for RSS to leave room for code, stack, etc. - Exceeding RSS is not fatal, but can be quite slow. */ - if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size) - size = rlimit.rlim_cur / 16 * 15; -#endif - - /* Use no less than the minimum. */ - return MAX (size, MIN_SORT_SIZE); -} - -/* Return the sort buffer size to use with the input files identified - by FPS and FILES, which are alternate paths to the same files. - NFILES gives the number of input files; NFPS may be less. Assume - that each input line requires LINE_BYTES extra bytes' worth of line - information. Do not exceed a bound on the size: if the bound is - not specified by the user, use a default. */ - -static size_t -sort_buffer_size (FILE *const *fps, int nfps, - char *const *files, int nfiles, - size_t line_bytes) -{ - /* A bound on the input size. If zero, the bound hasn't been - determined yet. */ - static size_t size_bound; - - /* In the worst case, each input byte is a newline. */ - size_t worst_case_per_input_byte = line_bytes + 1; - - /* Keep enough room for one extra input line and an extra byte. - This extra room might be needed when preparing to read EOF. */ - size_t size = worst_case_per_input_byte + 1; - - int i; - - for (i = 0; i < nfiles; i++) - { - struct stat st; - off_t file_size; - size_t worst_case; - - if ((i < nfps ? fstat (fileno (fps[i]), &st) - : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st) - : stat (files[i], &st)) - != 0) - die (_("stat failed"), files[i]); - - if (S_ISREG (st.st_mode)) - file_size = st.st_size; - else - { - /* The file has unknown size. If the user specified a sort - buffer size, use that; otherwise, guess the size. */ - if (sort_size) - return sort_size; - file_size = INPUT_FILE_SIZE_GUESS; - } - - if (! size_bound) - { - size_bound = sort_size; - if (! size_bound) - size_bound = default_sort_size (); - } - - /* Add the amount of memory needed to represent the worst case - where the input consists entirely of newlines followed by a - single non-newline. Check for overflow. */ - worst_case = file_size * worst_case_per_input_byte + 1; - if (file_size != worst_case / worst_case_per_input_byte - || size_bound - size <= worst_case) - return size_bound; - size += worst_case; - } - - return size; -} - -/* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES - must be at least sizeof (struct line). Allocate ALLOC bytes - initially. */ - -static void -initbuf (struct buffer *buf, size_t line_bytes, size_t alloc) -{ - /* Ensure that the line array is properly aligned. If the desired - size cannot be allocated, repeatedly halve it until allocation - succeeds. The smaller allocation may hurt overall performance, - but that's better than failing. */ - for (;;) - { - alloc += sizeof (struct line) - alloc % sizeof (struct line); - buf->buf = malloc (alloc); - if (buf->buf) - break; - alloc /= 2; - if (alloc <= line_bytes + 1) - xalloc_die (); - } - - buf->line_bytes = line_bytes; - buf->alloc = alloc; - buf->used = buf->left = buf->nlines = 0; - buf->eof = false; -} - -/* Return one past the limit of the line array. */ - -static inline struct line * -buffer_linelim (struct buffer const *buf) -{ - return (struct line *) (buf->buf + buf->alloc); -} - -/* Return a pointer to the first character of the field specified - by KEY in LINE. */ - -static char * -begfield_uni (const struct line *line, const struct keyfield *key) -{ - register char *ptr = line->text, *lim = ptr + line->length - 1; - register size_t sword = key->sword; - register size_t schar = key->schar; - register size_t remaining_bytes; - - /* The leading field separator itself is included in a field when -t - is absent. */ - - if (!tab_default) - while (ptr < lim && sword--) - { - while (ptr < lim && *ptr != tab[0]) - ++ptr; - if (ptr < lim) - ++ptr; - } - else - while (ptr < lim && sword--) - { - while (ptr < lim && blanks[to_uchar (*ptr)]) - ++ptr; - while (ptr < lim && !blanks[to_uchar (*ptr)]) - ++ptr; - } - - if (key->skipsblanks) - while (ptr < lim && blanks[to_uchar (*ptr)]) - ++ptr; - - /* Advance PTR by SCHAR (if possible), but no further than LIM. */ - remaining_bytes = lim - ptr; - if (schar < remaining_bytes) - ptr += schar; - else - ptr = lim; - - return ptr; -} - -#if HAVE_MBRTOWC -static char * -begfield_mb (const struct line *line, const struct keyfield *key) -{ - int i; - char *ptr = line->text, *lim = ptr + line->length - 1; - size_t sword = key->sword; - size_t schar = key->schar; - size_t mblength; - mbstate_t state; - - memset (&state, '\0', sizeof(mbstate_t)); - - if (!tab_default) - while (ptr < lim && sword--) - { - while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) - { - GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); - ptr += mblength; - } - if (ptr < lim) - { - GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); - ptr += mblength; - } - } - else - while (ptr < lim && sword--) - { - while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) - ptr += mblength; - if (ptr < lim) - { - GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); - ptr += mblength; - } - while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) - ptr += mblength; - } - - if (key->skipsblanks) - while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) - ptr += mblength; - - for (i = 0; i < schar; i++) - { - GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); - - if (ptr + mblength > lim) - break; - else - ptr += mblength; - } - - return ptr; -} -#endif - -/* Return the limit of (a pointer to the first character after) the field - in LINE specified by KEY. */ - -static char * -limfield_uni (const struct line *line, const struct keyfield *key) -{ - register char *ptr = line->text, *lim = ptr + line->length - 1; - register size_t eword = key->eword, echar = key->echar; - register size_t remaining_bytes; - - /* Move PTR past EWORD fields or to one past the last byte on LINE, - whichever comes first. If there are more than EWORD fields, leave - PTR pointing at the beginning of the field having zero-based index, - EWORD. If a delimiter character was specified (via -t), then that - `beginning' is the first character following the delimiting TAB. - Otherwise, leave PTR pointing at the first `blank' character after - the preceding field. */ - if (!tab_default) - while (ptr < lim && eword--) - { - while (ptr < lim && *ptr != tab[0]) - ++ptr; - if (ptr < lim && (eword | echar)) - ++ptr; - } - else - while (ptr < lim && eword--) - { - while (ptr < lim && blanks[to_uchar (*ptr)]) - ++ptr; - while (ptr < lim && !blanks[to_uchar (*ptr)]) - ++ptr; - } - -#ifdef POSIX_UNSPECIFIED - /* The following block of code makes GNU sort incompatible with - standard Unix sort, so it's ifdef'd out for now. - The POSIX spec isn't clear on how to interpret this. - FIXME: request clarification. - - From: kwzh@gnu.ai.mit.edu (Karl Heuer) - Date: Thu, 30 May 96 12:20:41 -0400 - [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.] - - [...]I believe I've found another bug in `sort'. - - $ cat /tmp/sort.in - a b c 2 d - pq rs 1 t - $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in - a b c 2 d - pq rs 1 t - $ /bin/sort -k1.7,1.7 </tmp/sort.in - pq rs 1 t - a b c 2 d - - Unix sort produced the answer I expected: sort on the single character - in column 7. GNU sort produced different results, because it disagrees - on the interpretation of the key-end spec "M.N". Unix sort reads this - as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean - "skip M-1 fields, then either N-1 characters or the rest of the current - field, whichever comes first". This extra clause applies only to - key-ends, not key-starts. - */ - - /* Make LIM point to the end of (one byte past) the current field. */ - if (!tab_default) - { - char *newlim; - newlim = memchr (ptr, tab[0], lim - ptr); - if (newlim) - lim = newlim; - } - else - { - char *newlim; - newlim = ptr; - while (newlim < lim && blanks[to_uchar (*newlim)]) - ++newlim; - while (newlim < lim && !blanks[to_uchar (*newlim)]) - ++newlim; - lim = newlim; - } -#endif - - /* If we're ignoring leading blanks when computing the End - of the field, don't start counting bytes until after skipping - past any leading blanks. */ - if (key->skipeblanks) - while (ptr < lim && blanks[to_uchar (*ptr)]) - ++ptr; - - /* Advance PTR by ECHAR (if possible), but no further than LIM. */ - remaining_bytes = lim - ptr; - if (echar < remaining_bytes) - ptr += echar; - else - ptr = lim; - - return ptr; -} - -#if HAVE_MBRTOWC -static char * -limfield_mb (const struct line *line, const struct keyfield *key) -{ - char *ptr = line->text, *lim = ptr + line->length - 1; - size_t eword = key->eword, echar = key->echar; - int i; - size_t mblength; - mbstate_t state; - - memset (&state, '\0', sizeof(mbstate_t)); - - if (!tab_default) - while (ptr < lim && eword--) - { - while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) - { - GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); - ptr += mblength; - } - if (ptr < lim && (eword | echar)) - { - GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); - ptr += mblength; - } - } - else - while (ptr < lim && eword--) - { - while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) - ptr += mblength; - if (ptr < lim) - { - GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); - ptr += mblength; - } - while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) - ptr += mblength; - } - - -# ifdef POSIX_UNSPECIFIED - /* Make LIM point to the end of (one byte past) the current field. */ - if (!tab_default) - { - char *newlim, *p; - - newlim = NULL; - for (p = ptr; p < lim;) - { - if (memcmp (p, tab, tab_length) == 0) - { - newlim = p; - break; - } - - GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); - p += mblength; - } - } - else - { - char *newlim; - newlim = ptr; - - while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength)) - newlim += mblength; - if (ptr < lim) - { - GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); - ptr += mblength; - } - while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength)) - newlim += mblength; - lim = newlim; - } -# endif - - /* If we're skipping leading blanks, don't start counting characters - * until after skipping past any leading blanks. */ - if (key->skipeblanks) - while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) - ptr += mblength; - - memset (&state, '\0', sizeof(mbstate_t)); - - /* Advance PTR by ECHAR (if possible), but no further than LIM. */ - for (i = 0; i < echar; i++) - { - GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); - - if (ptr + mblength > lim) - break; - else - ptr += mblength; - } - - return ptr; -} -#endif - -/* Fill BUF reading from FP, moving buf->left bytes from the end - of buf->buf to the beginning first. If EOF is reached and the - file wasn't terminated by a newline, supply one. Set up BUF's line - table too. FILE is the name of the file corresponding to FP. - Return true if some input was read. */ - -static bool -fillbuf (struct buffer *buf, register FILE *fp, char const *file) -{ - struct keyfield const *key = keylist; - char eol = eolchar; - size_t line_bytes = buf->line_bytes; - size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE; - - if (buf->eof) - return false; - - if (buf->used != buf->left) - { - memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left); - buf->used = buf->left; - buf->nlines = 0; - } - - for (;;) - { - char *ptr = buf->buf + buf->used; - struct line *linelim = buffer_linelim (buf); - struct line *line = linelim - buf->nlines; - size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr; - char *line_start = buf->nlines ? line->text + line->length : buf->buf; - - while (line_bytes + 1 < avail) - { - /* Read as many bytes as possible, but do not read so many - bytes that there might not be enough room for the - corresponding line array. The worst case is when the - rest of the input file consists entirely of newlines, - except that the last byte is not a newline. */ - size_t readsize = (avail - 1) / (line_bytes + 1); - size_t bytes_read = fread (ptr, 1, readsize, fp); - char *ptrlim = ptr + bytes_read; - char *p; - avail -= bytes_read; - - if (bytes_read != readsize) - { - if (ferror (fp)) - die (_("read failed"), file); - if (feof (fp)) - { - buf->eof = true; - if (buf->buf == ptrlim) - return false; - if (ptrlim[-1] != eol) - *ptrlim++ = eol; - } - } - - /* Find and record each line in the just-read input. */ - while ((p = memchr (ptr, eol, ptrlim - ptr))) - { - ptr = p + 1; - line--; - line->text = line_start; - line->length = ptr - line_start; - mergesize = MAX (mergesize, line->length); - avail -= line_bytes; - - if (key) - { - /* Precompute the position of the first key for - efficiency. */ - line->keylim = (key->eword == SIZE_MAX - ? p - : limfield (line, key)); - - if (key->sword != SIZE_MAX) - line->keybeg = begfield (line, key); - else - { - if (key->skipsblanks) -#if HAVE_MBRTOWC - { - if (MB_CUR_MAX > 1) - { - size_t mblength; - - while (ismbblank (line_start, ptr - line_start, &mblength)) - line_start += mblength; - } - else -#endif - { - while (blanks[to_uchar (*line_start)]) - line_start++; - } - } - line->keybeg = line_start; - } - } - - line_start = ptr; - } - - ptr = ptrlim; - if (buf->eof) - break; - } - - buf->used = ptr - buf->buf; - buf->nlines = buffer_linelim (buf) - line; - if (buf->nlines != 0) - { - buf->left = ptr - line_start; - merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE; - return true; - } - - /* The current input line is too long to fit in the buffer. - Double the buffer size and try again. */ - buf->buf = x2nrealloc (buf->buf, &buf->alloc, sizeof *(buf->buf)); - } -} - -/* Compare strings A and B containing decimal fractions < 1. Each string - should begin with a decimal point followed immediately by the digits - of the fraction. Strings not of this form are considered to be zero. */ - -/* The goal here, is to take two numbers a and b... compare these - in parallel. Instead of converting each, and then comparing the - outcome. Most likely stopping the comparison before the conversion - is complete. The algorithm used, in the old sort: - - Algorithm: fraccompare - Action : compare two decimal fractions - accepts : char *a, char *b - returns : -1 if a<b, 0 if a=b, 1 if a>b. - implement: - - if *a == decimal_point AND *b == decimal_point - find first character different in a and b. - if both are digits, return the difference *a - *b. - if *a is a digit - skip past zeros - if digit return 1, else 0 - if *b is a digit - skip past zeros - if digit return -1, else 0 - if *a is a decimal_point - skip past decimal_point and zeros - if digit return 1, else 0 - if *b is a decimal_point - skip past decimal_point and zeros - if digit return -1, else 0 - return 0 */ - -static int -fraccompare (register const char *a, register const char *b) -{ - if (*a == decimal_point && *b == decimal_point) - { - while (*++a == *++b) - if (! ISDIGIT (*a)) - return 0; - if (ISDIGIT (*a) && ISDIGIT (*b)) - return *a - *b; - if (ISDIGIT (*a)) - goto a_trailing_nonzero; - if (ISDIGIT (*b)) - goto b_trailing_nonzero; - return 0; - } - else if (*a++ == decimal_point) - { - a_trailing_nonzero: - while (*a == NUMERIC_ZERO) - a++; - return ISDIGIT (*a); - } - else if (*b++ == decimal_point) - { - b_trailing_nonzero: - while (*b == NUMERIC_ZERO) - b++; - return - ISDIGIT (*b); - } - return 0; -} - -/* Compare strings A and B as numbers without explicitly converting them to - machine numbers. Comparatively slow for short strings, but asymptotically - hideously fast. */ - -static int -numcompare (register const char *a, register const char *b) -{ - char tmpa; - char tmpb; - int tmp; - size_t log_a; - size_t log_b; - -#if HAVE_MBRTOWC - if (MB_CUR_MAX > 1) - { - size_t mblength; - size_t alen = strnlen (a, MB_LEN_MAX); - size_t blen = strnlen (b, MB_LEN_MAX); - - while (ismbblank (a, alen, &mblength)) - a += mblength, alen -= mblength; - while (ismbblank (b, blen, &mblength)) - b += mblength, blen -= mblength; - - tmpa = *a; - tmpb = *b; - } - else -#endif - { - tmpa = *a; - tmpb = *b; - - while (blanks[to_uchar (tmpa)]) - tmpa = *++a; - while (blanks[to_uchar (tmpb)]) - tmpb = *++b; - } - - if (tmpa == NEGATION_SIGN) - { - do - tmpa = *++a; - while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa)); - if (tmpb != NEGATION_SIGN) - { - if (tmpa == decimal_point) - do - tmpa = *++a; - while (tmpa == NUMERIC_ZERO); - if (ISDIGIT (tmpa)) - return -1; - while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb)) - tmpb = *++b; - if (tmpb == decimal_point) - do - tmpb = *++b; - while (tmpb == NUMERIC_ZERO); - if (ISDIGIT (tmpb)) - return -1; - return 0; - } - do - tmpb = *++b; - while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb)); - - while (tmpa == tmpb && ISDIGIT (tmpa)) - { - do - tmpa = *++a; - while (IS_THOUSANDS_SEP (tmpa)); - do - tmpb = *++b; - while (IS_THOUSANDS_SEP (tmpb)); - } - - if ((tmpa == decimal_point && !ISDIGIT (tmpb)) - || (tmpb == decimal_point && !ISDIGIT (tmpa))) - return -fraccompare (a, b); - - tmp = tmpb - tmpa; - - for (log_a = 0; ISDIGIT (tmpa); ++log_a) - do - tmpa = *++a; - while (IS_THOUSANDS_SEP (tmpa)); - - for (log_b = 0; ISDIGIT (tmpb); ++log_b) - do - tmpb = *++b; - while (IS_THOUSANDS_SEP (tmpb)); - - if (log_a != log_b) - return log_a < log_b ? 1 : -1; - - if (!log_a) - return 0; - - return tmp; - } - else if (tmpb == NEGATION_SIGN) - { - do - tmpb = *++b; - while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb)); - if (tmpb == decimal_point) - do - tmpb = *++b; - while (tmpb == NUMERIC_ZERO); - if (ISDIGIT (tmpb)) - return 1; - while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa)) - tmpa = *++a; - if (tmpa == decimal_point) - do - tmpa = *++a; - while (tmpa == NUMERIC_ZERO); - if (ISDIGIT (tmpa)) - return 1; - return 0; - } - else - { - while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa)) - tmpa = *++a; - while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb)) - tmpb = *++b; - - while (tmpa == tmpb && ISDIGIT (tmpa)) - { - do - tmpa = *++a; - while (IS_THOUSANDS_SEP (tmpa)); - do - tmpb = *++b; - while (IS_THOUSANDS_SEP (tmpb)); - } - - if ((tmpa == decimal_point && !ISDIGIT (tmpb)) - || (tmpb == decimal_point && !ISDIGIT (tmpa))) - return fraccompare (a, b); - - tmp = tmpa - tmpb; - - for (log_a = 0; ISDIGIT (tmpa); ++log_a) - do - tmpa = *++a; - while (IS_THOUSANDS_SEP (tmpa)); - - for (log_b = 0; ISDIGIT (tmpb); ++log_b) - do - tmpb = *++b; - while (IS_THOUSANDS_SEP (tmpb)); - - if (log_a != log_b) - return log_a < log_b ? -1 : 1; - - if (!log_a) - return 0; - - return tmp; - } -} - -static int -general_numcompare (const char *sa, const char *sb) -{ - /* FIXME: add option to warn about failed conversions. */ - /* FIXME: maybe add option to try expensive FP conversion - only if A and B can't be compared more cheaply/accurately. */ - - char *bufa, *ea; - char *bufb, *eb; - double a; - double b; - - char *p; - struct lconv *lconvp = localeconv (); - size_t thousands_sep_len = strlen (lconvp->thousands_sep); - - bufa = (char *) xmalloc (strlen (sa) + 1); - bufb = (char *) xmalloc (strlen (sb) + 1); - strcpy (bufa, sa); - strcpy (bufb, sb); - - if (force_general_numcompare) - { - while (1) - { - a = strtod (bufa, &ea); - if (memcmp (ea, lconvp->thousands_sep, thousands_sep_len) == 0) - { - for (p = ea; *(p + thousands_sep_len) != '\0'; p++) - *p = *(p + thousands_sep_len); - *p = '\0'; - continue; - } - break; - } - - while (1) - { - b = strtod (bufb, &eb); - if (memcmp (eb, lconvp->thousands_sep, thousands_sep_len) == 0) - { - for (p = eb; *(p + thousands_sep_len) != '\0'; p++) - *p = *(p + thousands_sep_len); - *p = '\0'; - continue; - } - break; - } - } - else - { - a = strtod (bufa, &ea); - b = strtod (bufb, &eb); - } - - /* Put conversion errors at the start of the collating sequence. */ - free (bufa); - free (bufb); - if (bufa == ea) - return bufb == eb ? 0 : -1; - if (bufb == eb) - return 1; - - /* Sort numbers in the usual way, where -0 == +0. Put NaNs after - conversion errors but before numbers; sort them by internal - bit-pattern, for lack of a more portable alternative. */ - return (a < b ? -1 - : a > b ? 1 - : a == b ? 0 - : b == b ? -1 - : a == a ? 1 - : memcmp ((char *) &a, (char *) &b, sizeof a)); -} - -/* Return an integer in 1..12 of the month name S with length LEN. - Return 0 if the name in S is not recognized. */ - -static int -getmonth_uni (const char *s, size_t len) -{ - char *month; - register size_t i; - register int lo = 0, hi = MONTHS_PER_YEAR, result; - - while (len > 0 && blanks[to_uchar (*s)]) - { - ++s; - --len; - } - - if (len == 0) - return 0; - - month = alloca (len + 1); - for (i = 0; i < len; ++i) - month[i] = fold_toupper[to_uchar (s[i])]; - month[len] = '\0'; - - do - { - int ix = (lo + hi) / 2; - - if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) - hi = ix; - else - lo = ix; - } - while (hi - lo > 1); - - result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) - ? monthtab[lo].val : 0); - - return result; -} - -#if HAVE_MBRTOWC -static int -getmonth_mb (const char *s, size_t len) -{ - char *month; - register size_t i; - register int lo = 0, hi = MONTHS_PER_YEAR, result; - char *tmp; - size_t wclength, mblength; - const char **pp; - const wchar_t **wpp; - wchar_t *month_wcs; - mbstate_t state; - - while (len > 0 && ismbblank (s, len, &mblength)) - { - s += mblength; - len -= mblength; - } - - if (len == 0) - return 0; - - month = (char *) alloca (len + 1); - - tmp = (char *) alloca (len + 1); - memcpy (tmp, s, len); - tmp[len] = '\0'; - pp = (const char **)&tmp; - month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t)); - memset (&state, '\0', sizeof(mbstate_t)); - - wclength = mbsrtowcs (month_wcs, pp, len + 1, &state); - assert (wclength != (size_t)-1 && *pp == NULL); - - for (i = 0; i < wclength; i++) - month_wcs[i] = towupper(month_wcs[i]); - month_wcs[i] = L'\0'; - - wpp = (const wchar_t **)&month_wcs; - - mblength = wcsrtombs (month, wpp, len + 1, &state); - assert (mblength != (-1) && *wpp == NULL); - - do - { - int ix = (lo + hi) / 2; - - if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) - hi = ix; - else - lo = ix; - } - while (hi - lo > 1); - - result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) - ? monthtab[lo].val : 0); - - return result; -} -#endif - -/* Compare two lines A and B trying every key in sequence until there - are no more keys or a difference is found. */ - -static int -keycompare_uni (const struct line *a, const struct line *b) -{ - struct keyfield const *key = keylist; - - /* For the first iteration only, the key positions have been - precomputed for us. */ - register char *texta = a->keybeg; - register char *textb = b->keybeg; - register char *lima = a->keylim; - register char *limb = b->keylim; - - int diff; - - for (;;) - { - register char const *translate = key->translate; - register bool const *ignore = key->ignore; - - /* Find the lengths. */ - size_t lena = lima <= texta ? 0 : lima - texta; - size_t lenb = limb <= textb ? 0 : limb - textb; - - /* Actually compare the fields. */ - if (key->numeric | key->general_numeric) - { - char savea = *lima, saveb = *limb; - - *lima = *limb = '\0'; - diff = ((key->numeric ? numcompare : general_numcompare) - (texta, textb)); - *lima = savea, *limb = saveb; - } - else if (key->month) - diff = getmonth (texta, lena) - getmonth (textb, lenb); - /* Sorting like this may become slow, so in a simple locale the user - can select a faster sort that is similar to ascii sort */ - else if (HAVE_SETLOCALE && hard_LC_COLLATE) - { - if (ignore || translate) - { - char *copy_a = alloca (lena + 1 + lenb + 1); - char *copy_b = copy_a + lena + 1; - size_t new_len_a, new_len_b, i; - - /* Ignore and/or translate chars before comparing. */ - for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++) - { - if (i < lena) - { - copy_a[new_len_a] = (translate - ? translate[to_uchar (texta[i])] - : texta[i]); - if (!ignore || !ignore[to_uchar (texta[i])]) - ++new_len_a; - } - if (i < lenb) - { - copy_b[new_len_b] = (translate - ? translate[to_uchar (textb[i])] - : textb [i]); - if (!ignore || !ignore[to_uchar (textb[i])]) - ++new_len_b; - } - } - - diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); - } - else if (lena == 0) - diff = - NONZERO (lenb); - else if (lenb == 0) - goto greater; - else - diff = xmemcoll (texta, lena, textb, lenb); - } - else if (ignore) - { -#define CMP_WITH_IGNORE(A, B) \ - do \ - { \ - for (;;) \ - { \ - while (texta < lima && ignore[to_uchar (*texta)]) \ - ++texta; \ - while (textb < limb && ignore[to_uchar (*textb)]) \ - ++textb; \ - if (! (texta < lima && textb < limb)) \ - break; \ - diff = to_uchar (A) - to_uchar (B); \ - if (diff) \ - goto not_equal; \ - ++texta; \ - ++textb; \ - } \ - \ - diff = (texta < lima) - (textb < limb); \ - } \ - while (0) - - if (translate) - CMP_WITH_IGNORE (translate[to_uchar (*texta)], - translate[to_uchar (*textb)]); - else - CMP_WITH_IGNORE (*texta, *textb); - } - else if (lena == 0) - diff = - NONZERO (lenb); - else if (lenb == 0) - goto greater; - else - { - if (translate) - { - while (texta < lima && textb < limb) - { - diff = (to_uchar (translate[to_uchar (*texta++)]) - - to_uchar (translate[to_uchar (*textb++)])); - if (diff) - goto not_equal; - } - } - else - { - diff = memcmp (texta, textb, MIN (lena, lenb)); - if (diff) - goto not_equal; - } - diff = lena < lenb ? -1 : lena != lenb; - } - - if (diff) - goto not_equal; - - key = key->next; - if (! key) - break; - - /* Find the beginning and limit of the next field. */ - if (key->eword != SIZE_MAX) - lima = limfield (a, key), limb = limfield (b, key); - else - lima = a->text + a->length - 1, limb = b->text + b->length - 1; - - if (key->sword != SIZE_MAX) - texta = begfield (a, key), textb = begfield (b, key); - else - { - texta = a->text, textb = b->text; - if (key->skipsblanks) - { - while (texta < lima && blanks[to_uchar (*texta)]) - ++texta; - while (textb < limb && blanks[to_uchar (*textb)]) - ++textb; - } - } - } - - return 0; - - greater: - diff = 1; - not_equal: - return key->reverse ? -diff : diff; -} - -#if HAVE_MBRTOWC -static int -keycompare_mb (const struct line *a, const struct line *b) -{ - struct keyfield *key = keylist; - - /* For the first iteration only, the key positions have been - precomputed for us. */ - char *texta = a->keybeg; - char *textb = b->keybeg; - char *lima = a->keylim; - char *limb = b->keylim; - - size_t mblength_a, mblength_b; - wchar_t wc_a, wc_b; - mbstate_t state_a, state_b; - - int diff; - - memset (&state_a, '\0', sizeof(mbstate_t)); - memset (&state_b, '\0', sizeof(mbstate_t)); - - for (;;) - { - unsigned char *translate = (unsigned char *) key->translate; - bool const *ignore = key->ignore; - - /* Find the lengths. */ - size_t lena = lima <= texta ? 0 : lima - texta; - size_t lenb = limb <= textb ? 0 : limb - textb; - - /* Actually compare the fields. */ - if (key->numeric | key->general_numeric) - { - char savea = *lima, saveb = *limb; - - *lima = *limb = '\0'; - if (force_general_numcompare) - diff = general_numcompare (texta, textb); - else - diff = ((key->numeric ? numcompare : general_numcompare) - (texta, textb)); - *lima = savea, *limb = saveb; - } - else if (key->month) - diff = getmonth (texta, lena) - getmonth (textb, lenb); - else - { - if (ignore || translate) - { - char *copy_a = (char *) alloca (lena + 1 + lenb + 1); - char *copy_b = copy_a + lena + 1; - size_t new_len_a, new_len_b; - size_t i, j; - - /* Ignore and/or translate chars before comparing. */ -# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ - do \ - { \ - wchar_t uwc; \ - char mbc[MB_LEN_MAX]; \ - mbstate_t state_wc; \ - \ - for (NEW_LEN = i = 0; i < LEN;) \ - { \ - mbstate_t state_bak; \ - \ - state_bak = STATE; \ - MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \ - \ - if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \ - || MBLENGTH == 0) \ - { \ - if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \ - STATE = state_bak; \ - if (!ignore) \ - COPY[NEW_LEN++] = TEXT[i++]; \ - continue; \ - } \ - \ - if (ignore) \ - { \ - if ((ignore == nonprinting && !iswprint (WC)) \ - || (ignore == nondictionary \ - && !iswalnum (WC) && !iswblank (WC))) \ - { \ - i += MBLENGTH; \ - continue; \ - } \ - } \ - \ - if (translate) \ - { \ - \ - uwc = toupper(WC); \ - if (WC == uwc) \ - { \ - memcpy (mbc, TEXT + i, MBLENGTH); \ - i += MBLENGTH; \ - } \ - else \ - { \ - i += MBLENGTH; \ - WC = uwc; \ - memset (&state_wc, '\0', sizeof (mbstate_t)); \ - \ - MBLENGTH = wcrtomb (mbc, WC, &state_wc); \ - assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \ - } \ - \ - for (j = 0; j < MBLENGTH; j++) \ - COPY[NEW_LEN++] = mbc[j]; \ - } \ - else \ - for (j = 0; j < MBLENGTH; j++) \ - COPY[NEW_LEN++] = TEXT[i++]; \ - } \ - COPY[NEW_LEN] = '\0'; \ - } \ - while (0) - IGNORE_CHARS (new_len_a, lena, texta, copy_a, - wc_a, mblength_a, state_a); - IGNORE_CHARS (new_len_b, lenb, textb, copy_b, - wc_b, mblength_b, state_b); - diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); - } - else if (lena == 0) - diff = - NONZERO (lenb); - else if (lenb == 0) - goto greater; - else - diff = xmemcoll (texta, lena, textb, lenb); - } - - if (diff) - goto not_equal; - - key = key->next; - if (! key) - break; - - /* Find the beginning and limit of the next field. */ - if (key->eword != -1) - lima = limfield (a, key), limb = limfield (b, key); - else - lima = a->text + a->length - 1, limb = b->text + b->length - 1; - - if (key->sword != -1) - texta = begfield (a, key), textb = begfield (b, key); - else - { - texta = a->text, textb = b->text; - if (key->skipsblanks) - { - while (texta < lima && ismbblank (texta, lima - texta, &mblength_a)) - texta += mblength_a; - while (textb < limb && ismbblank (textb, limb - textb, &mblength_b)) - textb += mblength_b; - } - } - } - - return 0; - -greater: - diff = 1; -not_equal: - return key->reverse ? -diff : diff; -} -#endif - -/* Compare two lines A and B, returning negative, zero, or positive - depending on whether A compares less than, equal to, or greater than B. */ - -static int -compare (register const struct line *a, register const struct line *b) -{ - int diff; - size_t alen, blen; - - /* First try to compare on the specified keys (if any). - The only two cases with no key at all are unadorned sort, - and unadorned sort -r. */ - if (keylist) - { - diff = keycompare (a, b); - alloca (0); - if (diff | unique | stable) - return diff; - } - - /* If the keys all compare equal (or no keys were specified) - fall through to the default comparison. */ - alen = a->length - 1, blen = b->length - 1; - - if (alen == 0) - diff = - NONZERO (blen); - else if (blen == 0) - diff = 1; - else if (HAVE_SETLOCALE && hard_LC_COLLATE) - diff = xmemcoll (a->text, alen, b->text, blen); - else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen)))) - diff = alen < blen ? -1 : alen != blen; - - return reverse ? -diff : diff; -} - -/* Check that the lines read from FILE_NAME come in order. Print a - diagnostic (FILE_NAME, line number, contents of line) to stderr and return - false if they are not in order. Otherwise, print no diagnostic - and return true. */ - -static bool -check (char const *file_name) -{ - FILE *fp = xfopen (file_name, "r"); - struct buffer buf; /* Input buffer. */ - struct line temp; /* Copy of previous line. */ - size_t alloc = 0; - uintmax_t line_number = 0; - struct keyfield const *key = keylist; - bool nonunique = ! unique; - bool ordered = true; - - initbuf (&buf, sizeof (struct line), - MAX (merge_buffer_size, sort_size)); - temp.text = NULL; - - while (fillbuf (&buf, fp, file_name)) - { - struct line const *line = buffer_linelim (&buf); - struct line const *linebase = line - buf.nlines; - - /* Make sure the line saved from the old buffer contents is - less than or equal to the first line of the new buffer. */ - if (alloc && nonunique <= compare (&temp, line - 1)) - { - found_disorder: - { - struct line const *disorder_line = line - 1; - uintmax_t disorder_line_number = - buffer_linelim (&buf) - disorder_line + line_number; - char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)]; - fprintf (stderr, _("%s: %s:%s: disorder: "), - program_name, file_name, - umaxtostr (disorder_line_number, hr_buf)); - write_bytes (disorder_line->text, disorder_line->length, stderr, - _("standard error")); - ordered = false; - break; - } - } - - /* Compare each line in the buffer with its successor. */ - while (linebase < --line) - if (nonunique <= compare (line, line - 1)) - goto found_disorder; - - line_number += buf.nlines; - - /* Save the last line of the buffer. */ - if (alloc < line->length) - { - do - { - alloc *= 2; - if (! alloc) - { - alloc = line->length; - break; - } - } - while (alloc < line->length); - - temp.text = xrealloc (temp.text, alloc); - } - memcpy (temp.text, line->text, line->length); - temp.length = line->length; - if (key) - { - temp.keybeg = temp.text + (line->keybeg - line->text); - temp.keylim = temp.text + (line->keylim - line->text); - } - } - - xfclose (fp, file_name); - free (buf.buf); - if (temp.text) - free (temp.text); - return ordered; -} - -/* Merge lines from FILES onto OFP. NFILES cannot be greater than - NMERGE. Close input and output files before returning. - OUTPUT_FILE gives the name of the output file. If it is NULL, - the output file is standard output. If OFP is NULL, the output - file has not been opened yet (or written to, if standard output). */ - -static void -mergefps (char **files, register int nfiles, - FILE *ofp, const char *output_file) -{ - FILE *fps[NMERGE]; /* Input streams for each file. */ - struct buffer buffer[NMERGE]; /* Input buffers for each file. */ - struct line saved; /* Saved line storage for unique check. */ - struct line const *savedline = NULL; - /* &saved if there is a saved line. */ - size_t savealloc = 0; /* Size allocated for the saved line. */ - struct line const *cur[NMERGE]; /* Current line in each line table. */ - struct line const *base[NMERGE]; /* Base of each line table. */ - int ord[NMERGE]; /* Table representing a permutation of fps, - such that cur[ord[0]] is the smallest line - and will be next output. */ - register int i, j, t; - struct keyfield const *key = keylist; - saved.text = NULL; - - /* Read initial lines from each input file. */ - for (i = 0; i < nfiles; ) - { - fps[i] = xfopen (files[i], "r"); - initbuf (&buffer[i], sizeof (struct line), - MAX (merge_buffer_size, sort_size / nfiles)); - if (fillbuf (&buffer[i], fps[i], files[i])) - { - struct line const *linelim = buffer_linelim (&buffer[i]); - cur[i] = linelim - 1; - base[i] = linelim - buffer[i].nlines; - i++; - } - else - { - /* fps[i] is empty; eliminate it from future consideration. */ - xfclose (fps[i], files[i]); - zaptemp (files[i]); - free (buffer[i].buf); - --nfiles; - for (j = i; j < nfiles; ++j) - files[j] = files[j + 1]; - } - } - - if (! ofp) - ofp = xfopen (output_file, "w"); - - /* Set up the ord table according to comparisons among input lines. - Since this only reorders two items if one is strictly greater than - the other, it is stable. */ - for (i = 0; i < nfiles; ++i) - ord[i] = i; - for (i = 1; i < nfiles; ++i) - if (0 < compare (cur[ord[i - 1]], cur[ord[i]])) - t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0; - - /* Repeatedly output the smallest line until no input remains. */ - while (nfiles) - { - struct line const *smallest = cur[ord[0]]; - - /* If uniquified output is turned on, output only the first of - an identical series of lines. */ - if (unique) - { - if (savedline && compare (savedline, smallest)) - { - savedline = 0; - write_bytes (saved.text, saved.length, ofp, output_file); - } - if (!savedline) - { - savedline = &saved; - if (savealloc < smallest->length) - { - do - if (! savealloc) - { - savealloc = smallest->length; - break; - } - while ((savealloc *= 2) < smallest->length); - - saved.text = xrealloc (saved.text, savealloc); - } - saved.length = smallest->length; - memcpy (saved.text, smallest->text, saved.length); - if (key) - { - saved.keybeg = - saved.text + (smallest->keybeg - smallest->text); - saved.keylim = - saved.text + (smallest->keylim - smallest->text); - } - } - } - else - write_bytes (smallest->text, smallest->length, ofp, output_file); - - /* Check if we need to read more lines into core. */ - if (base[ord[0]] < smallest) - cur[ord[0]] = smallest - 1; - else - { - if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]])) - { - struct line const *linelim = buffer_linelim (&buffer[ord[0]]); - cur[ord[0]] = linelim - 1; - base[ord[0]] = linelim - buffer[ord[0]].nlines; - } - else - { - /* We reached EOF on fps[ord[0]]. */ - for (i = 1; i < nfiles; ++i) - if (ord[i] > ord[0]) - --ord[i]; - --nfiles; - xfclose (fps[ord[0]], files[ord[0]]); - zaptemp (files[ord[0]]); - free (buffer[ord[0]].buf); - for (i = ord[0]; i < nfiles; ++i) - { - fps[i] = fps[i + 1]; - files[i] = files[i + 1]; - buffer[i] = buffer[i + 1]; - cur[i] = cur[i + 1]; - base[i] = base[i + 1]; - } - for (i = 0; i < nfiles; ++i) - ord[i] = ord[i + 1]; - continue; - } - } - - /* The new line just read in may be larger than other lines - already in core; push it back in the queue until we encounter - a line larger than it. */ - for (i = 1; i < nfiles; ++i) - { - t = compare (cur[ord[0]], cur[ord[i]]); - if (!t) - t = ord[0] - ord[i]; - if (t < 0) - break; - } - t = ord[0]; - for (j = 1; j < i; ++j) - ord[j - 1] = ord[j]; - ord[i - 1] = t; - } - - if (unique && savedline) - { - write_bytes (saved.text, saved.length, ofp, output_file); - free (saved.text); - } - - xfclose (ofp, output_file); -} - -/* Merge into T the two sorted arrays of lines LO (with NLO members) - and HI (with NHI members). T, LO, and HI point just past their - respective arrays, and the arrays are in reverse order. NLO and - NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */ - -static inline void -mergelines (struct line *t, - struct line const *lo, size_t nlo, - struct line const *hi, size_t nhi) -{ - for (;;) - if (compare (lo - 1, hi - 1) <= 0) - { - *--t = *--lo; - if (! --nlo) - { - /* HI - NHI equalled T - (NLO + NHI) when this function - began. Therefore HI must equal T now, and there is no - need to copy from HI to T. */ - return; - } - } - else - { - *--t = *--hi; - if (! --nhi) - { - do - *--t = *--lo; - while (--nlo); - - return; - } - } -} - -/* Sort the array LINES with NLINES members, using TEMP for temporary space. - NLINES must be at least 2. - The input and output arrays are in reverse order, and LINES and - TEMP point just past the end of their respective arrays. - - Use a recursive divide-and-conquer algorithm, in the style - suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use - the optimization suggested by exercise 5.2.4-10; this requires room - for only 1.5*N lines, rather than the usual 2*N lines. Knuth - writes that this memory optimization was originally published by - D. A. Bell, Comp J. 1 (1958), 75. */ - -static void -sortlines (struct line *lines, size_t nlines, struct line *temp) -{ - if (nlines == 2) - { - if (0 < compare (&lines[-1], &lines[-2])) - { - struct line tmp = lines[-1]; - lines[-1] = lines[-2]; - lines[-2] = tmp; - } - } - else - { - size_t nlo = nlines / 2; - size_t nhi = nlines - nlo; - struct line *lo = lines; - struct line *hi = lines - nlo; - struct line *sorted_lo = temp; - - sortlines (hi, nhi, temp); - if (1 < nlo) - sortlines_temp (lo, nlo, sorted_lo); - else - sorted_lo[-1] = lo[-1]; - - mergelines (lines, sorted_lo, nlo, hi, nhi); - } -} - -/* Like sortlines (LINES, NLINES, TEMP), except output into TEMP - rather than sorting in place. */ - -static void -sortlines_temp (struct line *lines, size_t nlines, struct line *temp) -{ - if (nlines == 2) - { - bool swap = (0 < compare (&lines[-1], &lines[-2])); - temp[-1] = lines[-1 - swap]; - temp[-2] = lines[-2 + swap]; - } - else - { - size_t nlo = nlines / 2; - size_t nhi = nlines - nlo; - struct line *lo = lines; - struct line *hi = lines - nlo; - struct line *sorted_hi = temp - nlo; - - sortlines_temp (hi, nhi, sorted_hi); - if (1 < nlo) - sortlines (lo, nlo, temp); - - mergelines (temp, lo, nlo, sorted_hi, nhi); - } -} - -/* Return the index of the first of NFILES FILES that is the same file - as OUTFILE. If none can be the same, return NFILES. - - This test ensures that an otherwise-erroneous use like - "sort -m -o FILE ... FILE ..." copies FILE before writing to it. - It's not clear that POSIX requires this nicety. - Detect common error cases, but don't try to catch obscure cases like - "cat ... FILE ... | sort -m -o FILE" - where traditional "sort" doesn't copy the input and where - people should know that they're getting into trouble anyway. - Catching these obscure cases would slow down performance in - common cases. */ - -static int -first_same_file (char * const *files, int nfiles, char const *outfile) -{ - int i; - bool got_outstat = false; - struct stat instat, outstat; - - for (i = 0; i < nfiles; i++) - { - bool standard_input = STREQ (files[i], "-"); - - if (outfile && STREQ (outfile, files[i]) && ! standard_input) - return i; - - if (! got_outstat) - { - got_outstat = true; - if ((outfile - ? stat (outfile, &outstat) - : fstat (STDOUT_FILENO, &outstat)) - != 0) - return nfiles; - } - - if (((standard_input - ? fstat (STDIN_FILENO, &instat) - : stat (files[i], &instat)) - == 0) - && SAME_INODE (instat, outstat)) - return i; - } - - return nfiles; -} - -/* Merge NFILES FILES onto OUTPUT_FILE. However, merge at most - MAX_MERGE input files directly onto OUTPUT_FILE. MAX_MERGE cannot - exceed NMERGE. A null OUTPUT_FILE stands for standard output. */ - -static void -merge (char **files, int nfiles, int max_merge, char const *output_file) -{ - while (max_merge < nfiles) - { - FILE *tfp; - int i, t = 0; - char *temp; - for (i = 0; i < nfiles / NMERGE; ++i) - { - temp = create_temp_file (&tfp); - mergefps (&files[i * NMERGE], NMERGE, tfp, temp); - files[t++] = temp; - } - temp = create_temp_file (&tfp); - mergefps (&files[i * NMERGE], nfiles % NMERGE, tfp, temp); - files[t++] = temp; - nfiles = t; - if (nfiles == 1) - break; - } - - mergefps (files, nfiles, NULL, output_file); -} - -/* Sort NFILES FILES onto OUTPUT_FILE. */ - -static void -sort (char * const *files, int nfiles, char const *output_file) -{ - struct buffer buf; - int n_temp_files = 0; - bool output_file_created = false; - - buf.alloc = 0; - - while (nfiles) - { - char const *temp_output; - char const *file = *files; - FILE *fp = xfopen (file, "r"); - FILE *tfp; - size_t bytes_per_line = (2 * sizeof (struct line) - - sizeof (struct line) / 2); - - if (! buf.alloc) - initbuf (&buf, bytes_per_line, - sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line)); - buf.eof = false; - files++; - nfiles--; - - while (fillbuf (&buf, fp, file)) - { - struct line *line; - struct line *linebase; - - if (buf.eof && nfiles - && (bytes_per_line + 1 - < (buf.alloc - buf.used - bytes_per_line * buf.nlines))) - { - /* End of file, but there is more input and buffer room. - Concatenate the next input file; this is faster in - the usual case. */ - buf.left = buf.used; - break; - } - - line = buffer_linelim (&buf); - linebase = line - buf.nlines; - if (1 < buf.nlines) - sortlines (line, buf.nlines, linebase); - if (buf.eof && !nfiles && !n_temp_files && !buf.left) - { - xfclose (fp, file); - tfp = xfopen (output_file, "w"); - temp_output = output_file; - output_file_created = true; - } - else - { - ++n_temp_files; - temp_output = create_temp_file (&tfp); - } - - do - { - line--; - write_bytes (line->text, line->length, tfp, temp_output); - if (unique) - while (linebase < line && compare (line, line - 1) == 0) - line--; - } - while (linebase < line); - - xfclose (tfp, temp_output); - - if (output_file_created) - goto finish; - } - xfclose (fp, file); - } - - finish: - free (buf.buf); - - if (! output_file_created) - { - int i = n_temp_files; - struct tempnode *node; - char **tempfiles = xnmalloc (n_temp_files, sizeof *tempfiles); - for (node = temphead; i > 0; node = node->next) - tempfiles[--i] = node->name; - merge (tempfiles, n_temp_files, NMERGE, output_file); - free (tempfiles); - } -} - -/* Insert key KEY at the end of the key list. */ - -static void -insertkey (struct keyfield *key) -{ - struct keyfield **p; - - for (p = &keylist; *p; p = &(*p)->next) - continue; - *p = key; - key->next = NULL; -} - -/* Report a bad field specification SPEC, with extra info MSGID. */ - -static void badfieldspec (char const *, char const *) - ATTRIBUTE_NORETURN; -static void -badfieldspec (char const *spec, char const *msgid) -{ - error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"), - _(msgid), spec); - abort (); -} - -/* Parse the leading integer in STRING and store the resulting value - (which must fit into size_t) into *VAL. Return the address of the - suffix after the integer. If MSGID is NULL, return NULL after - failure; otherwise, report MSGID and exit on failure. */ - -static char const * -parse_field_count (char const *string, size_t *val, char const *msgid) -{ - char *suffix; - uintmax_t n; - - switch (xstrtoumax (string, &suffix, 10, &n, "")) - { - case LONGINT_OK: - case LONGINT_INVALID_SUFFIX_CHAR: - *val = n; - if (*val == n) - break; - /* Fall through. */ - case LONGINT_OVERFLOW: - case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR: - if (msgid) - error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"), - _(msgid), (int) (suffix - string), string); - return NULL; - - case LONGINT_INVALID: - if (msgid) - error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"), - _(msgid), string); - return NULL; - } - - return suffix; -} - -/* Handle interrupts and hangups. */ - -static void -sighandler (int sig) -{ -#ifndef SA_NOCLDSTOP - signal (sig, SIG_IGN); -#endif - - cleanup (); - - signal (sig, SIG_DFL); - raise (sig); -} - -/* Set the ordering options for KEY specified in S. - Return the address of the first character in S that - is not a valid ordering option. - BLANKTYPE is the kind of blanks that 'b' should skip. */ - -static char * -set_ordering (register const char *s, struct keyfield *key, - enum blanktype blanktype) -{ - while (*s) - { - switch (*s) - { - case 'b': - if (blanktype == bl_start || blanktype == bl_both) - key->skipsblanks = true; - if (blanktype == bl_end || blanktype == bl_both) - key->skipeblanks = true; - break; - case 'd': - key->ignore = nondictionary; - break; - case 'f': - key->translate = fold_toupper; - break; - case 'g': - key->general_numeric = true; - break; - case 'i': - /* Option order should not matter, so don't let -i override - -d. -d implies -i, but -i does not imply -d. */ - if (! key->ignore) - key->ignore = nonprinting; - break; - case 'M': - key->month = true; - break; - case 'n': - key->numeric = true; - break; - case 'r': - key->reverse = true; - break; - default: - return (char *) s; - } - ++s; - } - return (char *) s; -} - -static struct keyfield * -new_key (void) -{ - struct keyfield *key = xzalloc (sizeof *key); - key->eword = SIZE_MAX; - return key; -} - -int -main (int argc, char **argv) -{ - struct keyfield *key; - struct keyfield gkey; - char const *s; - int c = 0; - bool checkonly = false; - bool mergeonly = false; - int nfiles = 0; - bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL); - bool obsolete_usage = (posix2_version () < 200112); - char const *short_options = (obsolete_usage - ? COMMON_SHORT_OPTIONS "y::" - : COMMON_SHORT_OPTIONS "y:"); - char *minus = "-", **files; - char const *outfile = NULL; - - initialize_main (&argc, &argv); - program_name = argv[0]; - setlocale (LC_ALL, ""); - bindtextdomain (PACKAGE, LOCALEDIR); - textdomain (PACKAGE); - - atexit (cleanup); - - initialize_exit_failure (SORT_FAILURE); - atexit (close_stdout); - - hard_LC_COLLATE = hard_locale (LC_COLLATE); -#if HAVE_NL_LANGINFO - hard_LC_TIME = hard_locale (LC_TIME); -#endif - -#if HAVE_SETLOCALE - /* Let's get locale's representation of the decimal point */ - { - struct lconv const *lconvp = localeconv (); - - decimal_point = *lconvp->decimal_point; - if (! decimal_point || lconvp->decimal_point[1]) - { - decimal_point = C_DECIMAL_POINT; - if (lconvp->decimal_point[0] && lconvp->decimal_point[1]) - force_general_numcompare = 1; - } - - /* We don't support multibyte thousands separators yet. */ - th_sep = *lconvp->thousands_sep; - if (! th_sep || lconvp->thousands_sep[1]) - { - th_sep = CHAR_MAX + 1; - if (lconvp->thousands_sep[0] && lconvp->thousands_sep[1]) - force_general_numcompare = 1; - } - } -#endif - -#if HAVE_MBRTOWC - if (MB_CUR_MAX > 1) - { - inittables = inittables_mb; - begfield = begfield_mb; - limfield = limfield_mb; - getmonth = getmonth_mb; - keycompare = keycompare_mb; - } - else -#endif - { - inittables = inittables_uni; - begfield = begfield_uni; - limfield = limfield_uni; - keycompare = keycompare_uni; - getmonth = getmonth_uni; - } - - have_read_stdin = false; - inittables (); - - { - int i; - static int const sig[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM }; - enum { nsigs = sizeof sig / sizeof sig[0] }; - -#ifdef SA_NOCLDSTOP - struct sigaction act; - - sigemptyset (&caught_signals); - for (i = 0; i < nsigs; i++) - { - sigaction (sig[i], NULL, &act); - if (act.sa_handler != SIG_IGN) - sigaddset (&caught_signals, sig[i]); - } - - act.sa_handler = sighandler; - act.sa_mask = caught_signals; - act.sa_flags = 0; - - for (i = 0; i < nsigs; i++) - if (sigismember (&caught_signals, sig[i])) - sigaction (sig[i], &act, NULL); -#else - for (i = 0; i < nsigs; i++) - if (signal (sig[i], SIG_IGN) != SIG_IGN) - signal (sig[i], sighandler); -#endif - } - - gkey.sword = gkey.eword = SIZE_MAX; - gkey.ignore = NULL; - gkey.translate = NULL; - gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false; - gkey.skipsblanks = gkey.skipeblanks = false; - - files = xnmalloc (argc, sizeof *files); - - for (;;) - { - /* Parse an operand as a file after "--" was seen; or if - pedantic and a file was seen, unless the POSIX version - predates 1003.1-2001 and -c was not seen and the operand is - "-o FILE" or "-oFILE". */ - - if (c == -1 - || (posixly_correct && nfiles != 0 - && ! (obsolete_usage - && ! checkonly - && optind != argc - && argv[optind][0] == '-' && argv[optind][1] == 'o' - && (argv[optind][2] || optind + 1 != argc))) - || ((c = getopt_long (argc, argv, short_options, - long_options, NULL)) - == -1)) - { - if (argc <= optind) - break; - files[nfiles++] = argv[optind++]; - } - else switch (c) - { - case 1: - key = NULL; - if (obsolete_usage && optarg[0] == '+') - { - /* Treat +POS1 [-POS2] as a key if possible; but silently - treat an operand as a file if it is not a valid +POS1. */ - key = new_key (); - s = parse_field_count (optarg + 1, &key->sword, NULL); - if (s && *s == '.') - s = parse_field_count (s + 1, &key->schar, NULL); - if (! (key->sword | key->schar)) - key->sword = SIZE_MAX; - if (! s || *set_ordering (s, key, bl_start)) - { - free (key); - key = NULL; - } - else - { - if (optind != argc && argv[optind][0] == '-' - && ISDIGIT (argv[optind][1])) - { - char const *optarg1 = argv[optind++]; - s = parse_field_count (optarg1 + 1, &key->eword, - N_("invalid number after `-'")); - if (*s == '.') - s = parse_field_count (s + 1, &key->echar, - N_("invalid number after `.'")); - if (*set_ordering (s, key, bl_end)) - badfieldspec (optarg1, - N_("stray character in field spec")); - } - insertkey (key); - } - } - if (! key) - files[nfiles++] = optarg; - break; - - case 'b': - case 'd': - case 'f': - case 'g': - case 'i': - case 'M': - case 'n': - case 'r': - { - char str[2]; - str[0] = c; - str[1] = '\0'; - set_ordering (str, &gkey, bl_both); - } - break; - - case 'c': - checkonly = true; - break; - - case 'k': - key = new_key (); - - /* Get POS1. */ - s = parse_field_count (optarg, &key->sword, - N_("invalid number at field start")); - if (! key->sword--) - { - /* Provoke with `sort -k0' */ - badfieldspec (optarg, N_("field number is zero")); - } - if (*s == '.') - { - s = parse_field_count (s + 1, &key->schar, - N_("invalid number after `.'")); - if (! key->schar--) - { - /* Provoke with `sort -k1.0' */ - badfieldspec (optarg, N_("character offset is zero")); - } - } - if (! (key->sword | key->schar)) - key->sword = SIZE_MAX; - s = set_ordering (s, key, bl_start); - if (*s != ',') - { - key->eword = SIZE_MAX; - key->echar = 0; - } - else - { - /* Get POS2. */ - s = parse_field_count (s + 1, &key->eword, - N_("invalid number after `,'")); - if (! key->eword--) - { - /* Provoke with `sort -k1,0' */ - badfieldspec (optarg, N_("field number is zero")); - } - if (*s == '.') - s = parse_field_count (s + 1, &key->echar, - N_("invalid number after `.'")); - else - { - /* `-k 2,3' is equivalent to `+1 -3'. */ - key->eword++; - } - s = set_ordering (s, key, bl_end); - } - if (*s) - badfieldspec (optarg, N_("stray character in field spec")); - insertkey (key); - break; - - case 'm': - mergeonly = true; - break; - - case 'o': - if (outfile && !STREQ (outfile, optarg)) - error (SORT_FAILURE, 0, _("multiple output files specified")); - outfile = optarg; - break; - - case 's': - stable = true; - break; - - case 'S': - specify_sort_size (optarg); - break; - - case 't': - { - char newtab[MB_LEN_MAX + 1]; - size_t newtab_length = 1; - strncpy (newtab, optarg, MB_LEN_MAX); - if (! newtab[0]) - error (SORT_FAILURE, 0, _("empty tab")); -#if HAVE_MBRTOWC - if (MB_CUR_MAX > 1) - { - wchar_t wc; - mbstate_t state; - size_t i; - - memset (&state, '\0', sizeof (mbstate_t)); - newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, MB_LEN_MAX), &state); - switch (newtab_length) - { - case (size_t) -1: - case (size_t) -2: - case 0: - newtab_length = 1; - } - - if (optarg[newtab_length]) - { - /* Provoke with `sort -txx'. Complain about - "multi-character tab" instead of "multibyte tab", so - that the diagnostic's wording does not need to be - changed once multibyte characters are supported. */ - error (SORT_FAILURE, 0, _("multi-character tab `%s'"), - optarg); - } - } - else -#endif - - if (optarg[1]) - { - if (STREQ (optarg, "\\0")) - newtab[0] = '\0'; - else - { - /* Provoke with `sort -txx'. Complain about - "multi-character tab" instead of "multibyte tab", so - that the diagnostic's wording does not need to be - changed once multibyte characters are supported. */ - error (SORT_FAILURE, 0, _("multi-character tab `%s'"), - optarg); - } - } - if (!tab_default && (tab_length != newtab_length - || memcmp(tab, newtab, tab_length) != 0)) - error (SORT_FAILURE, 0, _("incompatible tabs")); - memcpy(tab, newtab, newtab_length); - tab_length = newtab_length; - tab_default = false; - } - break; - - case 'T': - add_temp_dir (optarg); - break; - - case 'u': - unique = true; - break; - - case 'y': - /* Accept and ignore e.g. -y0 for compatibility with Solaris - 2.x through Solaris 7. -y is marked as obsolete starting - with Solaris 8. */ - break; - - case 'z': - eolchar = 0; - break; - - case_GETOPT_HELP_CHAR; - - case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); - - default: - usage (SORT_FAILURE); - } - } - - /* Inheritance of global options to individual keys. */ - for (key = keylist; key; key = key->next) - if (! (key->ignore || key->translate - || (key->skipsblanks | key->reverse - | key->skipeblanks | key->month | key->numeric - | key->general_numeric))) - { - key->ignore = gkey.ignore; - key->translate = gkey.translate; - key->skipsblanks = gkey.skipsblanks; - key->skipeblanks = gkey.skipeblanks; - key->month = gkey.month; - key->numeric = gkey.numeric; - key->general_numeric = gkey.general_numeric; - key->reverse = gkey.reverse; - } - - if (!keylist && (gkey.ignore || gkey.translate - || (gkey.skipsblanks | gkey.skipeblanks | gkey.month - | gkey.numeric | gkey.general_numeric))) - insertkey (&gkey); - reverse = gkey.reverse; - - if (temp_dir_count == 0) - { - char const *tmp_dir = getenv ("TMPDIR"); - add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR); - } - - if (nfiles == 0) - { - nfiles = 1; - files = − - } - - if (checkonly) - { - if (nfiles > 1) - { - error (0, 0, _("extra operand %s not allowed with -c"), - quote (files[1])); - usage (SORT_FAILURE); - } - - /* POSIX requires that sort return 1 IFF invoked with -c and the - input is not properly sorted. */ - exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER); - } - - if (mergeonly) - { - int max_merge = first_same_file (files, MIN (nfiles, NMERGE), outfile); - merge (files, nfiles, max_merge, outfile); - } - else - sort (files, nfiles, outfile); - - if (have_read_stdin && fclose (stdin) == EOF) - die (_("close failed"), "-"); - - exit (EXIT_SUCCESS); -} |