summaryrefslogtreecommitdiffstats
path: root/usr.bin/sort/fields.c
diff options
context:
space:
mode:
authormarkm <markm@FreeBSD.org>2002-03-22 13:54:24 +0000
committermarkm <markm@FreeBSD.org>2002-03-22 13:54:24 +0000
commit7406319bb4183a3a96ea4f4529e1d1faffb27313 (patch)
tree175f87c0ef5f84f7b79127cd284dbd553e76549f /usr.bin/sort/fields.c
parent5f2fdd1aa5cb8f249ef7ceac16b22990263464ef (diff)
downloadFreeBSD-src-7406319bb4183a3a96ea4f4529e1d1faffb27313.zip
FreeBSD-src-7406319bb4183a3a96ea4f4529e1d1faffb27313.tar.gz
Vendor import NETBSD's sort(1). This will be a replacement for
our GNU sort, as discussed 6 months or more ago.
Diffstat (limited to 'usr.bin/sort/fields.c')
-rw-r--r--usr.bin/sort/fields.c339
1 files changed, 339 insertions, 0 deletions
diff --git a/usr.bin/sort/fields.c b/usr.bin/sort/fields.c
new file mode 100644
index 0000000..175b87f
--- /dev/null
+++ b/usr.bin/sort/fields.c
@@ -0,0 +1,339 @@
+/* $NetBSD: fields.c,v 1.9 2001/02/19 19:52:27 jdolecek Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Subroutines to generate sort keys. */
+
+#include "sort.h"
+
+#ifndef lint
+__RCSID("$NetBSD: fields.c,v 1.9 2001/02/19 19:52:27 jdolecek Exp $");
+__SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93");
+#endif /* not lint */
+
+#define blancmange(ptr) { \
+ if (BLANK & d_mask[*(ptr)]) \
+ while (BLANK & d_mask[*(++(ptr))]); \
+}
+
+#define NEXTCOL(pos) { \
+ if (!SEP_FLAG) \
+ while (BLANK & l_d_mask[*(++pos)]); \
+ while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \
+}
+
+static u_char *enterfield __P((u_char *, u_char *, struct field *, int));
+static u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
+
+extern struct coldesc clist[(ND+1)*2];
+extern int ncols;
+
+#define DECIMAL '.'
+#define OFFSET 128
+
+u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
+u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
+u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
+u_char fnum[NBINS], rnum[NBINS];
+
+/*
+ * constructs sort key with leading recheader, followed by the key,
+ * followed by the original line.
+ */
+length_t
+enterkey(keybuf, line, size, fieldtable)
+ RECHEADER *keybuf; /* pointer to start of key */
+ DBT *line;
+ int size;
+ struct field fieldtable[];
+{
+ int i;
+ u_char *l_d_mask;
+ u_char *lineend, *pos;
+ u_char *endkey, *keypos;
+ struct coldesc *clpos;
+ int col = 1;
+ struct field *ftpos;
+ l_d_mask = d_mask;
+ pos = (u_char *) line->data - 1;
+ lineend = (u_char *) line->data + line->size-1;
+ /* don't include rec_delimiter */
+
+ for (i = 0; i < ncols; i++) {
+ clpos = clist + i;
+ for (; (col < clpos->num) && (pos < lineend); col++) {
+ NEXTCOL(pos);
+ }
+ if (pos >= lineend)
+ break;
+ clpos->start = SEP_FLAG ? pos + 1 : pos;
+ NEXTCOL(pos);
+ clpos->end = pos;
+ col++;
+ if (pos >= lineend) {
+ clpos->end = lineend;
+ i++;
+ break;
+ }
+ }
+ for (; i <= ncols; i++)
+ clist[i].start = clist[i].end = lineend;
+ if (clist[0].start < (u_char *) line->data)
+ clist[0].start++;
+
+ keypos = keybuf->data;
+ endkey = (u_char *) keybuf + size - line->size;
+ for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
+ if ((keypos = enterfield(keypos, endkey, ftpos,
+ fieldtable->flags)) == NULL)
+ return (1);
+
+ keybuf->offset = keypos - keybuf->data;
+ keybuf->length = keybuf->offset + line->size;
+ if (keybuf->length + sizeof(TRECHEADER) > size) {
+ /* line too long for buffer */
+ return (1);
+ }
+
+ /*
+ * Make [s]radixsort() only sort by relevant part of key if:
+ * 1. we want to choose unique items by relevant field[s]
+ * 2. we want stable sort and so the items should be sorted only by
+ * the relevant field[s]
+ */
+ if (UNIQUE || (stable_sort && keybuf->offset < line->size))
+ keypos[-1] = REC_D;
+
+ memcpy(keybuf->data + keybuf->offset, line->data, line->size);
+ return (0);
+}
+
+/*
+ * constructs a field (as defined by -k) within a key
+ */
+static u_char *
+enterfield(tablepos, endkey, cur_fld, gflags)
+ struct field *cur_fld;
+ u_char *tablepos, *endkey;
+ int gflags;
+{
+ u_char *start, *end, *lineend, *mask, *lweight;
+ struct column icol, tcol;
+ u_int flags;
+ u_int Rflag;
+
+ icol = cur_fld->icol;
+ tcol = cur_fld->tcol;
+ flags = cur_fld->flags;
+ start = icol.p->start;
+ lineend = clist[ncols].end;
+ if (flags & BI)
+ blancmange(start);
+ start += icol.indent;
+ start = min(start, lineend);
+
+ if (!tcol.num)
+ end = lineend;
+ else {
+ if (tcol.indent) {
+ end = tcol.p->start;
+ if (flags & BT)
+ blancmange(end);
+ end += tcol.indent;
+ end = min(end, lineend);
+ } else
+ end = tcol.p->end;
+ }
+
+ if (flags & N) {
+ Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
+ return number(tablepos, endkey, start, end, Rflag);
+ }
+
+ mask = cur_fld->mask;
+ lweight = cur_fld->weights;
+ for (; start < end; start++)
+ if (mask[*start]) {
+ if (*start <= 1) {
+ if (tablepos+2 >= endkey)
+ return (NULL);
+ *tablepos++ = lweight[1];
+ *tablepos++ = lweight[*start ? 2 : 1];
+ } else {
+ if (tablepos+1 >= endkey)
+ return (NULL);
+ *tablepos++ = lweight[*start];
+ }
+ }
+ *tablepos++ = lweight[0];
+ return (tablepos == endkey ? NULL : tablepos);
+}
+
+/* Uses the first bin to assign sign, expsign, 0, and the first
+ * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
+ * When sorting in forward order:
+ * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
+ * else use (0-99)->(2-102).
+ * If the exponent is >=61, use another byte for each additional 253
+ * in the exponent. Cutoff is at 567.
+ * To avoid confusing the exponent and the mantissa, use a field delimiter
+ * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
+ * only time a field delimiter can come in that position.
+ * Reverse order is done analagously.
+ */
+
+static u_char *
+number(pos, bufend, line, lineend, Rflag)
+ u_char *line, *pos, *bufend, *lineend;
+ int Rflag;
+{
+ int or_sign, parity = 0;
+ int expincr = 1, exponent = -1;
+ int bite, expsign = 1, sign = 1;
+ u_char lastvalue, *nonzero, *tline, *C_TENS;
+ u_char *nweights;
+
+ if (Rflag)
+ nweights = rnum;
+ else
+ nweights = fnum;
+ if (pos > bufend - 8)
+ return (NULL);
+ /*
+ * or_sign sets the sort direction:
+ * (-r: +/-)(sign: +/-)(expsign: +/-)
+ */
+ or_sign = sign ^ expsign ^ Rflag;
+ blancmange(line);
+ if (*line == '-') { /* set the sign */
+ or_sign ^= 1;
+ sign = 0;
+ line++;
+ }
+ /* eat initial zeroes */
+ for (; *line == '0' && line < lineend; line++)
+ ;
+ /* calculate exponents < 0 */
+ if (*line == DECIMAL) {
+ exponent = 1;
+ while (*++line == '0' && line < lineend)
+ exponent++;
+ expincr = 0;
+ expsign = 0;
+ }
+ /* next character better be a digit */
+ if (*line < '1' || *line > '9' || line >= lineend) {
+ *pos++ = nweights[127];
+ return (pos);
+ }
+ if (expincr) {
+ for (tline = line-1; *++tline >= '0' &&
+ *tline <= '9' && tline < lineend;)
+ exponent++;
+ }
+ if (exponent > 567) {
+ *pos++ = nweights[sign ? (expsign ? 254 : 128)
+ : (expsign ? 0 : 126)];
+ warnx("exponent out of bounds");
+ return (pos);
+ }
+ bite = min(exponent, 61);
+ *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
+ : (expsign ? 64-bite : 64+bite)];
+ if (bite >= 61) {
+ do {
+ exponent -= bite;
+ bite = min(exponent, 254);
+ *pos++ = nweights[or_sign ? 254-bite : bite];
+ } while (bite == 254);
+ }
+ C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
+ for (; line < lineend; line++) {
+ if (*line >= '0' && *line <= '9') {
+ if (parity) {
+ *pos++ = C_TENS[lastvalue] + (or_sign ? - *line
+ : *line);
+ if (pos == bufend)
+ return (NULL);
+ if (*line != '0' || lastvalue != '0')
+ nonzero = pos;
+ } else
+ lastvalue = *line;
+ parity ^= 1;
+ } else if(*line == DECIMAL) {
+ if(!expincr) /* a decimal already occurred once */
+ break;
+ expincr = 0;
+ } else
+ break;
+ }
+ if (parity && lastvalue != '0') {
+ *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
+ OFF_TENS[lastvalue] + '0';
+ } else
+ pos = nonzero;
+ if (pos > bufend-1)
+ return (NULL);
+ *pos++ = or_sign ? nweights[254] : nweights[0];
+ return (pos);
+}
+
+/* This forces a gap around the record delimiter
+ * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
+ * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
+ */
+void
+num_init()
+{
+ int i;
+ TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
+ NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
+ OFF_TENS = TENS - '0';
+ OFF_NTENS = NEGTENS - '0';
+ for (i = 1; i < 10; i++) {
+ TENS[i] = TENS[i - 1] + 10;
+ NEGTENS[i] = NEGTENS[i - 1] - 10;
+ }
+ for (i = 0; i < REC_D; i++) {
+ fnum[i] = i;
+ rnum[255 - i] = i;
+ }
+ for (i = REC_D; i <255; i++) {
+ fnum[i] = i + 1;
+ rnum[255 - i] = i - 1;
+ }
+}
OpenPOWER on IntegriCloud