summaryrefslogtreecommitdiffstats
path: root/contrib/less/charset.c
diff options
context:
space:
mode:
authorps <ps@FreeBSD.org>2000-05-22 09:53:22 +0000
committerps <ps@FreeBSD.org>2000-05-22 09:53:22 +0000
commit1b28029810e9c377087ea5a45acc8767cf0196b3 (patch)
tree27b16fc210b9a302c9e74f90e36a9b5ed21e6300 /contrib/less/charset.c
downloadFreeBSD-src-1b28029810e9c377087ea5a45acc8767cf0196b3.zip
FreeBSD-src-1b28029810e9c377087ea5a45acc8767cf0196b3.tar.gz
Import the [now] dual licensed version 3.5.4 of less. It is
distributed under your choice of the GPL or a BSD style license. Reviewed by: peter Obtained from: http://home.flash.net/~marknu/less/
Diffstat (limited to 'contrib/less/charset.c')
-rw-r--r--contrib/less/charset.c294
1 files changed, 294 insertions, 0 deletions
diff --git a/contrib/less/charset.c b/contrib/less/charset.c
new file mode 100644
index 0000000..efb26a8
--- /dev/null
+++ b/contrib/less/charset.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 1984-2000 Mark Nudelman
+ *
+ * You may distribute under the terms of either the GNU General Public
+ * License or the Less License, as specified in the README file.
+ *
+ * For more information about less, or for information on how to
+ * contact the author, see the README file.
+ */
+
+
+/*
+ * Functions to define the character set
+ * and do things specific to the character set.
+ */
+
+#include "less.h"
+#if HAVE_LOCALE
+#include <locale.h>
+#include <ctype.h>
+#endif
+
+public int utf_mode = 0;
+
+/*
+ * Predefined character sets,
+ * selected by the LESSCHARSET environment variable.
+ */
+struct charset {
+ char *name;
+ int *p_flag;
+ char *desc;
+} charsets[] = {
+ { "ascii", NULL, "8bcccbcc18b95.b" },
+ { "dos", NULL, "8bcccbcc12bc5b95.b." },
+ { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." },
+ { "iso8859", NULL, "8bcccbcc18b95.33b." },
+ { "koi8-r", NULL, "8bcccbcc18b95.b128." },
+ { "latin1", NULL, "8bcccbcc18b95.33b." },
+ { "next", NULL, "8bcccbcc18b95.bb125.bb" },
+ { "utf-8", &utf_mode, "8bcccbcc18b." },
+ { NULL, NULL, NULL }
+};
+
+#define IS_BINARY_CHAR 01
+#define IS_CONTROL_CHAR 02
+
+static char chardef[256];
+static char *binfmt = NULL;
+public int binattr = AT_STANDOUT;
+
+
+/*
+ * Define a charset, given a description string.
+ * The string consists of 256 letters,
+ * one for each character in the charset.
+ * If the string is shorter than 256 letters, missing letters
+ * are taken to be identical to the last one.
+ * A decimal number followed by a letter is taken to be a
+ * repetition of the letter.
+ *
+ * Each letter is one of:
+ * . normal character
+ * b binary character
+ * c control character
+ */
+ static void
+ichardef(s)
+ char *s;
+{
+ register char *cp;
+ register int n;
+ register char v;
+
+ n = 0;
+ v = 0;
+ cp = chardef;
+ while (*s != '\0')
+ {
+ switch (*s++)
+ {
+ case '.':
+ v = 0;
+ break;
+ case 'c':
+ v = IS_CONTROL_CHAR;
+ break;
+ case 'b':
+ v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
+ break;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ n = (10 * n) + (s[-1] - '0');
+ continue;
+
+ default:
+ error("invalid chardef", NULL_PARG);
+ quit(QUIT_ERROR);
+ /*NOTREACHED*/
+ }
+
+ do
+ {
+ if (cp >= chardef + sizeof(chardef))
+ {
+ error("chardef longer than 256", NULL_PARG);
+ quit(QUIT_ERROR);
+ /*NOTREACHED*/
+ }
+ *cp++ = v;
+ } while (--n > 0);
+ n = 0;
+ }
+
+ while (cp < chardef + sizeof(chardef))
+ *cp++ = v;
+}
+
+/*
+ * Define a charset, given a charset name.
+ * The valid charset names are listed in the "charsets" array.
+ */
+ static int
+icharset(name)
+ register char *name;
+{
+ register struct charset *p;
+
+ if (name == NULL || *name == '\0')
+ return (0);
+
+ for (p = charsets; p->name != NULL; p++)
+ {
+ if (strcmp(name, p->name) == 0)
+ {
+ ichardef(p->desc);
+ if (p->p_flag != NULL)
+ *(p->p_flag) = 1;
+ return (1);
+ }
+ }
+
+ error("invalid charset name", NULL_PARG);
+ quit(QUIT_ERROR);
+ /*NOTREACHED*/
+}
+
+#if HAVE_LOCALE
+/*
+ * Define a charset, given a locale name.
+ */
+ static void
+ilocale()
+{
+ register int c;
+
+ setlocale(LC_ALL, "");
+ for (c = 0; c < (int) sizeof(chardef); c++)
+ {
+ if (isprint(c))
+ chardef[c] = 0;
+ else if (iscntrl(c))
+ chardef[c] = IS_CONTROL_CHAR;
+ else
+ chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
+ }
+}
+#endif
+
+/*
+ * Define the printing format for control chars.
+ */
+ public void
+setbinfmt(s)
+ char *s;
+{
+ if (s == NULL || *s == '\0')
+ s = "*s<%X>";
+ /*
+ * Select the attributes if it starts with "*".
+ */
+ if (*s == '*')
+ {
+ switch (s[1])
+ {
+ case 'd': binattr = AT_BOLD; break;
+ case 'k': binattr = AT_BLINK; break;
+ case 's': binattr = AT_STANDOUT; break;
+ case 'u': binattr = AT_UNDERLINE; break;
+ default: binattr = AT_NORMAL; break;
+ }
+ s += 2;
+ }
+ binfmt = s;
+}
+
+/*
+ * Initialize charset data structures.
+ */
+ public void
+init_charset()
+{
+ register char *s;
+
+ s = lgetenv("LESSBINFMT");
+ setbinfmt(s);
+
+ /*
+ * See if environment variable LESSCHARSET is defined.
+ */
+ s = lgetenv("LESSCHARSET");
+ if (icharset(s))
+ return;
+ /*
+ * LESSCHARSET is not defined: try LESSCHARDEF.
+ */
+ s = lgetenv("LESSCHARDEF");
+ if (s != NULL && *s != '\0')
+ {
+ ichardef(s);
+ return;
+ }
+
+#if HAVE_STRSTR
+ /*
+ * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used.
+ */
+ if ((s = lgetenv("LC_ALL")) != NULL ||
+ (s = lgetenv("LC_CTYPE")) != NULL ||
+ (s = lgetenv("LANG")) != NULL)
+ {
+ if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL)
+ if (icharset("utf-8"))
+ return;
+ }
+#endif
+
+#if HAVE_LOCALE
+ /*
+ * Use setlocale.
+ */
+ ilocale();
+#else
+ /*
+ * Default to "latin1".
+ */
+ (void) icharset("latin1");
+#endif
+}
+
+/*
+ * Is a given character a "binary" character?
+ */
+ public int
+binary_char(c)
+ unsigned char c;
+{
+ c &= 0377;
+ return (chardef[c] & IS_BINARY_CHAR);
+}
+
+/*
+ * Is a given character a "control" character?
+ */
+ public int
+control_char(c)
+ int c;
+{
+ c &= 0377;
+ return (chardef[c] & IS_CONTROL_CHAR);
+}
+
+/*
+ * Return the printable form of a character.
+ * For example, in the "ascii" charset '\3' is printed as "^C".
+ */
+ public char *
+prchar(c)
+ int c;
+{
+ static char buf[8];
+
+ c &= 0377;
+ if (!control_char(c))
+ sprintf(buf, "%c", c);
+ else if (c == ESC)
+ sprintf(buf, "ESC");
+ else if (c < 128 && !control_char(c ^ 0100))
+ sprintf(buf, "^%c", c ^ 0100);
+ else
+ sprintf(buf, binfmt, c);
+ return (buf);
+}
OpenPOWER on IntegriCloud