diff options
author | ps <ps@FreeBSD.org> | 2000-05-22 09:53:22 +0000 |
---|---|---|
committer | ps <ps@FreeBSD.org> | 2000-05-22 09:53:22 +0000 |
commit | 1b28029810e9c377087ea5a45acc8767cf0196b3 (patch) | |
tree | 27b16fc210b9a302c9e74f90e36a9b5ed21e6300 /contrib/less/charset.c | |
download | FreeBSD-src-1b28029810e9c377087ea5a45acc8767cf0196b3.zip FreeBSD-src-1b28029810e9c377087ea5a45acc8767cf0196b3.tar.gz |
Import the [now] dual licensed version 3.5.4 of less. It is
distributed under your choice of the GPL or a BSD style license.
Reviewed by: peter
Obtained from: http://home.flash.net/~marknu/less/
Diffstat (limited to 'contrib/less/charset.c')
-rw-r--r-- | contrib/less/charset.c | 294 |
1 files changed, 294 insertions, 0 deletions
diff --git a/contrib/less/charset.c b/contrib/less/charset.c new file mode 100644 index 0000000..efb26a8 --- /dev/null +++ b/contrib/less/charset.c @@ -0,0 +1,294 @@ +/* + * Copyright (C) 1984-2000 Mark Nudelman + * + * You may distribute under the terms of either the GNU General Public + * License or the Less License, as specified in the README file. + * + * For more information about less, or for information on how to + * contact the author, see the README file. + */ + + +/* + * Functions to define the character set + * and do things specific to the character set. + */ + +#include "less.h" +#if HAVE_LOCALE +#include <locale.h> +#include <ctype.h> +#endif + +public int utf_mode = 0; + +/* + * Predefined character sets, + * selected by the LESSCHARSET environment variable. + */ +struct charset { + char *name; + int *p_flag; + char *desc; +} charsets[] = { + { "ascii", NULL, "8bcccbcc18b95.b" }, + { "dos", NULL, "8bcccbcc12bc5b95.b." }, + { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." }, + { "iso8859", NULL, "8bcccbcc18b95.33b." }, + { "koi8-r", NULL, "8bcccbcc18b95.b128." }, + { "latin1", NULL, "8bcccbcc18b95.33b." }, + { "next", NULL, "8bcccbcc18b95.bb125.bb" }, + { "utf-8", &utf_mode, "8bcccbcc18b." }, + { NULL, NULL, NULL } +}; + +#define IS_BINARY_CHAR 01 +#define IS_CONTROL_CHAR 02 + +static char chardef[256]; +static char *binfmt = NULL; +public int binattr = AT_STANDOUT; + + +/* + * Define a charset, given a description string. + * The string consists of 256 letters, + * one for each character in the charset. + * If the string is shorter than 256 letters, missing letters + * are taken to be identical to the last one. + * A decimal number followed by a letter is taken to be a + * repetition of the letter. + * + * Each letter is one of: + * . normal character + * b binary character + * c control character + */ + static void +ichardef(s) + char *s; +{ + register char *cp; + register int n; + register char v; + + n = 0; + v = 0; + cp = chardef; + while (*s != '\0') + { + switch (*s++) + { + case '.': + v = 0; + break; + case 'c': + v = IS_CONTROL_CHAR; + break; + case 'b': + v = IS_BINARY_CHAR|IS_CONTROL_CHAR; + break; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + n = (10 * n) + (s[-1] - '0'); + continue; + + default: + error("invalid chardef", NULL_PARG); + quit(QUIT_ERROR); + /*NOTREACHED*/ + } + + do + { + if (cp >= chardef + sizeof(chardef)) + { + error("chardef longer than 256", NULL_PARG); + quit(QUIT_ERROR); + /*NOTREACHED*/ + } + *cp++ = v; + } while (--n > 0); + n = 0; + } + + while (cp < chardef + sizeof(chardef)) + *cp++ = v; +} + +/* + * Define a charset, given a charset name. + * The valid charset names are listed in the "charsets" array. + */ + static int +icharset(name) + register char *name; +{ + register struct charset *p; + + if (name == NULL || *name == '\0') + return (0); + + for (p = charsets; p->name != NULL; p++) + { + if (strcmp(name, p->name) == 0) + { + ichardef(p->desc); + if (p->p_flag != NULL) + *(p->p_flag) = 1; + return (1); + } + } + + error("invalid charset name", NULL_PARG); + quit(QUIT_ERROR); + /*NOTREACHED*/ +} + +#if HAVE_LOCALE +/* + * Define a charset, given a locale name. + */ + static void +ilocale() +{ + register int c; + + setlocale(LC_ALL, ""); + for (c = 0; c < (int) sizeof(chardef); c++) + { + if (isprint(c)) + chardef[c] = 0; + else if (iscntrl(c)) + chardef[c] = IS_CONTROL_CHAR; + else + chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR; + } +} +#endif + +/* + * Define the printing format for control chars. + */ + public void +setbinfmt(s) + char *s; +{ + if (s == NULL || *s == '\0') + s = "*s<%X>"; + /* + * Select the attributes if it starts with "*". + */ + if (*s == '*') + { + switch (s[1]) + { + case 'd': binattr = AT_BOLD; break; + case 'k': binattr = AT_BLINK; break; + case 's': binattr = AT_STANDOUT; break; + case 'u': binattr = AT_UNDERLINE; break; + default: binattr = AT_NORMAL; break; + } + s += 2; + } + binfmt = s; +} + +/* + * Initialize charset data structures. + */ + public void +init_charset() +{ + register char *s; + + s = lgetenv("LESSBINFMT"); + setbinfmt(s); + + /* + * See if environment variable LESSCHARSET is defined. + */ + s = lgetenv("LESSCHARSET"); + if (icharset(s)) + return; + /* + * LESSCHARSET is not defined: try LESSCHARDEF. + */ + s = lgetenv("LESSCHARDEF"); + if (s != NULL && *s != '\0') + { + ichardef(s); + return; + } + +#if HAVE_STRSTR + /* + * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used. + */ + if ((s = lgetenv("LC_ALL")) != NULL || + (s = lgetenv("LC_CTYPE")) != NULL || + (s = lgetenv("LANG")) != NULL) + { + if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL) + if (icharset("utf-8")) + return; + } +#endif + +#if HAVE_LOCALE + /* + * Use setlocale. + */ + ilocale(); +#else + /* + * Default to "latin1". + */ + (void) icharset("latin1"); +#endif +} + +/* + * Is a given character a "binary" character? + */ + public int +binary_char(c) + unsigned char c; +{ + c &= 0377; + return (chardef[c] & IS_BINARY_CHAR); +} + +/* + * Is a given character a "control" character? + */ + public int +control_char(c) + int c; +{ + c &= 0377; + return (chardef[c] & IS_CONTROL_CHAR); +} + +/* + * Return the printable form of a character. + * For example, in the "ascii" charset '\3' is printed as "^C". + */ + public char * +prchar(c) + int c; +{ + static char buf[8]; + + c &= 0377; + if (!control_char(c)) + sprintf(buf, "%c", c); + else if (c == ESC) + sprintf(buf, "ESC"); + else if (c < 128 && !control_char(c ^ 0100)) + sprintf(buf, "^%c", c ^ 0100); + else + sprintf(buf, binfmt, c); + return (buf); +} |