summaryrefslogtreecommitdiffstats
path: root/usr.bin/look
diff options
context:
space:
mode:
authortjr <tjr@FreeBSD.org>2004-07-19 11:12:02 +0000
committertjr <tjr@FreeBSD.org>2004-07-19 11:12:02 +0000
commit9e984856c23b7c44c439ac5abc77b3532540fbbc (patch)
tree068630c855e15274c97885da37c49d09a9f2a174 /usr.bin/look
parent532da3de2f39c6f425d704c1f2e4d1a2cf65e864 (diff)
downloadFreeBSD-src-9e984856c23b7c44c439ac5abc77b3532540fbbc.zip
FreeBSD-src-9e984856c23b7c44c439ac5abc77b3532540fbbc.tar.gz
Add support for multibyte characters. While here, fix a longstanding bug in
the implementation of the -d option: we were skipping too many characters when a non-alphanumeric character was encountered.
Diffstat (limited to 'usr.bin/look')
-rw-r--r--usr.bin/look/look.16
-rw-r--r--usr.bin/look/look.c121
2 files changed, 68 insertions, 59 deletions
diff --git a/usr.bin/look/look.1 b/usr.bin/look/look.1
index 4c75787..c3d0db2 100644
--- a/usr.bin/look/look.1
+++ b/usr.bin/look/look.1
@@ -106,12 +106,6 @@ A
utility appeared in
.At v7 .
.Sh BUGS
-The
-.Nm
-utility does not recognize multibyte characters.
-.Pp
-Lines are not compared according to the current locale's collating
-order.
Input files must be sorted with
.Ev LC_COLLATE
set to
diff --git a/usr.bin/look/look.c b/usr.bin/look/look.c
index 33a1295..68d6755 100644
--- a/usr.bin/look/look.c
+++ b/usr.bin/look/look.c
@@ -60,7 +60,6 @@ __FBSDID("$FreeBSD$");
#include <sys/mman.h>
#include <sys/stat.h>
-#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
@@ -70,33 +69,25 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
#include "pathnames.h"
static char _path_words[] = _PATH_WORDS;
-/*
- * FOLD and DICT convert characters to a normal form for comparison,
- * according to the user specified flags.
- *
- * DICT expects integers because it uses a non-character value to
- * indicate a character which should not participate in comparisons.
- */
#define EQUAL 0
#define GREATER 1
#define LESS (-1)
-#define NO_COMPARE (-2)
-
-#define FOLD(c) (isupper(c) ? tolower(c) : (unsigned char) (c))
-#define DICT(c) (isalnum(c) ? (c) & 0xFF /* int */ : NO_COMPARE)
int dflag, fflag;
-char *binary_search(unsigned char *, unsigned char *, unsigned char *);
-int compare(unsigned char *, unsigned char *, unsigned char *);
-char *linear_search(unsigned char *, unsigned char *, unsigned char *);
-int look(unsigned char *, unsigned char *, unsigned char *);
-void print_from(unsigned char *, unsigned char *, unsigned char *);
+char *binary_search(wchar_t *, unsigned char *, unsigned char *);
+int compare(wchar_t *, unsigned char *, unsigned char *);
+char *linear_search(wchar_t *, unsigned char *, unsigned char *);
+int look(wchar_t *, unsigned char *, unsigned char *);
+wchar_t *prepkey(const char *, wchar_t);
+void print_from(wchar_t *, unsigned char *, unsigned char *);
static void usage(void);
@@ -104,14 +95,16 @@ int
main(int argc, char *argv[])
{
struct stat sb;
- int ch, fd, termchar, match;
- unsigned char *back, *front, *string, *p;
+ int ch, fd, match;
+ wchar_t termchar;
+ unsigned char *back, *front;
unsigned const char *file;
+ wchar_t *key;
(void) setlocale(LC_CTYPE, "");
file = _path_words;
- termchar = '\0';
+ termchar = L'\0';
while ((ch = getopt(argc, argv, "dft:")) != -1)
switch(ch) {
case 'd':
@@ -121,7 +114,9 @@ main(int argc, char *argv[])
fflag = 1;
break;
case 't':
- termchar = *optarg;
+ if (mbrtowc(&termchar, optarg, MB_LEN_MAX, NULL) !=
+ strlen(optarg))
+ errx(2, "invalid termination character");
break;
case '?':
default:
@@ -134,12 +129,10 @@ main(int argc, char *argv[])
usage();
if (argc == 1) /* But set -df by default. */
dflag = fflag = 1;
- string = *argv++;
+ key = prepkey(*argv++, termchar);
if (argc >= 2)
file = *argv++;
- if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
- *++p = '\0';
match = 1;
do {
@@ -150,29 +143,47 @@ main(int argc, char *argv[])
if ((front = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_SHARED, fd, (off_t)0)) == MAP_FAILED)
err(2, "%s", file);
back = front + sb.st_size;
- match *= (look(string, front, back));
+ match *= (look(key, front, back));
close(fd);
} while (argc-- > 2 && (file = *argv++));
exit(match);
}
-int
-look(unsigned char *string, unsigned char *front, unsigned char *back)
+wchar_t *
+prepkey(const char *string, wchar_t termchar)
{
- int ch;
- unsigned char *readp, *writep;
+ const char *readp;
+ wchar_t *key, *writep;
+ wchar_t ch;
+ size_t clen;
- /* Reformat string string to avoid doing it multiple times later. */
- for (readp = writep = string; (ch = *readp++);) {
+ /*
+ * Reformat search string and convert to wide character representation
+ * to avoid doing it multiple times later.
+ */
+ if ((key = malloc(sizeof(wchar_t) * (strlen(string) + 1))) == NULL)
+ err(2, NULL);
+ readp = string;
+ writep = key;
+ while ((clen = mbrtowc(&ch, readp, MB_LEN_MAX, NULL)) != 0) {
+ if (clen == (size_t)-1 || clen == (size_t)-2)
+ errc(2, EILSEQ, NULL);
if (fflag)
- ch = FOLD(ch);
- if (dflag)
- ch = DICT(ch);
- if (ch != NO_COMPARE)
- *(writep++) = ch;
+ ch = towlower(ch);
+ if (!dflag || iswalnum(ch))
+ *writep++ = ch;
+ readp += clen;
}
- *writep = '\0';
+ *writep = L'\0';
+ if (termchar != L'\0' && (writep = wcschr(key, termchar)) != NULL)
+ *++writep = L'\0';
+ return (key);
+}
+
+int
+look(wchar_t *string, unsigned char *front, unsigned char *back)
+{
front = binary_search(string, front, back);
front = linear_search(string, front, back);
@@ -225,7 +236,7 @@ look(unsigned char *string, unsigned char *front, unsigned char *back)
while (p < back && *p++ != '\n');
char *
-binary_search(unsigned char *string, unsigned char *front, unsigned char *back)
+binary_search(wchar_t *string, unsigned char *front, unsigned char *back)
{
unsigned char *p;
@@ -259,7 +270,7 @@ binary_search(unsigned char *string, unsigned char *front, unsigned char *back)
* o front is before or at the first line to be printed.
*/
char *
-linear_search(unsigned char *string, unsigned char *front, unsigned char *back)
+linear_search(wchar_t *string, unsigned char *front, unsigned char *back)
{
while (front < back) {
switch (compare(string, front, back)) {
@@ -281,7 +292,7 @@ linear_search(unsigned char *string, unsigned char *front, unsigned char *back)
* Print as many lines as match string, starting at front.
*/
void
-print_from(unsigned char *string, unsigned char *front, unsigned char *back)
+print_from(wchar_t *string, unsigned char *front, unsigned char *back)
{
for (; front < back && compare(string, front, back) == EQUAL; ++front) {
for (; front < back && *front != '\n'; ++front)
@@ -306,23 +317,27 @@ print_from(unsigned char *string, unsigned char *front, unsigned char *back)
* "back" terminated).
*/
int
-compare(unsigned char *s1, unsigned char *s2, unsigned char *back)
+compare(wchar_t *s1, unsigned char *s2, unsigned char *back)
{
- int ch;
-
- for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
- ch = *s2;
+ wchar_t ch1, ch2;
+ size_t len2;
+
+ for (; *s1 && s2 < back && *s2 != '\n'; ++s1, s2 += len2) {
+ ch1 = *s1;
+ len2 = mbrtowc(&ch2, s2, back - s2, NULL);
+ if (len2 == (size_t)-1 || len2 == (size_t)-2) {
+ ch2 = *s2;
+ len2 = 1;
+ }
if (fflag)
- ch = FOLD(ch);
- if (dflag)
- ch = DICT(ch);
-
- if (ch == NO_COMPARE) {
- ++s2; /* Ignore character in comparison. */
+ ch2 = towlower(ch2);
+ if (dflag && !iswalnum(ch2)) {
+ /* Ignore character in comparison. */
+ --s1;
continue;
}
- if (*s1 != ch)
- return (*s1 < ch ? LESS : GREATER);
+ if (ch1 != ch2)
+ return (ch1 < ch2 ? LESS : GREATER);
}
return (*s1 ? GREATER : EQUAL);
}
OpenPOWER on IntegriCloud