diff options
author | knu <knu@FreeBSD.org> | 2001-02-23 16:20:55 +0000 |
---|---|---|
committer | knu <knu@FreeBSD.org> | 2001-02-23 16:20:55 +0000 |
commit | caa8a14382c376f9c55e5e5b70bd6bb997d4bd38 (patch) | |
tree | 84e1c36e31858c52eb3942df16eedd3e8f496d7e /usr.bin/find | |
parent | c6eb25402d7b17b05db469fb5b404765335ce916 (diff) | |
download | FreeBSD-src-caa8a14382c376f9c55e5e5b70bd6bb997d4bd38.zip FreeBSD-src-caa8a14382c376f9c55e5e5b70bd6bb997d4bd38.tar.gz |
Implement the following options and primaries:
-E Interpret regular expressions followed by -regex and -iregex op-
tions as extended (modern) regular expressions rather than basic
regular expressions (BRE's). The re_format(7) manual page fully
describes both formats.
-iname pattern
Like -name, but the match is case insensitive.
-ipath pattern
Like -path, but the match is case insensitive.
-regex pattern
True if the whole path of the file matches pattern using regular
expression. To match a file named ``./foo/xyzzy'', you can use
the regular expression ``.*/[xyz]*'' or ``.*/foo/.*'', but not
``xyzzy'' or ``/foo/''.
-iregex pattern
Like -regex, but the match is case insensitive.
These are meant to be compatible with other find(1) implementations
such as GNU's or NetBSD's except regexp library differences.
Reviewed by: sobomax, dcs, and some other people on -current
Diffstat (limited to 'usr.bin/find')
-rw-r--r-- | usr.bin/find/extern.h | 5 | ||||
-rw-r--r-- | usr.bin/find/find.1 | 42 | ||||
-rw-r--r-- | usr.bin/find/find.c | 1 | ||||
-rw-r--r-- | usr.bin/find/find.h | 8 | ||||
-rw-r--r-- | usr.bin/find/function.c | 156 | ||||
-rw-r--r-- | usr.bin/find/main.c | 9 | ||||
-rw-r--r-- | usr.bin/find/option.c | 5 |
7 files changed, 219 insertions, 7 deletions
diff --git a/usr.bin/find/extern.h b/usr.bin/find/extern.h index 00c8db2..442a950 100644 --- a/usr.bin/find/extern.h +++ b/usr.bin/find/extern.h @@ -63,7 +63,10 @@ PLAN *c_follow __P((void)); PLAN *c_fstype __P((char *)); #endif PLAN *c_group __P((char *)); +PLAN *c_iname __P((char *)); PLAN *c_inum __P((char *)); +PLAN *c_ipath __P((char *)); +PLAN *c_iregex __P((char *)); PLAN *c_links __P((char *)); PLAN *c_ls __P((void)); PLAN *c_name __P((char *)); @@ -75,6 +78,7 @@ PLAN *c_perm __P((char *)); PLAN *c_print __P((void)); PLAN *c_print0 __P((void)); PLAN *c_prune __P((void)); +PLAN *c_regex __P((char *)); PLAN *c_size __P((char *)); PLAN *c_type __P((char *)); PLAN *c_user __P((char *)); @@ -90,3 +94,4 @@ PLAN *c_or __P((void)); extern int ftsoptions, isdeprecated, isdepth, isoutput, issort, isxargs; extern int mindepth, maxdepth; +extern int regexp_flags; diff --git a/usr.bin/find/find.1 b/usr.bin/find/find.1 index ddd2666..95387e6 100644 --- a/usr.bin/find/find.1 +++ b/usr.bin/find/find.1 @@ -43,7 +43,7 @@ .Sh SYNOPSIS .Nm .Op Fl H | Fl L | Fl P -.Op Fl Xdsx +.Op Fl EXdsx .Op Fl f Ar pathname .Op Ar pathname ...\& .Ar expression @@ -59,6 +59,15 @@ of each file in the tree. The options are as follows: .Pp .Bl -tag -width Ds +.It Fl E +Interpret regular expressions followed by +.Ic -regex +and +.Ic -iregex +options as extended (modern) regular expressions rather than basic +regular expressions (BRE's). The +.Xr re_format 7 +manual page fully describes both formats. .It Fl H The .Fl H @@ -286,6 +295,20 @@ may be used as part of .Ar pattern . These characters may be matched explicitly by escaping them with a backslash (``\e''). +.It Ic -iname Ar pattern +Like +.Ic -name , +but the match is case insensitive. +.It Ic -regex Ar pattern +True if the whole path of the file matches +.Ar pattern +using regular expression. To match a file named ``./foo/xyzzy'', you +can use the regular expression ``.*/[xyz]*'' or ``.*/foo/.*'', but not +``xyzzy'' or ``/foo/''. +.It Ic -iregex Ar pattern +Like +.Ic -regex , +but the match is case insensitive. .It Ic -newer Ar file True if the current file has a more recent last modification time than .Ar file . @@ -303,6 +326,10 @@ These characters may be matched explicitly by escaping them with a backslash (``\e''). Slashes (``/'') are treated as normal characters and do not have to be matched explicitly. +.It Ic -ipath Ar pattern +Like +.Ic -path , +but the match is case insensitive. .It Xo .Ic -perm .Oo Fl Oc Ns Ar mode @@ -487,6 +514,7 @@ Print out a list of all the files that are either owned by ``wnj'' or that are newer than ``ttt''. .El .Sh SEE ALSO +.Xr re_format 7, .Xr chflags 1 , .Xr chmod 1 , .Xr locate 1 , @@ -506,11 +534,14 @@ utility syntax is a superset of the syntax specified by the standard. .Pp All the single character options as well as the +.Ic -iname , .Ic -inum , +.Ic -iregex , .Ic -print0 , .Ic -delete , +.Ic -ls , and -.Ic -ls +.Ic -regex primaries are extensions to .St -p1003.2 . .Pp @@ -540,6 +571,13 @@ primaries did not replace the string ``{}'' in the utility name or the utility arguments if it had preceding or following non-whitespace characters. This version replaces it no matter where in the utility name or arguments it appears. +.Pp +The +.Fl E +option was implemented on the analogy of +.Xr grep 1 +and +.Xr sed 1 . .Sh BUGS The special characters used by .Nm diff --git a/usr.bin/find/find.c b/usr.bin/find/find.c index 159ae2a..ce2595e 100644 --- a/usr.bin/find/find.c +++ b/usr.bin/find/find.c @@ -49,6 +49,7 @@ static const char rcsid[] = #include <err.h> #include <errno.h> #include <fts.h> +#include <regex.h> #include <stdio.h> #include <string.h> #include <stdlib.h> diff --git a/usr.bin/find/find.h b/usr.bin/find/find.h index e126086..f240fc1 100644 --- a/usr.bin/find/find.h +++ b/usr.bin/find/find.h @@ -37,14 +37,16 @@ * $FreeBSD$ */ +#include <regex.h> + /* node type */ enum ntype { N_AND = 1, /* must start > 0 */ N_AMIN, N_ATIME, N_CLOSEPAREN, N_CMIN, N_CTIME, N_DEPTH, N_EMPTY, N_EXEC, N_EXECDIR, N_EXPR, N_FLAGS, N_FOLLOW, N_FSTYPE, N_GROUP, N_INUM, N_LINKS, N_LS, N_MMIN, - N_MTIME, N_NAME, - N_NEWER, N_NOGROUP, N_NOT, N_NOUSER, N_OK, N_OPENPAREN, N_OR, N_PATH, + N_MTIME, N_NAME, N_INAME, N_PATH, N_IPATH, N_REGEX, N_IREGEX, + N_NEWER, N_NOGROUP, N_NOT, N_NOUSER, N_OK, N_OPENPAREN, N_OR, N_PERM, N_PRINT, N_PRUNE, N_SIZE, N_TYPE, N_USER, N_XDEV, N_PRINT0, N_DELETE, N_MAXDEPTH, N_MINDEPTH }; @@ -85,6 +87,7 @@ typedef struct _plandata { } ex; char *_a_data[2]; /* array of char pointers */ char *_c_data; /* char pointer */ + regex_t *_re_data; /* regex */ } p_un; } PLAN; #define a_data p_un._a_data @@ -100,6 +103,7 @@ typedef struct _plandata { #define p_data p_un._p_data #define t_data p_un._t_data #define u_data p_un._u_data +#define re_data p_un._re_data #define e_argv p_un.ex._e_argv #define e_orig p_un.ex._e_orig #define e_len p_un.ex._e_len diff --git a/usr.bin/find/function.c b/usr.bin/find/function.c index 18cd83c..d448946 100644 --- a/usr.bin/find/function.c +++ b/usr.bin/find/function.c @@ -56,6 +56,7 @@ static const char rcsid[] = #include <fts.h> #include <grp.h> #include <pwd.h> +#include <regex.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -76,6 +77,8 @@ static const char rcsid[] = } \ } +static int do_f_regex __P((PLAN *, FTSENT *, int)); +static PLAN *do_c_regex __P((char *, int)); static PLAN *palloc __P((enum ntype, int (*) __P((PLAN *, FTSENT *)))); /* @@ -924,6 +927,133 @@ c_name(pattern) return (new); } + +/* + * -iname functions -- + * + * Like -iname, but the match is case insensitive. + */ +int +f_iname(plan, entry) + PLAN *plan; + FTSENT *entry; +{ + return (!fnmatch(plan->c_data, entry->fts_name, FNM_CASEFOLD)); +} + +PLAN * +c_iname(pattern) + char *pattern; +{ + PLAN *new; + + new = palloc(N_INAME, f_iname); + new->c_data = pattern; + return (new); +} + + +/* + * -regex functions -- + * + * True if the whole path of the file matches pattern using + * regular expression. + */ +int +f_regex(plan, entry) + PLAN *plan; + FTSENT *entry; +{ + return (do_f_regex(plan, entry, 0)); +} + +PLAN * +c_regex(pattern) + char *pattern; +{ + return (do_c_regex(pattern, 0)); +} + +/* + * -iregex functions -- + * + * Like -regex, but the match is case insensitive. + */ +int +f_iregex(plan, entry) + PLAN *plan; + FTSENT *entry; +{ + return (do_f_regex(plan, entry, REG_ICASE)); +} + +PLAN * +c_iregex(pattern) + char *pattern; +{ + return (do_c_regex(pattern, REG_ICASE)); +} + +static int +do_f_regex(plan, entry, icase) + PLAN *plan; + FTSENT *entry; + int icase; +{ + char *str; + size_t len; + regex_t *pre; + regmatch_t pmatch; + int errcode; + char errbuf[LINE_MAX]; + int matched; + + pre = plan->re_data; + str = entry->fts_path; + len = strlen(str); + matched = 0; + + pmatch.rm_so = 0; + pmatch.rm_eo = len; + + errcode = regexec(pre, str, 1, &pmatch, REG_STARTEND); + + if (errcode != 0 && errcode != REG_NOMATCH) { + regerror(errcode, pre, errbuf, sizeof errbuf); + errx(1, "%s: %s", + icase == 0 ? "-regex" : "-iregex", errbuf); + } + + if (errcode == 0 && pmatch.rm_so == 0 && pmatch.rm_eo == len) + matched = 1; + + return (matched); +} + +PLAN * +do_c_regex(pattern, icase) + char *pattern; + int icase; +{ + PLAN *new; + regex_t *pre; + int errcode; + char errbuf[LINE_MAX]; + + if ((pre = malloc(sizeof(regex_t))) == NULL) + err(1, NULL); + + if ((errcode = regcomp(pre, pattern, regexp_flags | icase)) != 0) { + regerror(errcode, pre, errbuf, sizeof errbuf); + errx(1, "%s: %s: %s", + icase == 0 ? "-regex" : "-iregex", pattern, errbuf); + } + + new = icase == 0 ? palloc(N_REGEX, f_regex) : palloc(N_IREGEX, f_iregex); + new->re_data = pre; + return (new); +} + /* * -newer file functions -- * @@ -1019,7 +1149,31 @@ c_path(pattern) { PLAN *new; - new = palloc(N_NAME, f_path); + new = palloc(N_PATH, f_path); + new->c_data = pattern; + return (new); +} + +/* + * -ipath functions -- + * + * Like -path, but the match is case insensitive. + */ +int +f_ipath(plan, entry) + PLAN *plan; + FTSENT *entry; +{ + return (!fnmatch(plan->c_data, entry->fts_path, FNM_CASEFOLD)); +} + +PLAN * +c_ipath(pattern) + char *pattern; +{ + PLAN *new; + + new = palloc(N_IPATH, f_ipath); new->c_data = pattern; return (new); } diff --git a/usr.bin/find/main.c b/usr.bin/find/main.c index 2979026..ed5377c 100644 --- a/usr.bin/find/main.c +++ b/usr.bin/find/main.c @@ -57,6 +57,7 @@ static const char rcsid[] = #include <fcntl.h> #include <fts.h> #include <locale.h> +#include <regex.h> #include <stdio.h> #include <stdlib.h> #include <time.h> @@ -73,6 +74,7 @@ int isoutput; /* user specified output operator */ int issort; /* do hierarchies in lexicographical order */ int isxargs; /* don't permit xargs delimiting chars */ int mindepth = -1, maxdepth = -1; /* minimum and maximum depth */ +int regexp_flags = REG_BASIC; /* use the "basic" regexp by default*/ static void usage __P((void)); @@ -91,8 +93,11 @@ main(argc, argv) p = start = argv; Hflag = Lflag = 0; ftsoptions = FTS_NOSTAT | FTS_PHYSICAL; - while ((ch = getopt(argc, argv, "HLPXdf:sx")) != -1) + while ((ch = getopt(argc, argv, "EHLPXdf:sx")) != -1) switch (ch) { + case 'E': + regexp_flags |= REG_EXTENDED; + break; case 'H': Hflag = 1; Lflag = 0; @@ -161,6 +166,6 @@ static void usage() { (void)fprintf(stderr, -"usage: find [-H | -L | -P] [-Xdsx] [-f file] [file ...] [expression]\n"); +"usage: find [-H | -L | -P] [-EXdsx] [-f file] [file ...] [expression]\n"); exit(1); } diff --git a/usr.bin/find/option.c b/usr.bin/find/option.c index 3bb6226..b5911f2 100644 --- a/usr.bin/find/option.c +++ b/usr.bin/find/option.c @@ -47,6 +47,7 @@ static const char rcsid[] = #include <err.h> #include <fts.h> +#include <regex.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -82,7 +83,10 @@ static OPTION const options[] = { { "-fstype", N_FSTYPE, c_fstype, O_ARGV }, #endif { "-group", N_GROUP, c_group, O_ARGV }, + { "-iname", N_INAME, c_iname, O_ARGV }, { "-inum", N_INUM, c_inum, O_ARGV }, + { "-ipath", N_IPATH, c_ipath, O_ARGV }, + { "-iregex", N_IREGEX, c_iregex, O_ARGV }, { "-links", N_LINKS, c_links, O_ARGV }, { "-ls", N_LS, c_ls, O_ZERO }, { "-maxdepth", N_MAXDEPTH, c_maxdepth, O_ARGV }, @@ -101,6 +105,7 @@ static OPTION const options[] = { { "-print", N_PRINT, c_print, O_ZERO }, { "-print0", N_PRINT0, c_print0, O_ZERO }, { "-prune", N_PRUNE, c_prune, O_ZERO }, + { "-regex", N_REGEX, c_regex, O_ARGV }, { "-size", N_SIZE, c_size, O_ARGV }, { "-type", N_TYPE, c_type, O_ARGV }, { "-user", N_USER, c_user, O_ARGV }, |