From 0f9b087524425d34260b48dfe452fe59123a8921 Mon Sep 17 00:00:00 2001 From: seanc Date: Tue, 11 Feb 2003 19:32:18 +0000 Subject: Update random(6) to have the ability to randomize a file/stdin based off of lines or words. See the man page for details. Reviewed by: markm MFC after: 3 days --- games/random/Makefile | 2 + games/random/random.6 | 67 ++++++++++++-- games/random/random.c | 65 ++++++++++--- games/random/randomize_fd.c | 219 ++++++++++++++++++++++++++++++++++++++++++++ games/random/randomize_fd.h | 56 +++++++++++ 5 files changed, 389 insertions(+), 20 deletions(-) create mode 100644 games/random/randomize_fd.c create mode 100644 games/random/randomize_fd.h (limited to 'games') diff --git a/games/random/Makefile b/games/random/Makefile index 666023c..5f482df 100644 --- a/games/random/Makefile +++ b/games/random/Makefile @@ -3,5 +3,7 @@ PROG= random MAN= random.6 +SRCS= random.c randomize_fd.c +WARNS= 5 .include diff --git a/games/random/random.6 b/games/random/random.6 index 16a6e22..9706d74 100644 --- a/games/random/random.6 +++ b/games/random/random.6 @@ -10,7 +10,7 @@ .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. All advertising materials mentioning features or use of this software -.\" must display the following acknowledgement: +.\" must display the following acknowledgment: .\" This product includes software developed by the University of .\" California, Berkeley and its contributors. .\" 4. Neither the name of the University nor the names of its contributors @@ -32,7 +32,7 @@ .\" @(#)random.6 8.2 (Berkeley) 3/31/94 .\" $FreeBSD$ .\" -.Dd March 31, 1994 +.Dd February 8, 2003 .Dt RANDOM 6 .Os .Sh NAME @@ -41,14 +41,31 @@ .Sh SYNOPSIS .Nm .Op Fl er +.Op Fl f Ar filename .Op Ar denominator .Sh DESCRIPTION .Nm Random -reads lines from the standard input and copies them to the standard -output with a probability of 1/denominator. -The default value for +has two distinct modes of operations. The default is to read in lines +from stdin and randomly write them out to stdout with a probability of +1 / +.Ar denominator . +The default .Ar denominator -is 2. +for this mode of operation is 2, giving each line a 50/50 chance of +being displayed. +.Pp +The second mode of operation is to read in a file from +.Ar filename +and randomize the contents of the file and send it back out to stdout. +The contents can be randomized based off of newlines or based off of +space characters as determined by +.Xr isspace 3 . +The default +.Ar denominator +for this mode of operation is 1, which gives each line a chance to be +displayed, but in a +.Xr random 3 +order. .Pp The options are as follows: .Bl -tag -width Ds @@ -61,10 +78,46 @@ does not read or write anything, and simply exits with a random exit value of 0 to .Ar denominator \&- 1, inclusive. +.It Fl f Ar filename +The +.Fl f +option is used to specify the +.Ar filename +to read from. stdin is used if the filename is set to "-". +.It Fl l +Randomize the input via newlines (the default). .It Fl r The .Fl r option guarantees that the output is unbuffered. +.It Fl u +Tells +.Xr random 6 +not to select the same line or word from a file more than once (the +default). This does not guarantee uniqueness if there are two of the +same tokens from the input, but it does prevent selecting the same +token more than once. +.It Fl U +Tells +.Xr random 6 +that it is okay for it to reuse any given line or word when creating a +randomized output. +.It Fl w +Randomize words separated by +.Xr isspace 3 +instead of newlines. .El .Sh SEE ALSO -.Xr fortune 6 +.Xr fortune 6 , +.Xr random 3 +.Sh BUGS +There is no index used when printing out tokens from the list which +makes rather slow for large files (10MB+). If this were used in +performance sensitive areas, I'd do something about it. For smaller +files, however, it should still be quite fast and efficient. +.Sh HISTORY +Original +.Xr random 6 +game was brought in from BSD 4.4 Lite by jkh in 1994. The +functionality to randomizing lines and words was added in 2003 by +seanc. diff --git a/games/random/random.c b/games/random/random.c index 9c66da4..f0d5df3 100644 --- a/games/random/random.c +++ b/games/random/random.c @@ -52,33 +52,60 @@ static const char rcsid[] = #include #include +#include #include #include #include #include #include +#include "randomize_fd.h" void usage(void); int -main(argc, argv) - int argc; - char *argv[]; +main(int argc, char **argv) { double denom; - int ch, random_exit, selected, unbuffer_output; - char *ep; + int ch, fd, random_exit, randomize_lines, random_type, ret, + selected, unique_output, unbuffer_output; + char *ep, *filename; - random_exit = unbuffer_output = 0; denom = 0; - while ((ch = getopt(argc, argv, "er")) != -1) + filename = NULL; + random_type = RANDOM_TYPE_UNSET; + random_exit = randomize_lines = random_type = unbuffer_output = 0; + unique_output = 1; + while ((ch = getopt(argc, argv, "ef:hlruUw")) != -1) switch (ch) { case 'e': random_exit = 1; break; + case 'f': + randomize_lines = 1; + if (!strcmp(optarg, "-")) + filename = strdup("/dev/fd/0"); + else + filename = optarg; + break; + case 'l': + randomize_lines = 1; + random_type = RANDOM_TYPE_LINES; + break; case 'r': unbuffer_output = 1; break; + case 'u': + randomize_lines = 1; + unique_output = 1; + break; + case 'U': + randomize_lines = 1; + unique_output = 0; + break; + case 'w': + randomize_lines = 1; + random_type = RANDOM_TYPE_WORDS; + break; default: case '?': usage(); @@ -90,7 +117,7 @@ main(argc, argv) switch (argc) { case 0: - denom = 2; + denom = (randomize_lines ? 1 : 2); break; case 1: errno = 0; @@ -109,10 +136,6 @@ main(argc, argv) srandomdev(); - /* Compute a random exit status between 0 and denom - 1. */ - if (random_exit) - return ((denom * random()) / LONG_MAX); - /* * Act as a filter, randomly choosing lines of the standard input * to write to the standard output. @@ -121,6 +144,22 @@ main(argc, argv) setbuf(stdout, NULL); /* + * Act as a filter, randomizing lines read in from a given file + * descriptor and write the output to standard output. + */ + if (randomize_lines) { + if ((fd = open(filename, O_RDONLY, 0)) < 0) + err(1, "%s", optarg); + ret = randomize_fd(fd, random_type, unique_output, denom); + if (!random_exit) + return(ret); + } + + /* Compute a random exit status between 0 and denom - 1. */ + if (random_exit) + return ((denom * random()) / LONG_MAX); + + /* * Select whether to print the first line. (Prime the pump.) * We find a random number between 0 and denom - 1 and, if it's * 0 (which has a 1 / denom chance of being true), we select the @@ -148,6 +187,6 @@ void usage() { - (void)fprintf(stderr, "usage: random [-er] [denominator]\n"); + (void)fprintf(stderr, "usage: random [-elruUw] [-f filename] [denominator]\n"); exit(1); } diff --git a/games/random/randomize_fd.c b/games/random/randomize_fd.c new file mode 100644 index 0000000..447663cf --- /dev/null +++ b/games/random/randomize_fd.c @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2003 Sean Chittenden + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "randomize_fd.h" + +struct rand_node *rand_root; +struct rand_node *rand_tail; + + +static +struct rand_node *rand_node_allocate(void) +{ + struct rand_node *n; + + n = (struct rand_node *)malloc(sizeof(struct rand_node)); + if (n == NULL) + err(1, "malloc"); + + n->len = 0; + n->cp = NULL; + n->next = NULL; + return(n); +} + + +static +void rand_node_free(struct rand_node *n) +{ + if (n != NULL) { + if (n->cp != NULL) + free(n->cp); + + free(n); + } +} + + +static +void rand_node_free_rec(struct rand_node *n) +{ + if (n != NULL) { + if (n->next != NULL) + rand_node_free_rec(n->next); + + rand_node_free(n); + } +} + + +static +struct rand_node *rand_node_append(struct rand_node *n) +{ + if (rand_root == NULL) { + rand_root = rand_tail = n; + return(n); + } else { + rand_tail->next = n; + rand_tail = n; + + return(n); + } +} + + +int randomize_fd(int fd, int type, int unique, double denom) +{ + u_char *buf, *p; + u_int numnode, j, selected, slen; + struct rand_node *n, *prev; + int bufc, bufleft, buflen, eof, fndstr, i, len, ret; + + rand_root = rand_tail = NULL; + bufc = bufleft = eof = fndstr = numnode = 0; + + if (type == RANDOM_TYPE_UNSET) + type = RANDOM_TYPE_LINES; + + buflen = sizeof(u_char) * MAXBSIZE; + buf = (u_char *)malloc(buflen); + if (buf == NULL) + err(1, "malloc"); + + while (!eof) { + /* Check to see if we have bits in the buffer */ + if (bufleft == 0) { + len = read(fd, buf, buflen); + if (len == -1) + err(1, "read"); + else if (len == 0) + break; + else if (len < buflen) { + buflen = len; + eof++; + } + + bufleft = len; + } + + /* Look for a newline */ + for (i = bufc; i <= buflen; i++, bufleft--) { + if (i == buflen) { + if (fndstr) { + if (!eof) { + memmove(buf, &buf[bufc], i - bufc); + i = i - bufc; + bufc = 0; + len = read(fd, &buf[i], buflen - i); + if (len == -1) + err(1, "read"); + else if (len == 0) { + eof++; + break; + } else if (len < buflen -i ) + buflen = i + len; + + bufleft = len; + fndstr = 0; + } + } else { + p = (u_char *)realloc(buf, buflen * 2); + if (p == NULL) + err(1, "realloc"); + + buf = p; + if (!eof) { + len = read(fd, &buf[i], buflen); + if (len == -1) + err(1, "read"); + else if (len == 0) { + eof++; + break; + } else if (len < buflen -i ) + buflen = len; + + bufleft = len; + } + + buflen *= 2; + } + } + + if ((type == RANDOM_TYPE_LINES && buf[i] == '\n') || + (type == RANDOM_TYPE_WORDS && isspace((int)buf[i])) || + (eof && i == buflen - 1)) { + n = rand_node_allocate(); + slen = i - bufc; + n->len = slen + 2; + n->cp = (u_char *)malloc(slen + 2); + if (n->cp == NULL) + err(1, "malloc"); + + memmove(n->cp, &buf[bufc], slen); + n->cp[slen] = buf[i]; + n->cp[slen + 1] = '\0'; + bufc = i + 1; + fndstr = 1; + rand_node_append(n); + numnode++; + } + } + } + + (void)close(fd); + + for (i = numnode; i > 0; i--) { + selected = ((int)denom * random())/(((double)RAND_MAX + 1) / numnode); + + for (j = 0, prev = n = rand_root; n != NULL; j++, prev = n, n = n->next) { + if (j == selected) { + ret = printf("%.*s", n->len - 1, n->cp); + if (ret < 0) + err(1, "printf"); + if (unique) { + if (n == rand_root) + rand_root = n->next; + if (n == rand_tail) + rand_tail = prev; + + prev->next = n->next; + rand_node_free(n); + numnode--; + break; + } + } + } + } + + fflush(stdout); + + if (!unique) + rand_node_free_rec(rand_root); + + return(0); +} diff --git a/games/random/randomize_fd.h b/games/random/randomize_fd.h new file mode 100644 index 0000000..f3e99a8 --- /dev/null +++ b/games/random/randomize_fd.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2003 Sean Chittenden + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __RANDOMIZE_FD__ +#define __RANDOMIZE_FD__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RANDOM_TYPE_UNSET 0 +#define RANDOM_TYPE_LINES 1 +#define RANDOM_TYPE_WORDS 2 + +/* The multiple instance single integer key */ +struct rand_node { + u_char *cp; + u_int len; + struct rand_node *next; +}; + +int randomize_fd(int fd, int type, int unique, double denom); + +#endif -- cgit v1.1