Here is my long-threatened revamping of fetch. Jean-Marc probably won't

recognize it any more. This makes the following significant changes: - The main body of the program doesn't know a thing about URIs, HTTP, or FTP. This makes it possible to easily plug in other protocols. (The next revision will probably be able to dynamically add new recognizers.) - There are no longer arbitrary timeouts for the protocols. If you want to set one for yourself, use the environment variables. - FTP proxies are now supported (if I implemented it right). - The HTTP implementation is much more complete, and can now do restarts, preserve modtimes, and mrun in mirror mode. It's not yet up to 1.1, but it's getting there. - Transaction TCP is now used for sending HTTP requests. The HTTP/1.1 syntax for requesting that the connection be closed after one request is implemented. In all of this, I have doubtless broken somebody. Please test it and tell me about the bugs.
author: wollman <wollman@FreeBSD.org> 1997-01-30 21:43:44 +0000
committer: wollman <wollman@FreeBSD.org> 1997-01-30 21:43:44 +0000
commit: 444cbb04eefcb9a4da4e2a366541c4d02d74938d (patch)
tree: 8697935e80816a6c94b292ebeb8af3ef6994b10b /usr.bin/fetch/util.c
parent: 067667ae976ea0d2540166d4637720d70a012cf7 (diff)
download: FreeBSD-src-444cbb04eefcb9a4da4e2a366541c4d02d74938d.zip
FreeBSD-src-444cbb04eefcb9a4da4e2a366541c4d02d74938d.tar.gz
1 files changed, 322 insertions, 0 deletions
diff --git a/usr.bin/fetch/util.c b/usr.bin/fetch/util.c
new file mode 100644
index 0000000..08103a0
--- /dev/null
+++ b/usr.bin/fetch/util.c
@@ -0,0 +1,322 @@
+/*-
+ * Copyright 1997 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission.  M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied
+ * warranty.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	$Id$
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <time.h>		/* for time() */
+#include <unistd.h>
+
+#include <sys/time.h>		/* for struct timeval */
+
+#include "fetch.h"
+
+
+/* Signal handling functions */
+
+/*
+ * If this were Scheme we could make this variable private to just these two
+ * functions...
+ */
+static struct sigaction oldalrm;
+
+void
+setup_sigalrm(void)
+{
+	struct sigaction catch;
+
+	sigemptyset(&catch.sa_mask);
+	sigaddset(&catch.sa_mask, SIGHUP);
+	sigaddset(&catch.sa_mask, SIGINT);
+	sigaddset(&catch.sa_mask, SIGQUIT);
+	sigaddset(&catch.sa_mask, SIGTERM);
+	sigaddset(&catch.sa_mask, SIGALRM);
+	catch.sa_handler = catchsig;
+	catch.sa_flags = 0;
+
+	sigaction(SIGALRM, &catch, &oldalrm);
+}
+
+void
+unsetup_sigalrm(void)
+{
+	sigaction(SIGALRM, &oldalrm, 0);
+}
+
+
+/* File-handling functions */
+
+/*
+ * Set the last-modified time of the output file to be that returned by
+ * the server.
+ */
+void
+adjmodtime(struct fetch_state *fs)
+{
+    struct timeval tv[2];
+    
+    /* XXX - not strictly correct, since (time_t)-1 does not have to be
+       > 0.  This also catches some of the other routines which erroneously
+       return 0 for invalid times rather than -1. */
+    if (!fs->fs_newtime && fs->fs_modtime > 0) {
+	tv[0].tv_usec = tv[1].tv_usec = 0;
+	time(&tv[0].tv_sec);
+	tv[1].tv_sec = fs->fs_modtime;
+	utimes(fs->fs_outputfile, tv);
+    }
+}
+
+/*
+ * Delete the file when exiting on error, if it is not `precious'.
+ */
+void
+rm(struct fetch_state *fs)
+{
+	if (!(fs->fs_outputfile[0] == '-' && fs->fs_outputfile[1] == '\0')) {
+		if (!fs->fs_restart && !fs->fs_mirror && !fs->fs_precious)
+			unlink(fs->fs_outputfile);
+		else
+			adjmodtime(fs);
+	}
+}
+
+
+/* String-handling and -parsing functions */
+
+/*
+ * Undo the standard %-sign encoding in URIs (e.g., `%2f' -> `/').  This
+ * must be done after the URI is parsed, since the principal purpose of
+ * the encoding is to hide characters which would otherwise be significant
+ * to the parser (like `/').
+ */
+char *
+percent_decode(const char *uri)
+{
+	char *rv, *s;
+
+	rv = s = malloc(strlen(uri) + 1);
+	if (rv == 0)
+		err(EX_OSERR, "malloc");
+
+	while (*uri) {
+		if (*uri == '%' && uri[1] 
+		    && isxdigit(uri[1]) && isxdigit(uri[2])) {
+			int c;
+			static char buf[] = "xx";
+
+			buf[0] = uri[1];
+			buf[1] = uri[2];
+			sscanf(buf, "%x", &c);
+			uri += 3;
+			*s++ = c;
+		} else {
+			*s++ = *uri++;
+		}
+	}
+	return rv;
+}
+
+/*
+ * Decode a standard host:port string into its constituents, allocating
+ * memory for a new copy of the host part.
+ */
+int
+parse_host_port(const char *s, char **hostname, int *port)
+{
+	const char *colon;
+	char *ep;
+	unsigned long ul;
+
+	colon = strchr(s, ':');
+	if (colon != 0) {
+		colon++;
+		errno = 0;
+		ul = strtoul(colon + 1, &ep, 10);
+		if (*ep != '\0' || colon[1] == '\0' || errno != 0
+		    || ul < 1 || ul > 65534) {
+			warnx("`%s': invalid port number", s);
+			return EX_USAGE;
+		}
+
+		*hostname = safe_strndup(s, colon - s);
+		*port = ul;
+	} else {
+		*hostname = safe_strdup(s);
+	}
+	return 0;
+}
+
+/*
+ * safe_strdup is like strdup, but aborts on error.
+ */
+char *
+safe_strdup(const char *orig)
+{
+	char *s;
+
+	s = malloc(strlen(orig) + 1);
+	if (s == 0)
+		err(EX_OSERR, "malloc");
+	strcpy(s, orig);
+	return s;
+}
+
+/*
+ * safe_strndup is like safe_strdup, but copies at most `len'
+ * characters from `orig'.
+ */
+char *
+safe_strndup(const char *orig, size_t len)
+{
+	char *s;
+
+	s = malloc(len + 1);
+	if (s == 0)
+		err(EX_OSERR, "malloc");
+	s[0] = '\0';
+	strncat(s, orig, len);
+	return s;
+}
+
+/*
+ * Implement the `base64' encoding as described in RFC 1521.
+ */
+static const char base64[] = 
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+char *
+to_base64(const unsigned char *buf, size_t len)
+{
+	char *s = malloc((4 * (len + 1)) / 3 + 1), *rv;
+	unsigned tmp;
+
+	if (s == 0)
+		err(EX_OSERR, "malloc");
+
+	rv = s;
+	while (len >= 3) {
+		tmp = buf[0] << 16 | buf[1] << 8 || buf[2];
+		s[0] = base64[tmp >> 18];
+		s[1] = base64[(tmp >> 12) & 077];
+		s[2] = base64[(tmp >> 6) & 077];
+		s[3] = base64[tmp & 077];
+		len -= 3;
+		buf += 3;
+		s += 4;
+	}
+
+	/* RFC 1521 enumerates these three possibilities... */
+	switch(len) {
+	case 2:
+		tmp = buf[0] << 16 | buf[1] << 8;
+		s[0] = base64[(tmp >> 18) & 077];
+		s[1] = base64[(tmp >> 12) & 077];
+		s[2] = base64[(tmp >> 6) & 077];
+		s[3] = '=';
+		break;
+	case 1:
+		tmp = buf[0] << 16;
+		s[0] = base64[(tmp >> 18) & 077];
+		s[1] = base64[(tmp >> 12) & 077];
+		s[2] = s[3] = '=';
+		break;
+	case 0:
+		break;
+	}
+
+	return rv;
+}
+
+int
+from_base64(const char *orig, unsigned char *buf, size_t *lenp)
+{
+	int len, len2;
+	const char *equals;
+	unsigned tmp;
+
+	len = strlen(orig);
+	while (isspace(orig[len - 1]))
+		len--;
+
+	if (len % 4)
+		return -1;
+
+	len2 = 3 * (len / 4);
+	equals = strchr(orig, '=');
+	if (equals != 0) {
+		if (equals[1] == '=')
+			len2 -= 2;
+		else
+			len2 -= 1;
+	}
+
+	/* Now the length is len2 is the actual length of the original. */
+	if (len2 > *lenp)
+		return -1;
+	*lenp = len2;
+
+	while (len > 0) {
+		int i;
+		const char *off;
+		int forget;
+
+		tmp = 0;
+		forget = 0;
+		for (i = 0; i < 4; i++) {
+			if (orig[i] == '=') {
+				off = base64;
+				forget++;
+			} else {
+				off = strchr(base64, orig[i]);
+			}
+			if (off == 0)
+				return -1;
+			tmp = (tmp << 6) | (off - base64);
+		}
+
+		buf[0] = (tmp >> 16) & 0xff;
+		if (forget < 2)
+			buf[1] = (tmp >> 8) & 0xff;
+		if (forget < 1)
+			buf[2] = (tmp >> 8) & 0xff;
+		len -= 4;
+		orig += 4;
+		buf += 3 - forget;
+	}
+	return 0;
+}
author	wollman <wollman@FreeBSD.org>	1997-01-30 21:43:44 +0000
committer	wollman <wollman@FreeBSD.org>	1997-01-30 21:43:44 +0000
commit	444cbb04eefcb9a4da4e2a366541c4d02d74938d (patch)
tree	8697935e80816a6c94b292ebeb8af3ef6994b10b /usr.bin/fetch/util.c
parent	067667ae976ea0d2540166d4637720d70a012cf7 (diff)
download	FreeBSD-src-444cbb04eefcb9a4da4e2a366541c4d02d74938d.zip FreeBSD-src-444cbb04eefcb9a4da4e2a366541c4d02d74938d.tar.gz