diff options
author | wollman <wollman@FreeBSD.org> | 1997-01-31 19:55:51 +0000 |
---|---|---|
committer | wollman <wollman@FreeBSD.org> | 1997-01-31 19:55:51 +0000 |
commit | 827e39354ad8751e6eacb18ae55c82e09dc9ab47 (patch) | |
tree | f21d8b942d4bd85dae792f19dcba12aed3a72100 /usr.bin/fetch | |
parent | 2a7214e7ab593d04e5bd8e6e73af6a760985a070 (diff) | |
download | FreeBSD-src-827e39354ad8751e6eacb18ae55c82e09dc9ab47.zip FreeBSD-src-827e39354ad8751e6eacb18ae55c82e09dc9ab47.tar.gz |
Some fixes for HTTP:
1) Implement redirects (or try to, at least).
2) Implement automatic retry after 503 errors when Retry-After is given.
3) Implement a -a flag to enable both of these behaviors.
4) Recognize Transfer-Encoding headers and emit a warning that the file
is likely to be damaged.
5) Bug fix: only write the amount of data we read.
6) Actually document some of these.
7) Fix the usage message to display flags in semi-alphabetical order.
Diffstat (limited to 'usr.bin/fetch')
-rw-r--r-- | usr.bin/fetch/fetch.1 | 17 | ||||
-rw-r--r-- | usr.bin/fetch/fetch.h | 3 | ||||
-rw-r--r-- | usr.bin/fetch/http.c | 277 | ||||
-rw-r--r-- | usr.bin/fetch/main.c | 42 |
4 files changed, 293 insertions, 46 deletions
diff --git a/usr.bin/fetch/fetch.1 b/usr.bin/fetch/fetch.1 index 6955250..4fa89d3 100644 --- a/usr.bin/fetch/fetch.1 +++ b/usr.bin/fetch/fetch.1 @@ -7,7 +7,7 @@ .Nd retrieve a file by Uniform Resource Locator .Sh SYNOPSIS .Nm fetch -.Op Fl MPmnpqr +.Op Fl MPamnpqr .Op Fl o Ar file .Ar URL .Nm fetch @@ -45,6 +45,8 @@ flags. .Pp The following options are available: .Bl -tag -width Fl -compact +.It Fl a +Automatically retry the transfer upon soft failures. .It Fl c Ar dir The file to retrieve is in directory .Ar dir @@ -196,3 +198,16 @@ The original implementation of was done by Jean-Marc Zucconi. It was extensively re-worked for .Fx 3.0 by Garrett Wollman. +.Sh BUGS +There are too many environment variables and command-line options. +.Pp +The +.Fl a +option is only implemented for certain kinds of +.Tn HTTP +failures, and no +.Tn FTP +failures. +.Pp +.Tn HTTP +authentication is not yet implememnted. diff --git a/usr.bin/fetch/fetch.h b/usr.bin/fetch/fetch.h index daa010e..89ddbea 100644 --- a/usr.bin/fetch/fetch.h +++ b/usr.bin/fetch/fetch.h @@ -26,7 +26,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id$ + * $Id: fetch.h,v 1.1 1997/01/30 21:43:38 wollman Exp $ */ #ifndef fetch_h @@ -48,6 +48,7 @@ struct fetch_state { int fs_passive_mode; /* -p option */ int fs_linkfile; /* -l option */ int fs_precious; /* -R option */ + int fs_auto_retry; /* -a option */ time_t fs_modtime; void *fs_proto; int (*fs_retrieve)(struct fetch_state *); diff --git a/usr.bin/fetch/http.c b/usr.bin/fetch/http.c index 425476d..d92d82c 100644 --- a/usr.bin/fetch/http.c +++ b/usr.bin/fetch/http.c @@ -26,7 +26,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id$ + * $Id: http.c,v 1.1 1997/01/30 21:43:41 wollman Exp $ */ #include <sys/types.h> @@ -67,13 +67,22 @@ struct http_state { char *http_hostname; char *http_remote_request; char *http_decoded_file; + char *http_host_header; + char *http_authentication; + char *http_proxy_authentication; unsigned http_port; + int http_redirected; }; /* We are only concerned with headers we might receive. */ enum http_header { - ht_content_length, ht_last_modified, ht_content_md5, ht_content_type, - ht_transfer_encoding, ht_content_range, ht_warning, + ht_accept_ranges, ht_age, ht_allow, ht_cache_control, ht_connection, + ht_content_base, ht_content_encoding, ht_content_language, + ht_content_length, ht_content_location, ht_content_md5, + ht_content_range, ht_content_type, ht_date, ht_etag, ht_expires, + ht_last_modified, ht_location, ht_pragma, ht_proxy_authenticate, + ht_public, ht_retry_after, ht_server, ht_transfer_encoding, + ht_upgrade, ht_vary, ht_via, ht_www_authenticate, ht_warning, /* unusual cases */ ht_syntax_error, ht_unknown, ht_end_of_header }; @@ -90,7 +99,7 @@ static int http_parse(struct fetch_state *fs, const char *uri) { const char *p, *colon, *slash, *ques, *q; - char *hostname; + char *hostname, *hosthdr, *trimmed_name; unsigned port; struct http_state *https; @@ -148,14 +157,20 @@ http_parse(struct fetch_state *fs, const char *uri) */ https->http_hostname = safe_strdup(hostname); https->http_port = port; + hosthdr = alloca(sizeof("Host: :\r\n") + 5 + strlen(hostname)); + sprintf(hosthdr, "Host: %s:%d\r\n", hostname, port); + https->http_host_header = safe_strdup(hosthdr); + https->http_remote_request = safe_strdup(p); ques = strpbrk(p, "?#"); if (ques) { - https->http_remote_request = safe_strndup(p, ques - p); + trimmed_name = safe_strndup(p, ques - p); } else { - https->http_remote_request = safe_strdup(p); + trimmed_name = safe_strdup(p); } - p = https->http_decoded_file = percent_decode(p); + https->http_decoded_file = percent_decode(trimmed_name); + free(trimmed_name); + p = https->http_decoded_file; /* now p is the decoded version, so we can extract the basename */ if (fs->fs_outputfile == 0) { @@ -165,6 +180,7 @@ http_parse(struct fetch_state *fs, const char *uri) else fs->fs_outputfile = p; } + https->http_redirected = 0; fs->fs_proto = https; fs->fs_close = http_close; @@ -199,6 +215,7 @@ out: } if (strncmp(uri, "http://", 7) == 0) { + char *hosthdr; slash = strchr(uri + 7, '/'); if (slash == 0) { warnx("`%s': malformed `http' URL", uri); @@ -211,6 +228,11 @@ out: file = safe_strdup(slash); else file = safe_strndup(slash, ques - slash); + hosthdr = alloca(sizeof("Host: \r\n") + slash - uri - 7); + strcpy(hosthdr, "Host: "); + strncat(hosthdr, uri + 7, slash - uri - 7); + strcat(hosthdr, "\r\n"); + https->http_host_header = safe_strdup(hosthdr); } else { slash = uri; while (*slash && *slash != ':') @@ -223,8 +245,10 @@ out: slash++; } file = safe_strdup(slash); + https->http_host_header = safe_strdup(""); } https->http_decoded_file = percent_decode(file); + https->http_redirected = 0; free(file); if (fs->fs_outputfile == 0) { slash = strrchr(https->http_decoded_file, '/'); @@ -247,11 +271,57 @@ http_close(struct fetch_state *fs) free(https->http_hostname); free(https->http_remote_request); free(https->http_decoded_file); + free(https->http_host_header); free(https); fs->fs_outputfile = 0; return 0; } +static int +nullclose(struct fetch_state *fs) +{ + return 0; +} + +/* + * Process a redirection. This has a small memory leak. + */ +static int +http_redirect(struct fetch_state *fs, char *new, int permanent) +{ + struct http_state *https = fs->fs_proto; + int num_redirects = https->http_redirected + 1; + char *out = safe_strdup(fs->fs_outputfile); + int rv; + + if (num_redirects > 5) { + warnx("%s: HTTP redirection limit exceeded"); + return EX_PROTOCOL; + } + + free(https->http_hostname); + free(https->http_remote_request); + free(https->http_decoded_file); + free(https); + warnx("%s: resource has moved %s to `%s'", out, + permanent ? "permanently" : "temporarily", new); + rv = http_parse(fs, new); + if (rv != 0) { + fs->fs_close = nullclose; /* XXX rethink interface? */ + return rv; + } + https = fs->fs_proto; + https->http_redirected = num_redirects; + /* + * This ensures that the output file name doesn't suddenly change + * under the user's feet. Unfortunately, this results in a small + * memory leak. I wish C had garbage collection... + */ + fs->fs_outputfile = out; + rv = http_retrieve(fs); + return rv; +} + /* * Get a file using HTTP. We will try to implement HTTP/1.1 eventually. * This subroutine makes heavy use of the 4.4-Lite standard I/O library, @@ -269,22 +339,30 @@ http_retrieve(struct fetch_state *fs) int s; struct sockaddr_in sin; struct msghdr msg; - struct iovec iov[16]; /* XXX count precisely */ +#define NIOV 16 /* max is currently 12 */ + struct iovec iov[NIOV]; int n, status; const char *env; int timo; - char *line; + char *line, *new_location; size_t linelen, readresult, writeresult; off_t total_length, restart_from; - time_t last_modified; + time_t last_modified, when_to_retry; char *base64ofmd5; static char buf[BUFFER_SIZE]; - int to_stdout; + int to_stdout, restarting, redirection, retrying; char rangebuf[sizeof("Range: bytes=18446744073709551616-\r\n")]; https = fs->fs_proto; to_stdout = (strcmp(fs->fs_outputfile, "-") == 0); + restarting = fs->fs_restart; + redirection = 0; + retrying = 0; + /* + * Figure out the timeout. Prefer the -T command-line value, + * otherwise the HTTP_TIMEOUT envar, or else don't time out at all. + */ if (fs->fs_timeout) { timo = fs->fs_timeout; } else if ((env = getenv("HTTP_TIMEOUT")) != 0) { @@ -339,10 +417,22 @@ http_retrieve(struct fetch_state *fs) retry: addstr(iov, n, "GET /"); addstr(iov, n, https->http_remote_request); - addstr(iov, n, " HTTP/1.0\r\n"); + addstr(iov, n, " HTTP/1.1\r\n"); + /* + * The choice of HTTP/1.1 may be a bit controversial. The + * specification says that implementations which are not at + * least conditionally compliant MUST NOT call themselves + * HTTP/1.1. We choose not to comply with that requirement. + * (Eventually we will support the full HTTP/1.1, at which + * time this comment will not apply. But it's amusing how + * specifications attempt to define behavior for implementations + * which aren't obeying the spec in the first place...) + */ addstr(iov, n, format_http_user_agent()); /* do content negotiation here */ addstr(iov, n, "Accept: */*\r\n"); + addstr(iov, n, https->http_host_header); + addstr(iov, n, "Connection: close\r\n"); if (fs->fs_mirror) { struct stat stab; @@ -358,7 +448,7 @@ retry: fs->fs_outputfile); } } - if (fs->fs_restart) { + if (restarting) { struct stat stab; errno = 0; @@ -374,12 +464,19 @@ retry: } else if (errno != 0) { warn("%s: cannot restart; will retrieve anew", fs->fs_outputfile); + restarting = 0; + } else { + warnx("%s: cannot restart; will retrieve anew", + fs->fs_outputfile); + restarting = 0; } } - addstr(iov, n, "Connection: close\r\n"); addstr(iov, n, "\r\n"); msg.msg_iovlen = n; - + + if (n >= NIOV) + err(EX_SOFTWARE, "request vector length exceeded: %d", n); + s = socket(PF_INET, SOCK_STREAM, 0); if (s < 0) { warn("socket"); @@ -401,6 +498,7 @@ retry: return EX_OSERR; } +got100reply: alarm(timo); line = fgetln(remote, &linelen); alarm(0); @@ -475,18 +573,47 @@ retry: /* In the future, we might handle redirection and other responses. */ switch(status) { + case 100: /* Continue */ + goto got100reply; case 200: /* Here come results */ + case 203: /* Non-Authoritative Information */ + restarting = 0; + break; case 206: /* Here come partial results */ + /* can only happen when restarting */ + break; + case 301: /* Resource has moved permanently */ + if (!fs->fs_auto_retry) + goto spewerror; + redirection = 301; + break; + case 302: /* Resource has moved temporarily */ + /* + * We don't test fs->fs_auto_retry here so that this + * sort of redirection is transparent to the user. + */ + redirection = 302; break; - case 304: /* Object is unmodified */ if (fs->fs_mirror) { fclose(remote); unsetup_sigalrm(); return 0; } - /* otherwise, fall through */ + goto spewerror; + case 401: /* Unauthorized */ + case 407: /* Proxy Authentication Required */ + /* XXX implement authentication */ + + case 503: /* Service Unavailable */ + if (!fs->fs_auto_retry) + goto spewerror; + + retrying = 503; + break; + default: +spewerror: warnx("%s: %s: HTTP server returned error code %d", fs->fs_outputfile, https->http_hostname, status); if (fs->fs_verbose > 1) { @@ -501,8 +628,9 @@ retry: } total_length = -1; /* -1 means ``don't know'' */ - last_modified = -1; + last_modified = when_to_retry = -1; base64ofmd5 = 0; + new_location = 0; restart_from = 0; while((line = fgetln(remote, &linelen)) != 0) { @@ -520,9 +648,9 @@ retry: case ht_content_length: errno = 0; ul = strtoul(value, &ep, 10); - if (errno != 0 || *ep != '\r') + if (errno != 0 || *ep) warnx("invalid Content-Length: `%s'", value); - if (!fs->fs_restart) + if (!restarting) total_length = ul; break; @@ -537,6 +665,9 @@ retry: break; case ht_content_range: + if (!restarting) /* XXX protocol error */ + break; + /* NB: we might have to restart from farther back than we asked. */ status = parse_http_content_range(value, &restart_from, @@ -544,22 +675,92 @@ retry: /* If we couldn't understand the reply, get the whole thing. */ if (status) { - fs->fs_restart = 0; -/*doretry:*/ + restarting = 0; +doretry: fclose(remote); if (base64ofmd5) free(base64ofmd5); + if (new_location) + free(new_location); restart_from = 0; n = 0; goto retry; } break; + case ht_location: + if (redirection) { + char *s = value; + while (*s && !isspace(*s)) + s++; + new_location = safe_strndup(value, s - value); + } + break; + + case ht_transfer_encoding: + warnx("%s: %s specified a Transfer-Encoding: %s", + fs->fs_outputfile, https->http_hostname, + value); + warnx("%s: output file may be uninterpretable", + fs->fs_outputfile); + break; + + case ht_retry_after: + if (!retrying) + break; + + errno = 0; + ul = strtoul(value, &ep, 10); + if (errno != 0 || (*ep && !isspace(*ep))) { + time_t when; + when = parse_http_date(value); + if (when == -1) + break; + when_to_retry = when; + } else { + when_to_retry = time(0) + ul; + } + break; + default: break; } } + if (retrying) { + int howlong; + + if (when_to_retry == -1) { + /* This assignment is OK because all we do is print. */ + line = (char *)"HTTP/1.1 503 Service Unavailable"; + goto spewerror; + } + howlong = when_to_retry - time(0); + if (howlong < 30) + howlong = 30; + + warnx("%s: service unavailable; retrying in %d seconds", + https->http_hostname, howlong); + sleep(howlong); + goto doretry; + } + + if (redirection && new_location) { + fclose(remote); + if (base64ofmd5) + free(base64ofmd5); + status = http_redirect(fs, new_location, redirection == 301); + free(new_location); + return status; + } else if (redirection) { + warnx("%s: redirection but no new location", + fs->fs_outputfile); + fclose(remote); + if (base64ofmd5) + free(base64ofmd5); + return EX_PROTOCOL; + } + /* * OK, if we got here, then we have finished parsing the header * and have read the `\r\n' line which denotes the end of same. @@ -595,7 +796,7 @@ retry: break; display(fs, total_length, readresult); - writeresult = fwrite(buf, 1, sizeof buf, local); + writeresult = fwrite(buf, 1, readresult, local); } while (writeresult == readresult); status = errno; /* save errno for warn(), below, if needed */ @@ -691,16 +892,42 @@ http_parse_header(char *line, char **valuep) for (value = colon + 1; *value && isspace(*value); value++) ; /* do nothing */ - /* XXX - strip comments? */ + /* Trim trailing whitespace (including \r). */ *valuep = value; + value += strlen(value) - 1; + while (value > *valuep && isspace(*value)) + value--; + *++value = '\0'; #define cmp(name, num) do { if (!strcasecmp(line, name)) return num; } while(0) + cmp("Accept-Ranges", ht_accept_ranges); + cmp("Age", ht_age); + cmp("Allow", ht_allow); + cmp("Cache-Control", ht_cache_control); + cmp("Connection", ht_connection); + cmp("Content-Base", ht_content_base); + cmp("Content-Encoding", ht_content_encoding); + cmp("Content-Language", ht_content_language); cmp("Content-Length", ht_content_length); - cmp("Last-Modified", ht_last_modified); + cmp("Content-Location", ht_content_location); cmp("Content-MD5", ht_content_md5); cmp("Content-Range", ht_content_range); cmp("Content-Type", ht_content_type); + cmp("Date", ht_date); + cmp("ETag", ht_etag); + cmp("Expires", ht_expires); + cmp("Last-Modified", ht_last_modified); + cmp("Location", ht_location); + cmp("Pragma", ht_pragma); + cmp("Proxy-Authenticate", ht_proxy_authenticate); + cmp("Public", ht_public); + cmp("Retry-After", ht_retry_after); + cmp("Server", ht_server); cmp("Transfer-Encoding", ht_transfer_encoding); + cmp("Upgrade", ht_upgrade); + cmp("Vary", ht_vary); + cmp("Via", ht_via); + cmp("WWW-Authenticate", ht_www_authenticate); cmp("Warning", ht_warning); #undef cmp return ht_unknown; diff --git a/usr.bin/fetch/main.c b/usr.bin/fetch/main.c index 50845c7..09d0ae3 100644 --- a/usr.bin/fetch/main.c +++ b/usr.bin/fetch/main.c @@ -52,7 +52,7 @@ static void usage(const char *argv0) { fprintf(stderr, - "%s: usage:\n\t%s [-DHINPMTVLqlmnprv] [-o outputfile] " + "%s: usage:\n\t%s [-DHILMNPRTValmnpqrv] [-o outputfile] " "[-f file -h host [-c dir] | URL]\n", argv0, argv0); exit(EX_USAGE); } @@ -73,14 +73,14 @@ main(int argc, char *const *argv) fs.fs_verbose = 1; change_to_dir = file_to_get = hostname = 0; - while ((c = getopt(argc, argv, "D:HINPMT:V:Lqc:f:h:o:plmnrv")) != -1) { + while ((c = getopt(argc, argv, "ac:D:f:h:HilLmMnNo:pPqrT:vV:")) != -1) { switch (c) { case 'D': case 'H': case 'I': case 'N': case 'L': case 'V': break; /* ncftp compatibility */ - case 'q': - fs.fs_verbose = 0; - + case 'a': + fs.fs_auto_retry = 1; + break; case 'c': change_to_dir = optarg; break; @@ -97,6 +97,14 @@ main(int argc, char *const *argv) fs.fs_linkfile = 1; break; + case 'm': case 'M': + fs.fs_mirror = 1; + break; + + case 'n': + fs.fs_newtime = 1; + break; + case 'o': fs.fs_outputfile = optarg; break; @@ -105,14 +113,10 @@ main(int argc, char *const *argv) fs.fs_passive_mode = 1; break; - case 'm': case 'M': - fs.fs_mirror = 1; - break; - - case 'n': - fs.fs_newtime = 1; + case 'q': + fs.fs_verbose = 0; break; - + case 'r': fs.fs_restart = 1; break; @@ -121,13 +125,6 @@ main(int argc, char *const *argv) fs.fs_precious = 1; break; - case 'v': - if (fs.fs_verbose < 2) - fs.fs_verbose = 2; - else - fs.fs_verbose++; - break; - case 'T': /* strtol sets errno to ERANGE in the case of overflow */ errno = 0; @@ -138,6 +135,13 @@ main(int argc, char *const *argv) fs.fs_timeout = l; break; + case 'v': + if (fs.fs_verbose < 2) + fs.fs_verbose = 2; + else + fs.fs_verbose++; + break; + default: case '?': usage(argv[0]); |