summaryrefslogtreecommitdiffstats
path: root/tools/tools/kttcp
diff options
context:
space:
mode:
authorsam <sam@FreeBSD.org>2003-11-13 00:30:27 +0000
committersam <sam@FreeBSD.org>2003-11-13 00:30:27 +0000
commita3489f51701b77256f3b144c6c47a4b2568e5836 (patch)
tree504f7f98134fe595a935cccf9d9962252e6faff5 /tools/tools/kttcp
parent6cc414b2dd327b08b0dd7542d8687f4ba126ebab (diff)
downloadFreeBSD-src-a3489f51701b77256f3b144c6c47a4b2568e5836.zip
FreeBSD-src-a3489f51701b77256f3b144c6c47a4b2568e5836.tar.gz
add in-kernel ttcp performance tool
Diffstat (limited to 'tools/tools/kttcp')
-rw-r--r--tools/tools/kttcp/Makefile23
-rw-r--r--tools/tools/kttcp/README24
-rw-r--r--tools/tools/kttcp/kttcp.c309
-rw-r--r--tools/tools/kttcp/sys/Makefile8
-rw-r--r--tools/tools/kttcp/sys/kttcp.c772
-rw-r--r--tools/tools/kttcp/sys/kttcpio.h59
6 files changed, 1195 insertions, 0 deletions
diff --git a/tools/tools/kttcp/Makefile b/tools/tools/kttcp/Makefile
new file mode 100644
index 0000000..781e077
--- /dev/null
+++ b/tools/tools/kttcp/Makefile
@@ -0,0 +1,23 @@
+# $FreeBSD$
+
+SHELL= /bin/sh
+
+PROG= kttcp
+SRCS= kttcp.c
+BINDIR= /usr/local/bin
+SYSDIR= /usr/src/sys
+
+CFLAGS += -I${SYSDIR} -Isys
+
+all: kttcp module
+
+module:
+ cd sys; SYSDIR=${SYSDIR} make
+
+install:
+ install kttcp ${DESTDIR}/${BINDIR}
+ cd sys; SYSDIR=${SYSDIR} make install
+
+clean:
+ rm -f ${PROG}
+ cd sys; SYSDIR=${SYSDIR} make clean
diff --git a/tools/tools/kttcp/README b/tools/tools/kttcp/README
new file mode 100644
index 0000000..f78c8d6
--- /dev/null
+++ b/tools/tools/kttcp/README
@@ -0,0 +1,24 @@
+$FreeBSD$
+
+This is a port of Jason Thorpe's kttcp tool for testing network
+performance for in-kernel applications (like NFS). The tool consists
+of a loadable module and a small user-mode application. Beware
+that you should match the kernel module to the kernel it is to be
+used with. By default SYSDIR is set to /usr/src/sys in Makefile.
+You may want to change that.
+
+To use the tool do something like on each of two machines:
+
+1. make
+2. su; make install (installs module and kttcp in /usr/local/bin)
+3. kldload kttcp
+
+Then:
+
+4. kttcp -r on one machine
+5. kttcp -t foo on the other machine, where foo is the
+ machine where #4 was done.
+
+kttcp w/o arguments gives usage. Otherwise the source is your
+friend. Beware that the kernel code must mimic soreceive and sosend
+for results to be meaningful.
diff --git a/tools/tools/kttcp/kttcp.c b/tools/tools/kttcp/kttcp.c
new file mode 100644
index 0000000..45fa818
--- /dev/null
+++ b/tools/tools/kttcp/kttcp.c
@@ -0,0 +1,309 @@
+/* $FreeBSD$ */
+/* $NetBSD: kttcp.c,v 1.5 2002/07/11 23:32:35 simonb Exp $ */
+
+/*
+ * Copyright (c) 2002 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Frank van der Linden and Jason R. Thorpe
+ * for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed for the NetBSD Project by
+ * Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ * or promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <err.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
+#include "dev/kttcp/kttcpio.h"
+
+#define KTTCP_PORT "22222"
+#define KTTCP_XMITSIZE (10*1024*1024)
+#define KTTCP_SOCKBUF_DEFAULT 65536
+
+#define KTTCP_DEVICE "/dev/kttcp"
+
+static void
+usage(void)
+{
+ fprintf(stderr,
+ "usage: kttcp -r [-b sockbufsize] [-p port] [-q] [-v]\n"
+ " [-4] [-6]\n"
+ " kttcp -t [-b sockbufsize] [-n bytes] [-q] [-v] [-p port]\n"
+ " [-4] [-6] host\n"
+ );
+ exit(1);
+}
+
+static unsigned long long
+get_bytes(const char *str)
+{
+ unsigned long long bytes;
+ char *cp;
+
+ bytes = strtoull(str, &cp, 10);
+ if (bytes == ULLONG_MAX && errno == ERANGE)
+ err(1, "%s", str);
+
+ if (cp[0] != '\0') {
+ if (cp[1] != '\0')
+ errx(1, "invalid byte count: %s", str);
+ if (cp[0] == 'k' || cp[0] == 'K')
+ bytes *= 1024;
+ else if (cp[0] == 'm' || cp[0] == 'M')
+ bytes *= 1024 * 1024;
+ else if (cp[0] == 'g' || cp[0] == 'G')
+ bytes *= 1024 * 1024 * 1024;
+ else
+ errx(1, "invalid byte count modifier: %s", str);
+ }
+
+ return (bytes);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int c, error, s, verbose, s2, kfd;
+ int xmitset, family;
+ int bufsize;
+ int ai_flag;
+ char *host;
+ char *portstr;
+ struct kttcp_io_args kio;
+ struct addrinfo hints, *addr, *res;
+ struct sockaddr_storage ss;
+ struct rusage rustart, ruend;
+ struct timeval tvtmp;
+ unsigned long long ull, usecs, bytespersec, bitspersec, xmitsize;
+ char connecthost[NI_MAXHOST];
+ socklen_t slen;
+ const int one = 1;
+ u_long cmd;
+
+ cmd = 0;
+ portstr = KTTCP_PORT;
+ verbose = 1;
+ xmitset = 0;
+ bufsize = KTTCP_SOCKBUF_DEFAULT;
+ xmitsize = KTTCP_XMITSIZE;
+ family = PF_UNSPEC;
+ while ((c = getopt(argc, argv, "46b:n:p:qrtvw:")) != -1) {
+ switch (c) {
+ case '4':
+ if (family != PF_UNSPEC)
+ usage();
+ family = PF_INET;
+ break;
+ case '6':
+ if (family != PF_UNSPEC)
+ usage();
+ family = PF_INET6;
+ break;
+ case 'b':
+ ull = get_bytes(optarg);
+ if (ull > INT_MAX)
+ errx(1,
+ "invalid socket buffer size: %s\n", optarg);
+ bufsize = ull;
+ break;
+ case 'n':
+ xmitsize = get_bytes(optarg);
+ if (xmitsize > KTTCP_MAX_XMIT)
+ xmitsize = KTTCP_MAX_XMIT;
+ xmitset = 1;
+ break;
+ case 'p':
+ portstr = optarg;
+ break;
+ case 'q':
+ verbose = 0;
+ break;
+ case 'r':
+ if (cmd != 0)
+ usage();
+ cmd = KTTCP_IO_RECV;
+ break;
+ case 't':
+ if (cmd != 0)
+ usage();
+ cmd = KTTCP_IO_SEND;
+ break;
+ case 'v':
+ verbose = 2;
+ break;
+ case '?':
+ default:
+ usage();
+ }
+ }
+ if (cmd == 0)
+ usage();
+
+ argc -= optind;
+ argv += optind;
+
+ if (cmd == KTTCP_IO_SEND) {
+ if (xmitsize <= 0 || argc < 1)
+ usage();
+ host = argv[0];
+ ai_flag = 0;
+ } else {
+ if (xmitset == 0)
+ xmitsize = KTTCP_MAX_XMIT;
+ host = NULL;
+ ai_flag = AI_PASSIVE;
+ }
+
+ if ((kfd = open(KTTCP_DEVICE, O_RDWR, 666)) == -1)
+ err(2, "open %s", KTTCP_DEVICE);
+
+ memset(&hints, 0, sizeof hints);
+ hints.ai_flags = ai_flag;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_family = family;
+ error = getaddrinfo(host, portstr, &hints, &addr);
+
+ if (error != 0)
+ errx(2, "%s", gai_strerror(error));
+
+ s = -1;
+ for (res = addr; res != NULL; res = res->ai_next) {
+ s = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
+ if (s >= 0)
+ break;
+ }
+ if (res == NULL)
+ err(2, "can't create socket");
+
+ printf("kttcp: socket buffer size: %d\n", bufsize);
+
+ if (cmd == KTTCP_IO_SEND) {
+ if (connect(s, res->ai_addr, res->ai_addrlen) < 0)
+ err(2, "connect");
+ if (verbose) {
+ getnameinfo(res->ai_addr, res->ai_addrlen,
+ connecthost, sizeof connecthost, NULL, 0,
+ NI_NUMERICHOST);
+ printf("kttcp: connected to %s\n", connecthost);
+ }
+ if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof (int))
+ < 0)
+ err(2, "setsockopt sndbuf");
+ kio.kio_socket = s;
+ } else {
+ if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one,
+ sizeof (int)) < 0)
+ err(2, "setsockopt reuseaddr");
+ if (bind(s, res->ai_addr, res->ai_addrlen) < 0)
+ err(2, "bind");
+ if (listen(s, 1) < 0)
+ err(2, "listen");
+ if (verbose)
+ printf("kttcp: listening on port %s\n", portstr);
+ slen = sizeof ss;
+ s2 = accept(s, (struct sockaddr *)&ss, &slen);
+ if (s2 < 0)
+ err(2, "accept");
+ if (verbose) {
+ getnameinfo((struct sockaddr *)&ss, ss.ss_len,
+ connecthost, sizeof connecthost, NULL, 0,
+ NI_NUMERICHOST);
+ printf("kttcp: connect from %s\n", connecthost);
+ }
+ if (setsockopt(s2, SOL_SOCKET, SO_RCVBUF, &bufsize,
+ sizeof (int)) < 0)
+ err(2, "setsockopt rcvbuf");
+ kio.kio_socket = s2;
+ }
+
+ kio.kio_totalsize = xmitsize;
+
+ getrusage(RUSAGE_SELF, &rustart);
+ if (ioctl(kfd, cmd, &kio) == -1)
+ err(2, "kttcp i/o command");
+ getrusage(RUSAGE_SELF, &ruend);
+
+ usecs = (unsigned long long)kio.kio_elapsed.tv_sec * 1000000;
+ usecs += kio.kio_elapsed.tv_usec;
+
+ bytespersec = kio.kio_bytesdone * 1000000LL / usecs;
+ bitspersec = bytespersec * NBBY;
+ printf("kttcp: %llu bytes in %ld.%03ld real seconds ==> %llu bytes/sec\n",
+ kio.kio_bytesdone, kio.kio_elapsed.tv_sec,
+ kio.kio_elapsed.tv_usec / 1000, bytespersec);
+ if (verbose > 1) {
+ timersub(&ruend.ru_stime, &rustart.ru_stime, &tvtmp);
+ bytespersec = kio.kio_bytesdone * 1000000LL /
+ (tvtmp.tv_sec * 1000000ULL + tvtmp.tv_usec);
+ printf("kttcp: %llu bytes in %ld.%03ld CPU seconds ==> %llu bytes/CPU sec\n",
+ kio.kio_bytesdone, tvtmp.tv_sec, tvtmp.tv_usec / 1000, bytespersec);
+ }
+ printf(" %g (%g) Megabits/sec\n",
+ ((double) bitspersec / 1024.0) / 1024.0,
+ ((double) bitspersec / 1000.0) / 1000.0);
+
+ timersub(&ruend.ru_utime, &rustart.ru_utime, &tvtmp);
+ /* XXX
+ * sometimes, this ends up as -1 * hz!?
+ */
+ if (tvtmp.tv_sec < 0)
+ tvtmp.tv_sec = tvtmp.tv_usec = 0;
+ printf(" %ld.%02lduser", tvtmp.tv_sec, tvtmp.tv_usec / 10000);
+ ull = tvtmp.tv_sec * 1000000ULL + tvtmp.tv_usec;
+
+ timersub(&ruend.ru_stime, &rustart.ru_stime, &tvtmp);
+ printf(" %ld.%02ldsys", tvtmp.tv_sec, tvtmp.tv_usec / 10000);
+ ull += tvtmp.tv_sec * 1000000ULL + tvtmp.tv_usec;
+
+ printf(" %lld.%lldreal", usecs / 1000000, (usecs % 1000000) / 10000);
+ printf(" %lld%%", ull * 100 / usecs);
+ printf("\n");
+
+
+ close(kio.kio_socket);
+ if (cmd == KTTCP_IO_RECV)
+ close(s);
+ close(kfd);
+ freeaddrinfo(addr);
+
+ return 0;
+}
diff --git a/tools/tools/kttcp/sys/Makefile b/tools/tools/kttcp/sys/Makefile
new file mode 100644
index 0000000..d781530
--- /dev/null
+++ b/tools/tools/kttcp/sys/Makefile
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+KMOD = kttcp
+SRCS = kttcp.c
+SRCS += device_if.h
+MFILES = kern/device_if.m
+
+.include <bsd.kmod.mk>
diff --git a/tools/tools/kttcp/sys/kttcp.c b/tools/tools/kttcp/sys/kttcp.c
new file mode 100644
index 0000000..adaed5c
--- /dev/null
+++ b/tools/tools/kttcp/sys/kttcp.c
@@ -0,0 +1,772 @@
+/* $FreeBSD$ */
+/* $NetBSD: kttcp.c,v 1.3 2002/07/03 19:36:52 thorpej Exp $ */
+
+/*
+ * Copyright (c) 2002 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Frank van der Linden and Jason R. Thorpe for
+ * Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed for the NetBSD Project by
+ * Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ * or promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * kttcp.c --
+ *
+ * This module provides kernel support for testing network
+ * throughput from the perspective of the kernel. It is
+ * similar in spirit to the classic ttcp network benchmark
+ * program, the main difference being that with kttcp, the
+ * kernel is the source and sink of the data.
+ *
+ * Testing like this is useful for a few reasons:
+ *
+ * 1. This allows us to know what kind of performance we can
+ * expect from network applications that run in the kernel
+ * space, such as the NFS server or the NFS client. These
+ * applications don't have to move the data to/from userspace,
+ * and so benchmark programs which run in userspace don't
+ * give us an accurate model.
+ *
+ * 2. Since data received is just thrown away, the receiver
+ * is very fast. This can provide better exercise for the
+ * sender at the other end.
+ *
+ * 3. Since the NetBSD kernel currently uses a run-to-completion
+ * scheduling model, kttcp provides a benchmark model where
+ * preemption of the benchmark program is not an issue.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/sysctl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/fcntl.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/socket.h>
+#include <sys/mbuf.h>
+#include <sys/resourcevar.h>
+#include <sys/proc.h>
+
+#include <dev/kttcp/kttcpio.h>
+
+#ifndef timersub
+#define timersub(tvp, uvp, vvp) \
+ do { \
+ (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
+ (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \
+ if ((vvp)->tv_usec < 0) { \
+ (vvp)->tv_sec--; \
+ (vvp)->tv_usec += 1000000; \
+ } \
+ } while (0)
+#endif
+
+static int kttcp_send(struct thread *p, struct kttcp_io_args *);
+static int kttcp_recv(struct thread *p, struct kttcp_io_args *);
+static int kttcp_sosend(struct socket *, unsigned long long,
+ unsigned long long *, struct thread *, int);
+static int kttcp_soreceive(struct socket *, unsigned long long,
+ unsigned long long *, struct thread *, int *);
+
+static d_open_t kttcpopen;
+static d_ioctl_t kttcpioctl;
+
+static struct cdevsw kttcp_cdevsw = {
+ .d_open = kttcpopen,
+ .d_ioctl = kttcpioctl,
+ .d_name = "kttcp",
+ .d_maj = MAJOR_AUTO,
+};
+
+static int
+kttcpopen(dev_t dev, int flag, int mode, struct thread *td)
+{
+ /* Always succeeds. */
+ return (0);
+}
+
+static int
+kttcpioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
+{
+ int error;
+
+ if ((flag & FWRITE) == 0)
+ return EPERM;
+
+ switch (cmd) {
+ case KTTCP_IO_SEND:
+ error = kttcp_send(td, (struct kttcp_io_args *) data);
+ break;
+
+ case KTTCP_IO_RECV:
+ error = kttcp_recv(td, (struct kttcp_io_args *) data);
+ break;
+
+ default:
+ return EINVAL;
+ }
+
+ return error;
+}
+
+static int
+kttcp_send(struct thread *td, struct kttcp_io_args *kio)
+{
+ struct file *fp;
+ int error;
+ struct timeval t0, t1;
+ unsigned long long len = 0;
+ unsigned long long done;
+
+ if (kio->kio_totalsize >= KTTCP_MAX_XMIT)
+ return EINVAL;
+
+ error = fget(td, kio->kio_socket, &fp);
+ if (error != 0)
+ return error;
+ mtx_lock(&Giant);
+ if ((fp->f_flag & FWRITE) == 0) {
+ fdrop(fp, td);
+ mtx_unlock(&Giant);
+ return EBADF;
+ }
+ if (fp->f_type == DTYPE_SOCKET) {
+ len = kio->kio_totalsize;
+ microtime(&t0);
+ do {
+ error = kttcp_sosend((struct socket *)fp->f_data, len,
+ &done, td, 0);
+ len -= done;
+ } while (error == 0 && len > 0);
+ microtime(&t1);
+ } else
+ error = EFTYPE;
+ fdrop(fp, td);
+ mtx_unlock(&Giant);
+ if (error != 0)
+ return error;
+ timersub(&t1, &t0, &kio->kio_elapsed);
+
+ kio->kio_bytesdone = kio->kio_totalsize - len;
+
+ return 0;
+}
+
+static int
+kttcp_recv(struct thread *td, struct kttcp_io_args *kio)
+{
+ struct file *fp;
+ int error;
+ struct timeval t0, t1;
+ unsigned long long len = 0;
+ unsigned long long done;
+
+ if (kio->kio_totalsize > KTTCP_MAX_XMIT)
+ return EINVAL;
+
+ error = fget(td, kio->kio_socket, &fp);
+ if (error != 0)
+ return error;
+ mtx_lock(&Giant);
+ if ((fp->f_flag & FWRITE) == 0) {
+ fdrop(fp, td);
+ mtx_unlock(&Giant);
+ return EBADF;
+ }
+ if (fp->f_type == DTYPE_SOCKET) {
+ len = kio->kio_totalsize;
+ microtime(&t0);
+ do {
+ error = kttcp_soreceive((struct socket *)fp->f_data,
+ len, &done, td, NULL);
+ len -= done;
+ } while (error == 0 && len > 0 && done > 0);
+ microtime(&t1);
+ if (error == EPIPE)
+ error = 0;
+ } else
+ error = EFTYPE;
+ fdrop(fp, td);
+ mtx_unlock(&Giant);
+ if (error != 0)
+ return error;
+ timersub(&t1, &t0, &kio->kio_elapsed);
+
+ kio->kio_bytesdone = kio->kio_totalsize - len;
+
+ return 0;
+}
+
+#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
+
+/*
+ * Slightly changed version of sosend()
+ */
+int
+kttcp_sosend(struct socket *so, unsigned long long slen,
+ unsigned long long *done, struct thread *td, int flags)
+{
+ struct mbuf **mp, *m, *top;
+ long space, len, mlen;
+ int error, s, dontroute, atomic;
+ long long resid;
+
+ atomic = sosendallatonce(so);
+ resid = slen;
+ top = NULL;
+ /*
+ * In theory resid should be unsigned.
+ * However, space must be signed, as it might be less than 0
+ * if we over-committed, and we must use a signed comparison
+ * of space and resid. On the other hand, a negative resid
+ * causes us to loop sending 0-length segments to the protocol.
+ *
+ * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
+ * type sockets since that's an error.
+ */
+ if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
+ error = EINVAL;
+ goto out;
+ }
+
+ dontroute =
+ (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
+ (so->so_proto->pr_flags & PR_ATOMIC);
+ if (td)
+ td->td_proc->p_stats->p_ru.ru_msgsnd++;
+#define snderr(errno) { error = errno; splx(s); goto release; }
+
+restart:
+ error = sblock(&so->so_snd, SBLOCKWAIT(flags));
+ if (error)
+ goto out;
+ do {
+ s = splnet();
+ if (so->so_state & SS_CANTSENDMORE)
+ snderr(EPIPE);
+ if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ splx(s);
+ goto release;
+ }
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ /*
+ * `sendto' and `sendmsg' is allowed on a connection-
+ * based socket if it supports implied connect.
+ * Return ENOTCONN if not connected and no address is
+ * supplied.
+ */
+ if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
+ (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
+ if ((so->so_state & SS_ISCONFIRMING) == 0 &&
+ !(resid == 0))
+ snderr(ENOTCONN);
+ } else
+ snderr(EDESTADDRREQ);
+ }
+ space = sbspace(&so->so_snd);
+ if (flags & MSG_OOB)
+ space += 1024;
+ if (atomic && resid > so->so_snd.sb_hiwat)
+ snderr(EMSGSIZE);
+ if (space < resid && (atomic || space < so->so_snd.sb_lowat)) {
+ if (so->so_state & SS_NBIO)
+ snderr(EWOULDBLOCK);
+ sbunlock(&so->so_snd);
+ error = sbwait(&so->so_snd);
+ splx(s);
+ if (error)
+ goto out;
+ goto restart;
+ }
+ splx(s);
+ mp = &top;
+ do {
+ do {
+ if (top == 0) {
+ MGETHDR(m, M_WAIT, MT_DATA);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto release;
+ }
+ mlen = MHLEN;
+ m->m_pkthdr.len = 0;
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ } else {
+ MGET(m, M_WAIT, MT_DATA);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto release;
+ }
+ mlen = MLEN;
+ }
+ if (resid >= MINCLSIZE) {
+ MCLGET(m, M_WAIT);
+ if ((m->m_flags & M_EXT) == 0)
+ goto nopages;
+ mlen = MCLBYTES;
+ len = min(min(mlen, resid), space);
+ } else {
+ nopages:
+ len = min(min(mlen, resid), space);
+ /*
+ * For datagram protocols, leave room
+ * for protocol headers in first mbuf.
+ */
+ if (atomic && top == 0 && len < mlen)
+ MH_ALIGN(m, len);
+ }
+ space -= len;
+ resid -= len;
+ m->m_len = len;
+ *mp = m;
+ top->m_pkthdr.len += len;
+ if (error)
+ goto release;
+ mp = &m->m_next;
+ if (resid <= 0) {
+ if (flags & MSG_EOR)
+ top->m_flags |= M_EOR;
+ break;
+ }
+ } while (space > 0 && atomic);
+ if (dontroute)
+ so->so_options |= SO_DONTROUTE;
+ s = splnet(); /* XXX */
+ /*
+ * XXX all the SS_CANTSENDMORE checks previously
+ * done could be out of date. We could have recieved
+ * a reset packet in an interrupt or maybe we slept
+ * while doing page faults in uiomove() etc. We could
+ * probably recheck again inside the splnet() protection
+ * here, but there are probably other places that this
+ * also happens. We must rethink this.
+ */
+ error = (*so->so_proto->pr_usrreqs->pru_send)(so,
+ (flags & MSG_OOB) ? PRUS_OOB :
+ /*
+ * If the user set MSG_EOF, the protocol
+ * understands this flag and nothing left to
+ * send then use PRU_SEND_EOF instead of PRU_SEND.
+ */
+ ((flags & MSG_EOF) &&
+ (so->so_proto->pr_flags & PR_IMPLOPCL) &&
+ (resid <= 0)) ?
+ PRUS_EOF :
+ /* If there is more to send set PRUS_MORETOCOME */
+ (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
+ top, NULL, NULL, td);
+ splx(s);
+ if (dontroute)
+ so->so_options &= ~SO_DONTROUTE;
+ top = 0;
+ mp = &top;
+ if (error)
+ goto release;
+ } while (resid && space > 0);
+ } while (resid);
+
+release:
+ sbunlock(&so->so_snd);
+out:
+ if (top)
+ m_freem(top);
+ *done = slen - resid;
+ return (error);
+}
+
+int
+kttcp_soreceive(struct socket *so, unsigned long long slen,
+ unsigned long long *done, struct thread *td, int *flagsp)
+{
+ struct mbuf *m, **mp;
+ int flags, len, error, s, offset;
+ struct protosw *pr;
+ struct mbuf *nextrecord;
+ int moff, type;
+ long long orig_resid, resid;
+
+ pr = so->so_proto;
+ mp = NULL;
+ type = 0;
+ resid = orig_resid = slen;
+ if (flagsp)
+ flags = *flagsp &~ MSG_EOR;
+ else
+ flags = 0;
+ if (flags & MSG_OOB) {
+ m = m_get(M_WAIT, MT_DATA);
+ if (m == NULL)
+ return (ENOBUFS);
+ error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
+ if (error)
+ goto bad;
+ do {
+ resid -= min(resid, m->m_len);
+ m = m_free(m);
+ } while (resid && error == 0 && m);
+bad:
+ if (m)
+ m_freem(m);
+ return (error);
+ }
+ if (mp)
+ *mp = (struct mbuf *)0;
+ if (so->so_state & SS_ISCONFIRMING && resid)
+ (*pr->pr_usrreqs->pru_rcvd)(so, 0);
+
+restart:
+ error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
+ if (error)
+ return (error);
+ s = splnet();
+
+ m = so->so_rcv.sb_mb;
+ /*
+ * If we have less data than requested, block awaiting more
+ * (subject to any timeout) if:
+ * 1. the current count is less than the low water mark, or
+ * 2. MSG_WAITALL is set, and it is possible to do the entire
+ * receive operation at once if we block (resid <= hiwat).
+ * 3. MSG_DONTWAIT is not set
+ * If MSG_WAITALL is set but resid is larger than the receive buffer,
+ * we have to do the receive in sections, and thus risk returning
+ * a short count if a timeout or signal occurs after we start.
+ */
+ if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
+ so->so_rcv.sb_cc < resid) &&
+ (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
+ ((flags & MSG_WAITALL) && resid <= so->so_rcv.sb_hiwat)) &&
+ m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
+ KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
+ if (so->so_error) {
+ if (m)
+ goto dontblock;
+ error = so->so_error;
+ if ((flags & MSG_PEEK) == 0)
+ so->so_error = 0;
+ goto release;
+ }
+ if (so->so_state & SS_CANTRCVMORE) {
+ if (m)
+ goto dontblock;
+ else
+ goto release;
+ }
+ for (; m; m = m->m_next)
+ if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
+ m = so->so_rcv.sb_mb;
+ goto dontblock;
+ }
+ if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
+ (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+ error = ENOTCONN;
+ goto release;
+ }
+ if (resid == 0)
+ goto release;
+ if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
+ error = EWOULDBLOCK;
+ goto release;
+ }
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
+ sbunlock(&so->so_rcv);
+ error = sbwait(&so->so_rcv);
+ splx(s);
+ if (error)
+ return (error);
+ goto restart;
+ }
+dontblock:
+ /*
+ * On entry here, m points to the first record of the socket buffer.
+ * While we process the initial mbufs containing address and control
+ * info, we save a copy of m->m_nextpkt into nextrecord.
+ */
+ if (td)
+ td->td_proc->p_stats->p_ru.ru_msgrcv++;
+ KASSERT(m == so->so_rcv.sb_mb, ("receive 1b"));
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
+ nextrecord = m->m_nextpkt;
+ if (pr->pr_flags & PR_ADDR) {
+ KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
+ orig_resid = 0;
+ if (flags & MSG_PEEK) {
+ m = m->m_next;
+ } else {
+ sbfree(&so->so_rcv, m);
+ so->so_rcv.sb_mb = m_free(m);
+ m = so->so_rcv.sb_mb;
+ }
+ }
+ while (m && m->m_type == MT_CONTROL && error == 0) {
+ if (flags & MSG_PEEK) {
+ m = m->m_next;
+ } else {
+ sbfree(&so->so_rcv, m);
+ so->so_rcv.sb_mb = m_free(m);
+ m = so->so_rcv.sb_mb;
+ }
+ }
+
+ /*
+ * If m is non-NULL, we have some data to read. From now on,
+ * make sure to keep sb_lastrecord consistent when working on
+ * the last packet on the chain (nextrecord == NULL) and we
+ * change m->m_nextpkt.
+ */
+ if (m) {
+ if ((flags & MSG_PEEK) == 0) {
+ m->m_nextpkt = nextrecord;
+ /*
+ * If nextrecord == NULL (this is a single chain),
+ * then sb_lastrecord may not be valid here if m
+ * was changed earlier.
+ */
+ if (nextrecord == NULL) {
+ KASSERT(so->so_rcv.sb_mb == m, ("receive 1c"));
+ so->so_rcv.sb_lastrecord = m;
+ }
+ }
+ type = m->m_type;
+ if (type == MT_OOBDATA)
+ flags |= MSG_OOB;
+ } else {
+ if ((flags & MSG_PEEK) == 0) {
+ KASSERT(so->so_rcv.sb_mb == m, ("receive 1d"));
+ so->so_rcv.sb_mb = nextrecord;
+ SB_EMPTY_FIXUP(&so->so_rcv);
+ }
+ }
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
+
+ moff = 0;
+ offset = 0;
+ while (m && resid > 0 && error == 0) {
+ if (m->m_type == MT_OOBDATA) {
+ if (type != MT_OOBDATA)
+ break;
+ } else if (type == MT_OOBDATA)
+ break;
+ else
+ KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
+ ("receive 3"));
+ so->so_state &= ~SS_RCVATMARK;
+ len = resid;
+ if (so->so_oobmark && len > so->so_oobmark - offset)
+ len = so->so_oobmark - offset;
+ if (len > m->m_len - moff)
+ len = m->m_len - moff;
+ /*
+ * If mp is set, just pass back the mbufs.
+ * Otherwise copy them out via the uio, then free.
+ * Sockbuf must be consistent here (points to current mbuf,
+ * it points to next record) when we drop priority;
+ * we must note any additions to the sockbuf when we
+ * block interrupts again.
+ */
+ resid -= len;
+ if (len == m->m_len - moff) {
+ if (m->m_flags & M_EOR)
+ flags |= MSG_EOR;
+ if (flags & MSG_PEEK) {
+ m = m->m_next;
+ moff = 0;
+ } else {
+ nextrecord = m->m_nextpkt;
+ sbfree(&so->so_rcv, m);
+ if (mp) {
+ *mp = m;
+ mp = &m->m_next;
+ so->so_rcv.sb_mb = m = m->m_next;
+ *mp = (struct mbuf *)0;
+ } else {
+ so->so_rcv.sb_mb = m = m_free(m);
+ }
+ /*
+ * If m != NULL, we also know that
+ * so->so_rcv.sb_mb != NULL.
+ */
+ KASSERT(so->so_rcv.sb_mb == m, ("receive 3a"));
+ if (m) {
+ m->m_nextpkt = nextrecord;
+ if (nextrecord == NULL)
+ so->so_rcv.sb_lastrecord = m;
+ } else {
+ so->so_rcv.sb_mb = nextrecord;
+ SB_EMPTY_FIXUP(&so->so_rcv);
+ }
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
+ }
+ } else {
+ if (flags & MSG_PEEK)
+ moff += len;
+ else {
+ if (mp)
+ *mp = m_copym(m, 0, len, M_WAIT);
+ m->m_data += len;
+ m->m_len -= len;
+ so->so_rcv.sb_cc -= len;
+ }
+ }
+ if (so->so_oobmark) {
+ if ((flags & MSG_PEEK) == 0) {
+ so->so_oobmark -= len;
+ if (so->so_oobmark == 0) {
+ so->so_state |= SS_RCVATMARK;
+ break;
+ }
+ } else {
+ offset += len;
+ if (offset == so->so_oobmark)
+ break;
+ }
+ }
+ if (flags & MSG_EOR)
+ break;
+ /*
+ * If the MSG_WAITALL flag is set (for non-atomic socket),
+ * we must not quit until "uio->uio_resid == 0" or an error
+ * termination. If a signal/timeout occurs, return
+ * with a short count but without error.
+ * Keep sockbuf locked against other readers.
+ */
+ while (flags & MSG_WAITALL && m == 0 && resid > 0 &&
+ !sosendallatonce(so) && !nextrecord) {
+ if (so->so_error || so->so_state & SS_CANTRCVMORE)
+ break;
+ /*
+ * The window might have closed to zero, make
+ * sure we send an ack now that we've drained
+ * the buffer or we might end up blocking until
+ * the idle takes over (5 seconds).
+ */
+ if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+ (*pr->pr_usrreqs->pru_rcvd)(so, flags);
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
+ error = sbwait(&so->so_rcv);
+ if (error) {
+ sbunlock(&so->so_rcv);
+ splx(s);
+ return (0);
+ }
+ m = so->so_rcv.sb_mb;
+ if (m)
+ nextrecord = m->m_nextpkt;
+ }
+ }
+
+ if (m && pr->pr_flags & PR_ATOMIC) {
+ flags |= MSG_TRUNC;
+ if ((flags & MSG_PEEK) == 0)
+ (void) sbdroprecord(&so->so_rcv);
+ }
+ if ((flags & MSG_PEEK) == 0) {
+ if (m == 0) {
+ /*
+ * First part is an inline SB_EMPTY_FIXUP(). Second
+ * part makes sure sb_lastrecord is up-to-date if
+ * there is still data in the socket buffer.
+ */
+ so->so_rcv.sb_mb = nextrecord;
+ if (so->so_rcv.sb_mb == NULL) {
+ so->so_rcv.sb_mbtail = NULL;
+ so->so_rcv.sb_lastrecord = NULL;
+ } else if (nextrecord->m_nextpkt == NULL)
+ so->so_rcv.sb_lastrecord = nextrecord;
+ }
+ SBLASTRECORDCHK(&so->so_rcv);
+ SBLASTMBUFCHK(&so->so_rcv);
+ if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+ (*pr->pr_usrreqs->pru_rcvd)(so, flags);
+ }
+ if (orig_resid == resid && orig_resid &&
+ (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
+ sbunlock(&so->so_rcv);
+ splx(s);
+ goto restart;
+ }
+
+ if (flagsp)
+ *flagsp |= flags;
+release:
+ sbunlock(&so->so_rcv);
+ splx(s);
+ *done = slen - resid;
+#if 0
+ printf("soreceive: error %d slen %llu resid %lld\n", error, slen, resid);
+#endif
+ return (error);
+}
+
+static dev_t kttcp_dev;
+
+/*
+ * Initialization code, both for static and dynamic loading.
+ */
+static int
+kttcpdev_modevent(module_t mod, int type, void *unused)
+{
+ switch (type) {
+ case MOD_LOAD:
+ kttcp_dev = make_dev(&kttcp_cdevsw, 0,
+ UID_ROOT, GID_WHEEL, 0666,
+ "kttcp");
+ return 0;
+ case MOD_UNLOAD:
+ /*XXX disallow if active sessions */
+ destroy_dev(kttcp_dev);
+ return 0;
+ }
+ return EINVAL;
+}
+
+static moduledata_t kttcpdev_mod = {
+ "kttcpdev",
+ kttcpdev_modevent,
+ 0
+};
+MODULE_VERSION(kttcpdev, 1);
+DECLARE_MODULE(kttcpdev, kttcpdev_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/tools/tools/kttcp/sys/kttcpio.h b/tools/tools/kttcp/sys/kttcpio.h
new file mode 100644
index 0000000..1375f6e
--- /dev/null
+++ b/tools/tools/kttcp/sys/kttcpio.h
@@ -0,0 +1,59 @@
+/* $FreeBSD$ */
+/* $NetBSD$ */
+
+/*
+ * Copyright (c) 2002 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Frank van der Linden and Jason R. Thorpe for
+ * Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed for the NetBSD Project by
+ * Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ * or promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _DEV_KTTCPIO_H_
+#define _DEV_KTTCPIO_H_
+
+#include <sys/ioccom.h>
+#include <sys/time.h>
+
+struct kttcp_io_args {
+ unsigned long long kio_totalsize;/* i/o total size (IN) */
+ unsigned long long kio_bytesdone;/* i/o actually completed (OUT) */
+ struct timeval kio_elapsed; /* elapsed time (OUT) */
+ int kio_socket; /* socket to use for i/o (IN) */
+ int kio_protovers; /* KTTCP protocol version */
+};
+
+#define KTTCP_IO_SEND _IOWR('K', 0, struct kttcp_io_args)
+#define KTTCP_IO_RECV _IOWR('K', 1, struct kttcp_io_args)
+
+#define KTTCP_MAX_XMIT 0x7fffffffLL /* XXX can't handle > 31 bits */
+
+#endif /* _DEV_KTTCPIO_H_ */
OpenPOWER on IntegriCloud