diff options
author | ken <ken@FreeBSD.org> | 2012-01-26 16:35:09 +0000 |
---|---|---|
committer | ken <ken@FreeBSD.org> | 2012-01-26 16:35:09 +0000 |
commit | 7f685c218aab7dcffad6119b6e3741a0b71a2e97 (patch) | |
tree | d7ac881e15cd62a22e0e8f7d9e34f5708762e482 | |
parent | 709d732d543664ea423f0de76539e97df592cf5c (diff) | |
download | FreeBSD-src-7f685c218aab7dcffad6119b6e3741a0b71a2e97.zip FreeBSD-src-7f685c218aab7dcffad6119b6e3741a0b71a2e97.tar.gz |
Xen netback driver rewrite.
share/man/man4/Makefile,
share/man/man4/xnb.4,
sys/dev/xen/netback/netback.c,
sys/dev/xen/netback/netback_unit_tests.c:
Rewrote the netback driver for xen to attach properly via newbus
and work properly in both HVM and PVM mode (only HVM is tested).
Works with the in-tree FreeBSD netfront driver or the Windows
netfront driver from SuSE. Has not been extensively tested with
a Linux netfront driver. Does not implement LRO, TSO, or
polling. Includes unit tests that may be run through sysctl
after compiling with XNB_DEBUG defined.
sys/dev/xen/blkback/blkback.c,
sys/xen/interface/io/netif.h:
Comment elaboration.
sys/kern/uipc_mbuf.c:
Fix page fault in kernel mode when calling m_print() on a
null mbuf. Since m_print() is only used for debugging, there
are no performance concerns for extra error checking code.
sys/kern/subr_scanf.c:
Add the "hh" and "ll" width specifiers from C99 to scanf().
A few callers were already using "ll" even though scanf()
was handling it as "l".
Submitted by: Alan Somers <alans@spectralogic.com>
Submitted by: John Suykerbuyk <johns@spectralogic.com>
Sponsored by: Spectra Logic
MFC after: 1 week
Reviewed by: ken
-rw-r--r-- | share/man/man4/Makefile | 2 | ||||
-rw-r--r-- | share/man/man4/xnb.4 | 134 | ||||
-rw-r--r-- | sys/dev/xen/blkback/blkback.c | 4 | ||||
-rw-r--r-- | sys/dev/xen/netback/netback.c | 3438 | ||||
-rw-r--r-- | sys/dev/xen/netback/netback_unit_tests.c | 2530 | ||||
-rw-r--r-- | sys/kern/subr_scanf.c | 21 | ||||
-rw-r--r-- | sys/kern/uipc_mbuf.c | 5 | ||||
-rw-r--r-- | sys/xen/interface/io/netif.h | 8 |
8 files changed, 4887 insertions, 1255 deletions
diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index e935472..90dcdeb 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -531,6 +531,7 @@ MAN= aac.4 \ ${_xen.4} \ xhci.4 \ xl.4 \ + ${_xnb.4} \ xpt.4 \ zero.4 \ zyd.4 @@ -731,6 +732,7 @@ _urtw.4= urtw.4 _viawd.4= viawd.4 _wpi.4= wpi.4 _xen.4= xen.4 +_xnb.4= xnb.4 MLINKS+=lindev.4 full.4 .endif diff --git a/share/man/man4/xnb.4 b/share/man/man4/xnb.4 new file mode 100644 index 0000000..f2d8f8a --- /dev/null +++ b/share/man/man4/xnb.4 @@ -0,0 +1,134 @@ +.\" Copyright (c) 2012 Spectra Logic Corporation +.\" All rights reserved. +.\" +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions, and the following disclaimer, +.\" without modification. +.\" 2. Redistributions in binary form must reproduce at minimum a disclaimer +.\" substantially similar to the "NO WARRANTY" disclaimer below +.\" ("Disclaimer") and any redistribution must be conditioned upon +.\" including a substantially similar Disclaimer requirement for further +.\" binary redistribution. +.\" +.\" NO WARRANTY +.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR +.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +.\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGES. +.\" +.\" Authors: Alan Somers (Spectra Logic Corporation) +.\" +.\" $FreeBSD$ +.\" + +.Dd January 6, 2012 +.Dt XNB 4 +.Os +.Sh NAME +.Nm xnb +.Nd "Xen Paravirtualized Backend Ethernet Driver" +.Sh SYNOPSIS +To compile this driver into the kernel, place the following lines in your +kernel configuration file: +.Bd -ragged -offset indent +.Cd "options XENHVM" +.Cd "device xenpci" +.Ed +.Sh DESCRIPTION +The +.Nm +driver provides the back half of a paravirtualized +.Xr xen 4 +network connection. The netback and netfront drivers appear to their +respective operating systems as Ethernet devices linked by a crossover cable. +Typically, +.Nm +will run on Domain 0 and the netfront driver will run on a guest domain. +However, it is also possible to run +.Nm +on a guest domain. It may be bridged or routed to provide the netfront's +domain access to other guest domains or to a physical network. +.Pp +In most respects, the +.Nm +device appears to the OS as an other Ethernet device. It can be configured at +runtime entirely with +.Xr ifconfig 8 +\&. In particular, it supports MAC changing, arbitrary MTU sizes, checksum +offload for IP, UDP, and TCP for both receive and transmit, and TSO. However, +see +.Sx CAVEATS +before enabling txcsum, rxcsum, or tso. +.Sh SYSCTL VARIABLES +The following read-only variables are available via +.Xr sysctl 8 : +.Bl -tag -width indent +.It Va dev.xnb.%d.dump_rings +Displays information about the ring buffers used to pass requests between the +netfront and netback. Mostly useful for debugging, but can also be used to +get traffic statistics. +.It Va dev.xnb.%d.unit_test_results +Runs a builtin suite of unit tests and displays the results. Does not affect +the operation of the driver in any way. Note that the test suite simulates +error conditions; this will result in error messages being printed to the +system system log. +.Sh CAVEATS +Packets sent through Xennet pass over shared memory, so the protocol includes +no form of link-layer checksum or CRC. Furthermore, Xennet drivers always +report to their hosts that they support receive and transmit checksum +offloading. They "offload" the checksum calculation by simply skipping it. +That works fine for packets that are exchanged between two domains on the same +machine. However, when a Xennet interface is bridged to a physical interface, +a correct checksum must be attached to any packets bound for that physical +interface. Currently, FreeBSD lacks any mechanism for an ethernet device to +inform the OS that newly received packets are valid even though their checksums +are not. So if the netfront driver is configured to offload checksum +calculations, it will pass non-checksumed packets to +.Nm +, which must then calculate the checksum in software before passing the packet +to the OS. +.Pp +For this reason, it is recommended that if +.Nm +is bridged to a physcal interface, then transmit checksum offloading should be +disabled on the netfront. The Xennet protocol does not have any mechanism for +the netback to request the netfront to do this; the operator must do it +manually. +.Sh SEE ALSO +.Xr arp 4 , +.Xr netintro 4 , +.Xr ng_ether 4 , +.Xr ifconfig 8 , +.Xr xen 4 +.Sh HISTORY +The +.Nm +device driver first appeared in +.Fx 10.0 +. +.Sh AUTHORS +The +.Nm +driver was written by +.An Alan Somers +.Aq alans@spectralogic.com +and +.An John Suykerbuyk +.Aq johns@spectralogic.com +.Sh BUGS +The +.Nm +driver does not properly checksum UDP datagrams that span more than one +Ethernet frame. Nor does it correctly checksum IPv6 packets. To workaround +that bug, disable transmit checksum offloading on the netfront driver. diff --git a/sys/dev/xen/blkback/blkback.c b/sys/dev/xen/blkback/blkback.c index c42bfd9..c6ab562 100644 --- a/sys/dev/xen/blkback/blkback.c +++ b/sys/dev/xen/blkback/blkback.c @@ -3434,6 +3434,10 @@ xbb_shutdown(struct xbb_softc *xbb) DPRINTF("\n"); + /* + * Before unlocking mutex, set this flag to prevent other threads from + * getting into this function + */ xbb->flags |= XBBF_IN_SHUTDOWN; mtx_unlock(&xbb->lock); diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c index b2be6e4..ef7b074 100644 --- a/sys/dev/xen/netback/netback.c +++ b/sys/dev/xen/netback/netback.c @@ -1,1595 +1,2535 @@ -/* - * Copyright (c) 2006, Cisco Systems, Inc. +/*- + * Copyright (c) 2009-2011 Spectra Logic Corporation * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * Alan Somers (Spectra Logic Corporation) + * John Suykerbuyk (Spectra Logic Corporation) */ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); + +/** + * \file netback.c + * + * \brief Device driver supporting the vending of network access + * from this FreeBSD domain to other domains. + */ +#include "opt_inet.h" +#include "opt_global.h" + #include "opt_sctp.h" #include <sys/param.h> -#include <sys/systm.h> -#include <sys/sockio.h> -#include <sys/mbuf.h> -#include <sys/malloc.h> #include <sys/kernel.h> -#include <sys/socket.h> -#include <sys/queue.h> -#include <sys/taskqueue.h> -#include <sys/module.h> #include <sys/bus.h> +#include <sys/module.h> +#include <sys/rman.h> +#include <sys/socket.h> +#include <sys/sockio.h> #include <sys/sysctl.h> #include <net/if.h> #include <net/if_arp.h> -#include <net/if_types.h> #include <net/ethernet.h> -#include <net/if_bridgevar.h> +#include <net/if_dl.h> +#include <net/if_media.h> +#include <net/if_types.h> -#include <netinet/in_systm.h> #include <netinet/in.h> -#include <netinet/in_var.h> #include <netinet/ip.h> +#include <netinet/if_ether.h> +#if __FreeBSD_version >= 700000 #include <netinet/tcp.h> -#include <netinet/udp.h> -#ifdef SCTP -#include <netinet/sctp.h> -#include <netinet/sctp_crc32.h> #endif +#include <netinet/ip_icmp.h> +#include <netinet/udp.h> +#include <machine/in_cksum.h> -#include <vm/vm_extern.h> -#include <vm/vm_kern.h> +#include <vm/vm.h> +#include <vm/pmap.h> -#include <machine/in_cksum.h> -#include <machine/xen-os.h> -#include <machine/hypervisor.h> -#include <machine/hypervisor-ifs.h> -#include <machine/xen_intr.h> -#include <machine/evtchn.h> -#include <machine/xenbus.h> -#include <machine/gnttab.h> -#include <machine/xen-public/memory.h> -#include <dev/xen/xenbus/xenbus_comms.h> - - -#ifdef XEN_NETBACK_DEBUG -#define DPRINTF(fmt, args...) \ - printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) -#else -#define DPRINTF(fmt, args...) ((void)0) -#endif +#include <machine/_inttypes.h> +#include <machine/xen/xen-os.h> +#include <machine/xen/xenvar.h> -#ifdef XEN_NETBACK_DEBUG_LOTS -#define DDPRINTF(fmt, args...) \ - printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) -#define DPRINTF_MBUF(_m) print_mbuf(_m, 0) -#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len) -#else -#define DDPRINTF(fmt, args...) ((void)0) -#define DPRINTF_MBUF(_m) ((void)0) -#define DPRINTF_MBUF_LEN(_m, _len) ((void)0) -#endif +#include <xen/evtchn.h> +#include <xen/xen_intr.h> +#include <xen/interface/io/netif.h> +#include <xen/xenbus/xenbusvar.h> -#define WPRINTF(fmt, args...) \ - printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) +/*--------------------------- Compile-time Tunables --------------------------*/ -#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) -#define BUG_ON PANIC_IF +/*---------------------------------- Macros ----------------------------------*/ +/** + * Custom malloc type for all driver allocations. + */ +static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data"); -#define IFNAME(_np) (_np)->ifp->if_xname +#define XNB_SG 1 /* netback driver supports feature-sg */ +#define XNB_GSO_TCPV4 1 /* netback driver supports feature-gso-tcpv4 */ +#define XNB_RX_COPY 1 /* netback driver supports feature-rx-copy */ +#define XNB_RX_FLIP 0 /* netback driver does not support feature-rx-flip */ -#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) -#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) +#undef XNB_DEBUG +#define XNB_DEBUG /* hardcode on during development */ -struct ring_ref { - vm_offset_t va; - grant_handle_t handle; - uint64_t bus_addr; -}; +#ifdef XNB_DEBUG +#define DPRINTF(fmt, args...) \ + printf("xnb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTF(fmt, args...) do {} while (0) +#endif -typedef struct netback_info { +/* Default length for stack-allocated grant tables */ +#define GNTTAB_LEN (64) - /* Schedule lists */ - STAILQ_ENTRY(netback_info) next_tx; - STAILQ_ENTRY(netback_info) next_rx; - int on_tx_sched_list; - int on_rx_sched_list; +/* Features supported by all backends. TSO and LRO can be negotiated */ +#define XNB_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) - struct xenbus_device *xdev; - XenbusState frontend_state; +#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) +#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) - domid_t domid; - int handle; - char *bridge; +/** + * Two argument version of the standard macro. Second argument is a tentative + * value of req_cons + */ +#define RING_HAS_UNCONSUMED_REQUESTS_2(_r, cons) ({ \ + unsigned int req = (_r)->sring->req_prod - cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + (cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) - int rings_connected; - struct ring_ref tx_ring_ref; - struct ring_ref rx_ring_ref; - netif_tx_back_ring_t tx; - netif_rx_back_ring_t rx; - evtchn_port_t evtchn; - int irq; - void *irq_cookie; +#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT) +#define virt_to_offset(x) ((x) & (PAGE_SIZE - 1)) - struct ifnet *ifp; - int ref_cnt; +/** + * Predefined array type of grant table copy descriptors. Used to pass around + * statically allocated memory structures. + */ +typedef struct gnttab_copy gnttab_copy_table[GNTTAB_LEN]; + +/*--------------------------- Forward Declarations ---------------------------*/ +struct xnb_softc; +struct xnb_pkt; + +static void xnb_attach_failed(struct xnb_softc *xnb, + int err, const char *fmt, ...) + __printflike(3,4); +static int xnb_shutdown(struct xnb_softc *xnb); +static int create_netdev(device_t dev); +static int xnb_detach(device_t dev); +static int xen_net_read_mac(device_t dev, uint8_t mac[]); +static int xnb_ifmedia_upd(struct ifnet *ifp); +static void xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); +static void xnb_intr(void *arg); +static int xnb_send(netif_rx_back_ring_t *rxb, domid_t otherend, + const struct mbuf *mbufc, gnttab_copy_table gnttab); +static int xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, + struct mbuf **mbufc, struct ifnet *ifnet, + gnttab_copy_table gnttab); +static int xnb_ring2pkt(struct xnb_pkt *pkt, + const netif_tx_back_ring_t *tx_ring, + RING_IDX start); +static void xnb_txpkt2rsp(const struct xnb_pkt *pkt, + netif_tx_back_ring_t *ring, int error); +static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp); +static int xnb_txpkt2gnttab(const struct xnb_pkt *pkt, + const struct mbuf *mbufc, + gnttab_copy_table gnttab, + const netif_tx_back_ring_t *txb, + domid_t otherend_id); +static void xnb_update_mbufc(struct mbuf *mbufc, + const gnttab_copy_table gnttab, int n_entries); +static int xnb_mbufc2pkt(const struct mbuf *mbufc, + struct xnb_pkt *pkt, + RING_IDX start, int space); +static int xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, + const struct mbuf *mbufc, + gnttab_copy_table gnttab, + const netif_rx_back_ring_t *rxb, + domid_t otherend_id); +static int xnb_rxpkt2rsp(const struct xnb_pkt *pkt, + const gnttab_copy_table gnttab, int n_entries, + netif_rx_back_ring_t *ring); +static void xnb_add_mbuf_cksum(struct mbuf *mbufc); +static void xnb_stop(struct xnb_softc*); +static int xnb_ioctl(struct ifnet*, u_long, caddr_t); +static void xnb_start_locked(struct ifnet*); +static void xnb_start(struct ifnet*); +static void xnb_ifinit_locked(struct xnb_softc*); +static void xnb_ifinit(void*); +#ifdef XNB_DEBUG +static int xnb_unit_test_main(SYSCTL_HANDLER_ARGS); +static int xnb_dump_rings(SYSCTL_HANDLER_ARGS); +#endif +/*------------------------------ Data Structures -----------------------------*/ - device_t ndev; - int attached; -} netif_t; +/** + * Representation of a xennet packet. Simplified version of a packet as + * stored in the Xen tx ring. Applicable to both RX and TX packets + */ +struct xnb_pkt{ + /** + * Array index of the first data-bearing (eg, not extra info) entry + * for this packet + */ + RING_IDX car; -#define MAX_PENDING_REQS 256 -#define PKT_PROT_LEN 64 + /** + * Array index of the second data-bearing entry for this packet. + * Invalid if the packet has only one data-bearing entry. If the + * packet has more than two data-bearing entries, then the second + * through the last will be sequential modulo the ring size + */ + RING_IDX cdr; -static struct { - netif_tx_request_t req; - netif_t *netif; -} pending_tx_info[MAX_PENDING_REQS]; -static uint16_t pending_ring[MAX_PENDING_REQS]; -typedef unsigned int PEND_RING_IDX; -#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) -static PEND_RING_IDX pending_prod, pending_cons; -#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) + /** + * Optional extra info. Only valid if flags contains + * NETTXF_extra_info. Note that extra.type will always be + * XEN_NETIF_EXTRA_TYPE_GSO. Currently, no known netfront or netback + * driver will ever set XEN_NETIF_EXTRA_TYPE_MCAST_* + */ + netif_extra_info_t extra; -static unsigned long mmap_vstart; -#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) + /** Size of entire packet in bytes. */ + uint16_t size; -/* Freed TX mbufs get batched on this ring before return to pending_ring. */ -static uint16_t dealloc_ring[MAX_PENDING_REQS]; -static PEND_RING_IDX dealloc_prod, dealloc_cons; + /** The size of the first entry's data in bytes */ + uint16_t car_size; -static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; -static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; -static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE]; + /** + * Either NETTXF_ or NETRXF_ flags. Note that the flag values are + * not the same for TX and RX packets + */ + uint16_t flags; -static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; -static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS]; -static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS]; + /** + * The number of valid data-bearing entries (either netif_tx_request's + * or netif_rx_response's) in the packet. If this is 0, it means the + * entire packet is invalid. + */ + uint16_t list_len; -static struct task net_tx_task, net_rx_task; -static struct callout rx_task_callout; + /** There was an error processing the packet */ + uint8_t error; +}; -static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list = - STAILQ_HEAD_INITIALIZER(tx_sched_list); -static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list = - STAILQ_HEAD_INITIALIZER(rx_sched_list); -static struct mtx tx_sched_list_lock; -static struct mtx rx_sched_list_lock; +/** xnb_pkt method: initialize it */ +static inline void +xnb_pkt_initialize(struct xnb_pkt *pxnb) +{ + bzero(pxnb, sizeof(*pxnb)); +} -static int vif_unit_maker = 0; +/** xnb_pkt method: mark the packet as valid */ +static inline void +xnb_pkt_validate(struct xnb_pkt *pxnb) +{ + pxnb->error = 0; +}; -/* Protos */ -static void netback_start(struct ifnet *ifp); -static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); -static int vif_add_dev(struct xenbus_device *xdev); -static void disconnect_rings(netif_t *netif); +/** xnb_pkt method: mark the packet as invalid */ +static inline void +xnb_pkt_invalidate(struct xnb_pkt *pxnb) +{ + pxnb->error = 1; +}; + +/** xnb_pkt method: Check whether the packet is valid */ +static inline int +xnb_pkt_is_valid(const struct xnb_pkt *pxnb) +{ + return (! pxnb->error); +} + +#ifdef XNB_DEBUG +/** xnb_pkt method: print the packet's contents in human-readable format*/ +static void __unused +xnb_dump_pkt(const struct xnb_pkt *pkt) { + if (pkt == NULL) { + DPRINTF("Was passed a null pointer.\n"); + return; + } + DPRINTF("pkt address= %p\n", pkt); + DPRINTF("pkt->size=%d\n", pkt->size); + DPRINTF("pkt->car_size=%d\n", pkt->car_size); + DPRINTF("pkt->flags=0x%04x\n", pkt->flags); + DPRINTF("pkt->list_len=%d\n", pkt->list_len); + /* DPRINTF("pkt->extra"); TODO */ + DPRINTF("pkt->car=%d\n", pkt->car); + DPRINTF("pkt->cdr=%d\n", pkt->cdr); + DPRINTF("pkt->error=%d\n", pkt->error); +} +#endif /* XNB_DEBUG */ -#ifdef XEN_NETBACK_DEBUG_LOTS -/* Debug code to display the contents of an mbuf */ static void -print_mbuf(struct mbuf *m, int max) +xnb_dump_txreq(RING_IDX idx, const struct netif_tx_request *txreq) { - int i, j=0; - printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len); - for (; m; m = m->m_next) { - unsigned char *d = m->m_data; - for (i=0; i < m->m_len; i++) { - if (max && j == max) - break; - if ((j++ % 16) == 0) - printf("\n%04x:", j); - printf(" %02x", d[i]); - } + if (txreq != NULL) { + DPRINTF("netif_tx_request index =%u\n", idx); + DPRINTF("netif_tx_request.gref =%u\n", txreq->gref); + DPRINTF("netif_tx_request.offset=%hu\n", txreq->offset); + DPRINTF("netif_tx_request.flags =%hu\n", txreq->flags); + DPRINTF("netif_tx_request.id =%hu\n", txreq->id); + DPRINTF("netif_tx_request.size =%hu\n", txreq->size); } - printf("\n"); } -#endif -#define MAX_MFN_ALLOC 64 -static unsigned long mfn_list[MAX_MFN_ALLOC]; -static unsigned int alloc_index = 0; +/** + * \brief Configuration data for a shared memory request ring + * used to communicate with the front-end client of this + * this driver. + */ +struct xnb_ring_config { + /** + * Runtime structures for ring access. Unfortunately, TX and RX rings + * use different data structures, and that cannot be changed since it + * is part of the interdomain protocol. + */ + union{ + netif_rx_back_ring_t rx_ring; + netif_tx_back_ring_t tx_ring; + } back_ring; + + /** + * The device bus address returned by the hypervisor when + * mapping the ring and required to unmap it when a connection + * is torn down. + */ + uint64_t bus_addr; -static unsigned long -alloc_mfn(void) -{ - unsigned long mfn = 0; - struct xen_memory_reservation reservation = { - .extent_start = mfn_list, - .nr_extents = MAX_MFN_ALLOC, - .extent_order = 0, - .domid = DOMID_SELF - }; - if ( unlikely(alloc_index == 0) ) - alloc_index = HYPERVISOR_memory_op( - XENMEM_increase_reservation, &reservation); - if ( alloc_index != 0 ) - mfn = mfn_list[--alloc_index]; - return mfn; -} + /** The pseudo-physical address where ring memory is mapped.*/ + uint64_t gnt_addr; + + /** KVA address where ring memory is mapped. */ + vm_offset_t va; + + /** + * Grant table handles, one per-ring page, returned by the + * hyperpervisor upon mapping of the ring and required to + * unmap it when a connection is torn down. + */ + grant_handle_t handle; + + /** The number of ring pages mapped for the current connection. */ + unsigned ring_pages; -static unsigned long -alloc_empty_page_range(unsigned long nr_pages) + /** + * The grant references, one per-ring page, supplied by the + * front-end, allowing us to reference the ring pages in the + * front-end's domain and to map these pages into our own domain. + */ + grant_ref_t ring_ref; +}; + +/** + * Per-instance connection state flags. + */ +typedef enum { - void *pages; - int i = 0, j = 0; - multicall_entry_t mcl[17]; - unsigned long mfn_list[16]; - struct xen_memory_reservation reservation = { - .extent_start = mfn_list, - .nr_extents = 0, - .address_bits = 0, - .extent_order = 0, - .domid = DOMID_SELF - }; + /** Communication with the front-end has been established. */ + XNBF_RING_CONNECTED = 0x01, - pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT); - if (pages == NULL) - return 0; + /** + * Front-end requests exist in the ring and are waiting for + * xnb_xen_req objects to free up. + */ + XNBF_RESOURCE_SHORTAGE = 0x02, - memset(mcl, 0, sizeof(mcl)); + /** Connection teardown has started. */ + XNBF_SHUTDOWN = 0x04, - while (i < nr_pages) { - unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE); + /** A thread is already performing shutdown processing. */ + XNBF_IN_SHUTDOWN = 0x08 +} xnb_flag_t; - mcl[j].op = __HYPERVISOR_update_va_mapping; - mcl[j].args[0] = va; +/** + * Types of rings. Used for array indices and to identify a ring's control + * data structure type + */ +typedef enum{ + XNB_RING_TYPE_TX = 0, /* ID of TX rings, used for array indices */ + XNB_RING_TYPE_RX = 1, /* ID of RX rings, used for array indices */ + XNB_NUM_RING_TYPES +} xnb_ring_type_t; - mfn_list[j++] = vtomach(va) >> PAGE_SHIFT; +/** + * Per-instance configuration data. + */ +struct xnb_softc { + /** NewBus device corresponding to this instance. */ + device_t dev; - xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY; + /* Media related fields */ - if (j == 16 || i == nr_pages) { - mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL; + /** Generic network media state */ + struct ifmedia sc_media; - reservation.nr_extents = j; + /** Media carrier info */ + struct ifnet *xnb_ifp; - mcl[j].op = __HYPERVISOR_memory_op; - mcl[j].args[0] = XENMEM_decrease_reservation; - mcl[j].args[1] = (unsigned long)&reservation; - - (void)HYPERVISOR_multicall(mcl, j+1); + /** Our own private carrier state */ + unsigned carrier; - mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0; - j = 0; - } - } + /** Device MAC Address */ + uint8_t mac[ETHER_ADDR_LEN]; - return (unsigned long)pages; -} + /* Xen related fields */ -#ifdef XEN_NETBACK_FIXUP_CSUM -static void -fixup_checksum(struct mbuf *m) -{ - struct ether_header *eh = mtod(m, struct ether_header *); - struct ip *ip = (struct ip *)(eh + 1); - int iphlen = ip->ip_hl << 2; - int iplen = ntohs(ip->ip_len); - - if ((m->m_pkthdr.csum_flags & CSUM_TCP)) { - struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen); - th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(IPPROTO_TCP + (iplen - iphlen))); - th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen); - m->m_pkthdr.csum_flags &= ~CSUM_TCP; -#ifdef SCTP - } else if (sw_csum & CSUM_SCTP) { - sctp_delayed_cksum(m, iphlen); - sw_csum &= ~CSUM_SCTP; -#endif - } else { - u_short csum; - struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen); - uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(IPPROTO_UDP + (iplen - iphlen))); - if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0) - csum = 0xffff; - uh->uh_sum = csum; - m->m_pkthdr.csum_flags &= ~CSUM_UDP; - } -} + /** + * \brief The netif protocol abi in effect. + * + * There are situations where the back and front ends can + * have a different, native abi (e.g. intel x86_64 and + * 32bit x86 domains on the same machine). The back-end + * always accomodates the front-end's native abi. That + * value is pulled from the XenStore and recorded here. + */ + int abi; + + /** + * Name of the bridge to which this VIF is connected, if any + * This field is dynamically allocated by xenbus and must be free()ed + * when no longer needed + */ + char *bridge; + + /** The interrupt driven even channel used to signal ring events. */ + evtchn_port_t evtchn; + + /** Xen device handle.*/ + long handle; + + /** IRQ mapping for the communication ring event channel. */ + int irq; + + /** + * \brief Cached value of the front-end's domain id. + * + * This value is used at once for each mapped page in + * a transaction. We cache it to avoid incuring the + * cost of an ivar access every time this is needed. + */ + domid_t otherend_id; + + /** + * Undocumented frontend feature. Has something to do with + * scatter/gather IO + */ + uint8_t can_sg; + /** Undocumented frontend feature */ + uint8_t gso; + /** Undocumented frontend feature */ + uint8_t gso_prefix; + /** Can checksum TCP/UDP over IPv4 */ + uint8_t ip_csum; + + /* Implementation related fields */ + /** + * Preallocated grant table copy descriptor for RX operations. + * Access must be protected by rx_lock + */ + gnttab_copy_table rx_gnttab; + + /** + * Preallocated grant table copy descriptor for TX operations. + * Access must be protected by tx_lock + */ + gnttab_copy_table tx_gnttab; + +#ifdef XENHVM + /** + * Resource representing allocated physical address space + * associated with our per-instance kva region. + */ + struct resource *pseudo_phys_res; + + /** Resource id for allocated physical address space. */ + int pseudo_phys_res_id; #endif -/* Add the interface to the specified bridge */ -static int -add_to_bridge(struct ifnet *ifp, char *bridge) -{ - struct ifdrv ifd; - struct ifbreq ifb; - struct ifnet *ifp_bridge = ifunit(bridge); + /** Ring mapping and interrupt configuration data. */ + struct xnb_ring_config ring_configs[XNB_NUM_RING_TYPES]; - if (!ifp_bridge) - return ENOENT; + /** + * Global pool of kva used for mapping remote domain ring + * and I/O transaction data. + */ + vm_offset_t kva; - bzero(&ifd, sizeof(ifd)); - bzero(&ifb, sizeof(ifb)); + /** Psuedo-physical address corresponding to kva. */ + uint64_t gnt_base_addr; - strcpy(ifb.ifbr_ifsname, ifp->if_xname); - strcpy(ifd.ifd_name, ifp->if_xname); - ifd.ifd_cmd = BRDGADD; - ifd.ifd_len = sizeof(ifb); - ifd.ifd_data = &ifb; + /** Various configuration and state bit flags. */ + xnb_flag_t flags; - return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd); - -} + /** Mutex protecting per-instance data in the receive path. */ + struct mtx rx_lock; -static int -netif_create(int handle, struct xenbus_device *xdev, char *bridge) -{ - netif_t *netif; - struct ifnet *ifp; + /** Mutex protecting per-instance data in the softc structure. */ + struct mtx sc_lock; - netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO); - if (!netif) - return ENOMEM; + /** Mutex protecting per-instance data in the transmit path. */ + struct mtx tx_lock; - netif->ref_cnt = 1; - netif->handle = handle; - netif->domid = xdev->otherend_id; - netif->xdev = xdev; - netif->bridge = bridge; - xdev->data = netif; - - /* Set up ifnet structure */ - ifp = netif->ifp = if_alloc(IFT_ETHER); - if (!ifp) { - if (bridge) - free(bridge, M_DEVBUF); - free(netif, M_DEVBUF); - return ENOMEM; + /** The size of the global kva pool. */ + int kva_size; +}; + +/*---------------------------- Debugging functions ---------------------------*/ +#ifdef XNB_DEBUG +static void __unused +xnb_dump_gnttab_copy(const struct gnttab_copy *entry) +{ + if (entry == NULL) { + printf("NULL grant table pointer\n"); + return; } - ifp->if_softc = netif; - if_initname(ifp, "vif", - atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ ); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX; - ifp->if_output = ether_output; - ifp->if_start = netback_start; - ifp->if_ioctl = netback_ioctl; - ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; - - DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle); + if (entry->flags & GNTCOPY_dest_gref) + printf("gnttab dest ref=\t%u\n", entry->dest.u.ref); + else + printf("gnttab dest gmfn=\t%lu\n", entry->dest.u.gmfn); + printf("gnttab dest offset=\t%hu\n", entry->dest.offset); + printf("gnttab dest domid=\t%hu\n", entry->dest.domid); + if (entry->flags & GNTCOPY_source_gref) + printf("gnttab source ref=\t%u\n", entry->source.u.ref); + else + printf("gnttab source gmfn=\t%lu\n", entry->source.u.gmfn); + printf("gnttab source offset=\t%hu\n", entry->source.offset); + printf("gnttab source domid=\t%hu\n", entry->source.domid); + printf("gnttab len=\t%hu\n", entry->len); + printf("gnttab flags=\t%hu\n", entry->flags); + printf("gnttab status=\t%hd\n", entry->status); +} - return 0; +static int +xnb_dump_rings(SYSCTL_HANDLER_ARGS) +{ + static char results[720]; + struct xnb_softc const* xnb = (struct xnb_softc*)arg1; + netif_rx_back_ring_t const* rxb = + &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring; + netif_tx_back_ring_t const* txb = + &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring; + + /* empty the result strings */ + results[0] = 0; + + if ( !txb || !txb->sring || !rxb || !rxb->sring ) + return (SYSCTL_OUT(req, results, strnlen(results, 720))); + + snprintf(results, 720, + "\n\t%35s %18s\n" /* TX, RX */ + "\t%16s %18d %18d\n" /* req_cons */ + "\t%16s %18d %18d\n" /* nr_ents */ + "\t%16s %18d %18d\n" /* rsp_prod_pvt */ + "\t%16s %18p %18p\n" /* sring */ + "\t%16s %18d %18d\n" /* req_prod */ + "\t%16s %18d %18d\n" /* req_event */ + "\t%16s %18d %18d\n" /* rsp_prod */ + "\t%16s %18d %18d\n", /* rsp_event */ + "TX", "RX", + "req_cons", txb->req_cons, rxb->req_cons, + "nr_ents", txb->nr_ents, rxb->nr_ents, + "rsp_prod_pvt", txb->rsp_prod_pvt, rxb->rsp_prod_pvt, + "sring", txb->sring, rxb->sring, + "sring->req_prod", txb->sring->req_prod, rxb->sring->req_prod, + "sring->req_event", txb->sring->req_event, rxb->sring->req_event, + "sring->rsp_prod", txb->sring->rsp_prod, rxb->sring->rsp_prod, + "sring->rsp_event", txb->sring->rsp_event, rxb->sring->rsp_event); + + return (SYSCTL_OUT(req, results, strnlen(results, 720))); } -static void -netif_get(netif_t *netif) +static void __unused +xnb_dump_mbuf(const struct mbuf *m) { - atomic_add_int(&netif->ref_cnt, 1); + int len; + uint8_t *d; + if (m == NULL) + return; + + printf("xnb_dump_mbuf:\n"); + if (m->m_flags & M_PKTHDR) { + printf(" flowid=%10d, csum_flags=%#8x, csum_data=%#8x, " + "tso_segsz=%5hd\n", + m->m_pkthdr.flowid, m->m_pkthdr.csum_flags, + m->m_pkthdr.csum_data, m->m_pkthdr.tso_segsz); + printf(" rcvif=%16p, header=%18p, len=%19d\n", + m->m_pkthdr.rcvif, m->m_pkthdr.header, m->m_pkthdr.len); + } + printf(" m_next=%16p, m_nextpk=%16p, m_data=%16p\n", + m->m_next, m->m_nextpkt, m->m_data); + printf(" m_len=%17d, m_flags=%#15x, m_type=%18hd\n", + m->m_len, m->m_flags, m->m_type); + + len = m->m_len; + d = mtod(m, uint8_t*); + while (len > 0) { + int i; + printf(" "); + for (i = 0; (i < 16) && (len > 0); i++, len--) { + printf("%02hhx ", *(d++)); + } + printf("\n"); + } } +#endif /* XNB_DEBUG */ +/*------------------------ Inter-Domain Communication ------------------------*/ +/** + * Free dynamically allocated KVA or pseudo-physical address allocations. + * + * \param xnb Per-instance xnb configuration structure. + */ static void -netif_put(netif_t *netif) +xnb_free_communication_mem(struct xnb_softc *xnb) { - if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) { - DPRINTF("%s\n", IFNAME(netif)); - disconnect_rings(netif); - if (netif->ifp) { - if_free(netif->ifp); - netif->ifp = NULL; + if (xnb->kva != 0) { +#ifndef XENHVM + kmem_free(kernel_map, xnb->kva, xnb->kva_size); +#else + if (xnb->pseudo_phys_res != NULL) { + bus_release_resource(xnb->dev, SYS_RES_MEMORY, + xnb->pseudo_phys_res_id, + xnb->pseudo_phys_res); + xnb->pseudo_phys_res = NULL; } - if (netif->bridge) - free(netif->bridge, M_DEVBUF); - free(netif, M_DEVBUF); +#endif /* XENHVM */ } + xnb->kva = 0; + xnb->gnt_base_addr = 0; } +/** + * Cleanup all inter-domain communication mechanisms. + * + * \param xnb Per-instance xnb configuration structure. + */ static int -netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +xnb_disconnect(struct xnb_softc *xnb) { - switch (cmd) { - case SIOCSIFFLAGS: - DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n", - IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags); - return 0; + struct gnttab_unmap_grant_ref gnts[XNB_NUM_RING_TYPES]; + int error; + int i; + + if (xnb->irq != 0) { + unbind_from_irqhandler(xnb->irq); + xnb->irq = 0; } - DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd); + /* + * We may still have another thread currently processing requests. We + * must acquire the rx and tx locks to make sure those threads are done, + * but we can release those locks as soon as we acquire them, because no + * more interrupts will be arriving. + */ + mtx_lock(&xnb->tx_lock); + mtx_unlock(&xnb->tx_lock); + mtx_lock(&xnb->rx_lock); + mtx_unlock(&xnb->rx_lock); + + /* Free malloc'd softc member variables */ + if (xnb->bridge != NULL) + free(xnb->bridge, M_XENSTORE); + + /* All request processing has stopped, so unmap the rings */ + for (i=0; i < XNB_NUM_RING_TYPES; i++) { + gnts[i].host_addr = xnb->ring_configs[i].gnt_addr; + gnts[i].dev_bus_addr = xnb->ring_configs[i].bus_addr; + gnts[i].handle = xnb->ring_configs[i].handle; + } + error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, gnts, + XNB_NUM_RING_TYPES); + KASSERT(error == 0, ("Grant table unmap op failed (%d)", error)); - return ether_ioctl(ifp, cmd, data); -} + xnb_free_communication_mem(xnb); + /* + * Zero the ring config structs because the pointers, handles, and + * grant refs contained therein are no longer valid. + */ + bzero(&xnb->ring_configs[XNB_RING_TYPE_TX], + sizeof(struct xnb_ring_config)); + bzero(&xnb->ring_configs[XNB_RING_TYPE_RX], + sizeof(struct xnb_ring_config)); -static inline void -maybe_schedule_tx_action(void) -{ - smp_mb(); - if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list)) - taskqueue_enqueue(taskqueue_swi, &net_tx_task); + xnb->flags &= ~XNBF_RING_CONNECTED; + return (0); } -/* Removes netif from front of list and does not call netif_put() (caller must) */ -static netif_t * -remove_from_tx_schedule_list(void) +/** + * Map a single shared memory ring into domain local address space and + * initialize its control structure + * + * \param xnb Per-instance xnb configuration structure + * \param ring_type Array index of this ring in the xnb's array of rings + * \return An errno + */ +static int +xnb_connect_ring(struct xnb_softc *xnb, xnb_ring_type_t ring_type) { - netif_t *netif; + struct gnttab_map_grant_ref gnt; + struct xnb_ring_config *ring = &xnb->ring_configs[ring_type]; + int error; - mtx_lock(&tx_sched_list_lock); - - if ((netif = STAILQ_FIRST(&tx_sched_list))) { - STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx); - STAILQ_NEXT(netif, next_tx) = NULL; - netif->on_tx_sched_list = 0; - } + /* TX ring type = 0, RX =1 */ + ring->va = xnb->kva + ring_type * PAGE_SIZE; + ring->gnt_addr = xnb->gnt_base_addr + ring_type * PAGE_SIZE; - mtx_unlock(&tx_sched_list_lock); + gnt.host_addr = ring->gnt_addr; + gnt.flags = GNTMAP_host_map; + gnt.ref = ring->ring_ref; + gnt.dom = xnb->otherend_id; - return netif; -} - -/* Adds netif to end of list and calls netif_get() */ -static void -add_to_tx_schedule_list_tail(netif_t *netif) -{ - if (netif->on_tx_sched_list) - return; + error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &gnt, 1); + if (error != 0) + panic("netback: Ring page grant table op failed (%d)", error); - mtx_lock(&tx_sched_list_lock); - if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { - netif_get(netif); - STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx); - netif->on_tx_sched_list = 1; + if (gnt.status != 0) { + ring->va = 0; + error = EACCES; + xenbus_dev_fatal(xnb->dev, error, + "Ring shared page mapping failed. " + "Status %d.", gnt.status); + } else { + ring->handle = gnt.handle; + ring->bus_addr = gnt.dev_bus_addr; + + if (ring_type == XNB_RING_TYPE_TX) { + BACK_RING_INIT(&ring->back_ring.tx_ring, + (netif_tx_sring_t*)ring->va, + ring->ring_pages * PAGE_SIZE); + } else if (ring_type == XNB_RING_TYPE_RX) { + BACK_RING_INIT(&ring->back_ring.rx_ring, + (netif_rx_sring_t*)ring->va, + ring->ring_pages * PAGE_SIZE); + } else { + xenbus_dev_fatal(xnb->dev, error, + "Unknown ring type %d", ring_type); + } } - mtx_unlock(&tx_sched_list_lock); + + return error; } -/* - * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER: - * If this driver is pipelining transmit requests then we can be very - * aggressive in avoiding new-packet notifications -- frontend only needs to - * send a notification if there are no outstanding unreceived responses. - * If we may be buffer transmit buffers for any reason then we must be rather - * more conservative and treat this as the final check for pending work. +/** + * Setup the shared memory rings and bind an interrupt to the event channel + * used to notify us of ring changes. + * + * \param xnb Per-instance xnb configuration structure. */ -static void -netif_schedule_tx_work(netif_t *netif) +static int +xnb_connect_comms(struct xnb_softc *xnb) { - int more_to_do; + int error; + xnb_ring_type_t i; -#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER - more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx); -#else - RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); -#endif + if ((xnb->flags & XNBF_RING_CONNECTED) != 0) + return (0); - if (more_to_do) { - DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif)); - add_to_tx_schedule_list_tail(netif); - maybe_schedule_tx_action(); + /* + * Kva for our rings are at the tail of the region of kva allocated + * by xnb_alloc_communication_mem(). + */ + for (i=0; i < XNB_NUM_RING_TYPES; i++) { + error = xnb_connect_ring(xnb, i); + if (error != 0) + return error; } -} -static struct mtx dealloc_lock; -MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS); + xnb->flags |= XNBF_RING_CONNECTED; + + error = + bind_interdomain_evtchn_to_irqhandler(xnb->otherend_id, + xnb->evtchn, + device_get_nameunit(xnb->dev), + xnb_intr, /*arg*/xnb, + INTR_TYPE_BIO | INTR_MPSAFE, + &xnb->irq); + if (error != 0) { + (void)xnb_disconnect(xnb); + xenbus_dev_fatal(xnb->dev, error, "binding event channel"); + return (error); + } -static void -netif_idx_release(uint16_t pending_idx) -{ - mtx_lock_spin(&dealloc_lock); - dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx; - mtx_unlock_spin(&dealloc_lock); + DPRINTF("rings connected!\n"); - taskqueue_enqueue(taskqueue_swi, &net_tx_task); + return (0); } -static void -make_tx_response(netif_t *netif, - uint16_t id, - int8_t st) +/** + * Size KVA and pseudo-physical address allocations based on negotiated + * values for the size and number of I/O requests, and the size of our + * communication ring. + * + * \param xnb Per-instance xnb configuration structure. + * + * These address spaces are used to dynamically map pages in the + * front-end's domain into our own. + */ +static int +xnb_alloc_communication_mem(struct xnb_softc *xnb) { - RING_IDX i = netif->tx.rsp_prod_pvt; - netif_tx_response_t *resp; - int notify; - - resp = RING_GET_RESPONSE(&netif->tx, i); - resp->id = id; - resp->status = st; - - netif->tx.rsp_prod_pvt = ++i; - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); - if (notify) - notify_remote_via_irq(netif->irq); - -#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER - if (i == netif->tx.req_cons) { - int more_to_do; - RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); - if (more_to_do) - add_to_tx_schedule_list_tail(netif); + xnb_ring_type_t i; + + xnb->kva_size = 0; + for (i=0; i < XNB_NUM_RING_TYPES; i++) { + xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE; } -#endif +#ifndef XENHVM + xnb->kva = kmem_alloc_nofault(kernel_map, xnb->kva_size); + if (xnb->kva == 0) + return (ENOMEM); + xnb->gnt_base_addr = xnb->kva; +#else /* defined XENHVM */ + /* + * Reserve a range of pseudo physical memory that we can map + * into kva. These pages will only be backed by machine + * pages ("real memory") during the lifetime of front-end requests + * via grant table operations. We will map the netif tx and rx rings + * into this space. + */ + xnb->pseudo_phys_res_id = 0; + xnb->pseudo_phys_res = bus_alloc_resource(xnb->dev, SYS_RES_MEMORY, + &xnb->pseudo_phys_res_id, + 0, ~0, xnb->kva_size, + RF_ACTIVE); + if (xnb->pseudo_phys_res == NULL) { + xnb->kva = 0; + return (ENOMEM); + } + xnb->kva = (vm_offset_t)rman_get_virtual(xnb->pseudo_phys_res); + xnb->gnt_base_addr = rman_get_start(xnb->pseudo_phys_res); +#endif /* !defined XENHVM */ + return (0); } -static inline void -net_tx_action_dealloc(void) +/** + * Collect information from the XenStore related to our device and its frontend + * + * \param xnb Per-instance xnb configuration structure. + */ +static int +xnb_collect_xenstore_info(struct xnb_softc *xnb) { - gnttab_unmap_grant_ref_t *gop; - uint16_t pending_idx; - PEND_RING_IDX dc, dp; - netif_t *netif; - int ret; + /** + * \todo Linux collects the following info. We should collect most + * of this, too: + * "feature-rx-notify" + */ + const char *otherend_path; + const char *our_path; + int err; + unsigned int rx_copy, bridge_len; + uint8_t no_csum_offload; + + otherend_path = xenbus_get_otherend_path(xnb->dev); + our_path = xenbus_get_node(xnb->dev); + + /* Collect the critical communication parameters */ + err = xs_gather(XST_NIL, otherend_path, + "tx-ring-ref", "%l" PRIu32, + &xnb->ring_configs[XNB_RING_TYPE_TX].ring_ref, + "rx-ring-ref", "%l" PRIu32, + &xnb->ring_configs[XNB_RING_TYPE_RX].ring_ref, + "event-channel", "%" PRIu32, &xnb->evtchn, + NULL); + if (err != 0) { + xenbus_dev_fatal(xnb->dev, err, + "Unable to retrieve ring information from " + "frontend %s. Unable to connect.", + otherend_path); + return (err); + } - dc = dealloc_cons; - dp = dealloc_prod; + /* Collect the handle from xenstore */ + err = xs_scanf(XST_NIL, our_path, "handle", NULL, "%li", &xnb->handle); + if (err != 0) { + xenbus_dev_fatal(xnb->dev, err, + "Error reading handle from frontend %s. " + "Unable to connect.", otherend_path); + } /* - * Free up any grants we have finished using + * Collect the bridgename, if any. We do not need bridge_len; we just + * throw it away */ - gop = tx_unmap_ops; - while (dc != dp) { - pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; - gop->host_addr = MMAP_VADDR(pending_idx); - gop->dev_bus_addr = 0; - gop->handle = grant_tx_handle[pending_idx]; - gop++; - } - ret = HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); - BUG_ON(ret); + err = xs_read(XST_NIL, our_path, "bridge", &bridge_len, + (void**)&xnb->bridge); + if (err != 0) + xnb->bridge = NULL; - while (dealloc_cons != dp) { - pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; + /* + * Does the frontend request that we use rx copy? If not, return an + * error because this driver only supports rx copy. + */ + err = xs_scanf(XST_NIL, otherend_path, "request-rx-copy", NULL, + "%" PRIu32, &rx_copy); + if (err == ENOENT) { + err = 0; + rx_copy = 0; + } + if (err < 0) { + xenbus_dev_fatal(xnb->dev, err, "reading %s/request-rx-copy", + otherend_path); + return err; + } + /** + * \todo: figure out the exact meaning of this feature, and when + * the frontend will set it to true. It should be set to true + * at some point + */ +/* if (!rx_copy)*/ +/* return EOPNOTSUPP;*/ - netif = pending_tx_info[pending_idx].netif; + /** \todo Collect the rx notify feature */ - make_tx_response(netif, pending_tx_info[pending_idx].req.id, - NETIF_RSP_OKAY); - - pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + /* Collect the feature-sg. */ + if (xs_scanf(XST_NIL, otherend_path, "feature-sg", NULL, + "%hhu", &xnb->can_sg) < 0) + xnb->can_sg = 0; - netif_put(netif); - } -} + /* Collect remaining frontend features */ + if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4", NULL, + "%hhu", &xnb->gso) < 0) + xnb->gso = 0; -static void -netif_page_release(void *buf, void *args) -{ - uint16_t pending_idx = (unsigned int)args; - - DDPRINTF("pending_idx=%u\n", pending_idx); + if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4-prefix", NULL, + "%hhu", &xnb->gso_prefix) < 0) + xnb->gso_prefix = 0; - KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx)); + if (xs_scanf(XST_NIL, otherend_path, "feature-no-csum-offload", NULL, + "%hhu", &no_csum_offload) < 0) + no_csum_offload = 0; + xnb->ip_csum = (no_csum_offload == 0); - netif_idx_release(pending_idx); + return (0); } -static void -net_tx_action(void *context, int pending) +/** + * Supply information about the physical device to the frontend + * via XenBus. + * + * \param xnb Per-instance xnb configuration structure. + */ +static int +xnb_publish_backend_info(struct xnb_softc *xnb) { - struct mbuf *m; - netif_t *netif; - netif_tx_request_t txreq; - uint16_t pending_idx; - RING_IDX i; - gnttab_map_grant_ref_t *mop; - int ret, work_to_do; - struct mbuf *txq = NULL, *txq_last = NULL; - - if (dealloc_cons != dealloc_prod) - net_tx_action_dealloc(); - - mop = tx_map_ops; - while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) { - - /* Get a netif from the list with work to do. */ - netif = remove_from_tx_schedule_list(); - - DDPRINTF("Processing %s (prod=%u, cons=%u)\n", - IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons); - - RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); - if (!work_to_do) { - netif_put(netif); - continue; + struct xs_transaction xst; + const char *our_path; + int error; + + our_path = xenbus_get_node(xnb->dev); + + do { + error = xs_transaction_start(&xst); + if (error != 0) { + xenbus_dev_fatal(xnb->dev, error, + "Error publishing backend info " + "(start transaction)"); + break; } - i = netif->tx.req_cons; - rmb(); /* Ensure that we see the request before we copy it. */ - memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq)); + error = xs_printf(xst, our_path, "feature-sg", + "%d", XNB_SG); + if (error != 0) + break; - /* If we want credit-based scheduling, coud add it here - WORK */ + error = xs_printf(xst, our_path, "feature-gso-tcpv4", + "%d", XNB_GSO_TCPV4); + if (error != 0) + break; - netif->tx.req_cons++; + error = xs_printf(xst, our_path, "feature-rx-copy", + "%d", XNB_RX_COPY); + if (error != 0) + break; - netif_schedule_tx_work(netif); + error = xs_printf(xst, our_path, "feature-rx-flip", + "%d", XNB_RX_FLIP); + if (error != 0) + break; - if (unlikely(txreq.size < ETHER_HDR_LEN) || - unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) { - WPRINTF("Bad packet size: %d\n", txreq.size); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); - continue; + error = xs_transaction_end(xst, 0); + if (error != 0 && error != EAGAIN) { + xenbus_dev_fatal(xnb->dev, error, "ending transaction"); + break; } - /* No crossing a page as the payload mustn't fragment. */ - if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) { - WPRINTF("txreq.offset: %x, size: %u, end: %u\n", - txreq.offset, txreq.size, - (txreq.offset & PAGE_MASK) + txreq.size); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); - continue; - } + } while (error == EAGAIN); - pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; + return (error); +} - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (!m) { - WPRINTF("Failed to allocate mbuf\n"); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); - break; - } - m->m_pkthdr.rcvif = netif->ifp; - - if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) { - struct mbuf *n; - MGET(n, M_DONTWAIT, MT_DATA); - if (!(m->m_next = n)) { - m_freem(m); - WPRINTF("Failed to allocate second mbuf\n"); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); - break; - } - n->m_len = txreq.size - PKT_PROT_LEN; - m->m_len = PKT_PROT_LEN; - } else - m->m_len = txreq.size; - - mop->host_addr = MMAP_VADDR(pending_idx); - mop->dom = netif->domid; - mop->ref = txreq.gref; - mop->flags = GNTMAP_host_map | GNTMAP_readonly; - mop++; - - memcpy(&pending_tx_info[pending_idx].req, - &txreq, sizeof(txreq)); - pending_tx_info[pending_idx].netif = netif; - *((uint16_t *)m->m_data) = pending_idx; - - if (txq_last) - txq_last->m_nextpkt = m; - else - txq = m; - txq_last = m; - - pending_cons++; - - if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) - break; - } +/** + * Connect to our netfront peer now that it has completed publishing + * its configuration into the XenStore. + * + * \param xnb Per-instance xnb configuration structure. + */ +static void +xnb_connect(struct xnb_softc *xnb) +{ + int error; - if (!txq) + if (xenbus_get_state(xnb->dev) == XenbusStateConnected) return; - ret = HYPERVISOR_grant_table_op( - GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); - BUG_ON(ret); - - mop = tx_map_ops; - while ((m = txq) != NULL) { - caddr_t data; - - txq = m->m_nextpkt; - m->m_nextpkt = NULL; - - pending_idx = *((uint16_t *)m->m_data); - netif = pending_tx_info[pending_idx].netif; - memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq)); - - /* Check the remap error code. */ - if (unlikely(mop->status)) { - WPRINTF("#### netback grant fails\n"); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); - m_freem(m); - mop++; - pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; - continue; - } + if (xnb_collect_xenstore_info(xnb) != 0) + return; -#if 0 - /* Can't do this in FreeBSD since vtophys() returns the pfn */ - /* of the remote domain who loaned us the machine page - DPT */ - xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] = - mop->dev_bus_addr >> PAGE_SHIFT; -#endif - grant_tx_handle[pending_idx] = mop->handle; - - /* Setup data in mbuf (lengths are already set) */ - data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset); - bcopy(data, m->m_data, m->m_len); - if (m->m_next) { - struct mbuf *n = m->m_next; - MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release, - (void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV); - n->m_data = &data[PKT_PROT_LEN]; - } else { - /* Schedule a response immediately. */ - netif_idx_release(pending_idx); - } + xnb->flags &= ~XNBF_SHUTDOWN; - if ((txreq.flags & NETTXF_data_validated)) { - /* Tell the stack the checksums are okay */ - m->m_pkthdr.csum_flags |= - (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); - m->m_pkthdr.csum_data = 0xffff; - } + /* Read front end configuration. */ - /* If necessary, inform stack to compute the checksums if it forwards the packet */ - if ((txreq.flags & NETTXF_csum_blank)) { - struct ether_header *eh = mtod(m, struct ether_header *); - if (ntohs(eh->ether_type) == ETHERTYPE_IP) { - struct ip *ip = (struct ip *)&m->m_data[14]; - if (ip->ip_p == IPPROTO_TCP) - m->m_pkthdr.csum_flags |= CSUM_TCP; - else if (ip->ip_p == IPPROTO_UDP) - m->m_pkthdr.csum_flags |= CSUM_UDP; - } - } + /* Allocate resources whose size depends on front-end configuration. */ + error = xnb_alloc_communication_mem(xnb); + if (error != 0) { + xenbus_dev_fatal(xnb->dev, error, + "Unable to allocate communication memory"); + return; + } - netif->ifp->if_ibytes += m->m_pkthdr.len; - netif->ifp->if_ipackets++; + /* + * Connect communication channel. + */ + error = xnb_connect_comms(xnb); + if (error != 0) { + /* Specific errors are reported by xnb_connect_comms(). */ + return; + } + xnb->carrier = 1; + + /* Ready for I/O. */ + xenbus_set_state(xnb->dev, XenbusStateConnected); +} + +/*-------------------------- Device Teardown Support -------------------------*/ +/** + * Perform device shutdown functions. + * + * \param xnb Per-instance xnb configuration structure. + * + * Mark this instance as shutting down, wait for any active requests + * to drain, disconnect from the front-end, and notify any waiters (e.g. + * a thread invoking our detach method) that detach can now proceed. + */ +static int +xnb_shutdown(struct xnb_softc *xnb) +{ + /* + * Due to the need to drop our mutex during some + * xenbus operations, it is possible for two threads + * to attempt to close out shutdown processing at + * the same time. Tell the caller that hits this + * race to try back later. + */ + if ((xnb->flags & XNBF_IN_SHUTDOWN) != 0) + return (EAGAIN); - DDPRINTF("RECV %d bytes from %s (cflags=%x)\n", - m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags); - DPRINTF_MBUF_LEN(m, 128); + xnb->flags |= XNBF_SHUTDOWN; - (*netif->ifp->if_input)(netif->ifp, m); + xnb->flags |= XNBF_IN_SHUTDOWN; - mop++; + mtx_unlock(&xnb->sc_lock); + /* Free the network interface */ + xnb->carrier = 0; + if (xnb->xnb_ifp != NULL) { + ether_ifdetach(xnb->xnb_ifp); + if_free(xnb->xnb_ifp); + xnb->xnb_ifp = NULL; } + mtx_lock(&xnb->sc_lock); + + xnb_disconnect(xnb); + + mtx_unlock(&xnb->sc_lock); + if (xenbus_get_state(xnb->dev) < XenbusStateClosing) + xenbus_set_state(xnb->dev, XenbusStateClosing); + mtx_lock(&xnb->sc_lock); + + xnb->flags &= ~XNBF_IN_SHUTDOWN; + + + /* Indicate to xnb_detach() that is it safe to proceed. */ + wakeup(xnb); + + return (0); } -/* Handle interrupt from a frontend */ +/** + * Report an attach time error to the console and Xen, and cleanup + * this instance by forcing immediate detach processing. + * + * \param xnb Per-instance xnb configuration structure. + * \param err Errno describing the error. + * \param fmt Printf style format and arguments + */ static void -netback_intr(void *arg) +xnb_attach_failed(struct xnb_softc *xnb, int err, const char *fmt, ...) { - netif_t *netif = arg; - DDPRINTF("%s\n", IFNAME(netif)); - add_to_tx_schedule_list_tail(netif); - maybe_schedule_tx_action(); + va_list ap; + va_list ap_hotplug; + + va_start(ap, fmt); + va_copy(ap_hotplug, ap); + xs_vprintf(XST_NIL, xenbus_get_node(xnb->dev), + "hotplug-error", fmt, ap_hotplug); + va_end(ap_hotplug); + xs_printf(XST_NIL, xenbus_get_node(xnb->dev), + "hotplug-status", "error"); + + xenbus_dev_vfatal(xnb->dev, err, fmt, ap); + va_end(ap); + + xs_printf(XST_NIL, xenbus_get_node(xnb->dev), + "online", "0"); + xnb_detach(xnb->dev); } -/* Removes netif from front of list and does not call netif_put() (caller must) */ -static netif_t * -remove_from_rx_schedule_list(void) +/*---------------------------- NewBus Entrypoints ----------------------------*/ +/** + * Inspect a XenBus device and claim it if is of the appropriate type. + * + * \param dev NewBus device object representing a candidate XenBus device. + * + * \return 0 for success, errno codes for failure. + */ +static int +xnb_probe(device_t dev) { - netif_t *netif; - - mtx_lock(&rx_sched_list_lock); - - if ((netif = STAILQ_FIRST(&rx_sched_list))) { - STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx); - STAILQ_NEXT(netif, next_rx) = NULL; - netif->on_rx_sched_list = 0; + if (!strcmp(xenbus_get_type(dev), "vif")) { + DPRINTF("Claiming device %d, %s\n", device_get_unit(dev), + devclass_get_name(device_get_devclass(dev))); + device_set_desc(dev, "Backend Virtual Network Device"); + device_quiet(dev); + return (0); } - - mtx_unlock(&rx_sched_list_lock); - - return netif; + return (ENXIO); } -/* Adds netif to end of list and calls netif_get() */ +/** + * Setup sysctl variables to control various Network Back parameters. + * + * \param xnb Xen Net Back softc. + * + */ static void -add_to_rx_schedule_list_tail(netif_t *netif) +xnb_setup_sysctl(struct xnb_softc *xnb) { - if (netif->on_rx_sched_list) + struct sysctl_ctx_list *sysctl_ctx = NULL; + struct sysctl_oid *sysctl_tree = NULL; + + sysctl_ctx = device_get_sysctl_ctx(xnb->dev); + if (sysctl_ctx == NULL) return; - mtx_lock(&rx_sched_list_lock); - if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { - netif_get(netif); - STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx); - netif->on_rx_sched_list = 1; - } - mtx_unlock(&rx_sched_list_lock); + sysctl_tree = device_get_sysctl_tree(xnb->dev); + if (sysctl_tree == NULL) + return; + +#ifdef XNB_DEBUG + SYSCTL_ADD_PROC(sysctl_ctx, + SYSCTL_CHILDREN(sysctl_tree), + OID_AUTO, + "unit_test_results", + CTLTYPE_STRING | CTLFLAG_RD, + xnb, + 0, + xnb_unit_test_main, + "A", + "Results of builtin unit tests"); + + SYSCTL_ADD_PROC(sysctl_ctx, + SYSCTL_CHILDREN(sysctl_tree), + OID_AUTO, + "dump_rings", + CTLTYPE_STRING | CTLFLAG_RD, + xnb, + 0, + xnb_dump_rings, + "A", + "Xennet Back Rings"); +#endif /* XNB_DEBUG */ } -static int -make_rx_response(netif_t *netif, uint16_t id, int8_t st, - uint16_t offset, uint16_t size, uint16_t flags) +/** + * Create a network device. + * @param handle device handle + */ +int +create_netdev(device_t dev) { - RING_IDX i = netif->rx.rsp_prod_pvt; - netif_rx_response_t *resp; - int notify; + struct ifnet *ifp; + struct xnb_softc *xnb; + int err = 0; - resp = RING_GET_RESPONSE(&netif->rx, i); - resp->offset = offset; - resp->flags = flags; - resp->id = id; - resp->status = (int16_t)size; - if (st < 0) - resp->status = (int16_t)st; + xnb = device_get_softc(dev); + mtx_init(&xnb->sc_lock, "xnb_softc", "xen netback softc lock", MTX_DEF); + mtx_init(&xnb->tx_lock, "xnb_tx", "xen netback tx lock", MTX_DEF); + mtx_init(&xnb->rx_lock, "xnb_rx", "xen netback rx lock", MTX_DEF); + + xnb->dev = dev; + + ifmedia_init(&xnb->sc_media, 0, xnb_ifmedia_upd, xnb_ifmedia_sts); + ifmedia_add(&xnb->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); + ifmedia_set(&xnb->sc_media, IFM_ETHER|IFM_MANUAL); + + err = xen_net_read_mac(dev, xnb->mac); + if (err == 0) { + /* Set up ifnet structure */ + ifp = xnb->xnb_ifp = if_alloc(IFT_ETHER); + ifp->if_softc = xnb; + if_initname(ifp, "xnb", device_get_unit(dev)); + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_ioctl = xnb_ioctl; + ifp->if_output = ether_output; + ifp->if_start = xnb_start; +#ifdef notyet + ifp->if_watchdog = xnb_watchdog; +#endif + ifp->if_init = xnb_ifinit; + ifp->if_mtu = ETHERMTU; + ifp->if_snd.ifq_maxlen = NET_RX_RING_SIZE - 1; - DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n", - i, resp->offset, resp->flags, resp->id, resp->status); + ifp->if_hwassist = XNB_CSUM_FEATURES; + ifp->if_capabilities = IFCAP_HWCSUM; + ifp->if_capenable = IFCAP_HWCSUM; - netif->rx.rsp_prod_pvt = ++i; - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify); + ether_ifattach(ifp, xnb->mac); + xnb->carrier = 0; + } - return notify; + return err; } +/** + * Attach to a XenBus device that has been claimed by our probe routine. + * + * \param dev NewBus device object representing this Xen Net Back instance. + * + * \return 0 for success, errno codes for failure. + */ static int -netif_rx(netif_t *netif) +xnb_attach(device_t dev) { - struct ifnet *ifp = netif->ifp; - struct mbuf *m; - multicall_entry_t *mcl; - mmu_update_t *mmu; - gnttab_transfer_t *gop; - unsigned long vdata, old_mfn, new_mfn; - struct mbuf *rxq = NULL, *rxq_last = NULL; - int ret, notify = 0, pkts_dequeued = 0; + struct xnb_softc *xnb; + int error; + xnb_ring_type_t i; + + error = create_netdev(dev); + if (error != 0) { + xenbus_dev_fatal(dev, error, "creating netdev"); + return (error); + } - DDPRINTF("%s\n", IFNAME(netif)); + DPRINTF("Attaching to %s\n", xenbus_get_node(dev)); - mcl = rx_mcl; - mmu = rx_mmu; - gop = grant_rx_op; + /* + * Basic initialization. + * After this block it is safe to call xnb_detach() + * to clean up any allocated data for this instance. + */ + xnb = device_get_softc(dev); + xnb->otherend_id = xenbus_get_otherend_id(dev); + for (i=0; i < XNB_NUM_RING_TYPES; i++) { + xnb->ring_configs[i].ring_pages = 1; + } - while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { - - /* Quit if the target domain has no receive buffers */ - if (netif->rx.req_cons == netif->rx.sring->req_prod) - break; + /* + * Setup sysctl variables. + */ + xnb_setup_sysctl(xnb); + + /* Update hot-plug status to satisfy xend. */ + error = xs_printf(XST_NIL, xenbus_get_node(xnb->dev), + "hotplug-status", "connected"); + if (error != 0) { + xnb_attach_failed(xnb, error, "writing %s/hotplug-status", + xenbus_get_node(xnb->dev)); + return (error); + } - IFQ_DRV_DEQUEUE(&ifp->if_snd, m); - if (m == NULL) - break; + if ((error = xnb_publish_backend_info(xnb)) != 0) { + /* + * If we can't publish our data, we cannot participate + * in this connection, and waiting for a front-end state + * change will not help the situation. + */ + xnb_attach_failed(xnb, error, + "Publishing backend status for %s", + xenbus_get_node(xnb->dev)); + return error; + } - pkts_dequeued++; - - /* Check if we need to copy the data */ - if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) || - (*m->m_ext.ref_cnt > 1) || m->m_next != NULL) { - struct mbuf *n; - - DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n", - m->m_flags, - (m->m_flags & M_EXT) ? m->m_ext.ext_type : 0, - (m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0, - (unsigned int)m->m_next); - - /* Make copy */ - MGETHDR(n, M_DONTWAIT, MT_DATA); - if (!n) - goto drop; - - MCLGET(n, M_DONTWAIT); - if (!(n->m_flags & M_EXT)) { - m_freem(n); - goto drop; - } + /* Tell the front end that we are ready to connect. */ + xenbus_set_state(dev, XenbusStateInitWait); + + return (0); +} - /* Leave space at front and keep current alignment */ - n->m_data += 16 + ((unsigned int)m->m_data & 0x3); +/** + * Detach from a net back device instance. + * + * \param dev NewBus device object representing this Xen Net Back instance. + * + * \return 0 for success, errno codes for failure. + * + * \note A net back device may be detached at any time in its life-cycle, + * including part way through the attach process. For this reason, + * initialization order and the intialization state checks in this + * routine must be carefully coupled so that attach time failures + * are gracefully handled. + */ +static int +xnb_detach(device_t dev) +{ + struct xnb_softc *xnb; - if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) { - WPRINTF("pkt to big %d\n", m->m_pkthdr.len); - m_freem(n); - goto drop; - } - m_copydata(m, 0, m->m_pkthdr.len, n->m_data); - n->m_pkthdr.len = n->m_len = m->m_pkthdr.len; - n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA); - m_freem(m); - m = n; - } + DPRINTF("\n"); - vdata = (unsigned long)m->m_data; - old_mfn = vtomach(vdata) >> PAGE_SHIFT; + xnb = device_get_softc(dev); + mtx_lock(&xnb->sc_lock); + while (xnb_shutdown(xnb) == EAGAIN) { + msleep(xnb, &xnb->sc_lock, /*wakeup prio unchanged*/0, + "xnb_shutdown", 0); + } + mtx_unlock(&xnb->sc_lock); + DPRINTF("\n"); - if ((new_mfn = alloc_mfn()) == 0) - goto drop; + mtx_destroy(&xnb->tx_lock); + mtx_destroy(&xnb->rx_lock); + mtx_destroy(&xnb->sc_lock); + return (0); +} -#ifdef XEN_NETBACK_FIXUP_CSUM - /* Check if we need to compute a checksum. This happens */ - /* when bridging from one domain to another. */ - if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) || - (m->m_pkthdr.csum_flags & CSUM_SCTP)) - fixup_checksum(m); -#endif +/** + * Prepare this net back device for suspension of this VM. + * + * \param dev NewBus device object representing this Xen net Back instance. + * + * \return 0 for success, errno codes for failure. + */ +static int +xnb_suspend(device_t dev) +{ + return (0); +} - xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn; - - mcl->op = __HYPERVISOR_update_va_mapping; - mcl->args[0] = vdata; - mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A; - mcl->args[2] = 0; - mcl->args[3] = 0; - mcl++; - - gop->mfn = old_mfn; - gop->domid = netif->domid; - gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref; - netif->rx.req_cons++; - gop++; - - mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; - mmu->val = vtophys(vdata) >> PAGE_SHIFT; - mmu++; - - if (rxq_last) - rxq_last->m_nextpkt = m; - else - rxq = m; - rxq_last = m; - - DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif)); - DPRINTF_MBUF_LEN(m, 128); - - /* Filled the batch queue? */ - if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op)) - break; - - continue; - drop: - DDPRINTF("dropping pkt\n"); - ifp->if_oerrors++; - m_freem(m); - } +/** + * Perform any processing required to recover from a suspended state. + * + * \param dev NewBus device object representing this Xen Net Back instance. + * + * \return 0 for success, errno codes for failure. + */ +static int +xnb_resume(device_t dev) +{ + return (0); +} - if (mcl == rx_mcl) - return pkts_dequeued; +/** + * Handle state changes expressed via the XenStore by our front-end peer. + * + * \param dev NewBus device object representing this Xen + * Net Back instance. + * \param frontend_state The new state of the front-end. + * + * \return 0 for success, errno codes for failure. + */ +static void +xnb_frontend_changed(device_t dev, XenbusState frontend_state) +{ + struct xnb_softc *xnb; - mcl->op = __HYPERVISOR_mmu_update; - mcl->args[0] = (unsigned long)rx_mmu; - mcl->args[1] = mmu - rx_mmu; - mcl->args[2] = 0; - mcl->args[3] = DOMID_SELF; - mcl++; + xnb = device_get_softc(dev); - mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; - ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); - BUG_ON(ret != 0); + DPRINTF("frontend_state=%s, xnb_state=%s\n", + xenbus_strstate(frontend_state), + xenbus_strstate(xenbus_get_state(xnb->dev))); - ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op); - BUG_ON(ret != 0); + switch (frontend_state) { + case XenbusStateInitialising: + break; + case XenbusStateInitialised: + case XenbusStateConnected: + xnb_connect(xnb); + break; + case XenbusStateClosing: + case XenbusStateClosed: + mtx_lock(&xnb->sc_lock); + xnb_shutdown(xnb); + mtx_unlock(&xnb->sc_lock); + if (frontend_state == XenbusStateClosed) + xenbus_set_state(xnb->dev, XenbusStateClosed); + break; + default: + xenbus_dev_fatal(xnb->dev, EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + + +/*---------------------------- Request Processing ----------------------------*/ +/** + * Interrupt handler bound to the shared ring's event channel. + * Entry point for the xennet transmit path in netback + * Transfers packets from the Xen ring to the host's generic networking stack + * + * \param arg Callback argument registerd during event channel + * binding - the xnb_softc for this instance. + */ +static void +xnb_intr(void *arg) +{ + struct xnb_softc *xnb; + struct ifnet *ifp; + netif_tx_back_ring_t *txb; + RING_IDX req_prod_local; + + xnb = (struct xnb_softc *)arg; + ifp = xnb->xnb_ifp; + txb = &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring; + + mtx_lock(&xnb->tx_lock); + do { + int notify; + req_prod_local = txb->sring->req_prod; + xen_rmb(); + + for (;;) { + struct mbuf *mbufc; + int err; + + err = xnb_recv(txb, xnb->otherend_id, &mbufc, ifp, + xnb->tx_gnttab); + if (err || (mbufc == NULL)) + break; - mcl = rx_mcl; - gop = grant_rx_op; + /* Send the packet to the generic network stack */ + (*xnb->xnb_ifp->if_input)(xnb->xnb_ifp, mbufc); + } - while ((m = rxq) != NULL) { - int8_t status; - uint16_t id, flags = 0; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify); + if (notify != 0) + notify_remote_via_irq(xnb->irq); - rxq = m->m_nextpkt; - m->m_nextpkt = NULL; + txb->sring->req_event = txb->req_cons + 1; + xen_mb(); + } while (txb->sring->req_prod != req_prod_local) ; + mtx_unlock(&xnb->tx_lock); - /* Rederive the machine addresses. */ - new_mfn = mcl->args[1] >> PAGE_SHIFT; - old_mfn = gop->mfn; + xnb_start(ifp); +} - ifp->if_obytes += m->m_pkthdr.len; - ifp->if_opackets++; - /* The update_va_mapping() must not fail. */ - BUG_ON(mcl->result != 0); +/** + * Build a struct xnb_pkt based on netif_tx_request's from a netif tx ring. + * Will read exactly 0 or 1 packets from the ring; never a partial packet. + * \param[out] pkt The returned packet. If there is an error building + * the packet, pkt.list_len will be set to 0. + * \param[in] tx_ring Pointer to the Ring that is the input to this function + * \param[in] start The ring index of the first potential request + * \return The number of requests consumed to build this packet + */ +static int +xnb_ring2pkt(struct xnb_pkt *pkt, const netif_tx_back_ring_t *tx_ring, + RING_IDX start) +{ + /* + * Outline: + * 1) Initialize pkt + * 2) Read the first request of the packet + * 3) Read the extras + * 4) Set cdr + * 5) Loop on the remainder of the packet + * 6) Finalize pkt (stuff like car_size and list_len) + */ + int idx = start; + int discard = 0; /* whether to discard the packet */ + int more_data = 0; /* there are more request past the last one */ + uint16_t cdr_size = 0; /* accumulated size of requests 2 through n */ + + xnb_pkt_initialize(pkt); + + /* Read the first request */ + if (RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { + netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx); + pkt->size = tx->size; + pkt->flags = tx->flags & ~NETTXF_more_data; + more_data = tx->flags & NETTXF_more_data; + pkt->list_len++; + pkt->car = idx; + idx++; + } - /* Setup flags */ - if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) - flags |= NETRXF_csum_blank | NETRXF_data_validated; - else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) - flags |= NETRXF_data_validated; + /* Read the extra info */ + if ((pkt->flags & NETTXF_extra_info) && + RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { + netif_extra_info_t *ext = + (netif_extra_info_t*) RING_GET_REQUEST(tx_ring, idx); + pkt->extra.type = ext->type; + switch (pkt->extra.type) { + case XEN_NETIF_EXTRA_TYPE_GSO: + pkt->extra.u.gso = ext->u.gso; + break; + default: + /* + * The reference Linux netfront driver will + * never set any other extra.type. So we don't + * know what to do with it. Let's print an + * error, then consume and discard the packet + */ + printf("xnb(%s:%d): Unknown extra info type %d." + " Discarding packet\n", + __func__, __LINE__, pkt->extra.type); + xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, + start)); + xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, + idx)); + discard = 1; + break; + } - /* Check the reassignment error code. */ - status = NETIF_RSP_OKAY; - if (gop->status != 0) { - DPRINTF("Bad status %d from grant transfer to DOM%u\n", - gop->status, netif->domid); + pkt->extra.flags = ext->flags; + if (ext->flags & XEN_NETIF_EXTRA_FLAG_MORE) { /* - * Page no longer belongs to us unless GNTST_bad_page, - * but that should be a fatal error anyway. + * The reference linux netfront driver never sets this + * flag (nor does any other known netfront). So we + * will discard the packet. */ - BUG_ON(gop->status == GNTST_bad_page); - status = NETIF_RSP_ERROR; + printf("xnb(%s:%d): Request sets " + "XEN_NETIF_EXTRA_FLAG_MORE, but we can't handle " + "that\n", __func__, __LINE__); + xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start)); + xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx)); + discard = 1; } - id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id; - notify |= make_rx_response(netif, id, status, - (unsigned long)m->m_data & PAGE_MASK, - m->m_pkthdr.len, flags); - - m_freem(m); - mcl++; - gop++; + + idx++; } - if (notify) - notify_remote_via_irq(netif->irq); + /* Set cdr. If there is not more data, cdr is invalid */ + pkt->cdr = idx; + + /* Loop on remainder of packet */ + while (more_data && RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { + netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx); + pkt->list_len++; + cdr_size += tx->size; + if (tx->flags & ~NETTXF_more_data) { + /* There should be no other flags set at this point */ + printf("xnb(%s:%d): Request sets unknown flags %d " + "after the 1st request in the packet.\n", + __func__, __LINE__, tx->flags); + xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start)); + xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx)); + } - return pkts_dequeued; -} + more_data = tx->flags & NETTXF_more_data; + idx++; + } -static void -rx_task_timer(void *arg) -{ - DDPRINTF("\n"); - taskqueue_enqueue(taskqueue_swi, &net_rx_task); + /* Finalize packet */ + if (more_data != 0) { + /* The ring ran out of requests before finishing the packet */ + xnb_pkt_invalidate(pkt); + idx = start; /* tell caller that we consumed no requests */ + } else { + /* Calculate car_size */ + pkt->car_size = pkt->size - cdr_size; + } + if (discard != 0) { + xnb_pkt_invalidate(pkt); + } + + return idx - start; } + +/** + * Respond to all the requests that constituted pkt. Builds the responses and + * writes them to the ring, but doesn't push them to the shared ring. + * \param[in] pkt the packet that needs a response + * \param[in] error true if there was an error handling the packet, such + * as in the hypervisor copy op or mbuf allocation + * \param[out] ring Responses go here + */ static void -net_rx_action(void *context, int pending) +xnb_txpkt2rsp(const struct xnb_pkt *pkt, netif_tx_back_ring_t *ring, + int error) { - netif_t *netif, *last_zero_work = NULL; - - DDPRINTF("\n"); - - while ((netif = remove_from_rx_schedule_list())) { - struct ifnet *ifp = netif->ifp; - - if (netif == last_zero_work) { - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - add_to_rx_schedule_list_tail(netif); - netif_put(netif); - if (!STAILQ_EMPTY(&rx_sched_list)) - callout_reset(&rx_task_callout, 1, rx_task_timer, NULL); - break; - } - - if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { - if (netif_rx(netif)) - last_zero_work = NULL; - else if (!last_zero_work) - last_zero_work = netif; - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - add_to_rx_schedule_list_tail(netif); + /* + * Outline: + * 1) Respond to the first request + * 2) Respond to the extra info reques + * Loop through every remaining request in the packet, generating + * responses that copy those requests' ids and sets the status + * appropriately. + */ + netif_tx_request_t *tx; + netif_tx_response_t *rsp; + int i; + uint16_t status; + + status = (xnb_pkt_is_valid(pkt) == 0) || error ? + NETIF_RSP_ERROR : NETIF_RSP_OKAY; + KASSERT((pkt->list_len == 0) || (ring->rsp_prod_pvt == pkt->car), + ("Cannot respond to ring requests out of order")); + + if (pkt->list_len >= 1) { + uint16_t id; + tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt); + id = tx->id; + rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); + rsp->id = id; + rsp->status = status; + ring->rsp_prod_pvt++; + + if (pkt->flags & NETRXF_extra_info) { + rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); + rsp->status = NETIF_RSP_NULL; + ring->rsp_prod_pvt++; } + } - netif_put(netif); + for (i=0; i < pkt->list_len - 1; i++) { + uint16_t id; + tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt); + id = tx->id; + rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); + rsp->id = id; + rsp->status = status; + ring->rsp_prod_pvt++; } } -static void -netback_start(struct ifnet *ifp) +/** + * Create an mbuf chain to represent a packet. Initializes all of the headers + * in the mbuf chain, but does not copy the data. The returned chain must be + * free()'d when no longer needed + * \param[in] pkt A packet to model the mbuf chain after + * \return A newly allocated mbuf chain, possibly with clusters attached. + * NULL on failure + */ +static struct mbuf* +xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp) { - netif_t *netif = (netif_t *)ifp->if_softc; + /** + * \todo consider using a memory pool for mbufs instead of + * reallocating them for every packet + */ + /** \todo handle extra data */ + struct mbuf *m; - DDPRINTF("%s\n", IFNAME(netif)); + m = m_getm(NULL, pkt->size, M_NOWAIT, MT_DATA); - add_to_rx_schedule_list_tail(netif); - taskqueue_enqueue(taskqueue_swi, &net_rx_task); + if (m != NULL) { + m->m_pkthdr.rcvif = ifp; + if (pkt->flags & NETTXF_data_validated) { + /* + * We lie to the host OS and always tell it that the + * checksums are ok, because the packet is unlikely to + * get corrupted going across domains. + */ + m->m_pkthdr.csum_flags = ( + CSUM_IP_CHECKED | + CSUM_IP_VALID | + CSUM_DATA_VALID | + CSUM_PSEUDO_HDR + ); + m->m_pkthdr.csum_data = 0xffff; + } + } + return m; } -/* Map a grant ref to a ring */ +/** + * Build a gnttab_copy table that can be used to copy data from a pkt + * to an mbufc. Does not actually perform the copy. Always uses gref's on + * the packet side. + * \param[in] pkt pkt's associated requests form the src for + * the copy operation + * \param[in] mbufc mbufc's storage forms the dest for the copy operation + * \param[out] gnttab Storage for the returned grant table + * \param[in] txb Pointer to the backend ring structure + * \param[in] otherend_id The domain ID of the other end of the copy + * \return The number of gnttab entries filled + */ static int -map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring) +xnb_txpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc, + gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb, + domid_t otherend_id) { - struct gnttab_map_grant_ref op; - - ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); - if (ring->va == 0) - return ENOMEM; - op.host_addr = ring->va; - op.flags = GNTMAP_host_map; - op.ref = ref; - op.dom = dom; - HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); - if (op.status) { - WPRINTF("grant table op err=%d\n", op.status); - kmem_free(kernel_map, ring->va, PAGE_SIZE); - ring->va = 0; - return EACCES; + const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */ + int gnt_idx = 0; /* index into grant table */ + RING_IDX r_idx = pkt->car; /* index into tx ring buffer */ + int r_ofs = 0; /* offset of next data within tx request's data area */ + int m_ofs = 0; /* offset of next data within mbuf's data area */ + /* size in bytes that still needs to be represented in the table */ + uint16_t size_remaining = pkt->size; + + while (size_remaining > 0) { + const netif_tx_request_t *txq = RING_GET_REQUEST(txb, r_idx); + const size_t mbuf_space = M_TRAILINGSPACE(mbuf) - m_ofs; + const size_t req_size = + r_idx == pkt->car ? pkt->car_size : txq->size; + const size_t pkt_space = req_size - r_ofs; + /* + * space is the largest amount of data that can be copied in the + * grant table's next entry + */ + const size_t space = MIN(pkt_space, mbuf_space); + + /* TODO: handle this error condition without panicking */ + KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short")); + + gnttab[gnt_idx].source.u.ref = txq->gref; + gnttab[gnt_idx].source.domid = otherend_id; + gnttab[gnt_idx].source.offset = txq->offset + r_ofs; + gnttab[gnt_idx].dest.u.gmfn = virt_to_mfn( + mtod(mbuf, vm_offset_t) + m_ofs); + gnttab[gnt_idx].dest.offset = virt_to_offset( + mtod(mbuf, vm_offset_t) + m_ofs); + gnttab[gnt_idx].dest.domid = DOMID_SELF; + gnttab[gnt_idx].len = space; + gnttab[gnt_idx].flags = GNTCOPY_source_gref; + + gnt_idx++; + r_ofs += space; + m_ofs += space; + size_remaining -= space; + if (req_size - r_ofs <= 0) { + /* Must move to the next tx request */ + r_ofs = 0; + r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1; + } + if (M_TRAILINGSPACE(mbuf) - m_ofs <= 0) { + /* Must move to the next mbuf */ + m_ofs = 0; + mbuf = mbuf->m_next; + } } - ring->handle = op.handle; - ring->bus_addr = op.dev_bus_addr; - - return 0; + return gnt_idx; } -/* Unmap grant ref for a ring */ +/** + * Check the status of the grant copy operations, and update mbufs various + * non-data fields to reflect the data present. + * \param[in,out] mbufc mbuf chain to update. The chain must be valid and of + * the correct length, and data should already be present + * \param[in] gnttab A grant table for a just completed copy op + * \param[in] n_entries The number of valid entries in the grant table + */ static void -unmap_ring(struct ring_ref *ring) +xnb_update_mbufc(struct mbuf *mbufc, const gnttab_copy_table gnttab, + int n_entries) { - struct gnttab_unmap_grant_ref op; - - op.host_addr = ring->va; - op.dev_bus_addr = ring->bus_addr; - op.handle = ring->handle; - HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); - if (op.status) - WPRINTF("grant table op err=%d\n", op.status); + struct mbuf *mbuf = mbufc; + int i; + size_t total_size = 0; + + for (i = 0; i < n_entries; i++) { + KASSERT(gnttab[i].status == GNTST_okay, + ("Some gnttab_copy entry had error status %hd\n", + gnttab[i].status)); + + mbuf->m_len += gnttab[i].len; + total_size += gnttab[i].len; + if (M_TRAILINGSPACE(mbuf) <= 0) { + mbuf = mbuf->m_next; + } + } + mbufc->m_pkthdr.len = total_size; - kmem_free(kernel_map, ring->va, PAGE_SIZE); - ring->va = 0; + xnb_add_mbuf_cksum(mbufc); } +/** + * Dequeue at most one packet from the shared ring + * \param[in,out] txb Netif tx ring. A packet will be removed from it, and + * its private indices will be updated. But the indices + * will not be pushed to the shared ring. + * \param[in] ifnet Interface to which the packet will be sent + * \param[in] otherend Domain ID of the other end of the ring + * \param[out] mbufc The assembled mbuf chain, ready to send to the generic + * networking stack + * \param[in,out] gnttab Pointer to enough memory for a grant table. We make + * this a function parameter so that we will take less + * stack space. + * \return An error code + */ static int -connect_rings(netif_t *netif) +xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, struct mbuf **mbufc, + struct ifnet *ifnet, gnttab_copy_table gnttab) { - struct xenbus_device *xdev = netif->xdev; - netif_tx_sring_t *txs; - netif_rx_sring_t *rxs; - unsigned long tx_ring_ref, rx_ring_ref; - evtchn_port_t evtchn; - evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; - int err; + struct xnb_pkt pkt; + /* number of tx requests consumed to build the last packet */ + int num_consumed; + int nr_ents; - // Grab FE data and map his memory - err = xenbus_gather(NULL, xdev->otherend, - "tx-ring-ref", "%lu", &tx_ring_ref, - "rx-ring-ref", "%lu", &rx_ring_ref, - "event-channel", "%u", &evtchn, NULL); - if (err) { - xenbus_dev_fatal(xdev, err, - "reading %s/ring-ref and event-channel", - xdev->otherend); - return err; - } + *mbufc = NULL; + num_consumed = xnb_ring2pkt(&pkt, txb, txb->req_cons); + if (num_consumed == 0) + return 0; /* Nothing to receive */ - err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref); - if (err) { - xenbus_dev_fatal(xdev, err, "mapping tx ring"); - return err; - } - txs = (netif_tx_sring_t *)netif->tx_ring_ref.va; - BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); + /* update statistics indepdent of errors */ + ifnet->if_ipackets++; - err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref); - if (err) { - unmap_ring(&netif->tx_ring_ref); - xenbus_dev_fatal(xdev, err, "mapping rx ring"); - return err; - } - rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va; - BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); - - op.u.bind_interdomain.remote_dom = netif->domid; - op.u.bind_interdomain.remote_port = evtchn; - err = HYPERVISOR_event_channel_op(&op); - if (err) { - unmap_ring(&netif->tx_ring_ref); - unmap_ring(&netif->rx_ring_ref); - xenbus_dev_fatal(xdev, err, "binding event channel"); - return err; + /* + * if we got here, then 1 or more requests was consumed, but the packet + * is not necesarily valid. + */ + if (xnb_pkt_is_valid(&pkt) == 0) { + /* got a garbage packet, respond and drop it */ + xnb_txpkt2rsp(&pkt, txb, 1); + txb->req_cons += num_consumed; + DPRINTF("xnb_intr: garbage packet, num_consumed=%d\n", + num_consumed); + ifnet->if_ierrors++; + return EINVAL; } - netif->evtchn = op.u.bind_interdomain.local_port; - /* bind evtchn to irq handler */ - netif->irq = - bind_evtchn_to_irqhandler(netif->evtchn, "netback", - netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie); + *mbufc = xnb_pkt2mbufc(&pkt, ifnet); + + if (*mbufc == NULL) { + /* + * Couldn't allocate mbufs. Respond and drop the packet. Do + * not consume the requests + */ + xnb_txpkt2rsp(&pkt, txb, 1); + DPRINTF("xnb_intr: Couldn't allocate mbufs, num_consumed=%d\n", + num_consumed); + ifnet->if_iqdrops++; + return ENOMEM; + } - netif->rings_connected = 1; + nr_ents = xnb_txpkt2gnttab(&pkt, *mbufc, gnttab, txb, otherend); - DPRINTF("%s connected! evtchn=%d irq=%d\n", - IFNAME(netif), netif->evtchn, netif->irq); + if (nr_ents > 0) { + int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, + gnttab, nr_ents); + KASSERT(hv_ret == 0, + ("HYPERVISOR_grant_table_op returned %d\n", hv_ret)); + xnb_update_mbufc(*mbufc, gnttab, nr_ents); + } + xnb_txpkt2rsp(&pkt, txb, 0); + txb->req_cons += num_consumed; return 0; } -static void -disconnect_rings(netif_t *netif) +/** + * Create an xnb_pkt based on the contents of an mbuf chain. + * \param[in] mbufc mbuf chain to transform into a packet + * \param[out] pkt Storage for the newly generated xnb_pkt + * \param[in] start The ring index of the first available slot in the rx + * ring + * \param[in] space The number of free slots in the rx ring + * \retval 0 Success + * \retval EINVAL mbufc was corrupt or not convertible into a pkt + * \retval EAGAIN There was not enough space in the ring to queue the + * packet + */ +static int +xnb_mbufc2pkt(const struct mbuf *mbufc, struct xnb_pkt *pkt, + RING_IDX start, int space) { - DPRINTF("\n"); - if (netif->rings_connected) { - unbind_from_irqhandler(netif->irq, netif->irq_cookie); - netif->irq = 0; - unmap_ring(&netif->tx_ring_ref); - unmap_ring(&netif->rx_ring_ref); - netif->rings_connected = 0; - } -} + int retval = 0; -static void -connect(netif_t *netif) -{ - if (!netif->xdev || - !netif->attached || - netif->frontend_state != XenbusStateConnected) { - return; - } + if ((mbufc == NULL) || + ( (mbufc->m_flags & M_PKTHDR) == 0) || + (mbufc->m_pkthdr.len == 0)) { + xnb_pkt_invalidate(pkt); + retval = EINVAL; + } else { + int slots_required; + + xnb_pkt_validate(pkt); + pkt->flags = 0; + pkt->size = mbufc->m_pkthdr.len; + pkt->car = start; + pkt->car_size = mbufc->m_len; + + if (mbufc->m_pkthdr.csum_flags & CSUM_TSO) { + pkt->flags |= NETRXF_extra_info; + pkt->extra.u.gso.size = mbufc->m_pkthdr.tso_segsz; + pkt->extra.u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; + pkt->extra.u.gso.pad = 0; + pkt->extra.u.gso.features = 0; + pkt->extra.type = XEN_NETIF_EXTRA_TYPE_GSO; + pkt->extra.flags = 0; + pkt->cdr = start + 2; + } else { + pkt->cdr = start + 1; + } + if (mbufc->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_DELAY_DATA)) { + pkt->flags |= + (NETRXF_csum_blank | NETRXF_data_validated); + } - if (!connect_rings(netif)) { - xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected); + /* + * Each ring response can have up to PAGE_SIZE of data. + * Assume that we can defragment the mbuf chain efficiently + * into responses so that each response but the last uses all + * PAGE_SIZE bytes. + */ + pkt->list_len = (pkt->size + PAGE_SIZE - 1) / PAGE_SIZE; - /* Turn on interface */ - netif->ifp->if_drv_flags |= IFF_DRV_RUNNING; - netif->ifp->if_flags |= IFF_UP; + if (pkt->list_len > 1) { + pkt->flags |= NETRXF_more_data; + } + + slots_required = pkt->list_len + + (pkt->flags & NETRXF_extra_info ? 1 : 0); + if (slots_required > space) { + xnb_pkt_invalidate(pkt); + retval = EAGAIN; + } } + + return retval; } +/** + * Build a gnttab_copy table that can be used to copy data from an mbuf chain + * to the frontend's shared buffers. Does not actually perform the copy. + * Always uses gref's on the other end's side. + * \param[in] pkt pkt's associated responses form the dest for the copy + * operatoin + * \param[in] mbufc The source for the copy operation + * \param[out] gnttab Storage for the returned grant table + * \param[in] rxb Pointer to the backend ring structure + * \param[in] otherend_id The domain ID of the other end of the copy + * \return The number of gnttab entries filled + */ static int -netback_remove(struct xenbus_device *xdev) +xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc, + gnttab_copy_table gnttab, const netif_rx_back_ring_t *rxb, + domid_t otherend_id) { - netif_t *netif = xdev->data; - device_t ndev; - - DPRINTF("remove %s\n", xdev->nodename); - if ((ndev = netif->ndev)) { - netif->ndev = NULL; - mtx_lock(&Giant); - device_detach(ndev); - mtx_unlock(&Giant); + const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */ + int gnt_idx = 0; /* index into grant table */ + RING_IDX r_idx = pkt->car; /* index into rx ring buffer */ + int r_ofs = 0; /* offset of next data within rx request's data area */ + int m_ofs = 0; /* offset of next data within mbuf's data area */ + /* size in bytes that still needs to be represented in the table */ + uint16_t size_remaining; + + size_remaining = (xnb_pkt_is_valid(pkt) != 0) ? pkt->size : 0; + + while (size_remaining > 0) { + const netif_rx_request_t *rxq = RING_GET_REQUEST(rxb, r_idx); + const size_t mbuf_space = mbuf->m_len - m_ofs; + /* Xen shared pages have an implied size of PAGE_SIZE */ + const size_t req_size = PAGE_SIZE; + const size_t pkt_space = req_size - r_ofs; + /* + * space is the largest amount of data that can be copied in the + * grant table's next entry + */ + const size_t space = MIN(pkt_space, mbuf_space); + + /* TODO: handle this error condition without panicing */ + KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short")); + + gnttab[gnt_idx].dest.u.ref = rxq->gref; + gnttab[gnt_idx].dest.domid = otherend_id; + gnttab[gnt_idx].dest.offset = r_ofs; + gnttab[gnt_idx].source.u.gmfn = virt_to_mfn( + mtod(mbuf, vm_offset_t) + m_ofs); + gnttab[gnt_idx].source.offset = virt_to_offset( + mtod(mbuf, vm_offset_t) + m_ofs); + gnttab[gnt_idx].source.domid = DOMID_SELF; + gnttab[gnt_idx].len = space; + gnttab[gnt_idx].flags = GNTCOPY_dest_gref; + + gnt_idx++; + + r_ofs += space; + m_ofs += space; + size_remaining -= space; + if (req_size - r_ofs <= 0) { + /* Must move to the next rx request */ + r_ofs = 0; + r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1; + } + if (mbuf->m_len - m_ofs <= 0) { + /* Must move to the next mbuf */ + m_ofs = 0; + mbuf = mbuf->m_next; + } } - xdev->data = NULL; - netif->xdev = NULL; - netif_put(netif); - - return 0; + return gnt_idx; } /** - * Entry point to this code when a new device is created. Allocate the basic - * structures and the ring buffers for communication with the frontend. - * Switch to Connected state. + * Generates responses for all the requests that constituted pkt. Builds + * responses and writes them to the ring, but doesn't push the shared ring + * indices. + * \param[in] pkt the packet that needs a response + * \param[in] gnttab The grant copy table corresponding to this packet. + * Used to determine how many rsp->netif_rx_response_t's to + * generate. + * \param[in] n_entries Number of relevant entries in the grant table + * \param[out] ring Responses go here + * \return The number of RX requests that were consumed to generate + * the responses */ static int -netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id) +xnb_rxpkt2rsp(const struct xnb_pkt *pkt, const gnttab_copy_table gnttab, + int n_entries, netif_rx_back_ring_t *ring) { - int err; - long handle; - char *bridge; - - DPRINTF("node=%s\n", xdev->nodename); - - /* Grab the handle */ - err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle); - if (err != 1) { - xenbus_dev_fatal(xdev, err, "reading handle"); - return err; - } + /* + * This code makes the following assumptions: + * * All entries in gnttab set GNTCOPY_dest_gref + * * The entries in gnttab are grouped by their grefs: any two + * entries with the same gref must be adjacent + */ + int error = 0; + int gnt_idx, i; + int n_responses = 0; + grant_ref_t last_gref = GRANT_REF_INVALID; + RING_IDX r_idx; - /* Check for bridge */ - bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL); - if (IS_ERR(bridge)) - bridge = NULL; + KASSERT(gnttab != NULL, ("Received a null granttable copy")); - err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait); - if (err) { - xenbus_dev_fatal(xdev, err, "writing switch state"); - return err; + /* + * In the event of an error, we only need to send one response to the + * netfront. In that case, we musn't write any data to the responses + * after the one we send. So we must loop all the way through gnttab + * looking for errors before we generate any responses + * + * Since we're looping through the grant table anyway, we'll count the + * number of different gref's in it, which will tell us how many + * responses to generate + */ + for (gnt_idx = 0; gnt_idx < n_entries; gnt_idx++) { + int16_t status = gnttab[gnt_idx].status; + if (status != GNTST_okay) { + DPRINTF( + "Got error %d for hypervisor gnttab_copy status\n", + status); + error = 1; + break; + } + if (gnttab[gnt_idx].dest.u.ref != last_gref) { + n_responses++; + last_gref = gnttab[gnt_idx].dest.u.ref; + } } - err = netif_create(handle, xdev, bridge); - if (err) { - xenbus_dev_fatal(xdev, err, "creating netif"); - return err; - } + if (error != 0) { + uint16_t id; + netif_rx_response_t *rsp; + + id = RING_GET_REQUEST(ring, ring->rsp_prod_pvt)->id; + rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); + rsp->id = id; + rsp->status = NETIF_RSP_ERROR; + n_responses = 1; + } else { + gnt_idx = 0; + const int has_extra = pkt->flags & NETRXF_extra_info; + if (has_extra != 0) + n_responses++; - err = vif_add_dev(xdev); - if (err) { - netif_put((netif_t *)xdev->data); - xenbus_dev_fatal(xdev, err, "adding vif device"); - return err; + for (i = 0; i < n_responses; i++) { + netif_rx_request_t rxq; + netif_rx_response_t *rsp; + + r_idx = ring->rsp_prod_pvt + i; + /* + * We copy the structure of rxq instead of making a + * pointer because it shares the same memory as rsp. + */ + rxq = *(RING_GET_REQUEST(ring, r_idx)); + rsp = RING_GET_RESPONSE(ring, r_idx); + if (has_extra && (i == 1)) { + netif_extra_info_t *ext = + (netif_extra_info_t*)rsp; + ext->type = XEN_NETIF_EXTRA_TYPE_GSO; + ext->flags = 0; + ext->u.gso.size = pkt->extra.u.gso.size; + ext->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; + ext->u.gso.pad = 0; + ext->u.gso.features = 0; + } else { + rsp->id = rxq.id; + rsp->status = GNTST_okay; + rsp->offset = 0; + rsp->flags = 0; + if (i < pkt->list_len - 1) + rsp->flags |= NETRXF_more_data; + if ((i == 0) && has_extra) + rsp->flags |= NETRXF_extra_info; + if ((i == 0) && + (pkt->flags & NETRXF_data_validated)) { + rsp->flags |= NETRXF_data_validated; + rsp->flags |= NETRXF_csum_blank; + } + rsp->status = 0; + for (; gnttab[gnt_idx].dest.u.ref == rxq.gref; + gnt_idx++) { + rsp->status += gnttab[gnt_idx].len; + } + } + } } - return 0; + ring->req_cons += n_responses; + ring->rsp_prod_pvt += n_responses; + return n_responses; } /** - * We are reconnecting to the backend, due to a suspend/resume, or a backend - * driver restart. We tear down our netif structure and recreate it, but - * leave the device-layer structures intact so that this is transparent to the - * rest of the kernel. - */ -static int netback_resume(struct xenbus_device *xdev) -{ - DPRINTF("node=%s\n", xdev->nodename); - return 0; -} - - -/** - * Callback received when the frontend's state changes. + * Add IP, TCP, and/or UDP checksums to every mbuf in a chain. The first mbuf + * in the chain must start with a struct ether_header. + * + * XXX This function will perform incorrectly on UDP packets that are split up + * into multiple ethernet frames. */ -static void frontend_changed(struct xenbus_device *xdev, - XenbusState frontend_state) +static void +xnb_add_mbuf_cksum(struct mbuf *mbufc) { - netif_t *netif = xdev->data; + struct ether_header *eh; + struct ip *iph; + uint16_t ether_type; + + eh = mtod(mbufc, struct ether_header*); + ether_type = ntohs(eh->ether_type); + if (ether_type != ETHERTYPE_IP) { + /* Nothing to calculate */ + return; + } - DPRINTF("state=%d\n", frontend_state); - - netif->frontend_state = frontend_state; + iph = (struct ip*)(eh + 1); + if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { + iph->ip_sum = 0; + iph->ip_sum = in_cksum_hdr(iph); + } - switch (frontend_state) { - case XenbusStateInitialising: - case XenbusStateInitialised: - break; - case XenbusStateConnected: - connect(netif); - break; - case XenbusStateClosing: - xenbus_switch_state(xdev, NULL, XenbusStateClosing); + switch (iph->ip_p) { + case IPPROTO_TCP: + if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { + size_t tcplen = ntohs(iph->ip_len) - sizeof(struct ip); + struct tcphdr *th = (struct tcphdr*)(iph + 1); + th->th_sum = in_pseudo(iph->ip_src.s_addr, + iph->ip_dst.s_addr, htons(IPPROTO_TCP + tcplen)); + th->th_sum = in_cksum_skip(mbufc, + sizeof(struct ether_header) + ntohs(iph->ip_len), + sizeof(struct ether_header) + (iph->ip_hl << 2)); + } break; - case XenbusStateClosed: - xenbus_remove_device(xdev); + case IPPROTO_UDP: + if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { + size_t udplen = ntohs(iph->ip_len) - sizeof(struct ip); + struct udphdr *uh = (struct udphdr*)(iph + 1); + uh->uh_sum = in_pseudo(iph->ip_src.s_addr, + iph->ip_dst.s_addr, htons(IPPROTO_UDP + udplen)); + uh->uh_sum = in_cksum_skip(mbufc, + sizeof(struct ether_header) + ntohs(iph->ip_len), + sizeof(struct ether_header) + (iph->ip_hl << 2)); + } break; - case XenbusStateUnknown: - case XenbusStateInitWait: - xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend", - frontend_state); + default: break; } } -/* ** Driver registration ** */ - -static struct xenbus_device_id netback_ids[] = { - { "vif" }, - { "" } -}; - -static struct xenbus_driver netback = { - .name = "netback", - .ids = netback_ids, - .probe = netback_probe, - .remove = netback_remove, - .resume= netback_resume, - .otherend_changed = frontend_changed, -}; - static void -netback_init(void *unused) +xnb_stop(struct xnb_softc *xnb) { - callout_init(&rx_task_callout, CALLOUT_MPSAFE); - - mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS); - BUG_ON(!mmap_vstart); - - pending_cons = 0; - for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++) - pending_ring[pending_prod] = pending_prod; - - TASK_INIT(&net_tx_task, 0, net_tx_action, NULL); - TASK_INIT(&net_rx_task, 0, net_rx_action, NULL); - mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF); - mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF); - - DPRINTF("registering %s\n", netback.name); + struct ifnet *ifp; - xenbus_register_backend(&netback); + mtx_assert(&xnb->sc_lock, MA_OWNED); + ifp = xnb->xnb_ifp; + ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + if_link_state_change(ifp, LINK_STATE_DOWN); } -SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL) - static int -vif_add_dev(struct xenbus_device *xdev) +xnb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - netif_t *netif = xdev->data; - device_t nexus, ndev; - devclass_t dc; - int err = 0; - - mtx_lock(&Giant); - - /* We will add a vif device as a child of nexus0 (for now) */ - if (!(dc = devclass_find("nexus")) || - !(nexus = devclass_get_device(dc, 0))) { - WPRINTF("could not find nexus0!\n"); - err = ENOENT; - goto done; - } - + struct xnb_softc *xnb = ifp->if_softc; +#ifdef INET + struct ifreq *ifr = (struct ifreq*) data; + struct ifaddr *ifa = (struct ifaddr*)data; +#endif + int error = 0; - /* Create a newbus device representing the vif */ - ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit); - if (!ndev) { - WPRINTF("could not create newbus device %s!\n", IFNAME(netif)); - err = EFAULT; - goto done; + switch (cmd) { + case SIOCSIFFLAGS: + mtx_lock(&xnb->sc_lock); + if (ifp->if_flags & IFF_UP) { + xnb_ifinit_locked(xnb); + } else { + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + xnb_stop(xnb); + } + } + /* + * Note: netfront sets a variable named xn_if_flags + * here, but that variable is never read + */ + mtx_unlock(&xnb->sc_lock); + break; + case SIOCSIFADDR: + case SIOCGIFADDR: +#ifdef INET + mtx_lock(&xnb->sc_lock); + if (ifa->ifa_addr->sa_family == AF_INET) { + ifp->if_flags |= IFF_UP; + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { + ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | + IFF_DRV_OACTIVE); + if_link_state_change(ifp, + LINK_STATE_DOWN); + ifp->if_drv_flags |= IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + if_link_state_change(ifp, + LINK_STATE_UP); + } + arp_ifinit(ifp, ifa); + mtx_unlock(&xnb->sc_lock); + } else { + mtx_unlock(&xnb->sc_lock); +#endif + error = ether_ioctl(ifp, cmd, data); +#ifdef INET + } +#endif + break; + case SIOCSIFCAP: + mtx_lock(&xnb->sc_lock); + if (ifr->ifr_reqcap & IFCAP_TXCSUM) { + ifp->if_capenable |= IFCAP_TXCSUM; + ifp->if_hwassist |= XNB_CSUM_FEATURES; + } else { + ifp->if_capenable &= ~(IFCAP_TXCSUM); + ifp->if_hwassist &= ~(XNB_CSUM_FEATURES); + } + if ((ifr->ifr_reqcap & IFCAP_RXCSUM)) { + ifp->if_capenable |= IFCAP_RXCSUM; + } else { + ifp->if_capenable &= ~(IFCAP_RXCSUM); + } + /* + * TODO enable TSO4 and LRO once we no longer need + * to calculate checksums in software + */ +#if 0 + if (ifr->if_reqcap |= IFCAP_TSO4) { + if (IFCAP_TXCSUM & ifp->if_capenable) { + printf("xnb: Xen netif requires that " + "TXCSUM be enabled in order " + "to use TSO4\n"); + error = EINVAL; + } else { + ifp->if_capenable |= IFCAP_TSO4; + ifp->if_hwassist |= CSUM_TSO; + } + } else { + ifp->if_capenable &= ~(IFCAP_TSO4); + ifp->if_hwassist &= ~(CSUM_TSO); + } + if (ifr->ifreqcap |= IFCAP_LRO) { + ifp->if_capenable |= IFCAP_LRO; + } else { + ifp->if_capenable &= ~(IFCAP_LRO); + } +#endif + mtx_unlock(&xnb->sc_lock); + break; + case SIOCSIFMTU: + ifp->if_mtu = ifr->ifr_mtu; + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + xnb_ifinit(xnb); + break; + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCSIFMEDIA: + case SIOCGIFMEDIA: + error = ifmedia_ioctl(ifp, ifr, &xnb->sc_media, cmd); + break; + default: + error = ether_ioctl(ifp, cmd, data); + break; } - - netif_get(netif); - device_set_ivars(ndev, netif); - netif->ndev = ndev; - - device_probe_and_attach(ndev); + return (error); +} - done: +static void +xnb_start_locked(struct ifnet *ifp) +{ + netif_rx_back_ring_t *rxb; + struct xnb_softc *xnb; + struct mbuf *mbufc; + RING_IDX req_prod_local; - mtx_unlock(&Giant); + xnb = ifp->if_softc; + rxb = &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring; - return err; -} + if (!xnb->carrier) + return; -enum { - VIF_SYSCTL_DOMID, - VIF_SYSCTL_HANDLE, - VIF_SYSCTL_TXRING, - VIF_SYSCTL_RXRING, -}; + do { + int out_of_space = 0; + int notify; + req_prod_local = rxb->sring->req_prod; + xen_rmb(); + for (;;) { + int error; -static char * -vif_sysctl_ring_info(netif_t *netif, int cmd) -{ - char *buf = malloc(256, M_DEVBUF, M_WAITOK); - if (buf) { - if (!netif->rings_connected) - sprintf(buf, "rings not connected\n"); - else if (cmd == VIF_SYSCTL_TXRING) { - netif_tx_back_ring_t *tx = &netif->tx; - sprintf(buf, "nr_ents=%x req_cons=%x" - " req_prod=%x req_event=%x" - " rsp_prod=%x rsp_event=%x", - tx->nr_ents, tx->req_cons, - tx->sring->req_prod, tx->sring->req_event, - tx->sring->rsp_prod, tx->sring->rsp_event); - } else { - netif_rx_back_ring_t *rx = &netif->rx; - sprintf(buf, "nr_ents=%x req_cons=%x" - " req_prod=%x req_event=%x" - " rsp_prod=%x rsp_event=%x", - rx->nr_ents, rx->req_cons, - rx->sring->req_prod, rx->sring->req_event, - rx->sring->rsp_prod, rx->sring->rsp_event); + IF_DEQUEUE(&ifp->if_snd, mbufc); + if (mbufc == NULL) + break; + error = xnb_send(rxb, xnb->otherend_id, mbufc, + xnb->rx_gnttab); + switch (error) { + case EAGAIN: + /* + * Insufficient space in the ring. + * Requeue pkt and send when space is + * available. + */ + IF_PREPEND(&ifp->if_snd, mbufc); + /* + * Perhaps the frontend missed an IRQ + * and went to sleep. Notify it to wake + * it up. + */ + out_of_space = 1; + break; + + case EINVAL: + /* OS gave a corrupt packet. Drop it.*/ + ifp->if_oerrors++; + /* FALLTHROUGH */ + default: + /* Send succeeded, or packet had error. + * Free the packet */ + ifp->if_opackets++; + if (mbufc) + m_freem(mbufc); + break; + } + if (out_of_space != 0) + break; } - } - return buf; + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify); + if ((notify != 0) || (out_of_space != 0)) + notify_remote_via_irq(xnb->irq); + rxb->sring->req_event = req_prod_local + 1; + xen_mb(); + } while (rxb->sring->req_prod != req_prod_local) ; } +/** + * Sends one packet to the ring. Blocks until the packet is on the ring + * \param[in] mbufc Contains one packet to send. Caller must free + * \param[in,out] rxb The packet will be pushed onto this ring, but the + * otherend will not be notified. + * \param[in] otherend The domain ID of the other end of the connection + * \retval EAGAIN The ring did not have enough space for the packet. + * The ring has not been modified + * \param[in,out] gnttab Pointer to enough memory for a grant table. We make + * this a function parameter so that we will take less + * stack space. + * \retval EINVAL mbufc was corrupt or not convertible into a pkt + */ static int -vif_sysctl_handler(SYSCTL_HANDLER_ARGS) +xnb_send(netif_rx_back_ring_t *ring, domid_t otherend, const struct mbuf *mbufc, + gnttab_copy_table gnttab) { - device_t dev = (device_t)arg1; - netif_t *netif = (netif_t *)device_get_ivars(dev); - const char *value; - char *buf = NULL; - int err; - - switch (arg2) { - case VIF_SYSCTL_DOMID: - return sysctl_handle_int(oidp, NULL, netif->domid, req); - case VIF_SYSCTL_HANDLE: - return sysctl_handle_int(oidp, NULL, netif->handle, req); - case VIF_SYSCTL_TXRING: - case VIF_SYSCTL_RXRING: - value = buf = vif_sysctl_ring_info(netif, arg2); - break; - default: - return (EINVAL); + struct xnb_pkt pkt; + int error, n_entries, n_reqs; + RING_IDX space; + + space = ring->sring->req_prod - ring->req_cons; + error = xnb_mbufc2pkt(mbufc, &pkt, ring->rsp_prod_pvt, space); + if (error != 0) + return error; + n_entries = xnb_rxpkt2gnttab(&pkt, mbufc, gnttab, ring, otherend); + if (n_entries != 0) { + int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, + gnttab, n_entries); + KASSERT(hv_ret == 0, ("HYPERVISOR_grant_table_op returned %d\n", + hv_ret)); } - err = SYSCTL_OUT(req, value, strlen(value)); - if (buf != NULL) - free(buf, M_DEVBUF); + n_reqs = xnb_rxpkt2rsp(&pkt, gnttab, n_entries, ring); - return err; + return 0; } -/* Newbus vif device driver probe */ -static int -vif_probe(device_t dev) +static void +xnb_start(struct ifnet *ifp) { - DDPRINTF("vif%d\n", device_get_unit(dev)); - return 0; + struct xnb_softc *xnb; + + xnb = ifp->if_softc; + mtx_lock(&xnb->rx_lock); + xnb_start_locked(ifp); + mtx_unlock(&xnb->rx_lock); } -/* Newbus vif device driver attach */ -static int -vif_attach(device_t dev) +/* equivalent of network_open() in Linux */ +static void +xnb_ifinit_locked(struct xnb_softc *xnb) { - netif_t *netif = (netif_t *)device_get_ivars(dev); - uint8_t mac[ETHER_ADDR_LEN]; - - DDPRINTF("%s\n", IFNAME(netif)); - - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD, - dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I", - "domid of frontend"); - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD, - dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I", - "handle of frontend"); -#ifdef XEN_NETBACK_DEBUG - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "txring", CTLTYPE_STRING | CTLFLAG_RD, - dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A", - "tx ring info"); - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "rxring", CTLTYPE_STRING | CTLFLAG_RD, - dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A", - "rx ring info"); -#endif + struct ifnet *ifp; - memset(mac, 0xff, sizeof(mac)); - mac[0] &= ~0x01; - - ether_ifattach(netif->ifp, mac); - netif->attached = 1; + ifp = xnb->xnb_ifp; - connect(netif); + mtx_assert(&xnb->sc_lock, MA_OWNED); - if (netif->bridge) { - DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge); - int err = add_to_bridge(netif->ifp, netif->bridge); - if (err) { - WPRINTF("Error adding %s to %s; err=%d\n", - IFNAME(netif), netif->bridge, err); - } - } + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + return; + + xnb_stop(xnb); - return bus_generic_attach(dev); + ifp->if_drv_flags |= IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + if_link_state_change(ifp, LINK_STATE_UP); } -/* Newbus vif device driver detach */ -static int -vif_detach(device_t dev) -{ - netif_t *netif = (netif_t *)device_get_ivars(dev); - struct ifnet *ifp = netif->ifp; - DDPRINTF("%s\n", IFNAME(netif)); +static void +xnb_ifinit(void *xsc) +{ + struct xnb_softc *xnb = xsc; - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + mtx_lock(&xnb->sc_lock); + xnb_ifinit_locked(xnb); + mtx_unlock(&xnb->sc_lock); +} - ether_ifdetach(ifp); - bus_generic_detach(dev); +/** + * Read the 'mac' node at the given device's node in the store, and parse that + * as colon-separated octets, placing result the given mac array. mac must be + * a preallocated array of length ETHER_ADDR_LEN ETH_ALEN (as declared in + * net/ethernet.h). + * Return 0 on success, or errno on error. + */ +static int +xen_net_read_mac(device_t dev, uint8_t mac[]) +{ + char *s, *e, *macstr; + const char *path; + int error = 0; + int i; + + path = xenbus_get_node(dev); + error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); + if (error != 0) { + xenbus_dev_fatal(dev, error, "parsing %s/mac", path); + } else { + s = macstr; + for (i = 0; i < ETHER_ADDR_LEN; i++) { + mac[i] = strtoul(s, &e, 16); + if (s == e || (e[0] != ':' && e[0] != 0)) { + error = ENOENT; + break; + } + s = &e[1]; + } + free(macstr, M_XENBUS); + } + return error; +} - netif->attached = 0; - netif_put(netif); +/** + * Callback used by the generic networking code to tell us when our carrier + * state has changed. Since we don't have a physical carrier, we don't care + */ +static int +xnb_ifmedia_upd(struct ifnet *ifp) +{ + return (0); +} - return 0; +/** + * Callback used by the generic networking code to ask us what our carrier + * state is. Since we don't have a physical carrier, this is very simple + */ +static void +xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) +{ + ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; + ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; } -static device_method_t vif_methods[] = { + +/*---------------------------- NewBus Registration ---------------------------*/ +static device_method_t xnb_methods[] = { /* Device interface */ - DEVMETHOD(device_probe, vif_probe), - DEVMETHOD(device_attach, vif_attach), - DEVMETHOD(device_detach, vif_detach), + DEVMETHOD(device_probe, xnb_probe), + DEVMETHOD(device_attach, xnb_attach), + DEVMETHOD(device_detach, xnb_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), - {0, 0} -}; + DEVMETHOD(device_suspend, xnb_suspend), + DEVMETHOD(device_resume, xnb_resume), -static devclass_t vif_devclass; + /* Xenbus interface */ + DEVMETHOD(xenbus_otherend_changed, xnb_frontend_changed), -static driver_t vif_driver = { - "vif", - vif_methods, - 0, + { 0, 0 } }; -DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0); +static driver_t xnb_driver = { + "xnb", + xnb_methods, + sizeof(struct xnb_softc), +}; +devclass_t xnb_devclass; +DRIVER_MODULE(xnb, xenbusb_back, xnb_driver, xnb_devclass, 0, 0); -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: t - * End: - */ + +/*-------------------------- Unit Tests -------------------------------------*/ +#ifdef XNB_DEBUG +#include "netback_unit_tests.c" +#endif diff --git a/sys/dev/xen/netback/netback_unit_tests.c b/sys/dev/xen/netback/netback_unit_tests.c new file mode 100644 index 0000000..a3b0bc8 --- /dev/null +++ b/sys/dev/xen/netback/netback_unit_tests.c @@ -0,0 +1,2530 @@ +/*- + * Copyright (c) 2009-2011 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * Alan Somers (Spectra Logic Corporation) + * John Suykerbuyk (Spectra Logic Corporation) + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/** + * \file netback_unit_tests.c + * + * \brief Unit tests for the Xen netback driver. + * + * Due to the driver's use of static functions, these tests cannot be compiled + * standalone; they must be #include'd from the driver's .c file. + */ + + +/** Helper macro used to snprintf to a buffer and update the buffer pointer */ +#define SNCATF(buffer, buflen, ...) do { \ + size_t new_chars = snprintf(buffer, buflen, __VA_ARGS__); \ + buffer += new_chars; \ + /* be careful; snprintf's return value can be > buflen */ \ + buflen -= MIN(buflen, new_chars); \ +} while (0) + +/* STRINGIFY and TOSTRING are used only to help turn __LINE__ into a string */ +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + +/** + * Writes an error message to buffer if cond is false, and returns true + * iff the assertion failed. Note the implied parameters buffer and + * buflen + */ +#define XNB_ASSERT(cond) ({ \ + int passed = (cond); \ + char *_buffer = (buffer); \ + size_t _buflen = (buflen); \ + if (! passed) { \ + strlcat(_buffer, __func__, _buflen); \ + strlcat(_buffer, ":" TOSTRING(__LINE__) \ + " Assertion Error: " #cond "\n", _buflen); \ + } \ + ! passed; }) + + +/** + * The signature used by all testcases. If the test writes anything + * to buffer, then it will be considered a failure + * \param buffer Return storage for error messages + * \param buflen The space available in the buffer + */ +typedef void testcase_t(char *buffer, size_t buflen); + +/** + * Signature used by setup functions + * \return nonzero on error + */ +typedef int setup_t(void); + +typedef void teardown_t(void); + +/** A simple test fixture comprising setup, teardown, and test */ +struct test_fixture { + /** Will be run before the test to allocate and initialize variables */ + setup_t *setup; + + /** Will be run if setup succeeds */ + testcase_t *test; + + /** Cleans up test data whether or not the setup suceeded*/ + teardown_t *teardown; +}; + +typedef struct test_fixture test_fixture_t; + +static void xnb_fill_eh_and_ip(struct mbuf *m, uint16_t ip_len, + uint16_t ip_id, uint16_t ip_p, + uint16_t ip_off, uint16_t ip_sum); +static void xnb_fill_tcp(struct mbuf *m); +static int xnb_get1pkt(struct xnb_pkt *pkt, size_t size, uint16_t flags); +static int xnb_unit_test_runner(test_fixture_t const tests[], int ntests, + char *buffer, size_t buflen); + +static int __unused +null_setup(void) { return 0; } + +static void __unused +null_teardown(void) { } + +static setup_t setup_pvt_data; +static teardown_t teardown_pvt_data; +static testcase_t xnb_ring2pkt_emptyring; +static testcase_t xnb_ring2pkt_1req; +static testcase_t xnb_ring2pkt_2req; +static testcase_t xnb_ring2pkt_3req; +static testcase_t xnb_ring2pkt_extra; +static testcase_t xnb_ring2pkt_partial; +static testcase_t xnb_ring2pkt_wraps; +static testcase_t xnb_txpkt2rsp_emptypkt; +static testcase_t xnb_txpkt2rsp_1req; +static testcase_t xnb_txpkt2rsp_extra; +static testcase_t xnb_txpkt2rsp_long; +static testcase_t xnb_txpkt2rsp_invalid; +static testcase_t xnb_txpkt2rsp_error; +static testcase_t xnb_txpkt2rsp_wraps; +static testcase_t xnb_pkt2mbufc_empty; +static testcase_t xnb_pkt2mbufc_short; +static testcase_t xnb_pkt2mbufc_csum; +static testcase_t xnb_pkt2mbufc_1cluster; +static testcase_t xnb_pkt2mbufc_largecluster; +static testcase_t xnb_pkt2mbufc_2cluster; +static testcase_t xnb_txpkt2gnttab_empty; +static testcase_t xnb_txpkt2gnttab_short; +static testcase_t xnb_txpkt2gnttab_2req; +static testcase_t xnb_txpkt2gnttab_2cluster; +static testcase_t xnb_update_mbufc_short; +static testcase_t xnb_update_mbufc_2req; +static testcase_t xnb_update_mbufc_2cluster; +static testcase_t xnb_mbufc2pkt_empty; +static testcase_t xnb_mbufc2pkt_short; +static testcase_t xnb_mbufc2pkt_1cluster; +static testcase_t xnb_mbufc2pkt_2short; +static testcase_t xnb_mbufc2pkt_long; +static testcase_t xnb_mbufc2pkt_extra; +static testcase_t xnb_mbufc2pkt_nospace; +static testcase_t xnb_rxpkt2gnttab_empty; +static testcase_t xnb_rxpkt2gnttab_short; +static testcase_t xnb_rxpkt2gnttab_2req; +static testcase_t xnb_rxpkt2rsp_empty; +static testcase_t xnb_rxpkt2rsp_short; +static testcase_t xnb_rxpkt2rsp_extra; +static testcase_t xnb_rxpkt2rsp_2short; +static testcase_t xnb_rxpkt2rsp_2slots; +static testcase_t xnb_rxpkt2rsp_copyerror; +/* TODO: add test cases for xnb_add_mbuf_cksum for IPV6 tcp and udp */ +static testcase_t xnb_add_mbuf_cksum_arp; +static testcase_t xnb_add_mbuf_cksum_tcp; +static testcase_t xnb_add_mbuf_cksum_udp; +static testcase_t xnb_add_mbuf_cksum_icmp; +static testcase_t xnb_add_mbuf_cksum_tcp_swcksum; +static testcase_t xnb_sscanf_llu; +static testcase_t xnb_sscanf_lld; +static testcase_t xnb_sscanf_hhu; +static testcase_t xnb_sscanf_hhd; +static testcase_t xnb_sscanf_hhn; + +/** Private data used by unit tests */ +static struct { + gnttab_copy_table gnttab; + netif_rx_back_ring_t rxb; + netif_rx_front_ring_t rxf; + netif_tx_back_ring_t txb; + netif_tx_front_ring_t txf; + struct ifnet* ifp; + netif_rx_sring_t* rxs; + netif_tx_sring_t* txs; +} xnb_unit_pvt; + +static inline void safe_m_freem(struct mbuf **ppMbuf) { + if (*ppMbuf != NULL) { + m_freem(*ppMbuf); + *ppMbuf = NULL; + } +} + +/** + * The unit test runner. It will run every supplied test and return an + * output message as a string + * \param tests An array of tests. Every test will be attempted. + * \param ntests The length of tests + * \param buffer Return storage for the result string + * \param buflen The length of buffer + * \return The number of tests that failed + */ +static int +xnb_unit_test_runner(test_fixture_t const tests[], int ntests, char *buffer, + size_t buflen) +{ + int i; + int n_passes; + int n_failures = 0; + + for (i = 0; i < ntests; i++) { + int error = tests[i].setup(); + if (error != 0) { + SNCATF(buffer, buflen, + "Setup failed for test idx %d\n", i); + n_failures++; + } else { + size_t new_chars; + + tests[i].test(buffer, buflen); + new_chars = strnlen(buffer, buflen); + buffer += new_chars; + buflen -= new_chars; + + if (new_chars > 0) { + n_failures++; + } + } + tests[i].teardown(); + } + + n_passes = ntests - n_failures; + if (n_passes > 0) { + SNCATF(buffer, buflen, "%d Tests Passed\n", n_passes); + } + if (n_failures > 0) { + SNCATF(buffer, buflen, "%d Tests FAILED\n", n_failures); + } + + return n_failures; +} + +/** Number of unit tests. Must match the length of the tests array below */ +#define TOTAL_TESTS (53) +/** + * Max memory available for returning results. 400 chars/test should give + * enough space for a five line error message for every test + */ +#define TOTAL_BUFLEN (400 * TOTAL_TESTS + 2) + +/** + * Called from userspace by a sysctl. Runs all internal unit tests, and + * returns the results to userspace as a string + * \param oidp unused + * \param arg1 pointer to an xnb_softc for a specific xnb device + * \param arg2 unused + * \param req sysctl access structure + * \return a string via the special SYSCTL_OUT macro. + */ + +static int +xnb_unit_test_main(SYSCTL_HANDLER_ARGS) { + test_fixture_t const tests[TOTAL_TESTS] = { + {setup_pvt_data, xnb_ring2pkt_emptyring, teardown_pvt_data}, + {setup_pvt_data, xnb_ring2pkt_1req, teardown_pvt_data}, + {setup_pvt_data, xnb_ring2pkt_2req, teardown_pvt_data}, + {setup_pvt_data, xnb_ring2pkt_3req, teardown_pvt_data}, + {setup_pvt_data, xnb_ring2pkt_extra, teardown_pvt_data}, + {setup_pvt_data, xnb_ring2pkt_partial, teardown_pvt_data}, + {setup_pvt_data, xnb_ring2pkt_wraps, teardown_pvt_data}, + {setup_pvt_data, xnb_txpkt2rsp_emptypkt, teardown_pvt_data}, + {setup_pvt_data, xnb_txpkt2rsp_1req, teardown_pvt_data}, + {setup_pvt_data, xnb_txpkt2rsp_extra, teardown_pvt_data}, + {setup_pvt_data, xnb_txpkt2rsp_long, teardown_pvt_data}, + {setup_pvt_data, xnb_txpkt2rsp_invalid, teardown_pvt_data}, + {setup_pvt_data, xnb_txpkt2rsp_error, teardown_pvt_data}, + {setup_pvt_data, xnb_txpkt2rsp_wraps, teardown_pvt_data}, + {setup_pvt_data, xnb_pkt2mbufc_empty, teardown_pvt_data}, + {setup_pvt_data, xnb_pkt2mbufc_short, teardown_pvt_data}, + {setup_pvt_data, xnb_pkt2mbufc_csum, teardown_pvt_data}, + {setup_pvt_data, xnb_pkt2mbufc_1cluster, teardown_pvt_data}, + {setup_pvt_data, xnb_pkt2mbufc_largecluster, teardown_pvt_data}, + {setup_pvt_data, xnb_pkt2mbufc_2cluster, teardown_pvt_data}, + {setup_pvt_data, xnb_txpkt2gnttab_empty, teardown_pvt_data}, + {setup_pvt_data, xnb_txpkt2gnttab_short, teardown_pvt_data}, + {setup_pvt_data, xnb_txpkt2gnttab_2req, teardown_pvt_data}, + {setup_pvt_data, xnb_txpkt2gnttab_2cluster, teardown_pvt_data}, + {setup_pvt_data, xnb_update_mbufc_short, teardown_pvt_data}, + {setup_pvt_data, xnb_update_mbufc_2req, teardown_pvt_data}, + {setup_pvt_data, xnb_update_mbufc_2cluster, teardown_pvt_data}, + {setup_pvt_data, xnb_mbufc2pkt_empty, teardown_pvt_data}, + {setup_pvt_data, xnb_mbufc2pkt_short, teardown_pvt_data}, + {setup_pvt_data, xnb_mbufc2pkt_1cluster, teardown_pvt_data}, + {setup_pvt_data, xnb_mbufc2pkt_2short, teardown_pvt_data}, + {setup_pvt_data, xnb_mbufc2pkt_long, teardown_pvt_data}, + {setup_pvt_data, xnb_mbufc2pkt_extra, teardown_pvt_data}, + {setup_pvt_data, xnb_mbufc2pkt_nospace, teardown_pvt_data}, + {setup_pvt_data, xnb_rxpkt2gnttab_empty, teardown_pvt_data}, + {setup_pvt_data, xnb_rxpkt2gnttab_short, teardown_pvt_data}, + {setup_pvt_data, xnb_rxpkt2gnttab_2req, teardown_pvt_data}, + {setup_pvt_data, xnb_rxpkt2rsp_empty, teardown_pvt_data}, + {setup_pvt_data, xnb_rxpkt2rsp_short, teardown_pvt_data}, + {setup_pvt_data, xnb_rxpkt2rsp_extra, teardown_pvt_data}, + {setup_pvt_data, xnb_rxpkt2rsp_2short, teardown_pvt_data}, + {setup_pvt_data, xnb_rxpkt2rsp_2slots, teardown_pvt_data}, + {setup_pvt_data, xnb_rxpkt2rsp_copyerror, teardown_pvt_data}, + {null_setup, xnb_add_mbuf_cksum_arp, null_teardown}, + {null_setup, xnb_add_mbuf_cksum_icmp, null_teardown}, + {null_setup, xnb_add_mbuf_cksum_tcp, null_teardown}, + {null_setup, xnb_add_mbuf_cksum_tcp_swcksum, null_teardown}, + {null_setup, xnb_add_mbuf_cksum_udp, null_teardown}, + {null_setup, xnb_sscanf_hhd, null_teardown}, + {null_setup, xnb_sscanf_hhu, null_teardown}, + {null_setup, xnb_sscanf_lld, null_teardown}, + {null_setup, xnb_sscanf_llu, null_teardown}, + {null_setup, xnb_sscanf_hhn, null_teardown}, + }; + /** + * results is static so that the data will persist after this function + * returns. The sysctl code expects us to return a constant string. + * \todo: the static variable is not thread safe. Put a mutex around + * it. + */ + static char results[TOTAL_BUFLEN]; + + /* empty the result strings */ + results[0] = 0; + xnb_unit_test_runner(tests, TOTAL_TESTS, results, TOTAL_BUFLEN); + + return (SYSCTL_OUT(req, results, strnlen(results, TOTAL_BUFLEN))); +} + +static int +setup_pvt_data(void) +{ + int error = 0; + + bzero(xnb_unit_pvt.gnttab, sizeof(xnb_unit_pvt.gnttab)); + + xnb_unit_pvt.txs = malloc(PAGE_SIZE, M_XENNETBACK, M_WAITOK|M_ZERO); + if (xnb_unit_pvt.txs != NULL) { + SHARED_RING_INIT(xnb_unit_pvt.txs); + BACK_RING_INIT(&xnb_unit_pvt.txb, xnb_unit_pvt.txs, PAGE_SIZE); + FRONT_RING_INIT(&xnb_unit_pvt.txf, xnb_unit_pvt.txs, PAGE_SIZE); + } else { + error = 1; + } + + xnb_unit_pvt.ifp = if_alloc(IFT_ETHER); + if (xnb_unit_pvt.ifp == NULL) { + error = 1; + } + + xnb_unit_pvt.rxs = malloc(PAGE_SIZE, M_XENNETBACK, M_WAITOK|M_ZERO); + if (xnb_unit_pvt.rxs != NULL) { + SHARED_RING_INIT(xnb_unit_pvt.rxs); + BACK_RING_INIT(&xnb_unit_pvt.rxb, xnb_unit_pvt.rxs, PAGE_SIZE); + FRONT_RING_INIT(&xnb_unit_pvt.rxf, xnb_unit_pvt.rxs, PAGE_SIZE); + } else { + error = 1; + } + + return error; +} + +static void +teardown_pvt_data(void) +{ + if (xnb_unit_pvt.txs != NULL) { + free(xnb_unit_pvt.txs, M_XENNETBACK); + } + if (xnb_unit_pvt.rxs != NULL) { + free(xnb_unit_pvt.rxs, M_XENNETBACK); + } + if (xnb_unit_pvt.ifp != NULL) { + if_free(xnb_unit_pvt.ifp); + } +} + +/** + * Verify that xnb_ring2pkt will not consume any requests from an empty ring + */ +static void +xnb_ring2pkt_emptyring(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int num_consumed; + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + XNB_ASSERT(num_consumed == 0); +} + +/** + * Verify that xnb_ring2pkt can convert a single request packet correctly + */ +static void +xnb_ring2pkt_1req(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int num_consumed; + struct netif_tx_request *req; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + + req->flags = 0; + req->size = 69; /* arbitrary number for test */ + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + XNB_ASSERT(num_consumed == 1); + XNB_ASSERT(pkt.size == 69); + XNB_ASSERT(pkt.car_size == 69); + XNB_ASSERT(pkt.flags == 0); + XNB_ASSERT(xnb_pkt_is_valid(&pkt)); + XNB_ASSERT(pkt.list_len == 1); + XNB_ASSERT(pkt.car == 0); +} + +/** + * Verify that xnb_ring2pkt can convert a two request packet correctly. + * This tests handling of the MORE_DATA flag and cdr + */ +static void +xnb_ring2pkt_2req(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int num_consumed; + struct netif_tx_request *req; + RING_IDX start_idx = xnb_unit_pvt.txf.req_prod_pvt; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = NETTXF_more_data; + req->size = 100; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = 0; + req->size = 40; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + XNB_ASSERT(num_consumed == 2); + XNB_ASSERT(pkt.size == 100); + XNB_ASSERT(pkt.car_size == 60); + XNB_ASSERT(pkt.flags == 0); + XNB_ASSERT(xnb_pkt_is_valid(&pkt)); + XNB_ASSERT(pkt.list_len == 2); + XNB_ASSERT(pkt.car == start_idx); + XNB_ASSERT(pkt.cdr == start_idx + 1); +} + +/** + * Verify that xnb_ring2pkt can convert a three request packet correctly + */ +static void +xnb_ring2pkt_3req(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int num_consumed; + struct netif_tx_request *req; + RING_IDX start_idx = xnb_unit_pvt.txf.req_prod_pvt; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = NETTXF_more_data; + req->size = 200; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = NETTXF_more_data; + req->size = 40; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = 0; + req->size = 50; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + XNB_ASSERT(num_consumed == 3); + XNB_ASSERT(pkt.size == 200); + XNB_ASSERT(pkt.car_size == 110); + XNB_ASSERT(pkt.flags == 0); + XNB_ASSERT(xnb_pkt_is_valid(&pkt)); + XNB_ASSERT(pkt.list_len == 3); + XNB_ASSERT(pkt.car == start_idx); + XNB_ASSERT(pkt.cdr == start_idx + 1); + XNB_ASSERT(RING_GET_REQUEST(&xnb_unit_pvt.txb, pkt.cdr + 1) == req); +} + +/** + * Verify that xnb_ring2pkt can read extra inf + */ +static void +xnb_ring2pkt_extra(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int num_consumed; + struct netif_tx_request *req; + struct netif_extra_info *ext; + RING_IDX start_idx = xnb_unit_pvt.txf.req_prod_pvt; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = NETTXF_extra_info | NETTXF_more_data; + req->size = 150; + xnb_unit_pvt.txf.req_prod_pvt++; + + ext = (struct netif_extra_info*) RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + ext->flags = 0; + ext->type = XEN_NETIF_EXTRA_TYPE_GSO; + ext->u.gso.size = 250; + ext->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; + ext->u.gso.features = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = 0; + req->size = 50; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + XNB_ASSERT(num_consumed == 3); + XNB_ASSERT(pkt.extra.flags == 0); + XNB_ASSERT(pkt.extra.type == XEN_NETIF_EXTRA_TYPE_GSO); + XNB_ASSERT(pkt.extra.u.gso.size == 250); + XNB_ASSERT(pkt.extra.u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4); + XNB_ASSERT(pkt.size == 150); + XNB_ASSERT(pkt.car_size == 100); + XNB_ASSERT(pkt.flags == NETTXF_extra_info); + XNB_ASSERT(xnb_pkt_is_valid(&pkt)); + XNB_ASSERT(pkt.list_len == 2); + XNB_ASSERT(pkt.car == start_idx); + XNB_ASSERT(pkt.cdr == start_idx + 2); + XNB_ASSERT(RING_GET_REQUEST(&xnb_unit_pvt.txb, pkt.cdr) == req); +} + +/** + * Verify that xnb_ring2pkt will consume no requests if the entire packet is + * not yet in the ring + */ +static void +xnb_ring2pkt_partial(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int num_consumed; + struct netif_tx_request *req; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = NETTXF_more_data; + req->size = 150; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + XNB_ASSERT(num_consumed == 0); + XNB_ASSERT(! xnb_pkt_is_valid(&pkt)); +} + +/** + * Verity that xnb_ring2pkt can read a packet whose requests wrap around + * the end of the ring + */ +static void +xnb_ring2pkt_wraps(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int num_consumed; + struct netif_tx_request *req; + unsigned int rsize; + + /* + * Manually tweak the ring indices to create a ring with no responses + * and the next request slot at position 2 from the end + */ + rsize = RING_SIZE(&xnb_unit_pvt.txf); + xnb_unit_pvt.txf.req_prod_pvt = rsize - 2; + xnb_unit_pvt.txf.rsp_cons = rsize - 2; + xnb_unit_pvt.txs->req_prod = rsize - 2; + xnb_unit_pvt.txs->req_event = rsize - 1; + xnb_unit_pvt.txs->rsp_prod = rsize - 2; + xnb_unit_pvt.txs->rsp_event = rsize - 1; + xnb_unit_pvt.txb.rsp_prod_pvt = rsize - 2; + xnb_unit_pvt.txb.req_cons = rsize - 2; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = NETTXF_more_data; + req->size = 550; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = NETTXF_more_data; + req->size = 100; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = 0; + req->size = 50; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + XNB_ASSERT(num_consumed == 3); + XNB_ASSERT(xnb_pkt_is_valid(&pkt)); + XNB_ASSERT(pkt.list_len == 3); + XNB_ASSERT(RING_GET_REQUEST(&xnb_unit_pvt.txb, pkt.cdr + 1) == req); +} + + +/** + * xnb_txpkt2rsp should do nothing for an empty packet + */ +static void +xnb_txpkt2rsp_emptypkt(char *buffer, size_t buflen) +{ + int num_consumed; + struct xnb_pkt pkt; + netif_tx_back_ring_t txb_backup = xnb_unit_pvt.txb; + netif_tx_sring_t txs_backup = *xnb_unit_pvt.txs; + pkt.list_len = 0; + + /* must call xnb_ring2pkt just to intialize pkt */ + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + xnb_txpkt2rsp(&pkt, &xnb_unit_pvt.txb, 0); + XNB_ASSERT( + memcmp(&txb_backup, &xnb_unit_pvt.txb, sizeof(txb_backup)) == 0); + XNB_ASSERT( + memcmp(&txs_backup, xnb_unit_pvt.txs, sizeof(txs_backup)) == 0); +} + +/** + * xnb_txpkt2rsp responding to one request + */ +static void +xnb_txpkt2rsp_1req(char *buffer, size_t buflen) +{ + uint16_t num_consumed; + struct xnb_pkt pkt; + struct netif_tx_request *req; + struct netif_tx_response *rsp; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->size = 1000; + req->flags = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + xnb_unit_pvt.txb.req_cons += num_consumed; + + xnb_txpkt2rsp(&pkt, &xnb_unit_pvt.txb, 0); + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.txb, xnb_unit_pvt.txf.rsp_cons); + + XNB_ASSERT( + xnb_unit_pvt.txb.rsp_prod_pvt == xnb_unit_pvt.txs->req_prod); + XNB_ASSERT(rsp->id == req->id); + XNB_ASSERT(rsp->status == NETIF_RSP_OKAY); +}; + +/** + * xnb_txpkt2rsp responding to 1 data request and 1 extra info + */ +static void +xnb_txpkt2rsp_extra(char *buffer, size_t buflen) +{ + uint16_t num_consumed; + struct xnb_pkt pkt; + struct netif_tx_request *req; + netif_extra_info_t *ext; + struct netif_tx_response *rsp; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->size = 1000; + req->flags = NETTXF_extra_info; + req->id = 69; + xnb_unit_pvt.txf.req_prod_pvt++; + + ext = (netif_extra_info_t*) RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + ext->type = XEN_NETIF_EXTRA_TYPE_GSO; + ext->flags = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + xnb_unit_pvt.txb.req_cons += num_consumed; + + xnb_txpkt2rsp(&pkt, &xnb_unit_pvt.txb, 0); + + XNB_ASSERT( + xnb_unit_pvt.txb.rsp_prod_pvt == xnb_unit_pvt.txs->req_prod); + + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.txb, xnb_unit_pvt.txf.rsp_cons); + XNB_ASSERT(rsp->id == req->id); + XNB_ASSERT(rsp->status == NETIF_RSP_OKAY); + + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.txb, + xnb_unit_pvt.txf.rsp_cons + 1); + XNB_ASSERT(rsp->status == NETIF_RSP_NULL); +}; + +/** + * xnb_pkg2rsp responding to 3 data requests and 1 extra info + */ +static void +xnb_txpkt2rsp_long(char *buffer, size_t buflen) +{ + uint16_t num_consumed; + struct xnb_pkt pkt; + struct netif_tx_request *req; + netif_extra_info_t *ext; + struct netif_tx_response *rsp; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->size = 1000; + req->flags = NETTXF_extra_info | NETTXF_more_data; + req->id = 254; + xnb_unit_pvt.txf.req_prod_pvt++; + + ext = (netif_extra_info_t*) RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + ext->type = XEN_NETIF_EXTRA_TYPE_GSO; + ext->flags = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->size = 300; + req->flags = NETTXF_more_data; + req->id = 1034; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->size = 400; + req->flags = 0; + req->id = 34; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + xnb_unit_pvt.txb.req_cons += num_consumed; + + xnb_txpkt2rsp(&pkt, &xnb_unit_pvt.txb, 0); + + XNB_ASSERT( + xnb_unit_pvt.txb.rsp_prod_pvt == xnb_unit_pvt.txs->req_prod); + + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.txb, xnb_unit_pvt.txf.rsp_cons); + XNB_ASSERT(rsp->id == + RING_GET_REQUEST(&xnb_unit_pvt.txf, 0)->id); + XNB_ASSERT(rsp->status == NETIF_RSP_OKAY); + + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.txb, + xnb_unit_pvt.txf.rsp_cons + 1); + XNB_ASSERT(rsp->status == NETIF_RSP_NULL); + + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.txb, + xnb_unit_pvt.txf.rsp_cons + 2); + XNB_ASSERT(rsp->id == + RING_GET_REQUEST(&xnb_unit_pvt.txf, 2)->id); + XNB_ASSERT(rsp->status == NETIF_RSP_OKAY); + + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.txb, + xnb_unit_pvt.txf.rsp_cons + 3); + XNB_ASSERT(rsp->id == + RING_GET_REQUEST(&xnb_unit_pvt.txf, 3)->id); + XNB_ASSERT(rsp->status == NETIF_RSP_OKAY); +} + +/** + * xnb_txpkt2rsp responding to an invalid packet. + * Note: this test will result in an error message being printed to the console + * such as: + * xnb(xnb_ring2pkt:1306): Unknown extra info type 255. Discarding packet + */ +static void +xnb_txpkt2rsp_invalid(char *buffer, size_t buflen) +{ + uint16_t num_consumed; + struct xnb_pkt pkt; + struct netif_tx_request *req; + netif_extra_info_t *ext; + struct netif_tx_response *rsp; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->size = 1000; + req->flags = NETTXF_extra_info; + req->id = 69; + xnb_unit_pvt.txf.req_prod_pvt++; + + ext = (netif_extra_info_t*) RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + ext->type = 0xFF; /* Invalid extra type */ + ext->flags = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + xnb_unit_pvt.txb.req_cons += num_consumed; + XNB_ASSERT(! xnb_pkt_is_valid(&pkt)); + + xnb_txpkt2rsp(&pkt, &xnb_unit_pvt.txb, 0); + + XNB_ASSERT( + xnb_unit_pvt.txb.rsp_prod_pvt == xnb_unit_pvt.txs->req_prod); + + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.txb, xnb_unit_pvt.txf.rsp_cons); + XNB_ASSERT(rsp->id == req->id); + XNB_ASSERT(rsp->status == NETIF_RSP_ERROR); + + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.txb, + xnb_unit_pvt.txf.rsp_cons + 1); + XNB_ASSERT(rsp->status == NETIF_RSP_NULL); +}; + +/** + * xnb_txpkt2rsp responding to one request which caused an error + */ +static void +xnb_txpkt2rsp_error(char *buffer, size_t buflen) +{ + uint16_t num_consumed; + struct xnb_pkt pkt; + struct netif_tx_request *req; + struct netif_tx_response *rsp; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->size = 1000; + req->flags = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + xnb_unit_pvt.txb.req_cons += num_consumed; + + xnb_txpkt2rsp(&pkt, &xnb_unit_pvt.txb, 1); + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.txb, xnb_unit_pvt.txf.rsp_cons); + + XNB_ASSERT( + xnb_unit_pvt.txb.rsp_prod_pvt == xnb_unit_pvt.txs->req_prod); + XNB_ASSERT(rsp->id == req->id); + XNB_ASSERT(rsp->status == NETIF_RSP_ERROR); +}; + +/** + * xnb_txpkt2rsp's responses wrap around the end of the ring + */ +static void +xnb_txpkt2rsp_wraps(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int num_consumed; + struct netif_tx_request *req; + struct netif_tx_response *rsp; + unsigned int rsize; + + /* + * Manually tweak the ring indices to create a ring with no responses + * and the next request slot at position 2 from the end + */ + rsize = RING_SIZE(&xnb_unit_pvt.txf); + xnb_unit_pvt.txf.req_prod_pvt = rsize - 2; + xnb_unit_pvt.txf.rsp_cons = rsize - 2; + xnb_unit_pvt.txs->req_prod = rsize - 2; + xnb_unit_pvt.txs->req_event = rsize - 1; + xnb_unit_pvt.txs->rsp_prod = rsize - 2; + xnb_unit_pvt.txs->rsp_event = rsize - 1; + xnb_unit_pvt.txb.rsp_prod_pvt = rsize - 2; + xnb_unit_pvt.txb.req_cons = rsize - 2; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = NETTXF_more_data; + req->size = 550; + req->id = 1; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = NETTXF_more_data; + req->size = 100; + req->id = 2; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = 0; + req->size = 50; + req->id = 3; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + + xnb_txpkt2rsp(&pkt, &xnb_unit_pvt.txb, 0); + + XNB_ASSERT( + xnb_unit_pvt.txb.rsp_prod_pvt == xnb_unit_pvt.txs->req_prod); + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.txb, + xnb_unit_pvt.txf.rsp_cons + 2); + XNB_ASSERT(rsp->id == req->id); + XNB_ASSERT(rsp->status == NETIF_RSP_OKAY); +} + + +/** + * Helper function used to setup pkt2mbufc tests + * \param size size in bytes of the single request to push to the ring + * \param flags optional flags to put in the netif request + * \param[out] pkt the returned packet object + * \return number of requests consumed from the ring + */ +static int +xnb_get1pkt(struct xnb_pkt *pkt, size_t size, uint16_t flags) +{ + struct netif_tx_request *req; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = flags; + req->size = size; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + return xnb_ring2pkt(pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); +} + +/** + * xnb_pkt2mbufc on an empty packet + */ +static void +xnb_pkt2mbufc_empty(char *buffer, size_t buflen) +{ + int num_consumed; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + pkt.list_len = 0; + + /* must call xnb_ring2pkt just to intialize pkt */ + num_consumed = xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, + xnb_unit_pvt.txb.req_cons); + pkt.size = 0; + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + safe_m_freem(&pMbuf); +} + +/** + * xnb_pkt2mbufc on short packet that can fit in an mbuf internal buffer + */ +static void +xnb_pkt2mbufc_short(char *buffer, size_t buflen) +{ + const size_t size = MINCLSIZE - 1; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + + xnb_get1pkt(&pkt, size, 0); + + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + XNB_ASSERT(M_TRAILINGSPACE(pMbuf) >= size); + safe_m_freem(&pMbuf); +} + +/** + * xnb_pkt2mbufc on short packet whose checksum was validated by the netfron + */ +static void +xnb_pkt2mbufc_csum(char *buffer, size_t buflen) +{ + const size_t size = MINCLSIZE - 1; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + + xnb_get1pkt(&pkt, size, NETTXF_data_validated); + + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + XNB_ASSERT(M_TRAILINGSPACE(pMbuf) >= size); + XNB_ASSERT(pMbuf->m_pkthdr.csum_flags & CSUM_IP_CHECKED); + XNB_ASSERT(pMbuf->m_pkthdr.csum_flags & CSUM_IP_VALID); + XNB_ASSERT(pMbuf->m_pkthdr.csum_flags & CSUM_DATA_VALID); + XNB_ASSERT(pMbuf->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR); + safe_m_freem(&pMbuf); +} + +/** + * xnb_pkt2mbufc on packet that can fit in one cluster + */ +static void +xnb_pkt2mbufc_1cluster(char *buffer, size_t buflen) +{ + const size_t size = MINCLSIZE; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + + xnb_get1pkt(&pkt, size, 0); + + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + XNB_ASSERT(M_TRAILINGSPACE(pMbuf) >= size); + safe_m_freem(&pMbuf); +} + +/** + * xnb_pkt2mbufc on packet that cannot fit in one regular cluster + */ +static void +xnb_pkt2mbufc_largecluster(char *buffer, size_t buflen) +{ + const size_t size = MCLBYTES + 1; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + + xnb_get1pkt(&pkt, size, 0); + + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + XNB_ASSERT(M_TRAILINGSPACE(pMbuf) >= size); + safe_m_freem(&pMbuf); +} + +/** + * xnb_pkt2mbufc on packet that cannot fit in one clusters + */ +static void +xnb_pkt2mbufc_2cluster(char *buffer, size_t buflen) +{ + const size_t size = 2 * MCLBYTES + 1; + size_t space = 0; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + struct mbuf *m; + + xnb_get1pkt(&pkt, size, 0); + + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + + for (m = pMbuf; m != NULL; m = m->m_next) { + space += M_TRAILINGSPACE(m); + } + XNB_ASSERT(space >= size); + safe_m_freem(&pMbuf); +} + +/** + * xnb_txpkt2gnttab on an empty packet. Should return empty gnttab + */ +static void +xnb_txpkt2gnttab_empty(char *buffer, size_t buflen) +{ + int n_entries; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + pkt.list_len = 0; + + /* must call xnb_ring2pkt just to intialize pkt */ + xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, xnb_unit_pvt.txb.req_cons); + pkt.size = 0; + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + n_entries = xnb_txpkt2gnttab(&pkt, pMbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.txb, DOMID_FIRST_RESERVED); + XNB_ASSERT(n_entries == 0); + safe_m_freem(&pMbuf); +} + +/** + * xnb_txpkt2gnttab on a short packet, that can fit in one mbuf internal buffer + * and has one request + */ +static void +xnb_txpkt2gnttab_short(char *buffer, size_t buflen) +{ + const size_t size = MINCLSIZE - 1; + int n_entries; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + + struct netif_tx_request *req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = 0; + req->size = size; + req->gref = 7; + req->offset = 17; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, xnb_unit_pvt.txb.req_cons); + + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + n_entries = xnb_txpkt2gnttab(&pkt, pMbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.txb, DOMID_FIRST_RESERVED); + XNB_ASSERT(n_entries == 1); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].len == size); + /* flags should indicate gref's for source */ + XNB_ASSERT(xnb_unit_pvt.gnttab[0].flags & GNTCOPY_source_gref); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].source.offset == req->offset); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].source.domid == DOMID_SELF); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].dest.offset == virt_to_offset( + mtod(pMbuf, vm_offset_t))); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].dest.u.gmfn == + virt_to_mfn(mtod(pMbuf, vm_offset_t))); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].dest.domid == DOMID_FIRST_RESERVED); + safe_m_freem(&pMbuf); +} + +/** + * xnb_txpkt2gnttab on a packet with two requests, that can fit into a single + * mbuf cluster + */ +static void +xnb_txpkt2gnttab_2req(char *buffer, size_t buflen) +{ + int n_entries; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + + struct netif_tx_request *req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = NETTXF_more_data; + req->size = 1900; + req->gref = 7; + req->offset = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = 0; + req->size = 500; + req->gref = 8; + req->offset = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, xnb_unit_pvt.txb.req_cons); + + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + n_entries = xnb_txpkt2gnttab(&pkt, pMbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.txb, DOMID_FIRST_RESERVED); + + XNB_ASSERT(n_entries == 2); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].len == 1400); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].dest.offset == virt_to_offset( + mtod(pMbuf, vm_offset_t))); + + XNB_ASSERT(xnb_unit_pvt.gnttab[1].len == 500); + XNB_ASSERT(xnb_unit_pvt.gnttab[1].dest.offset == virt_to_offset( + mtod(pMbuf, vm_offset_t) + 1400)); + safe_m_freem(&pMbuf); +} + +/** + * xnb_txpkt2gnttab on a single request that spans two mbuf clusters + */ +static void +xnb_txpkt2gnttab_2cluster(char *buffer, size_t buflen) +{ + int n_entries; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + const uint16_t data_this_transaction = (MCLBYTES*2) + 1; + + struct netif_tx_request *req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = 0; + req->size = data_this_transaction; + req->gref = 8; + req->offset = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, xnb_unit_pvt.txb.req_cons); + + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + n_entries = xnb_txpkt2gnttab(&pkt, pMbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.txb, DOMID_FIRST_RESERVED); + + if (M_TRAILINGSPACE(pMbuf) == MCLBYTES) { + /* there should be three mbufs and three gnttab entries */ + XNB_ASSERT(n_entries == 3); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].len == MCLBYTES); + XNB_ASSERT( + xnb_unit_pvt.gnttab[0].dest.offset == virt_to_offset( + mtod(pMbuf, vm_offset_t))); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].source.offset == 0); + + XNB_ASSERT(xnb_unit_pvt.gnttab[1].len == MCLBYTES); + XNB_ASSERT( + xnb_unit_pvt.gnttab[1].dest.offset == virt_to_offset( + mtod(pMbuf->m_next, vm_offset_t))); + XNB_ASSERT(xnb_unit_pvt.gnttab[1].source.offset == MCLBYTES); + + XNB_ASSERT(xnb_unit_pvt.gnttab[2].len == 1); + XNB_ASSERT( + xnb_unit_pvt.gnttab[2].dest.offset == virt_to_offset( + mtod(pMbuf->m_next, vm_offset_t))); + XNB_ASSERT(xnb_unit_pvt.gnttab[2].source.offset == 2 * + MCLBYTES); + } else if (M_TRAILINGSPACE(pMbuf) == 2 * MCLBYTES) { + /* there should be two mbufs and two gnttab entries */ + XNB_ASSERT(n_entries == 2); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].len == 2 * MCLBYTES); + XNB_ASSERT( + xnb_unit_pvt.gnttab[0].dest.offset == virt_to_offset( + mtod(pMbuf, vm_offset_t))); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].source.offset == 0); + + XNB_ASSERT(xnb_unit_pvt.gnttab[1].len == 1); + XNB_ASSERT( + xnb_unit_pvt.gnttab[1].dest.offset == virt_to_offset( + mtod(pMbuf->m_next, vm_offset_t))); + XNB_ASSERT( + xnb_unit_pvt.gnttab[1].source.offset == 2 * MCLBYTES); + + } else { + /* should never get here */ + XNB_ASSERT(0); + } + if (pMbuf != NULL) + m_freem(pMbuf); +} + + +/** + * xnb_update_mbufc on a short packet that only has one gnttab entry + */ +static void +xnb_update_mbufc_short(char *buffer, size_t buflen) +{ + const size_t size = MINCLSIZE - 1; + int n_entries; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + + struct netif_tx_request *req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = 0; + req->size = size; + req->gref = 7; + req->offset = 17; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, xnb_unit_pvt.txb.req_cons); + + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + n_entries = xnb_txpkt2gnttab(&pkt, pMbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.txb, DOMID_FIRST_RESERVED); + + /* Update grant table's status fields as the hypervisor call would */ + xnb_unit_pvt.gnttab[0].status = GNTST_okay; + + xnb_update_mbufc(pMbuf, xnb_unit_pvt.gnttab, n_entries); + XNB_ASSERT(pMbuf->m_len == size); + XNB_ASSERT(pMbuf->m_pkthdr.len == size); + safe_m_freem(&pMbuf); +} + +/** + * xnb_update_mbufc on a packet with two requests, that can fit into a single + * mbuf cluster + */ +static void +xnb_update_mbufc_2req(char *buffer, size_t buflen) +{ + int n_entries; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + + struct netif_tx_request *req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = NETTXF_more_data; + req->size = 1900; + req->gref = 7; + req->offset = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = 0; + req->size = 500; + req->gref = 8; + req->offset = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + + xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, xnb_unit_pvt.txb.req_cons); + + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + n_entries = xnb_txpkt2gnttab(&pkt, pMbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.txb, DOMID_FIRST_RESERVED); + + /* Update grant table's status fields as the hypervisor call would */ + xnb_unit_pvt.gnttab[0].status = GNTST_okay; + xnb_unit_pvt.gnttab[1].status = GNTST_okay; + + xnb_update_mbufc(pMbuf, xnb_unit_pvt.gnttab, n_entries); + XNB_ASSERT(n_entries == 2); + XNB_ASSERT(pMbuf->m_pkthdr.len == 1900); + XNB_ASSERT(pMbuf->m_len == 1900); + + safe_m_freem(&pMbuf); +} + +/** + * xnb_update_mbufc on a single request that spans two mbuf clusters + */ +static void +xnb_update_mbufc_2cluster(char *buffer, size_t buflen) +{ + int i; + int n_entries; + struct xnb_pkt pkt; + struct mbuf *pMbuf; + const uint16_t data_this_transaction = (MCLBYTES*2) + 1; + + struct netif_tx_request *req = RING_GET_REQUEST(&xnb_unit_pvt.txf, + xnb_unit_pvt.txf.req_prod_pvt); + req->flags = 0; + req->size = data_this_transaction; + req->gref = 8; + req->offset = 0; + xnb_unit_pvt.txf.req_prod_pvt++; + + RING_PUSH_REQUESTS(&xnb_unit_pvt.txf); + xnb_ring2pkt(&pkt, &xnb_unit_pvt.txb, xnb_unit_pvt.txb.req_cons); + + pMbuf = xnb_pkt2mbufc(&pkt, xnb_unit_pvt.ifp); + n_entries = xnb_txpkt2gnttab(&pkt, pMbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.txb, DOMID_FIRST_RESERVED); + + /* Update grant table's status fields */ + for (i = 0; i < n_entries; i++) { + xnb_unit_pvt.gnttab[0].status = GNTST_okay; + } + xnb_update_mbufc(pMbuf, xnb_unit_pvt.gnttab, n_entries); + + if (n_entries == 3) { + /* there should be three mbufs and three gnttab entries */ + XNB_ASSERT(pMbuf->m_pkthdr.len == data_this_transaction); + XNB_ASSERT(pMbuf->m_len == MCLBYTES); + XNB_ASSERT(pMbuf->m_next->m_len == MCLBYTES); + XNB_ASSERT(pMbuf->m_next->m_next->m_len == 1); + } else if (n_entries == 2) { + /* there should be two mbufs and two gnttab entries */ + XNB_ASSERT(n_entries == 2); + XNB_ASSERT(pMbuf->m_pkthdr.len == data_this_transaction); + XNB_ASSERT(pMbuf->m_len == 2 * MCLBYTES); + XNB_ASSERT(pMbuf->m_next->m_len == 1); + } else { + /* should never get here */ + XNB_ASSERT(0); + } + safe_m_freem(&pMbuf); +} + +/** xnb_mbufc2pkt on an empty mbufc */ +static void +xnb_mbufc2pkt_empty(char *buffer, size_t buflen) { + struct xnb_pkt pkt; + int free_slots = 64; + struct mbuf *mbuf; + + mbuf = m_get(M_WAITOK, MT_DATA); + /* + * note: it is illegal to set M_PKTHDR on a mbuf with no data. Doing so + * will cause m_freem to segfault + */ + XNB_ASSERT(mbuf->m_len == 0); + + xnb_mbufc2pkt(mbuf, &pkt, 0, free_slots); + XNB_ASSERT(! xnb_pkt_is_valid(&pkt)); + + safe_m_freem(&mbuf); +} + +/** xnb_mbufc2pkt on a short mbufc */ +static void +xnb_mbufc2pkt_short(char *buffer, size_t buflen) { + struct xnb_pkt pkt; + size_t size = 128; + int free_slots = 64; + RING_IDX start = 9; + struct mbuf *mbuf; + + mbuf = m_getm(NULL, size, M_WAITOK, MT_DATA); + mbuf->m_flags |= M_PKTHDR; + mbuf->m_pkthdr.len = size; + mbuf->m_len = size; + + xnb_mbufc2pkt(mbuf, &pkt, start, free_slots); + XNB_ASSERT(xnb_pkt_is_valid(&pkt)); + XNB_ASSERT(pkt.size == size); + XNB_ASSERT(pkt.car_size == size); + XNB_ASSERT(! (pkt.flags & + (NETRXF_more_data | NETRXF_extra_info))); + XNB_ASSERT(pkt.list_len == 1); + XNB_ASSERT(pkt.car == start); + + safe_m_freem(&mbuf); +} + +/** xnb_mbufc2pkt on a single mbuf with an mbuf cluster */ +static void +xnb_mbufc2pkt_1cluster(char *buffer, size_t buflen) { + struct xnb_pkt pkt; + size_t size = MCLBYTES; + int free_slots = 32; + RING_IDX start = 12; + struct mbuf *mbuf; + + mbuf = m_getm(NULL, size, M_WAITOK, MT_DATA); + mbuf->m_flags |= M_PKTHDR; + mbuf->m_pkthdr.len = size; + mbuf->m_len = size; + + xnb_mbufc2pkt(mbuf, &pkt, start, free_slots); + XNB_ASSERT(xnb_pkt_is_valid(&pkt)); + XNB_ASSERT(pkt.size == size); + XNB_ASSERT(pkt.car_size == size); + XNB_ASSERT(! (pkt.flags & + (NETRXF_more_data | NETRXF_extra_info))); + XNB_ASSERT(pkt.list_len == 1); + XNB_ASSERT(pkt.car == start); + + safe_m_freem(&mbuf); +} + +/** xnb_mbufc2pkt on a a two-mbuf chain with short data regions */ +static void +xnb_mbufc2pkt_2short(char *buffer, size_t buflen) { + struct xnb_pkt pkt; + size_t size1 = MHLEN - 5; + size_t size2 = MHLEN - 15; + int free_slots = 32; + RING_IDX start = 14; + struct mbuf *mbufc, *mbufc2; + + mbufc = m_getm(NULL, size1, M_WAITOK, MT_DATA); + mbufc->m_flags |= M_PKTHDR; + if (mbufc == NULL) { + XNB_ASSERT(mbufc != NULL); + return; + } + + mbufc2 = m_getm(mbufc, size2, M_WAITOK, MT_DATA); + if (mbufc2 == NULL) { + XNB_ASSERT(mbufc2 != NULL); + safe_m_freem(&mbufc); + return; + } + mbufc2->m_pkthdr.len = size1 + size2; + mbufc2->m_len = size1; + + xnb_mbufc2pkt(mbufc2, &pkt, start, free_slots); + XNB_ASSERT(xnb_pkt_is_valid(&pkt)); + XNB_ASSERT(pkt.size == size1 + size2); + XNB_ASSERT(pkt.car == start); + /* + * The second m_getm may allocate a new mbuf and append + * it to the chain, or it may simply extend the first mbuf. + */ + if (mbufc2->m_next != NULL) { + XNB_ASSERT(pkt.car_size == size1); + XNB_ASSERT(pkt.list_len == 1); + XNB_ASSERT(pkt.cdr == start + 1); + } + + safe_m_freem(&mbufc2); +} + +/** xnb_mbufc2pkt on a a mbuf chain with >1 mbuf cluster */ +static void +xnb_mbufc2pkt_long(char *buffer, size_t buflen) { + struct xnb_pkt pkt; + size_t size = 14 * MCLBYTES / 3; + size_t size_remaining; + int free_slots = 15; + RING_IDX start = 3; + struct mbuf *mbufc, *m; + + mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA); + mbufc->m_flags |= M_PKTHDR; + if (mbufc == NULL) { + XNB_ASSERT(mbufc != NULL); + return; + } + + mbufc->m_pkthdr.len = size; + size_remaining = size; + for (m = mbufc; m != NULL; m = m->m_next) { + m->m_len = MAX(M_TRAILINGSPACE(m), size_remaining); + size_remaining -= m->m_len; + } + + xnb_mbufc2pkt(mbufc, &pkt, start, free_slots); + XNB_ASSERT(xnb_pkt_is_valid(&pkt)); + XNB_ASSERT(pkt.size == size); + XNB_ASSERT(pkt.car == start); + XNB_ASSERT(pkt.car_size = mbufc->m_len); + /* + * There should be >1 response in the packet, and there is no + * extra info. + */ + XNB_ASSERT(! (pkt.flags & NETRXF_extra_info)); + XNB_ASSERT(pkt.cdr == pkt.car + 1); + + safe_m_freem(&mbufc); +} + +/** xnb_mbufc2pkt on a a mbuf chain with >1 mbuf cluster and extra info */ +static void +xnb_mbufc2pkt_extra(char *buffer, size_t buflen) { + struct xnb_pkt pkt; + size_t size = 14 * MCLBYTES / 3; + size_t size_remaining; + int free_slots = 15; + RING_IDX start = 3; + struct mbuf *mbufc, *m; + + mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA); + if (mbufc == NULL) { + XNB_ASSERT(mbufc != NULL); + return; + } + + mbufc->m_flags |= M_PKTHDR; + mbufc->m_pkthdr.len = size; + mbufc->m_pkthdr.csum_flags |= CSUM_TSO; + mbufc->m_pkthdr.tso_segsz = TCP_MSS - 40; + size_remaining = size; + for (m = mbufc; m != NULL; m = m->m_next) { + m->m_len = MAX(M_TRAILINGSPACE(m), size_remaining); + size_remaining -= m->m_len; + } + + xnb_mbufc2pkt(mbufc, &pkt, start, free_slots); + XNB_ASSERT(xnb_pkt_is_valid(&pkt)); + XNB_ASSERT(pkt.size == size); + XNB_ASSERT(pkt.car == start); + XNB_ASSERT(pkt.car_size = mbufc->m_len); + /* There should be >1 response in the packet, there is extra info */ + XNB_ASSERT(pkt.flags & NETRXF_extra_info); + XNB_ASSERT(pkt.flags & NETRXF_data_validated); + XNB_ASSERT(pkt.cdr == pkt.car + 2); + XNB_ASSERT(pkt.extra.u.gso.size = mbufc->m_pkthdr.tso_segsz); + XNB_ASSERT(pkt.extra.type == XEN_NETIF_EXTRA_TYPE_GSO); + XNB_ASSERT(! (pkt.extra.flags & XEN_NETIF_EXTRA_FLAG_MORE)); + + safe_m_freem(&mbufc); +} + +/** xnb_mbufc2pkt with insufficient space in the ring */ +static void +xnb_mbufc2pkt_nospace(char *buffer, size_t buflen) { + struct xnb_pkt pkt; + size_t size = 14 * MCLBYTES / 3; + size_t size_remaining; + int free_slots = 2; + RING_IDX start = 3; + struct mbuf *mbufc, *m; + int error; + + mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA); + mbufc->m_flags |= M_PKTHDR; + if (mbufc == NULL) { + XNB_ASSERT(mbufc != NULL); + return; + } + + mbufc->m_pkthdr.len = size; + size_remaining = size; + for (m = mbufc; m != NULL; m = m->m_next) { + m->m_len = MAX(M_TRAILINGSPACE(m), size_remaining); + size_remaining -= m->m_len; + } + + error = xnb_mbufc2pkt(mbufc, &pkt, start, free_slots); + XNB_ASSERT(error == EAGAIN); + XNB_ASSERT(! xnb_pkt_is_valid(&pkt)); + + safe_m_freem(&mbufc); +} + +/** + * xnb_rxpkt2gnttab on an empty packet. Should return empty gnttab + */ +static void +xnb_rxpkt2gnttab_empty(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int nr_entries; + int free_slots = 60; + struct mbuf *mbuf; + + mbuf = m_get(M_WAITOK, MT_DATA); + + xnb_mbufc2pkt(mbuf, &pkt, 0, free_slots); + nr_entries = xnb_rxpkt2gnttab(&pkt, mbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.rxb, DOMID_FIRST_RESERVED); + + XNB_ASSERT(nr_entries == 0); + + safe_m_freem(&mbuf); +} + +/** xnb_rxpkt2gnttab on a short packet without extra data */ +static void +xnb_rxpkt2gnttab_short(char *buffer, size_t buflen) { + struct xnb_pkt pkt; + int nr_entries; + size_t size = 128; + int free_slots = 60; + RING_IDX start = 9; + struct netif_rx_request *req; + struct mbuf *mbuf; + + mbuf = m_getm(NULL, size, M_WAITOK, MT_DATA); + mbuf->m_flags |= M_PKTHDR; + mbuf->m_pkthdr.len = size; + mbuf->m_len = size; + + xnb_mbufc2pkt(mbuf, &pkt, start, free_slots); + req = RING_GET_REQUEST(&xnb_unit_pvt.rxf, + xnb_unit_pvt.txf.req_prod_pvt); + req->gref = 7; + + nr_entries = xnb_rxpkt2gnttab(&pkt, mbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.rxb, DOMID_FIRST_RESERVED); + + XNB_ASSERT(nr_entries == 1); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].len == size); + /* flags should indicate gref's for dest */ + XNB_ASSERT(xnb_unit_pvt.gnttab[0].flags & GNTCOPY_dest_gref); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].dest.offset == 0); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].source.domid == DOMID_SELF); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].source.offset == virt_to_offset( + mtod(mbuf, vm_offset_t))); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].source.u.gmfn == + virt_to_mfn(mtod(mbuf, vm_offset_t))); + XNB_ASSERT(xnb_unit_pvt.gnttab[0].dest.domid == DOMID_FIRST_RESERVED); + + safe_m_freem(&mbuf); +} + +/** + * xnb_rxpkt2gnttab on a packet with two different mbufs in a single chai + */ +static void +xnb_rxpkt2gnttab_2req(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int nr_entries; + int i, num_mbufs; + size_t total_granted_size = 0; + size_t size = MJUMPAGESIZE + 1; + int free_slots = 60; + RING_IDX start = 11; + struct netif_rx_request *req; + struct mbuf *mbuf, *m; + + mbuf = m_getm(NULL, size, M_WAITOK, MT_DATA); + mbuf->m_flags |= M_PKTHDR; + mbuf->m_pkthdr.len = size; + mbuf->m_len = size; + + xnb_mbufc2pkt(mbuf, &pkt, start, free_slots); + + for (i = 0, m=mbuf; m != NULL; i++, m = m->m_next) { + req = RING_GET_REQUEST(&xnb_unit_pvt.rxf, + xnb_unit_pvt.txf.req_prod_pvt); + req->gref = i; + req->id = 5; + } + num_mbufs = i; + + nr_entries = xnb_rxpkt2gnttab(&pkt, mbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.rxb, DOMID_FIRST_RESERVED); + + XNB_ASSERT(nr_entries >= num_mbufs); + for (i = 0; i < nr_entries; i++) { + int end_offset = xnb_unit_pvt.gnttab[i].len + + xnb_unit_pvt.gnttab[i].dest.offset; + XNB_ASSERT(end_offset <= PAGE_SIZE); + total_granted_size += xnb_unit_pvt.gnttab[i].len; + } + XNB_ASSERT(total_granted_size == size); +} + +/** + * xnb_rxpkt2rsp on an empty packet. Shouldn't make any response + */ +static void +xnb_rxpkt2rsp_empty(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int nr_entries; + int nr_reqs; + int free_slots = 60; + netif_rx_back_ring_t rxb_backup = xnb_unit_pvt.rxb; + netif_rx_sring_t rxs_backup = *xnb_unit_pvt.rxs; + struct mbuf *mbuf; + + mbuf = m_get(M_WAITOK, MT_DATA); + + xnb_mbufc2pkt(mbuf, &pkt, 0, free_slots); + nr_entries = xnb_rxpkt2gnttab(&pkt, mbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.rxb, DOMID_FIRST_RESERVED); + + nr_reqs = xnb_rxpkt2rsp(&pkt, xnb_unit_pvt.gnttab, nr_entries, + &xnb_unit_pvt.rxb); + XNB_ASSERT(nr_reqs == 0); + XNB_ASSERT( + memcmp(&rxb_backup, &xnb_unit_pvt.rxb, sizeof(rxb_backup)) == 0); + XNB_ASSERT( + memcmp(&rxs_backup, xnb_unit_pvt.rxs, sizeof(rxs_backup)) == 0); + + safe_m_freem(&mbuf); +} + +/** + * xnb_rxpkt2rsp on a short packet with no extras + */ +static void +xnb_rxpkt2rsp_short(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int nr_entries, nr_reqs; + size_t size = 128; + int free_slots = 60; + RING_IDX start = 5; + struct netif_rx_request *req; + struct netif_rx_response *rsp; + struct mbuf *mbuf; + + mbuf = m_getm(NULL, size, M_WAITOK, MT_DATA); + mbuf->m_flags |= M_PKTHDR; + mbuf->m_pkthdr.len = size; + mbuf->m_len = size; + + xnb_mbufc2pkt(mbuf, &pkt, start, free_slots); + req = RING_GET_REQUEST(&xnb_unit_pvt.rxf, start); + req->gref = 7; + xnb_unit_pvt.rxb.req_cons = start; + xnb_unit_pvt.rxb.rsp_prod_pvt = start; + xnb_unit_pvt.rxs->req_prod = start + 1; + xnb_unit_pvt.rxs->rsp_prod = start; + + nr_entries = xnb_rxpkt2gnttab(&pkt, mbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.rxb, DOMID_FIRST_RESERVED); + + nr_reqs = xnb_rxpkt2rsp(&pkt, xnb_unit_pvt.gnttab, nr_entries, + &xnb_unit_pvt.rxb); + + XNB_ASSERT(nr_reqs == 1); + XNB_ASSERT(xnb_unit_pvt.rxb.rsp_prod_pvt == start + 1); + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.rxb, start); + XNB_ASSERT(rsp->id == req->id); + XNB_ASSERT(rsp->offset == 0); + XNB_ASSERT((rsp->flags & (NETRXF_more_data | NETRXF_extra_info)) == 0); + XNB_ASSERT(rsp->status == size); + + safe_m_freem(&mbuf); +} + +/** + * xnb_rxpkt2rsp with extra data + */ +static void +xnb_rxpkt2rsp_extra(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int nr_entries, nr_reqs; + size_t size = 14; + int free_slots = 15; + RING_IDX start = 3; + uint16_t id = 49; + uint16_t gref = 65; + uint16_t mss = TCP_MSS - 40; + struct mbuf *mbufc; + struct netif_rx_request *req; + struct netif_rx_response *rsp; + struct netif_extra_info *ext; + + mbufc = m_getm(NULL, size, M_WAITOK, MT_DATA); + if (mbufc == NULL) { + XNB_ASSERT(mbufc != NULL); + return; + } + + mbufc->m_flags |= M_PKTHDR; + mbufc->m_pkthdr.len = size; + mbufc->m_pkthdr.csum_flags |= CSUM_TSO; + mbufc->m_pkthdr.tso_segsz = mss; + mbufc->m_len = size; + + xnb_mbufc2pkt(mbufc, &pkt, start, free_slots); + req = RING_GET_REQUEST(&xnb_unit_pvt.rxf, start); + req->id = id; + req->gref = gref; + req = RING_GET_REQUEST(&xnb_unit_pvt.rxf, start + 1); + req->id = id + 1; + req->gref = gref + 1; + xnb_unit_pvt.rxb.req_cons = start; + xnb_unit_pvt.rxb.rsp_prod_pvt = start; + xnb_unit_pvt.rxs->req_prod = start + 2; + xnb_unit_pvt.rxs->rsp_prod = start; + + nr_entries = xnb_rxpkt2gnttab(&pkt, mbufc, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.rxb, DOMID_FIRST_RESERVED); + + nr_reqs = xnb_rxpkt2rsp(&pkt, xnb_unit_pvt.gnttab, nr_entries, + &xnb_unit_pvt.rxb); + + XNB_ASSERT(nr_reqs == 2); + XNB_ASSERT(xnb_unit_pvt.rxb.rsp_prod_pvt == start + 2); + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.rxb, start); + XNB_ASSERT(rsp->id == id); + XNB_ASSERT((rsp->flags & NETRXF_more_data) == 0); + XNB_ASSERT((rsp->flags & NETRXF_extra_info)); + XNB_ASSERT((rsp->flags & NETRXF_data_validated)); + XNB_ASSERT((rsp->flags & NETRXF_csum_blank)); + XNB_ASSERT(rsp->status == size); + + ext = (struct netif_extra_info*) + RING_GET_RESPONSE(&xnb_unit_pvt.rxb, start + 1); + XNB_ASSERT(ext->type == XEN_NETIF_EXTRA_TYPE_GSO); + XNB_ASSERT(! (ext->flags & XEN_NETIF_EXTRA_FLAG_MORE)); + XNB_ASSERT(ext->u.gso.size == mss); + XNB_ASSERT(ext->u.gso.type == XEN_NETIF_EXTRA_TYPE_GSO); + + safe_m_freem(&mbufc); +} + +/** + * xnb_rxpkt2rsp on a packet with more than a pages's worth of data. It should + * generate two response slot + */ +static void +xnb_rxpkt2rsp_2slots(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int nr_entries, nr_reqs; + size_t size = PAGE_SIZE + 100; + int free_slots = 3; + uint16_t id1 = 17; + uint16_t id2 = 37; + uint16_t gref1 = 24; + uint16_t gref2 = 34; + RING_IDX start = 15; + struct netif_rx_request *req; + struct netif_rx_response *rsp; + struct mbuf *mbuf; + + mbuf = m_getm(NULL, size, M_WAITOK, MT_DATA); + mbuf->m_flags |= M_PKTHDR; + mbuf->m_pkthdr.len = size; + if (mbuf->m_next != NULL) { + size_t first_len = MIN(M_TRAILINGSPACE(mbuf), size); + mbuf->m_len = first_len; + mbuf->m_next->m_len = size - first_len; + + } else { + mbuf->m_len = size; + } + + xnb_mbufc2pkt(mbuf, &pkt, start, free_slots); + req = RING_GET_REQUEST(&xnb_unit_pvt.rxf, start); + req->gref = gref1; + req->id = id1; + req = RING_GET_REQUEST(&xnb_unit_pvt.rxf, start + 1); + req->gref = gref2; + req->id = id2; + xnb_unit_pvt.rxb.req_cons = start; + xnb_unit_pvt.rxb.rsp_prod_pvt = start; + xnb_unit_pvt.rxs->req_prod = start + 2; + xnb_unit_pvt.rxs->rsp_prod = start; + + nr_entries = xnb_rxpkt2gnttab(&pkt, mbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.rxb, DOMID_FIRST_RESERVED); + + nr_reqs = xnb_rxpkt2rsp(&pkt, xnb_unit_pvt.gnttab, nr_entries, + &xnb_unit_pvt.rxb); + + XNB_ASSERT(nr_reqs == 2); + XNB_ASSERT(xnb_unit_pvt.rxb.rsp_prod_pvt == start + 2); + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.rxb, start); + XNB_ASSERT(rsp->id == id1); + XNB_ASSERT(rsp->offset == 0); + XNB_ASSERT((rsp->flags & NETRXF_extra_info) == 0); + XNB_ASSERT(rsp->flags & NETRXF_more_data); + XNB_ASSERT(rsp->status == PAGE_SIZE); + + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.rxb, start + 1); + XNB_ASSERT(rsp->id == id2); + XNB_ASSERT(rsp->offset == 0); + XNB_ASSERT((rsp->flags & NETRXF_extra_info) == 0); + XNB_ASSERT(! (rsp->flags & NETRXF_more_data)); + XNB_ASSERT(rsp->status == size - PAGE_SIZE); + + safe_m_freem(&mbuf); +} + +/** xnb_rxpkt2rsp on a grant table with two sub-page entries */ +static void +xnb_rxpkt2rsp_2short(char *buffer, size_t buflen) { + struct xnb_pkt pkt; + int nr_reqs, nr_entries; + size_t size1 = MHLEN - 5; + size_t size2 = MHLEN - 15; + int free_slots = 32; + RING_IDX start = 14; + uint16_t id = 47; + uint16_t gref = 54; + struct netif_rx_request *req; + struct netif_rx_response *rsp; + struct mbuf *mbufc; + + mbufc = m_getm(NULL, size1, M_WAITOK, MT_DATA); + mbufc->m_flags |= M_PKTHDR; + if (mbufc == NULL) { + XNB_ASSERT(mbufc != NULL); + return; + } + + m_getm(mbufc, size2, M_WAITOK, MT_DATA); + XNB_ASSERT(mbufc->m_next != NULL); + mbufc->m_pkthdr.len = size1 + size2; + mbufc->m_len = size1; + mbufc->m_next->m_len = size2; + + xnb_mbufc2pkt(mbufc, &pkt, start, free_slots); + + req = RING_GET_REQUEST(&xnb_unit_pvt.rxf, start); + req->gref = gref; + req->id = id; + xnb_unit_pvt.rxb.req_cons = start; + xnb_unit_pvt.rxb.rsp_prod_pvt = start; + xnb_unit_pvt.rxs->req_prod = start + 1; + xnb_unit_pvt.rxs->rsp_prod = start; + + nr_entries = xnb_rxpkt2gnttab(&pkt, mbufc, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.rxb, DOMID_FIRST_RESERVED); + + nr_reqs = xnb_rxpkt2rsp(&pkt, xnb_unit_pvt.gnttab, nr_entries, + &xnb_unit_pvt.rxb); + + XNB_ASSERT(nr_entries == 2); + XNB_ASSERT(nr_reqs == 1); + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.rxb, start); + XNB_ASSERT(rsp->id == id); + XNB_ASSERT(rsp->status == size1 + size2); + XNB_ASSERT(rsp->offset == 0); + XNB_ASSERT(! (rsp->flags & (NETRXF_more_data | NETRXF_extra_info))); + + safe_m_freem(&mbufc); +} + +/** + * xnb_rxpkt2rsp on a long packet with a hypervisor gnttab_copy error + * Note: this test will result in an error message being printed to the console + * such as: + * xnb(xnb_rxpkt2rsp:1720): Got error -1 for hypervisor gnttab_copy status + */ +static void +xnb_rxpkt2rsp_copyerror(char *buffer, size_t buflen) +{ + struct xnb_pkt pkt; + int nr_entries, nr_reqs; + int id = 7; + int gref = 42; + uint16_t canary = 6859; + size_t size = 7 * MCLBYTES; + int free_slots = 9; + RING_IDX start = 2; + struct netif_rx_request *req; + struct netif_rx_response *rsp; + struct mbuf *mbuf; + + mbuf = m_getm(NULL, size, M_WAITOK, MT_DATA); + mbuf->m_flags |= M_PKTHDR; + mbuf->m_pkthdr.len = size; + mbuf->m_len = size; + + xnb_mbufc2pkt(mbuf, &pkt, start, free_slots); + req = RING_GET_REQUEST(&xnb_unit_pvt.rxf, start); + req->gref = gref; + req->id = id; + xnb_unit_pvt.rxb.req_cons = start; + xnb_unit_pvt.rxb.rsp_prod_pvt = start; + xnb_unit_pvt.rxs->req_prod = start + 1; + xnb_unit_pvt.rxs->rsp_prod = start; + req = RING_GET_REQUEST(&xnb_unit_pvt.rxf, start + 1); + req->gref = canary; + req->id = canary; + + nr_entries = xnb_rxpkt2gnttab(&pkt, mbuf, xnb_unit_pvt.gnttab, + &xnb_unit_pvt.rxb, DOMID_FIRST_RESERVED); + /* Inject the error*/ + xnb_unit_pvt.gnttab[2].status = GNTST_general_error; + + nr_reqs = xnb_rxpkt2rsp(&pkt, xnb_unit_pvt.gnttab, nr_entries, + &xnb_unit_pvt.rxb); + + XNB_ASSERT(nr_reqs == 1); + XNB_ASSERT(xnb_unit_pvt.rxb.rsp_prod_pvt == start + 1); + rsp = RING_GET_RESPONSE(&xnb_unit_pvt.rxb, start); + XNB_ASSERT(rsp->id == id); + XNB_ASSERT(rsp->status == NETIF_RSP_ERROR); + req = RING_GET_REQUEST(&xnb_unit_pvt.rxf, start + 1); + XNB_ASSERT(req->gref == canary); + XNB_ASSERT(req->id == canary); + + safe_m_freem(&mbuf); +} + +/** + * xnb_add_mbuf_cksum on an ARP request packet + */ +static void +xnb_add_mbuf_cksum_arp(char *buffer, size_t buflen) +{ + const size_t pkt_len = sizeof(struct ether_header) + + sizeof(struct ether_arp); + struct mbuf *mbufc; + struct ether_header *eh; + struct ether_arp *ep; + unsigned char pkt_orig[pkt_len]; + + mbufc = m_getm(NULL, pkt_len, M_WAITOK, MT_DATA); + /* Fill in an example arp request */ + eh = mtod(mbufc, struct ether_header*); + eh->ether_dhost[0] = 0xff; + eh->ether_dhost[1] = 0xff; + eh->ether_dhost[2] = 0xff; + eh->ether_dhost[3] = 0xff; + eh->ether_dhost[4] = 0xff; + eh->ether_dhost[5] = 0xff; + eh->ether_shost[0] = 0x00; + eh->ether_shost[1] = 0x15; + eh->ether_shost[2] = 0x17; + eh->ether_shost[3] = 0xe9; + eh->ether_shost[4] = 0x30; + eh->ether_shost[5] = 0x68; + eh->ether_type = htons(ETHERTYPE_ARP); + ep = (struct ether_arp*)(eh + 1); + ep->ea_hdr.ar_hrd = htons(ARPHRD_ETHER); + ep->ea_hdr.ar_pro = htons(ETHERTYPE_IP); + ep->ea_hdr.ar_hln = 6; + ep->ea_hdr.ar_pln = 4; + ep->ea_hdr.ar_op = htons(ARPOP_REQUEST); + ep->arp_sha[0] = 0x00; + ep->arp_sha[1] = 0x15; + ep->arp_sha[2] = 0x17; + ep->arp_sha[3] = 0xe9; + ep->arp_sha[4] = 0x30; + ep->arp_sha[5] = 0x68; + ep->arp_spa[0] = 0xc0; + ep->arp_spa[1] = 0xa8; + ep->arp_spa[2] = 0x0a; + ep->arp_spa[3] = 0x04; + bzero(&(ep->arp_tha), ETHER_ADDR_LEN); + ep->arp_tpa[0] = 0xc0; + ep->arp_tpa[1] = 0xa8; + ep->arp_tpa[2] = 0x0a; + ep->arp_tpa[3] = 0x06; + + /* fill in the length field */ + mbufc->m_len = pkt_len; + mbufc->m_pkthdr.len = pkt_len; + /* indicate that the netfront uses hw-assisted checksums */ + mbufc->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + + /* Make a backup copy of the packet */ + bcopy(mtod(mbufc, const void*), pkt_orig, pkt_len); + + /* Function under test */ + xnb_add_mbuf_cksum(mbufc); + + /* Verify that the packet's data did not change */ + XNB_ASSERT(bcmp(mtod(mbufc, const void*), pkt_orig, pkt_len) == 0); + m_freem(mbufc); +} + +/** + * Helper function that populates the ethernet header and IP header used by + * some of the xnb_add_mbuf_cksum unit tests. m must already be allocated + * and must be large enough + */ +static void +xnb_fill_eh_and_ip(struct mbuf *m, uint16_t ip_len, uint16_t ip_id, + uint16_t ip_p, uint16_t ip_off, uint16_t ip_sum) +{ + struct ether_header *eh; + struct ip *iph; + + eh = mtod(m, struct ether_header*); + eh->ether_dhost[0] = 0x00; + eh->ether_dhost[1] = 0x16; + eh->ether_dhost[2] = 0x3e; + eh->ether_dhost[3] = 0x23; + eh->ether_dhost[4] = 0x50; + eh->ether_dhost[5] = 0x0b; + eh->ether_shost[0] = 0x00; + eh->ether_shost[1] = 0x16; + eh->ether_shost[2] = 0x30; + eh->ether_shost[3] = 0x00; + eh->ether_shost[4] = 0x00; + eh->ether_shost[5] = 0x00; + eh->ether_type = htons(ETHERTYPE_IP); + iph = (struct ip*)(eh + 1); + iph->ip_hl = 0x5; /* 5 dwords == 20 bytes */ + iph->ip_v = 4; /* IP v4 */ + iph->ip_tos = 0; + iph->ip_len = htons(ip_len); + iph->ip_id = htons(ip_id); + iph->ip_off = htons(ip_off); + iph->ip_ttl = 64; + iph->ip_p = ip_p; + iph->ip_sum = htons(ip_sum); + iph->ip_src.s_addr = htonl(0xc0a80a04); + iph->ip_dst.s_addr = htonl(0xc0a80a05); +} + +/** + * xnb_add_mbuf_cksum on an ICMP packet, based on a tcpdump of an actual + * ICMP packet + */ +static void +xnb_add_mbuf_cksum_icmp(char *buffer, size_t buflen) +{ + const size_t icmp_len = 64; /* set by ping(1) */ + const size_t pkt_len = sizeof(struct ether_header) + + sizeof(struct ip) + icmp_len; + struct mbuf *mbufc; + struct ether_header *eh; + struct ip *iph; + struct icmp *icmph; + unsigned char pkt_orig[icmp_len]; + uint32_t *tv_field; + uint8_t *data_payload; + int i; + const uint16_t ICMP_CSUM = 0xaed7; + const uint16_t IP_CSUM = 0xe533; + + mbufc = m_getm(NULL, pkt_len, M_WAITOK, MT_DATA); + /* Fill in an example ICMP ping request */ + eh = mtod(mbufc, struct ether_header*); + xnb_fill_eh_and_ip(mbufc, 84, 28, IPPROTO_ICMP, 0, 0); + iph = (struct ip*)(eh + 1); + icmph = (struct icmp*)(iph + 1); + icmph->icmp_type = ICMP_ECHO; + icmph->icmp_code = 0; + icmph->icmp_cksum = htons(ICMP_CSUM); + icmph->icmp_id = htons(31492); + icmph->icmp_seq = htons(0); + /* + * ping(1) uses bcopy to insert a native-endian timeval after icmp_seq. + * For this test, we will set the bytes individually for portability. + */ + tv_field = (uint32_t*)(&(icmph->icmp_hun)); + tv_field[0] = 0x4f02cfac; + tv_field[1] = 0x0007c46a; + /* + * Remainder of packet is an incrmenting 8 bit integer, starting with 8 + */ + data_payload = (uint8_t*)(&tv_field[2]); + for (i = 8; i < 37; i++) { + *data_payload++ = i; + } + + /* fill in the length field */ + mbufc->m_len = pkt_len; + mbufc->m_pkthdr.len = pkt_len; + /* indicate that the netfront uses hw-assisted checksums */ + mbufc->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + + bcopy(mtod(mbufc, const void*), pkt_orig, icmp_len); + /* Function under test */ + xnb_add_mbuf_cksum(mbufc); + + /* Check the IP checksum */ + XNB_ASSERT(iph->ip_sum == htons(IP_CSUM)); + + /* Check that the ICMP packet did not change */ + XNB_ASSERT(bcmp(icmph, pkt_orig, icmp_len)); + m_freem(mbufc); +} + +/** + * xnb_add_mbuf_cksum on a UDP packet, based on a tcpdump of an actual + * UDP packet + */ +static void +xnb_add_mbuf_cksum_udp(char *buffer, size_t buflen) +{ + const size_t udp_len = 16; + const size_t pkt_len = sizeof(struct ether_header) + + sizeof(struct ip) + udp_len; + struct mbuf *mbufc; + struct ether_header *eh; + struct ip *iph; + struct udphdr *udp; + uint8_t *data_payload; + const uint16_t IP_CSUM = 0xe56b; + const uint16_t UDP_CSUM = 0xdde2; + + mbufc = m_getm(NULL, pkt_len, M_WAITOK, MT_DATA); + /* Fill in an example UDP packet made by 'uname | nc -u <host> 2222 */ + eh = mtod(mbufc, struct ether_header*); + xnb_fill_eh_and_ip(mbufc, 36, 4, IPPROTO_UDP, 0, 0xbaad); + iph = (struct ip*)(eh + 1); + udp = (struct udphdr*)(iph + 1); + udp->uh_sport = htons(0x51ae); + udp->uh_dport = htons(0x08ae); + udp->uh_ulen = htons(udp_len); + udp->uh_sum = htons(0xbaad); /* xnb_add_mbuf_cksum will fill this in */ + data_payload = (uint8_t*)(udp + 1); + data_payload[0] = 'F'; + data_payload[1] = 'r'; + data_payload[2] = 'e'; + data_payload[3] = 'e'; + data_payload[4] = 'B'; + data_payload[5] = 'S'; + data_payload[6] = 'D'; + data_payload[7] = '\n'; + + /* fill in the length field */ + mbufc->m_len = pkt_len; + mbufc->m_pkthdr.len = pkt_len; + /* indicate that the netfront uses hw-assisted checksums */ + mbufc->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + + /* Function under test */ + xnb_add_mbuf_cksum(mbufc); + + /* Check the checksums */ + XNB_ASSERT(iph->ip_sum == htons(IP_CSUM)); + XNB_ASSERT(udp->uh_sum == htons(UDP_CSUM)); + + m_freem(mbufc); +} + +/** + * Helper function that populates a TCP packet used by all of the + * xnb_add_mbuf_cksum tcp unit tests. m must already be allocated and must be + * large enough + */ +static void +xnb_fill_tcp(struct mbuf *m) +{ + struct ether_header *eh; + struct ip *iph; + struct tcphdr *tcp; + uint32_t *options; + uint8_t *data_payload; + + /* Fill in an example TCP packet made by 'uname | nc <host> 2222' */ + eh = mtod(m, struct ether_header*); + xnb_fill_eh_and_ip(m, 60, 8, IPPROTO_TCP, IP_DF, 0); + iph = (struct ip*)(eh + 1); + tcp = (struct tcphdr*)(iph + 1); + tcp->th_sport = htons(0x9cd9); + tcp->th_dport = htons(2222); + tcp->th_seq = htonl(0x00f72b10); + tcp->th_ack = htonl(0x7f37ba6c); + tcp->th_x2 = 0; + tcp->th_off = 8; + tcp->th_flags = 0x18; + tcp->th_win = htons(0x410); + /* th_sum is incorrect; will be inserted by function under test */ + tcp->th_sum = htons(0xbaad); + tcp->th_urp = htons(0); + /* + * The following 12 bytes of options encode: + * [nop, nop, TS val 33247 ecr 3457687679] + */ + options = (uint32_t*)(tcp + 1); + options[0] = htonl(0x0101080a); + options[1] = htonl(0x000081df); + options[2] = htonl(0xce18207f); + data_payload = (uint8_t*)(&options[3]); + data_payload[0] = 'F'; + data_payload[1] = 'r'; + data_payload[2] = 'e'; + data_payload[3] = 'e'; + data_payload[4] = 'B'; + data_payload[5] = 'S'; + data_payload[6] = 'D'; + data_payload[7] = '\n'; +} + +/** + * xnb_add_mbuf_cksum on a TCP packet, based on a tcpdump of an actual TCP + * packet + */ +static void +xnb_add_mbuf_cksum_tcp(char *buffer, size_t buflen) +{ + const size_t payload_len = 8; + const size_t tcp_options_len = 12; + const size_t pkt_len = sizeof(struct ether_header) + sizeof(struct ip) + + sizeof(struct tcphdr) + tcp_options_len + payload_len; + struct mbuf *mbufc; + struct ether_header *eh; + struct ip *iph; + struct tcphdr *tcp; + const uint16_t IP_CSUM = 0xa55a; + const uint16_t TCP_CSUM = 0x2f64; + + mbufc = m_getm(NULL, pkt_len, M_WAITOK, MT_DATA); + /* Fill in an example TCP packet made by 'uname | nc <host> 2222' */ + xnb_fill_tcp(mbufc); + eh = mtod(mbufc, struct ether_header*); + iph = (struct ip*)(eh + 1); + tcp = (struct tcphdr*)(iph + 1); + + /* fill in the length field */ + mbufc->m_len = pkt_len; + mbufc->m_pkthdr.len = pkt_len; + /* indicate that the netfront uses hw-assisted checksums */ + mbufc->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + + /* Function under test */ + xnb_add_mbuf_cksum(mbufc); + + /* Check the checksums */ + XNB_ASSERT(iph->ip_sum == htons(IP_CSUM)); + XNB_ASSERT(tcp->th_sum == htons(TCP_CSUM)); + + m_freem(mbufc); +} + +/** + * xnb_add_mbuf_cksum on a TCP packet that does not use HW assisted checksums + */ +static void +xnb_add_mbuf_cksum_tcp_swcksum(char *buffer, size_t buflen) +{ + const size_t payload_len = 8; + const size_t tcp_options_len = 12; + const size_t pkt_len = sizeof(struct ether_header) + sizeof(struct ip) + + sizeof(struct tcphdr) + tcp_options_len + payload_len; + struct mbuf *mbufc; + struct ether_header *eh; + struct ip *iph; + struct tcphdr *tcp; + /* Use deliberately bad checksums, and verify that they don't get */ + /* corrected by xnb_add_mbuf_cksum */ + const uint16_t IP_CSUM = 0xdead; + const uint16_t TCP_CSUM = 0xbeef; + + mbufc = m_getm(NULL, pkt_len, M_WAITOK, MT_DATA); + /* Fill in an example TCP packet made by 'uname | nc <host> 2222' */ + xnb_fill_tcp(mbufc); + eh = mtod(mbufc, struct ether_header*); + iph = (struct ip*)(eh + 1); + iph->ip_sum = htons(IP_CSUM); + tcp = (struct tcphdr*)(iph + 1); + tcp->th_sum = htons(TCP_CSUM); + + /* fill in the length field */ + mbufc->m_len = pkt_len; + mbufc->m_pkthdr.len = pkt_len; + /* indicate that the netfront does not use hw-assisted checksums */ + mbufc->m_pkthdr.csum_flags = 0; + + /* Function under test */ + xnb_add_mbuf_cksum(mbufc); + + /* Check that the checksums didn't change */ + XNB_ASSERT(iph->ip_sum == htons(IP_CSUM)); + XNB_ASSERT(tcp->th_sum == htons(TCP_CSUM)); + + m_freem(mbufc); +} + +/** + * sscanf on unsigned chars + */ +static void +xnb_sscanf_hhu(char *buffer, size_t buflen) +{ + const char mystr[] = "137"; + uint8_t dest[12]; + int i; + + for (i = 0; i < 12; i++) + dest[i] = 'X'; + + sscanf(mystr, "%hhu", &dest[4]); + for (i = 0; i < 12; i++) + XNB_ASSERT(dest[i] == (i == 4 ? 137 : 'X')); +} + +/** + * sscanf on signed chars + */ +static void +xnb_sscanf_hhd(char *buffer, size_t buflen) +{ + const char mystr[] = "-27"; + int8_t dest[12]; + int i; + + for (i = 0; i < 12; i++) + dest[i] = 'X'; + + sscanf(mystr, "%hhd", &dest[4]); + for (i = 0; i < 12; i++) + XNB_ASSERT(dest[i] == (i == 4 ? -27 : 'X')); +} + +/** + * sscanf on signed long longs + */ +static void +xnb_sscanf_lld(char *buffer, size_t buflen) +{ + const char mystr[] = "-123456789012345"; /* about -2**47 */ + long long dest[3]; + int i; + + for (i = 0; i < 3; i++) + dest[i] = (long long)0xdeadbeefdeadbeef; + + sscanf(mystr, "%lld", &dest[1]); + for (i = 0; i < 3; i++) + XNB_ASSERT(dest[i] == (i != 1 ? (long long)0xdeadbeefdeadbeef : + -123456789012345)); +} + +/** + * sscanf on unsigned long longs + */ +static void +xnb_sscanf_llu(char *buffer, size_t buflen) +{ + const char mystr[] = "12802747070103273189"; + unsigned long long dest[3]; + int i; + + for (i = 0; i < 3; i++) + dest[i] = (long long)0xdeadbeefdeadbeef; + + sscanf(mystr, "%llu", &dest[1]); + for (i = 0; i < 3; i++) + XNB_ASSERT(dest[i] == (i != 1 ? (long long)0xdeadbeefdeadbeef : + 12802747070103273189ull)); +} + +/** + * sscanf on unsigned short short n's + */ +static void +xnb_sscanf_hhn(char *buffer, size_t buflen) +{ + const char mystr[] = + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f"; + unsigned char dest[12]; + int i; + + for (i = 0; i < 12; i++) + dest[i] = (unsigned char)'X'; + + sscanf(mystr, + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f%hhn", &dest[4]); + for (i = 0; i < 12; i++) + XNB_ASSERT(dest[i] == (i == 4 ? 160 : 'X')); +} diff --git a/sys/kern/subr_scanf.c b/sys/kern/subr_scanf.c index 0814953..824e392 100644 --- a/sys/kern/subr_scanf.c +++ b/sys/kern/subr_scanf.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #define POINTER 0x10 /* weird %p pointer (`fake hex') */ #define NOSKIP 0x20 /* do not skip blanks */ #define QUAD 0x400 +#define SHORTSHORT 0x4000 /** hh: char */ /* * The following are used in numeric conversions only: @@ -160,13 +161,23 @@ literal: flags |= SUPPRESS; goto again; case 'l': - flags |= LONG; + if (flags & LONG){ + flags &= ~LONG; + flags |= QUAD; + } else { + flags |= LONG; + } goto again; case 'q': flags |= QUAD; goto again; case 'h': - flags |= SHORT; + if (flags & SHORT){ + flags &= ~SHORT; + flags |= SHORTSHORT; + } else { + flags |= SHORT; + } goto again; case '0': case '1': case '2': case '3': case '4': @@ -235,7 +246,9 @@ literal: nconversions++; if (flags & SUPPRESS) /* ??? */ continue; - if (flags & SHORT) + if (flags & SHORTSHORT) + *va_arg(ap, char *) = nread; + else if (flags & SHORT) *va_arg(ap, short *) = nread; else if (flags & LONG) *va_arg(ap, long *) = nread; @@ -510,6 +523,8 @@ literal: if (flags & POINTER) *va_arg(ap, void **) = (void *)(uintptr_t)res; + else if (flags & SHORTSHORT) + *va_arg(ap, char *) = res; else if (flags & SHORT) *va_arg(ap, short *) = res; else if (flags & LONG) diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index 871132c..620246b 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -1410,6 +1410,11 @@ m_print(const struct mbuf *m, int maxlen) int pdata; const struct mbuf *m2; + if (m == NULL) { + printf("mbuf: %p\n", m); + return; + } + if (m->m_flags & M_PKTHDR) len = m->m_pkthdr.len; else diff --git a/sys/xen/interface/io/netif.h b/sys/xen/interface/io/netif.h index fbb5c27..261b7d9 100644 --- a/sys/xen/interface/io/netif.h +++ b/sys/xen/interface/io/netif.h @@ -42,7 +42,7 @@ * This is the 'wire' format for packets: * Request 1: netif_tx_request -- NETTXF_* (any flags) * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info) - * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE) + * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_FLAG_MORE) * Request 4: netif_tx_request -- NETTXF_more_data * Request 5: netif_tx_request -- NETTXF_more_data * ... @@ -70,7 +70,9 @@ struct netif_tx_request { uint16_t offset; /* Offset within buffer page */ uint16_t flags; /* NETTXF_* */ uint16_t id; /* Echoed in response message. */ - uint16_t size; /* Packet size in bytes. */ + uint16_t size; /* For the first request in a packet, the packet + size in bytes. For subsequent requests, the + size of that request's associated data in bytes*/ }; typedef struct netif_tx_request netif_tx_request_t; @@ -175,7 +177,7 @@ struct netif_rx_response { uint16_t id; uint16_t offset; /* Offset in page of start of received packet */ uint16_t flags; /* NETRXF_* */ - int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */ + int16_t status; /* -ve: NETIF_RSP_* ; +ve: Rx'ed response size. */ }; typedef struct netif_rx_response netif_rx_response_t; |