summaryrefslogtreecommitdiffstats
path: root/sys/net
diff options
context:
space:
mode:
authoradrian <adrian@FreeBSD.org>2015-10-15 20:36:04 +0000
committeradrian <adrian@FreeBSD.org>2015-10-15 20:36:04 +0000
commit766753c6f7d99bd0ec7c6d3dd429424cb1b000b6 (patch)
tree405e433dfe84a7df7d2f70a4d8a694d6fa93cc7e /sys/net
parentfa397f3a4bf24a389a778d0a2c88676e33b91ac7 (diff)
downloadFreeBSD-src-766753c6f7d99bd0ec7c6d3dd429424cb1b000b6.zip
FreeBSD-src-766753c6f7d99bd0ec7c6d3dd429424cb1b000b6.tar.gz
Add the paravirt.h support from -HEAD.
Submitted by: eric Sponsored by: Norse Corp, Inc.
Diffstat (limited to 'sys/net')
-rw-r--r--sys/net/paravirt.h157
1 files changed, 157 insertions, 0 deletions
diff --git a/sys/net/paravirt.h b/sys/net/paravirt.h
new file mode 100644
index 0000000..e2885bc
--- /dev/null
+++ b/sys/net/paravirt.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2013 Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NET_PARAVIRT_H
+#define NET_PARAVIRT_H
+
+ /*
+ * $FreeBSD$
+ *
+ Support for virtio-like communication between host (H) and guest (G) NICs.
+
+ THIS IS EXPERIMENTAL CODE AND SUBJECT TO CHANGE.
+
+ The guest allocates the shared Communication Status Block (csb) and
+ write its physical address at CSBAL and CSBAH (data is little endian).
+ csb->csb_on enables the mode. If disabled, the device acts a regular one.
+
+ Notifications for tx and rx are exchanged without vm exits
+ if possible. In particular (only mentioning csb mode below),
+ the following actions are performed. In the description below,
+ "double check" means verifying again the condition that caused
+ the previous action, and reverting the action if the condition has
+ changed. The condition typically depends on a variable set by the
+ other party, and the double check is done to avoid races. E.g.
+
+ // start with A=0
+ again:
+ // do something
+ if ( cond(C) ) { // C is written by the other side
+ A = 1;
+ // barrier
+ if ( !cond(C) ) {
+ A = 0;
+ goto again;
+ }
+ }
+
+ TX: start from idle:
+ H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new
+ transmissions, G always updates guest_tdt. If host_need_txkick == 1,
+ G also writes to the TDT, which acts as a kick to H (so pending
+ writes are always dispatched to H as soon as possible.)
+
+ TX: active state:
+ On the kick (TDT write) H sets host_need_txkick == 0 (if not
+ done already by G), and starts an I/O thread trying to consume
+ packets from TDH to guest_tdt, periodically refreshing host_tdh
+ and TDH. When host_tdh == guest_tdt, H sets host_need_txkick=1,
+ and then does the "double check" for race avoidance.
+
+ TX: G runs out of buffers
+ XXX there are two mechanisms, one boolean (using guest_need_txkick)
+ and one with a threshold (using guest_txkick_at). They are mutually
+ exclusive.
+ BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does
+ the double check. If H finds guest_need_txkick== 1 on a write
+ to TDH, it also generates an interrupt.
+ THRESHOLD: G sets guest_txkick_at to the TDH value for which it
+ wants to receive an interrupt. When H detects that TDH moves
+ across guest_txkick_at, it generates an interrupt.
+ This second mechanism reduces the number of interrupts and
+ TDT writes on the transmit side when the host is too slow.
+
+ RX: start from idle
+ G starts with guest_need_rxkick = 1 when the receive ring is empty.
+ As packets arrive, H updates host_rdh (and RDH) and also generates an
+ interrupt when guest_need_rxkick == 1 (so incoming packets are
+ always reported to G as soon as possible, apart from interrupt
+ moderation delays). It also tracks guest_rdt for new buffers.
+
+ RX: active state
+ As the interrupt arrives, G sets guest_need_rxkick = 0 and starts
+ draining packets from the receive ring, while updating guest_rdt
+ When G runs out of packets it sets guest_need_rxkick=1 and does the
+ double check.
+
+ RX: H runs out of buffers
+ XXX there are two mechanisms, one boolean (using host_need_rxkick)
+ and one with a threshold (using host_xxkick_at). They are mutually
+ exclusive.
+ BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the
+ double check. If G finds host_need_rxkick==1 on updating guest_rdt,
+ it also writes to RDT causing a kick to H.
+ THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants
+ to receive a kick. When G detects that guest_rdt moves across
+ host_rxkick_at, it writes to RDT thus generates a kick.
+ This second mechanism reduces the number of kicks and
+ RDT writes on the receive side when the guest is too slow and
+ would free only a few buffers at a time.
+
+ */
+struct paravirt_csb {
+ /* XXX revise the layout to minimize cache bounces.
+ * Usage is described as follows:
+ * [GH][RW][+-0] guest/host reads/writes frequently/rarely/almost never
+ */
+ /* these are (mostly) written by the guest */
+ uint32_t guest_tdt; /* GW+ HR+ pkt to transmit */
+ uint32_t guest_need_txkick; /* GW- HR+ G ran out of tx bufs, request kick */
+ uint32_t guest_need_rxkick; /* GW- HR+ G ran out of rx pkts, request kick */
+ uint32_t guest_csb_on; /* GW- HR+ enable paravirtual mode */
+ uint32_t guest_rdt; /* GW+ HR+ rx buffers available */
+ uint32_t guest_txkick_at; /* GW- HR+ tx ring pos. where G expects an intr */
+ uint32_t guest_use_msix; /* GW0 HR0 guest uses MSI-X interrupts. */
+ uint32_t pad[9];
+
+ /* these are (mostly) written by the host */
+ uint32_t host_tdh; /* GR0 HW- shadow register, mostly unused */
+ uint32_t host_need_txkick; /* GR+ HW- start the iothread */
+ uint32_t host_txcycles_lim; /* GW- HR- how much to spin before sleep.
+ * set by the guest */
+ uint32_t host_txcycles; /* GR0 HW- counter, but no need to be exported */
+ uint32_t host_rdh; /* GR0 HW- shadow register, mostly unused */
+ uint32_t host_need_rxkick; /* GR+ HW- flush rx queued packets */
+ uint32_t host_isr; /* GR* HW* shadow copy of ISR */
+ uint32_t host_rxkick_at; /* GR+ HW- rx ring pos where H expects a kick */
+ uint32_t vnet_ring_high; /* Vnet ring physical address high. */
+ uint32_t vnet_ring_low; /* Vnet ring physical address low. */
+};
+
+#define NET_PARAVIRT_CSB_SIZE 4096
+#define NET_PARAVIRT_NONE (~((uint32_t)0))
+
+#ifdef QEMU_PCI_H
+
+/*
+ * API functions only available within QEMU
+ */
+
+void paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal,
+ uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as);
+
+#endif /* QEMU_PCI_H */
+
+#endif /* NET_PARAVIRT_H */
OpenPOWER on IntegriCloud