From 510c672e13b9748ab91e72576bf72a6ac7a325af Mon Sep 17 00:00:00 2001 From: kmacy Date: Tue, 12 Aug 2008 20:01:57 +0000 Subject: Import Xen paravirtual drivers. MFC after: 2 weeks --- sys/dev/xen/netback/netback.c | 1585 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1585 insertions(+) create mode 100644 sys/dev/xen/netback/netback.c (limited to 'sys/dev/xen/netback') diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c new file mode 100644 index 0000000..950a68c --- /dev/null +++ b/sys/dev/xen/netback/netback.c @@ -0,0 +1,1585 @@ +/* + * Copyright (c) 2006, Cisco Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifdef XEN_NETBACK_DEBUG +#define DPRINTF(fmt, args...) \ + printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTF(fmt, args...) ((void)0) +#endif + +#ifdef XEN_NETBACK_DEBUG_LOTS +#define DDPRINTF(fmt, args...) \ + printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) +#define DPRINTF_MBUF(_m) print_mbuf(_m, 0) +#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len) +#else +#define DDPRINTF(fmt, args...) ((void)0) +#define DPRINTF_MBUF(_m) ((void)0) +#define DPRINTF_MBUF_LEN(_m, _len) ((void)0) +#endif + +#define WPRINTF(fmt, args...) \ + printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) +#define BUG_ON PANIC_IF + +#define IFNAME(_np) (_np)->ifp->if_xname + +#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) +#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) + +struct ring_ref { + vm_offset_t va; + grant_handle_t handle; + uint64_t bus_addr; +}; + +typedef struct netback_info { + + /* Schedule lists */ + STAILQ_ENTRY(netback_info) next_tx; + STAILQ_ENTRY(netback_info) next_rx; + int on_tx_sched_list; + int on_rx_sched_list; + + struct xenbus_device *xdev; + XenbusState frontend_state; + + domid_t domid; + int handle; + char *bridge; + + int rings_connected; + struct ring_ref tx_ring_ref; + struct ring_ref rx_ring_ref; + netif_tx_back_ring_t tx; + netif_rx_back_ring_t rx; + evtchn_port_t evtchn; + int irq; + void *irq_cookie; + + struct ifnet *ifp; + int ref_cnt; + + device_t ndev; + int attached; +} netif_t; + + +#define MAX_PENDING_REQS 256 +#define PKT_PROT_LEN 64 + +static struct { + netif_tx_request_t req; + netif_t *netif; +} pending_tx_info[MAX_PENDING_REQS]; +static uint16_t pending_ring[MAX_PENDING_REQS]; +typedef unsigned int PEND_RING_IDX; +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) +static PEND_RING_IDX pending_prod, pending_cons; +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) + +static unsigned long mmap_vstart; +#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) + +/* Freed TX mbufs get batched on this ring before return to pending_ring. */ +static uint16_t dealloc_ring[MAX_PENDING_REQS]; +static PEND_RING_IDX dealloc_prod, dealloc_cons; + +static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; +static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; +static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE]; + +static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; +static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS]; +static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS]; + +static struct task net_tx_task, net_rx_task; +static struct callout rx_task_callout; + +static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list = + STAILQ_HEAD_INITIALIZER(tx_sched_list); +static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list = + STAILQ_HEAD_INITIALIZER(rx_sched_list); +static struct mtx tx_sched_list_lock; +static struct mtx rx_sched_list_lock; + +static int vif_unit_maker = 0; + +/* Protos */ +static void netback_start(struct ifnet *ifp); +static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); +static int vif_add_dev(struct xenbus_device *xdev); +static void disconnect_rings(netif_t *netif); + +#ifdef XEN_NETBACK_DEBUG_LOTS +/* Debug code to display the contents of an mbuf */ +static void +print_mbuf(struct mbuf *m, int max) +{ + int i, j=0; + printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len); + for (; m; m = m->m_next) { + unsigned char *d = m->m_data; + for (i=0; i < m->m_len; i++) { + if (max && j == max) + break; + if ((j++ % 16) == 0) + printf("\n%04x:", j); + printf(" %02x", d[i]); + } + } + printf("\n"); +} +#endif + + +#define MAX_MFN_ALLOC 64 +static unsigned long mfn_list[MAX_MFN_ALLOC]; +static unsigned int alloc_index = 0; + +static unsigned long +alloc_mfn(void) +{ + unsigned long mfn = 0; + struct xen_memory_reservation reservation = { + .extent_start = mfn_list, + .nr_extents = MAX_MFN_ALLOC, + .extent_order = 0, + .domid = DOMID_SELF + }; + if ( unlikely(alloc_index == 0) ) + alloc_index = HYPERVISOR_memory_op( + XENMEM_increase_reservation, &reservation); + if ( alloc_index != 0 ) + mfn = mfn_list[--alloc_index]; + return mfn; +} + +static unsigned long +alloc_empty_page_range(unsigned long nr_pages) +{ + void *pages; + int i = 0, j = 0; + multicall_entry_t mcl[17]; + unsigned long mfn_list[16]; + struct xen_memory_reservation reservation = { + .extent_start = mfn_list, + .nr_extents = 0, + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT); + if (pages == NULL) + return 0; + + memset(mcl, 0, sizeof(mcl)); + + while (i < nr_pages) { + unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE); + + mcl[j].op = __HYPERVISOR_update_va_mapping; + mcl[j].args[0] = va; + + mfn_list[j++] = vtomach(va) >> PAGE_SHIFT; + + xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY; + + if (j == 16 || i == nr_pages) { + mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL; + + reservation.nr_extents = j; + + mcl[j].op = __HYPERVISOR_memory_op; + mcl[j].args[0] = XENMEM_decrease_reservation; + mcl[j].args[1] = (unsigned long)&reservation; + + (void)HYPERVISOR_multicall(mcl, j+1); + + mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0; + j = 0; + } + } + + return (unsigned long)pages; +} + +#ifdef XEN_NETBACK_FIXUP_CSUM +static void +fixup_checksum(struct mbuf *m) +{ + struct ether_header *eh = mtod(m, struct ether_header *); + struct ip *ip = (struct ip *)(eh + 1); + int iphlen = ip->ip_hl << 2; + int iplen = ntohs(ip->ip_len); + + if ((m->m_pkthdr.csum_flags & CSUM_TCP)) { + struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen); + th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(IPPROTO_TCP + (iplen - iphlen))); + th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen); + m->m_pkthdr.csum_flags &= ~CSUM_TCP; + } else { + u_short csum; + struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen); + uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(IPPROTO_UDP + (iplen - iphlen))); + if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0) + csum = 0xffff; + uh->uh_sum = csum; + m->m_pkthdr.csum_flags &= ~CSUM_UDP; + } +} +#endif + +/* Add the interface to the specified bridge */ +static int +add_to_bridge(struct ifnet *ifp, char *bridge) +{ + struct ifdrv ifd; + struct ifbreq ifb; + struct ifnet *ifp_bridge = ifunit(bridge); + + if (!ifp_bridge) + return ENOENT; + + bzero(&ifd, sizeof(ifd)); + bzero(&ifb, sizeof(ifb)); + + strcpy(ifb.ifbr_ifsname, ifp->if_xname); + strcpy(ifd.ifd_name, ifp->if_xname); + ifd.ifd_cmd = BRDGADD; + ifd.ifd_len = sizeof(ifb); + ifd.ifd_data = &ifb; + + return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd); + +} + +static int +netif_create(int handle, struct xenbus_device *xdev, char *bridge) +{ + netif_t *netif; + struct ifnet *ifp; + + netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO); + if (!netif) + return ENOMEM; + + netif->ref_cnt = 1; + netif->handle = handle; + netif->domid = xdev->otherend_id; + netif->xdev = xdev; + netif->bridge = bridge; + xdev->data = netif; + + /* Set up ifnet structure */ + ifp = netif->ifp = if_alloc(IFT_ETHER); + if (!ifp) { + if (bridge) + free(bridge, M_DEVBUF); + free(netif, M_DEVBUF); + return ENOMEM; + } + + ifp->if_softc = netif; + if_initname(ifp, "vif", + atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ ); + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX; + ifp->if_output = ether_output; + ifp->if_start = netback_start; + ifp->if_ioctl = netback_ioctl; + ifp->if_mtu = ETHERMTU; + ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; + + DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle); + + return 0; +} + +static void +netif_get(netif_t *netif) +{ + atomic_add_int(&netif->ref_cnt, 1); +} + +static void +netif_put(netif_t *netif) +{ + if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) { + DPRINTF("%s\n", IFNAME(netif)); + disconnect_rings(netif); + if (netif->ifp) { + if_free(netif->ifp); + netif->ifp = NULL; + } + if (netif->bridge) + free(netif->bridge, M_DEVBUF); + free(netif, M_DEVBUF); + } +} + +static int +netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + switch (cmd) { + case SIOCSIFFLAGS: + DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n", + IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags); + return 0; + } + + DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd); + + return ether_ioctl(ifp, cmd, data); +} + +static inline void +maybe_schedule_tx_action(void) +{ + smp_mb(); + if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list)) + taskqueue_enqueue(taskqueue_swi, &net_tx_task); +} + +/* Removes netif from front of list and does not call netif_put() (caller must) */ +static netif_t * +remove_from_tx_schedule_list(void) +{ + netif_t *netif; + + mtx_lock(&tx_sched_list_lock); + + if ((netif = STAILQ_FIRST(&tx_sched_list))) { + STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx); + STAILQ_NEXT(netif, next_tx) = NULL; + netif->on_tx_sched_list = 0; + } + + mtx_unlock(&tx_sched_list_lock); + + return netif; +} + +/* Adds netif to end of list and calls netif_get() */ +static void +add_to_tx_schedule_list_tail(netif_t *netif) +{ + if (netif->on_tx_sched_list) + return; + + mtx_lock(&tx_sched_list_lock); + if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { + netif_get(netif); + STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx); + netif->on_tx_sched_list = 1; + } + mtx_unlock(&tx_sched_list_lock); +} + +/* + * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER: + * If this driver is pipelining transmit requests then we can be very + * aggressive in avoiding new-packet notifications -- frontend only needs to + * send a notification if there are no outstanding unreceived responses. + * If we may be buffer transmit buffers for any reason then we must be rather + * more conservative and treat this as the final check for pending work. + */ +static void +netif_schedule_tx_work(netif_t *netif) +{ + int more_to_do; + +#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER + more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx); +#else + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); +#endif + + if (more_to_do) { + DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif)); + add_to_tx_schedule_list_tail(netif); + maybe_schedule_tx_action(); + } +} + +static struct mtx dealloc_lock; +MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS); + +static void +netif_idx_release(uint16_t pending_idx) +{ + mtx_lock_spin(&dealloc_lock); + dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx; + mtx_unlock_spin(&dealloc_lock); + + taskqueue_enqueue(taskqueue_swi, &net_tx_task); +} + +static void +make_tx_response(netif_t *netif, + uint16_t id, + int8_t st) +{ + RING_IDX i = netif->tx.rsp_prod_pvt; + netif_tx_response_t *resp; + int notify; + + resp = RING_GET_RESPONSE(&netif->tx, i); + resp->id = id; + resp->status = st; + + netif->tx.rsp_prod_pvt = ++i; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); + if (notify) + notify_remote_via_irq(netif->irq); + +#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER + if (i == netif->tx.req_cons) { + int more_to_do; + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); + if (more_to_do) + add_to_tx_schedule_list_tail(netif); + } +#endif +} + +inline static void +net_tx_action_dealloc(void) +{ + gnttab_unmap_grant_ref_t *gop; + uint16_t pending_idx; + PEND_RING_IDX dc, dp; + netif_t *netif; + int ret; + + dc = dealloc_cons; + dp = dealloc_prod; + + /* + * Free up any grants we have finished using + */ + gop = tx_unmap_ops; + while (dc != dp) { + pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; + gop->host_addr = MMAP_VADDR(pending_idx); + gop->dev_bus_addr = 0; + gop->handle = grant_tx_handle[pending_idx]; + gop++; + } + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); + BUG_ON(ret); + + while (dealloc_cons != dp) { + pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; + + netif = pending_tx_info[pending_idx].netif; + + make_tx_response(netif, pending_tx_info[pending_idx].req.id, + NETIF_RSP_OKAY); + + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + + netif_put(netif); + } +} + +static void +netif_page_release(void *buf, void *args) +{ + uint16_t pending_idx = (unsigned int)args; + + DDPRINTF("pending_idx=%u\n", pending_idx); + + KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx)); + + netif_idx_release(pending_idx); +} + +static void +net_tx_action(void *context, int pending) +{ + struct mbuf *m; + netif_t *netif; + netif_tx_request_t txreq; + uint16_t pending_idx; + RING_IDX i; + gnttab_map_grant_ref_t *mop; + int ret, work_to_do; + struct mbuf *txq = NULL, *txq_last = NULL; + + if (dealloc_cons != dealloc_prod) + net_tx_action_dealloc(); + + mop = tx_map_ops; + while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) { + + /* Get a netif from the list with work to do. */ + netif = remove_from_tx_schedule_list(); + + DDPRINTF("Processing %s (prod=%u, cons=%u)\n", + IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons); + + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); + if (!work_to_do) { + netif_put(netif); + continue; + } + + i = netif->tx.req_cons; + rmb(); /* Ensure that we see the request before we copy it. */ + memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq)); + + /* If we want credit-based scheduling, coud add it here - WORK */ + + netif->tx.req_cons++; + + netif_schedule_tx_work(netif); + + if (unlikely(txreq.size < ETHER_HDR_LEN) || + unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) { + WPRINTF("Bad packet size: %d\n", txreq.size); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + continue; + } + + /* No crossing a page as the payload mustn't fragment. */ + if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) { + WPRINTF("txreq.offset: %x, size: %u, end: %u\n", + txreq.offset, txreq.size, + (txreq.offset & PAGE_MASK) + txreq.size); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + continue; + } + + pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; + + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (!m) { + WPRINTF("Failed to allocate mbuf\n"); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + break; + } + m->m_pkthdr.rcvif = netif->ifp; + + if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) { + struct mbuf *n; + MGET(n, M_DONTWAIT, MT_DATA); + if (!(m->m_next = n)) { + m_freem(m); + WPRINTF("Failed to allocate second mbuf\n"); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + break; + } + n->m_len = txreq.size - PKT_PROT_LEN; + m->m_len = PKT_PROT_LEN; + } else + m->m_len = txreq.size; + + mop->host_addr = MMAP_VADDR(pending_idx); + mop->dom = netif->domid; + mop->ref = txreq.gref; + mop->flags = GNTMAP_host_map | GNTMAP_readonly; + mop++; + + memcpy(&pending_tx_info[pending_idx].req, + &txreq, sizeof(txreq)); + pending_tx_info[pending_idx].netif = netif; + *((uint16_t *)m->m_data) = pending_idx; + + if (txq_last) + txq_last->m_nextpkt = m; + else + txq = m; + txq_last = m; + + pending_cons++; + + if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) + break; + } + + if (!txq) + return; + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); + BUG_ON(ret); + + mop = tx_map_ops; + while ((m = txq) != NULL) { + caddr_t data; + + txq = m->m_nextpkt; + m->m_nextpkt = NULL; + + pending_idx = *((uint16_t *)m->m_data); + netif = pending_tx_info[pending_idx].netif; + memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq)); + + /* Check the remap error code. */ + if (unlikely(mop->status)) { + WPRINTF("#### netback grant fails\n"); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + m_freem(m); + mop++; + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + continue; + } + +#if 0 + /* Can't do this in FreeBSD since vtophys() returns the pfn */ + /* of the remote domain who loaned us the machine page - DPT */ + xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] = + mop->dev_bus_addr >> PAGE_SHIFT; +#endif + grant_tx_handle[pending_idx] = mop->handle; + + /* Setup data in mbuf (lengths are already set) */ + data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset); + bcopy(data, m->m_data, m->m_len); + if (m->m_next) { + struct mbuf *n = m->m_next; + MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release, + (void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV); + n->m_data = &data[PKT_PROT_LEN]; + } else { + /* Schedule a response immediately. */ + netif_idx_release(pending_idx); + } + + if ((txreq.flags & NETTXF_data_validated)) { + /* Tell the stack the checksums are okay */ + m->m_pkthdr.csum_flags |= + (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + m->m_pkthdr.csum_data = 0xffff; + } + + /* If necessary, inform stack to compute the checksums if it forwards the packet */ + if ((txreq.flags & NETTXF_csum_blank)) { + struct ether_header *eh = mtod(m, struct ether_header *); + if (ntohs(eh->ether_type) == ETHERTYPE_IP) { + struct ip *ip = (struct ip *)&m->m_data[14]; + if (ip->ip_p == IPPROTO_TCP) + m->m_pkthdr.csum_flags |= CSUM_TCP; + else if (ip->ip_p == IPPROTO_UDP) + m->m_pkthdr.csum_flags |= CSUM_UDP; + } + } + + netif->ifp->if_ibytes += m->m_pkthdr.len; + netif->ifp->if_ipackets++; + + DDPRINTF("RECV %d bytes from %s (cflags=%x)\n", + m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags); + DPRINTF_MBUF_LEN(m, 128); + + (*netif->ifp->if_input)(netif->ifp, m); + + mop++; + } +} + +/* Handle interrupt from a frontend */ +static void +netback_intr(void *arg) +{ + netif_t *netif = arg; + DDPRINTF("%s\n", IFNAME(netif)); + add_to_tx_schedule_list_tail(netif); + maybe_schedule_tx_action(); +} + +/* Removes netif from front of list and does not call netif_put() (caller must) */ +static netif_t * +remove_from_rx_schedule_list(void) +{ + netif_t *netif; + + mtx_lock(&rx_sched_list_lock); + + if ((netif = STAILQ_FIRST(&rx_sched_list))) { + STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx); + STAILQ_NEXT(netif, next_rx) = NULL; + netif->on_rx_sched_list = 0; + } + + mtx_unlock(&rx_sched_list_lock); + + return netif; +} + +/* Adds netif to end of list and calls netif_get() */ +static void +add_to_rx_schedule_list_tail(netif_t *netif) +{ + if (netif->on_rx_sched_list) + return; + + mtx_lock(&rx_sched_list_lock); + if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { + netif_get(netif); + STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx); + netif->on_rx_sched_list = 1; + } + mtx_unlock(&rx_sched_list_lock); +} + +static int +make_rx_response(netif_t *netif, uint16_t id, int8_t st, + uint16_t offset, uint16_t size, uint16_t flags) +{ + RING_IDX i = netif->rx.rsp_prod_pvt; + netif_rx_response_t *resp; + int notify; + + resp = RING_GET_RESPONSE(&netif->rx, i); + resp->offset = offset; + resp->flags = flags; + resp->id = id; + resp->status = (int16_t)size; + if (st < 0) + resp->status = (int16_t)st; + + DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n", + i, resp->offset, resp->flags, resp->id, resp->status); + + netif->rx.rsp_prod_pvt = ++i; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify); + + return notify; +} + +static int +netif_rx(netif_t *netif) +{ + struct ifnet *ifp = netif->ifp; + struct mbuf *m; + multicall_entry_t *mcl; + mmu_update_t *mmu; + gnttab_transfer_t *gop; + unsigned long vdata, old_mfn, new_mfn; + struct mbuf *rxq = NULL, *rxq_last = NULL; + int ret, notify = 0, pkts_dequeued = 0; + + DDPRINTF("%s\n", IFNAME(netif)); + + mcl = rx_mcl; + mmu = rx_mmu; + gop = grant_rx_op; + + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + + /* Quit if the target domain has no receive buffers */ + if (netif->rx.req_cons == netif->rx.sring->req_prod) + break; + + IFQ_DRV_DEQUEUE(&ifp->if_snd, m); + if (m == NULL) + break; + + pkts_dequeued++; + + /* Check if we need to copy the data */ + if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) || + (*m->m_ext.ref_cnt > 1) || m->m_next != NULL) { + struct mbuf *n; + + DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n", + m->m_flags, + (m->m_flags & M_EXT) ? m->m_ext.ext_type : 0, + (m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0, + (unsigned int)m->m_next); + + /* Make copy */ + MGETHDR(n, M_DONTWAIT, MT_DATA); + if (!n) + goto drop; + + MCLGET(n, M_DONTWAIT); + if (!(n->m_flags & M_EXT)) { + m_freem(n); + goto drop; + } + + /* Leave space at front and keep current alignment */ + n->m_data += 16 + ((unsigned int)m->m_data & 0x3); + + if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) { + WPRINTF("pkt to big %d\n", m->m_pkthdr.len); + m_freem(n); + goto drop; + } + m_copydata(m, 0, m->m_pkthdr.len, n->m_data); + n->m_pkthdr.len = n->m_len = m->m_pkthdr.len; + n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA); + m_freem(m); + m = n; + } + + vdata = (unsigned long)m->m_data; + old_mfn = vtomach(vdata) >> PAGE_SHIFT; + + if ((new_mfn = alloc_mfn()) == 0) + goto drop; + +#ifdef XEN_NETBACK_FIXUP_CSUM + /* Check if we need to compute a checksum. This happens */ + /* when bridging from one domain to another. */ + if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) + fixup_checksum(m); +#endif + + xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn; + + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->args[0] = vdata; + mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A; + mcl->args[2] = 0; + mcl->args[3] = 0; + mcl++; + + gop->mfn = old_mfn; + gop->domid = netif->domid; + gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref; + netif->rx.req_cons++; + gop++; + + mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + mmu->val = vtophys(vdata) >> PAGE_SHIFT; + mmu++; + + if (rxq_last) + rxq_last->m_nextpkt = m; + else + rxq = m; + rxq_last = m; + + DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif)); + DPRINTF_MBUF_LEN(m, 128); + + /* Filled the batch queue? */ + if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op)) + break; + + continue; + drop: + DDPRINTF("dropping pkt\n"); + ifp->if_oerrors++; + m_freem(m); + } + + if (mcl == rx_mcl) + return pkts_dequeued; + + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)rx_mmu; + mcl->args[1] = mmu - rx_mmu; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + mcl++; + + mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; + ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); + BUG_ON(ret != 0); + + ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op); + BUG_ON(ret != 0); + + mcl = rx_mcl; + gop = grant_rx_op; + + while ((m = rxq) != NULL) { + int8_t status; + uint16_t id, flags = 0; + + rxq = m->m_nextpkt; + m->m_nextpkt = NULL; + + /* Rederive the machine addresses. */ + new_mfn = mcl->args[1] >> PAGE_SHIFT; + old_mfn = gop->mfn; + + ifp->if_obytes += m->m_pkthdr.len; + ifp->if_opackets++; + + /* The update_va_mapping() must not fail. */ + BUG_ON(mcl->result != 0); + + /* Setup flags */ + if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) + flags |= NETRXF_csum_blank | NETRXF_data_validated; + else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) + flags |= NETRXF_data_validated; + + /* Check the reassignment error code. */ + status = NETIF_RSP_OKAY; + if (gop->status != 0) { + DPRINTF("Bad status %d from grant transfer to DOM%u\n", + gop->status, netif->domid); + /* + * Page no longer belongs to us unless GNTST_bad_page, + * but that should be a fatal error anyway. + */ + BUG_ON(gop->status == GNTST_bad_page); + status = NETIF_RSP_ERROR; + } + id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id; + notify |= make_rx_response(netif, id, status, + (unsigned long)m->m_data & PAGE_MASK, + m->m_pkthdr.len, flags); + + m_freem(m); + mcl++; + gop++; + } + + if (notify) + notify_remote_via_irq(netif->irq); + + return pkts_dequeued; +} + +static void +rx_task_timer(void *arg) +{ + DDPRINTF("\n"); + taskqueue_enqueue(taskqueue_swi, &net_rx_task); +} + +static void +net_rx_action(void *context, int pending) +{ + netif_t *netif, *last_zero_work = NULL; + + DDPRINTF("\n"); + + while ((netif = remove_from_rx_schedule_list())) { + struct ifnet *ifp = netif->ifp; + + if (netif == last_zero_work) { + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + add_to_rx_schedule_list_tail(netif); + netif_put(netif); + if (!STAILQ_EMPTY(&rx_sched_list)) + callout_reset(&rx_task_callout, 1, rx_task_timer, NULL); + break; + } + + if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { + if (netif_rx(netif)) + last_zero_work = NULL; + else if (!last_zero_work) + last_zero_work = netif; + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + add_to_rx_schedule_list_tail(netif); + } + + netif_put(netif); + } +} + +static void +netback_start(struct ifnet *ifp) +{ + netif_t *netif = (netif_t *)ifp->if_softc; + + DDPRINTF("%s\n", IFNAME(netif)); + + add_to_rx_schedule_list_tail(netif); + taskqueue_enqueue(taskqueue_swi, &net_rx_task); +} + +/* Map a grant ref to a ring */ +static int +map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring) +{ + struct gnttab_map_grant_ref op; + + ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); + if (ring->va == 0) + return ENOMEM; + + op.host_addr = ring->va; + op.flags = GNTMAP_host_map; + op.ref = ref; + op.dom = dom; + HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); + if (op.status) { + WPRINTF("grant table op err=%d\n", op.status); + kmem_free(kernel_map, ring->va, PAGE_SIZE); + ring->va = 0; + return EACCES; + } + + ring->handle = op.handle; + ring->bus_addr = op.dev_bus_addr; + + return 0; +} + +/* Unmap grant ref for a ring */ +static void +unmap_ring(struct ring_ref *ring) +{ + struct gnttab_unmap_grant_ref op; + + op.host_addr = ring->va; + op.dev_bus_addr = ring->bus_addr; + op.handle = ring->handle; + HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); + if (op.status) + WPRINTF("grant table op err=%d\n", op.status); + + kmem_free(kernel_map, ring->va, PAGE_SIZE); + ring->va = 0; +} + +static int +connect_rings(netif_t *netif) +{ + struct xenbus_device *xdev = netif->xdev; + netif_tx_sring_t *txs; + netif_rx_sring_t *rxs; + unsigned long tx_ring_ref, rx_ring_ref; + evtchn_port_t evtchn; + evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; + int err; + + // Grab FE data and map his memory + err = xenbus_gather(NULL, xdev->otherend, + "tx-ring-ref", "%lu", &tx_ring_ref, + "rx-ring-ref", "%lu", &rx_ring_ref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(xdev, err, + "reading %s/ring-ref and event-channel", + xdev->otherend); + return err; + } + + err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref); + if (err) { + xenbus_dev_fatal(xdev, err, "mapping tx ring"); + return err; + } + txs = (netif_tx_sring_t *)netif->tx_ring_ref.va; + BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); + + err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref); + if (err) { + unmap_ring(&netif->tx_ring_ref); + xenbus_dev_fatal(xdev, err, "mapping rx ring"); + return err; + } + rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va; + BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); + + op.u.bind_interdomain.remote_dom = netif->domid; + op.u.bind_interdomain.remote_port = evtchn; + err = HYPERVISOR_event_channel_op(&op); + if (err) { + unmap_ring(&netif->tx_ring_ref); + unmap_ring(&netif->rx_ring_ref); + xenbus_dev_fatal(xdev, err, "binding event channel"); + return err; + } + netif->evtchn = op.u.bind_interdomain.local_port; + + /* bind evtchn to irq handler */ + netif->irq = + bind_evtchn_to_irqhandler(netif->evtchn, "netback", + netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie); + + netif->rings_connected = 1; + + DPRINTF("%s connected! evtchn=%d irq=%d\n", + IFNAME(netif), netif->evtchn, netif->irq); + + return 0; +} + +static void +disconnect_rings(netif_t *netif) +{ + DPRINTF("\n"); + + if (netif->rings_connected) { + unbind_from_irqhandler(netif->irq, netif->irq_cookie); + netif->irq = 0; + unmap_ring(&netif->tx_ring_ref); + unmap_ring(&netif->rx_ring_ref); + netif->rings_connected = 0; + } +} + +static void +connect(netif_t *netif) +{ + if (!netif->xdev || + !netif->attached || + netif->frontend_state != XenbusStateConnected) { + return; + } + + if (!connect_rings(netif)) { + xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected); + + /* Turn on interface */ + netif->ifp->if_drv_flags |= IFF_DRV_RUNNING; + netif->ifp->if_flags |= IFF_UP; + } +} + +static int +netback_remove(struct xenbus_device *xdev) +{ + netif_t *netif = xdev->data; + device_t ndev; + + DPRINTF("remove %s\n", xdev->nodename); + + if ((ndev = netif->ndev)) { + netif->ndev = NULL; + mtx_lock(&Giant); + device_detach(ndev); + mtx_unlock(&Giant); + } + + xdev->data = NULL; + netif->xdev = NULL; + netif_put(netif); + + return 0; +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures and the ring buffers for communication with the frontend. + * Switch to Connected state. + */ +static int +netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id) +{ + int err; + long handle; + char *bridge; + + DPRINTF("node=%s\n", xdev->nodename); + + /* Grab the handle */ + err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle); + if (err != 1) { + xenbus_dev_fatal(xdev, err, "reading handle"); + return err; + } + + /* Check for bridge */ + bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL); + if (IS_ERR(bridge)) + bridge = NULL; + + err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait); + if (err) { + xenbus_dev_fatal(xdev, err, "writing switch state"); + return err; + } + + err = netif_create(handle, xdev, bridge); + if (err) { + xenbus_dev_fatal(xdev, err, "creating netif"); + return err; + } + + err = vif_add_dev(xdev); + if (err) { + netif_put((netif_t *)xdev->data); + xenbus_dev_fatal(xdev, err, "adding vif device"); + return err; + } + + return 0; +} + +/** + * We are reconnecting to the backend, due to a suspend/resume, or a backend + * driver restart. We tear down our netif structure and recreate it, but + * leave the device-layer structures intact so that this is transparent to the + * rest of the kernel. + */ +static int netback_resume(struct xenbus_device *xdev) +{ + DPRINTF("node=%s\n", xdev->nodename); + return 0; +} + + +/** + * Callback received when the frontend's state changes. + */ +static void frontend_changed(struct xenbus_device *xdev, + XenbusState frontend_state) +{ + netif_t *netif = xdev->data; + + DPRINTF("state=%d\n", frontend_state); + + netif->frontend_state = frontend_state; + + switch (frontend_state) { + case XenbusStateInitialising: + case XenbusStateInitialised: + break; + case XenbusStateConnected: + connect(netif); + break; + case XenbusStateClosing: + xenbus_switch_state(xdev, NULL, XenbusStateClosing); + break; + case XenbusStateClosed: + xenbus_remove_device(xdev); + break; + case XenbusStateUnknown: + case XenbusStateInitWait: + xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + +/* ** Driver registration ** */ + +static struct xenbus_device_id netback_ids[] = { + { "vif" }, + { "" } +}; + +static struct xenbus_driver netback = { + .name = "netback", + .ids = netback_ids, + .probe = netback_probe, + .remove = netback_remove, + .resume= netback_resume, + .otherend_changed = frontend_changed, +}; + +static void +netback_init(void *unused) +{ + callout_init(&rx_task_callout, CALLOUT_MPSAFE); + + mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS); + BUG_ON(!mmap_vstart); + + pending_cons = 0; + for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++) + pending_ring[pending_prod] = pending_prod; + + TASK_INIT(&net_tx_task, 0, net_tx_action, NULL); + TASK_INIT(&net_rx_task, 0, net_rx_action, NULL); + mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF); + mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF); + + DPRINTF("registering %s\n", netback.name); + + xenbus_register_backend(&netback); +} + +SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL) + +static int +vif_add_dev(struct xenbus_device *xdev) +{ + netif_t *netif = xdev->data; + device_t nexus, ndev; + devclass_t dc; + int err = 0; + + mtx_lock(&Giant); + + /* We will add a vif device as a child of nexus0 (for now) */ + if (!(dc = devclass_find("nexus")) || + !(nexus = devclass_get_device(dc, 0))) { + WPRINTF("could not find nexus0!\n"); + err = ENOENT; + goto done; + } + + + /* Create a newbus device representing the vif */ + ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit); + if (!ndev) { + WPRINTF("could not create newbus device %s!\n", IFNAME(netif)); + err = EFAULT; + goto done; + } + + netif_get(netif); + device_set_ivars(ndev, netif); + netif->ndev = ndev; + + device_probe_and_attach(ndev); + + done: + + mtx_unlock(&Giant); + + return err; +} + +enum { + VIF_SYSCTL_DOMID, + VIF_SYSCTL_HANDLE, + VIF_SYSCTL_TXRING, + VIF_SYSCTL_RXRING, +}; + +static char * +vif_sysctl_ring_info(netif_t *netif, int cmd) +{ + char *buf = malloc(256, M_DEVBUF, M_WAITOK); + if (buf) { + if (!netif->rings_connected) + sprintf(buf, "rings not connected\n"); + else if (cmd == VIF_SYSCTL_TXRING) { + netif_tx_back_ring_t *tx = &netif->tx; + sprintf(buf, "nr_ents=%x req_cons=%x" + " req_prod=%x req_event=%x" + " rsp_prod=%x rsp_event=%x", + tx->nr_ents, tx->req_cons, + tx->sring->req_prod, tx->sring->req_event, + tx->sring->rsp_prod, tx->sring->rsp_event); + } else { + netif_rx_back_ring_t *rx = &netif->rx; + sprintf(buf, "nr_ents=%x req_cons=%x" + " req_prod=%x req_event=%x" + " rsp_prod=%x rsp_event=%x", + rx->nr_ents, rx->req_cons, + rx->sring->req_prod, rx->sring->req_event, + rx->sring->rsp_prod, rx->sring->rsp_event); + } + } + return buf; +} + +static int +vif_sysctl_handler(SYSCTL_HANDLER_ARGS) +{ + device_t dev = (device_t)arg1; + netif_t *netif = (netif_t *)device_get_ivars(dev); + const char *value; + char *buf = NULL; + int err; + + switch (arg2) { + case VIF_SYSCTL_DOMID: + return sysctl_handle_int(oidp, NULL, netif->domid, req); + case VIF_SYSCTL_HANDLE: + return sysctl_handle_int(oidp, NULL, netif->handle, req); + case VIF_SYSCTL_TXRING: + case VIF_SYSCTL_RXRING: + value = buf = vif_sysctl_ring_info(netif, arg2); + break; + default: + return (EINVAL); + } + + err = SYSCTL_OUT(req, value, strlen(value)); + if (buf != NULL) + free(buf, M_DEVBUF); + + return err; +} + +/* Newbus vif device driver probe */ +static int +vif_probe(device_t dev) +{ + DDPRINTF("vif%d\n", device_get_unit(dev)); + return 0; +} + +/* Newbus vif device driver attach */ +static int +vif_attach(device_t dev) +{ + netif_t *netif = (netif_t *)device_get_ivars(dev); + uint8_t mac[ETHER_ADDR_LEN]; + + DDPRINTF("%s\n", IFNAME(netif)); + + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD, + dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I", + "domid of frontend"); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD, + dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I", + "handle of frontend"); +#ifdef XEN_NETBACK_DEBUG + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "txring", CTLFLAG_RD, + dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A", + "tx ring info"); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "rxring", CTLFLAG_RD, + dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A", + "rx ring info"); +#endif + + memset(mac, 0xff, sizeof(mac)); + mac[0] &= ~0x01; + + ether_ifattach(netif->ifp, mac); + netif->attached = 1; + + connect(netif); + + if (netif->bridge) { + DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge); + int err = add_to_bridge(netif->ifp, netif->bridge); + if (err) { + WPRINTF("Error adding %s to %s; err=%d\n", + IFNAME(netif), netif->bridge, err); + } + } + + return bus_generic_attach(dev); +} + +/* Newbus vif device driver detach */ +static int +vif_detach(device_t dev) +{ + netif_t *netif = (netif_t *)device_get_ivars(dev); + struct ifnet *ifp = netif->ifp; + + DDPRINTF("%s\n", IFNAME(netif)); + + /* Tell the stack that the interface is no longer active */ + ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + + ether_ifdetach(ifp); + + bus_generic_detach(dev); + + netif->attached = 0; + + netif_put(netif); + + return 0; +} + +static device_method_t vif_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, vif_probe), + DEVMETHOD(device_attach, vif_attach), + DEVMETHOD(device_detach, vif_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + {0, 0} +}; + +static devclass_t vif_devclass; + +static driver_t vif_driver = { + "vif", + vif_methods, + 0, +}; + +DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0); + + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: t + * End: + */ -- cgit v1.1