diff options
-rw-r--r-- | share/man/man4/Makefile | 5 | ||||
-rw-r--r-- | share/man/man4/ntb.4 | 114 | ||||
-rw-r--r-- | sys/amd64/conf/NOTES | 4 | ||||
-rw-r--r-- | sys/conf/files.amd64 | 2 | ||||
-rw-r--r-- | sys/dev/ntb/if_ntb/if_ntb.c | 1366 | ||||
-rw-r--r-- | sys/dev/ntb/ntb_hw/ntb_hw.c | 1288 | ||||
-rw-r--r-- | sys/dev/ntb/ntb_hw/ntb_hw.h | 73 | ||||
-rw-r--r-- | sys/dev/ntb/ntb_hw/ntb_regs.h | 146 | ||||
-rw-r--r-- | sys/modules/Makefile | 2 | ||||
-rw-r--r-- | sys/modules/ntb/Makefile | 5 | ||||
-rw-r--r-- | sys/modules/ntb/if_ntb/Makefile | 9 | ||||
-rw-r--r-- | sys/modules/ntb/ntb_hw/Makefile | 9 |
12 files changed, 3023 insertions, 0 deletions
diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index 22bb8d1..2c4eda0 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -333,6 +333,7 @@ MAN= aac.4 \ ng_vlan.4 \ nmdm.4 \ nsp.4 \ + ${_ntb.4} \ null.4 \ ${_nvd.4} \ ${_nve.4} \ @@ -647,6 +648,7 @@ MLINKS+=netintro.4 net.4 \ netintro.4 networking.4 MLINKS+=${_nfe.4} ${_if_nfe.4} MLINKS+=nge.4 if_nge.4 +MLINKS+=${_ntb.4} ${_if_ntb.4} ${_ntb_hw.4} MLINKS+=${_nve.4} ${_if_nve.4} MLINKS+=${_nxge.4} ${_if_nxge.4} MLINKS+=patm.4 if_patm.4 @@ -784,6 +786,9 @@ MLINKS+=lindev.4 full.4 .if ${MACHINE_CPUARCH} == "amd64" _bhyve.4= bhyve.4 +_if_ntb.4= if_ntb.4 +_ntb.4= ntb.4 +_ntb_hw.4= ntb_hw.4 _qlxgb.4= qlxgb.4 _sfxge.4= sfxge.4 diff --git a/share/man/man4/ntb.4 b/share/man/man4/ntb.4 new file mode 100644 index 0000000..c576526 --- /dev/null +++ b/share/man/man4/ntb.4 @@ -0,0 +1,114 @@ +.\" +.\" Copyright (c) 2013 Intel Corporation +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions, and the following disclaimer, +.\" without modification. +.\" 2. Redistributions in binary form must reproduce at minimum a disclaimer +.\" substantially similar to the "NO WARRANTY" disclaimer below +.\" ("Disclaimer") and any redistribution must be conditioned upon +.\" including a substantially similar Disclaimer requirement for further +.\" binary redistribution. +.\" +.\" NO WARRANTY +.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR +.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +.\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGES. +.\" +.\" ntb driver man page. +.\" +.\" Author: Carl Delsey <carl@FreeBSD.org> +.\" +.\" $FreeBSD$ +.\" +.Dd Apr 11, 2013 +.Dt NTB 4 +.Os +.Sh NAME +.Nm ntb , +.Nm ntb_hw , +.Nm if_ntb +.Nd Intel(R) Non-Transparent Bridge driver +.Sh SYNOPSIS +To compile this driver into your kernel, +place the following lines in your kernel configuration file: +.Bd -ragged -offset indent +.Cd "device ntb_hw" +.Cd "device if_ntb" +.Ed +.Pp +Or, to load the driver as a module at boot, place the following line in +.Xr loader.conf 5 : +.Bd -literal -offset indent +if_ntb_load="YES" +.Ed +.Sh DESCRIPTION +The +.Nm +driver provides support for the Non-Transparent Bridge (NTB) in the Intel S1200, +Xeon E3 and Xeon E5 processor families. +.Pp +The NTB allows you to connect two computer systems using a PCI-e link if they +have the correct equipment and connectors. +.Sh CONFIGURATION +The NTB memory windows need to be configured by the BIOS. +If your BIOS allows you to set their size, you should set the size of both +memory windows to 1 MiB. +This needs to be done on both systems. +.Pp +Each system needs to have a different IP address assigned. +The MAC address is randomly generated. +Also for maximum performance, the MTU should be set to 16 kiB. +This can be down by adding the line below to +.Xr rc.conf 5 : +.Bd -literal -offset indent +ifconfig_ntb0="inet 192.168.1.10 netmask 255.255.255.0 mtu 16384" +.Ed +.Pp +And on the second system : +.Bd -literal -offset indent +ifconfig_ntb0="inet 192.168.1.11 netmask 255.255.255.0 mtu 16384" +.Ed +.Pp +If you are using the UDP protocol, you may want to increase the +.Va net.inet.udp.maxdgram +.Xr sysctl 8 +variable. +.Sh SEE ALSO +.Xr rc.conf 5 , +.Xr sysctl 8 +.Sh AUTHORS +.An -nosplit +The +.Nm +driver was developed by Intel and originally written by +.An Carl Delsey Aq carl@FreeBSD.org. +.Sh BUGS +If the driver is unloaded, it cannot be reloaded without a system reboot. +.Pp +The network support is limited. +It isn't fully configurable yet. +It also isn't integrated into +.Xr netgraph 4 +or +.Xr bpf 4 . +.Pp +NTB to Root Port mode is not yet supported. +.Pp +There is no way to protect your system from malicious behavior on the other +system once the link is brought up. +Anyone with root or kernel access on the other system can read or write to +any location on your system. +In other words, only connect two systems that completely trust each other. diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES index 7a41464..5689ed3 100644 --- a/sys/amd64/conf/NOTES +++ b/sys/amd64/conf/NOTES @@ -366,6 +366,10 @@ device iwn6000fw device iwn6050fw device wpifw +# Intel Non-Transparent Bridge (NTB) hardware +device ntb_hw # Hardware Abstraction Layer for the NTB +device if_ntb # Simulated ethernet device using the NTB + # #XXX this stores pointers in a 32bit field that is defined by the hardware #device pst diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index ece99da..2d6db7a 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -211,6 +211,8 @@ dev/kbd/kbd.c optional atkbd | sc | ukbd dev/lindev/full.c optional lindev dev/lindev/lindev.c optional lindev dev/nfe/if_nfe.c optional nfe pci +dev/ntb/if_ntb/if_ntb.c optional if_ntb +dev/ntb/ntb_hw/ntb_hw.c optional if_ntb ntb_hw dev/nvd/nvd.c optional nvd nvme dev/nve/if_nve.c optional nve pci dev/nvme/nvme.c optional nvme diff --git a/sys/dev/ntb/if_ntb/if_ntb.c b/sys/dev/ntb/if_ntb/if_ntb.c new file mode 100644 index 0000000..55b19c5 --- /dev/null +++ b/sys/dev/ntb/if_ntb/if_ntb.c @@ -0,0 +1,1366 @@ +/*- + * Copyright (C) 2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/ktr.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/queue.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/taskqueue.h> +#include <net/if.h> +#include <net/if_media.h> +#include <net/if_types.h> +#include <net/if_var.h> +#include <net/bpf.h> +#include <net/ethernet.h> +#include <vm/vm.h> +#include <vm/pmap.h> +#include <machine/bus.h> +#include <machine/cpufunc.h> +#include <machine/pmap.h> + +#include "../ntb_hw/ntb_hw.h" + +/* + * The Non-Transparent Bridge (NTB) is a device on some Intel processors that + * allows you to connect two systems using a PCI-e link. + * + * This module contains a protocol for sending and receiving messages, and + * exposes that protocol through a simulated ethernet device called ntb. + * + * NOTE: Much of the code in this module is shared with Linux. Any patches may + * be picked up and redistributed in Linux with a dual GPL/BSD license. + */ + +/* TODO: These functions should really be part of the kernel */ +#define test_bit(pos, bitmap_addr) (*(bitmap_addr) & 1UL << (pos)) +#define set_bit(pos, bitmap_addr) *(bitmap_addr) |= 1UL << (pos) +#define clear_bit(pos, bitmap_addr) *(bitmap_addr) &= ~(1UL << (pos)) + +#define KTR_NTB KTR_SPARE3 + +#define NTB_TRANSPORT_VERSION 3 +#define NTB_RX_MAX_PKTS 64 +#define NTB_RXQ_SIZE 300 + +static unsigned int transport_mtu = 0x4000 + ETHER_HDR_LEN + ETHER_CRC_LEN; +static unsigned int max_num_clients = 1; + +STAILQ_HEAD(ntb_queue_list, ntb_queue_entry); + +struct ntb_queue_entry { + /* ntb_queue list reference */ + STAILQ_ENTRY(ntb_queue_entry) entry; + + /* info on data to be transfered */ + void *cb_data; + void *buf; + uint64_t len; + uint64_t flags; +}; + +struct ntb_rx_info { + unsigned int entry; +}; + +struct ntb_transport_qp { + struct ntb_netdev *transport; + struct ntb_softc *ntb; + + void *cb_data; + + bool client_ready; + bool qp_link; + uint8_t qp_num; /* Only 64 QP's are allowed. 0-63 */ + + struct ntb_rx_info *rx_info; + struct ntb_rx_info *remote_rx_info; + + void (*tx_handler) (struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + struct ntb_queue_list tx_free_q; + struct mtx ntb_tx_free_q_lock; + void *tx_mw; + uint64_t tx_index; + uint64_t tx_max_entry; + uint64_t tx_max_frame; + + void (*rx_handler) (struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + struct ntb_queue_list rx_pend_q; + struct ntb_queue_list rx_free_q; + struct mtx ntb_rx_pend_q_lock; + struct mtx ntb_rx_free_q_lock; + struct task rx_completion_task; + void *rx_buff; + uint64_t rx_index; + uint64_t rx_max_entry; + uint64_t rx_max_frame; + + void (*event_handler) (void *data, int status); + struct callout link_work; + struct callout queue_full; + struct callout rx_full; + + uint64_t last_rx_no_buf; + + /* Stats */ + uint64_t rx_bytes; + uint64_t rx_pkts; + uint64_t rx_ring_empty; + uint64_t rx_err_no_buf; + uint64_t rx_err_oflow; + uint64_t rx_err_ver; + uint64_t tx_bytes; + uint64_t tx_pkts; + uint64_t tx_ring_full; +}; + +struct ntb_queue_handlers { + void (*rx_handler) (struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*tx_handler) (struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*event_handler) (void *data, int status); +}; + + +struct ntb_transport_mw { + size_t size; + void *virt_addr; + vm_paddr_t dma_addr; +}; + +struct ntb_netdev { + struct ntb_softc *ntb; + struct ifnet *ifp; + struct ntb_transport_mw mw[NTB_NUM_MW]; + struct ntb_transport_qp *qps; + uint64_t max_qps; + uint64_t qp_bitmap; + bool transport_link; + struct callout link_work; + struct ntb_transport_qp *qp; + uint64_t bufsize; + u_char eaddr[ETHER_ADDR_LEN]; + struct mtx tx_lock; + struct mtx rx_lock; +}; + +static struct ntb_netdev net_softc; + +enum { + IF_NTB_DESC_DONE_FLAG = 1 << 0, + IF_NTB_LINK_DOWN_FLAG = 1 << 1, +}; + +struct ntb_payload_header { + uint64_t ver; + uint64_t len; + uint64_t flags; +}; + +enum { + IF_NTB_VERSION = 0, + IF_NTB_MW0_SZ, + IF_NTB_MW1_SZ, + IF_NTB_NUM_QPS, + IF_NTB_QP_LINKS, + IF_NTB_MAX_SPAD, +}; + +#define QP_TO_MW(qp) ((qp) % NTB_NUM_MW) +#define NTB_QP_DEF_NUM_ENTRIES 100 +#define NTB_LINK_DOWN_TIMEOUT 10 + +static int ntb_handle_module_events(struct module *m, int what, void *arg); +static int ntb_setup_interface(void); +static int ntb_teardown_interface(void); +static void ntb_net_init(void *arg); +static int ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data); +static void ntb_start(struct ifnet *ifp); +static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); +static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); +static void ntb_net_event_handler(void *data, int status); +static int ntb_transport_init(struct ntb_softc *ntb); +static void ntb_transport_free(void *transport); +static void ntb_transport_init_queue(struct ntb_netdev *nt, + unsigned int qp_num); +static void ntb_transport_free_queue(struct ntb_transport_qp *qp); +static struct ntb_transport_qp * ntb_transport_create_queue(void *data, + struct ntb_softc *pdev, const struct ntb_queue_handlers *handlers); +static void ntb_transport_link_up(struct ntb_transport_qp *qp); +static int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, + void *data, unsigned int len); +static int ntb_process_tx(struct ntb_transport_qp *qp, + struct ntb_queue_entry *entry); +static void ntb_tx_copy_task(struct ntb_transport_qp *qp, + struct ntb_queue_entry *entry, void *offset); +static void ntb_qp_full(void *arg); +static void ntb_transport_rxc_db(void *data, int db_num); +static void ntb_rx_pendq_full(void *arg); +static void ntb_transport_rx(struct ntb_transport_qp *qp); +static int ntb_process_rxc(struct ntb_transport_qp *qp); +static void ntb_rx_copy_task(struct ntb_transport_qp *qp, + struct ntb_queue_entry *entry, void *offset); +static void ntb_rx_completion_task(void *arg, int pending); +static void ntb_transport_event_callback(void *data, enum ntb_hw_event event); +static void ntb_transport_link_work(void *arg); +static int ntb_set_mw(struct ntb_netdev *nt, int num_mw, unsigned int size); +static void ntb_transport_setup_qp_mw(struct ntb_netdev *nt, + unsigned int qp_num); +static void ntb_qp_link_work(void *arg); +static void ntb_transport_link_cleanup(struct ntb_netdev *nt); +static void ntb_qp_link_down(struct ntb_transport_qp *qp); +static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp); +static void ntb_transport_link_down(struct ntb_transport_qp *qp); +static void ntb_send_link_down(struct ntb_transport_qp *qp); +static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry, + struct ntb_queue_list *list); +static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock, + struct ntb_queue_list *list); +static void create_random_local_eui48(u_char *eaddr); +static unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); + +MALLOC_DEFINE(M_NTB_IF, "if_ntb", "ntb network driver"); + +/* Module setup and teardown */ +static int +ntb_handle_module_events(struct module *m, int what, void *arg) +{ + int err = 0; + + switch (what) { + case MOD_LOAD: + err = ntb_setup_interface(); + break; + case MOD_UNLOAD: + err = ntb_teardown_interface(); + break; + default: + err = EOPNOTSUPP; + break; + } + return (err); +} + +static moduledata_t ntb_transport_mod = { + "ntb_transport", + ntb_handle_module_events, + NULL +}; + +DECLARE_MODULE(ntb_transport, ntb_transport_mod, SI_SUB_KLD, SI_ORDER_ANY); +MODULE_DEPEND(ntb_transport, ntb_hw, 1, 1, 1); + +static int +ntb_setup_interface() +{ + struct ifnet *ifp; + struct ntb_queue_handlers handlers = { ntb_net_rx_handler, + ntb_net_tx_handler, ntb_net_event_handler }; + + net_softc.ntb = devclass_get_softc(devclass_find("ntb_hw"), 0); + if (net_softc.ntb == NULL) { + printf("ntb: Can't find devclass\n"); + return (ENXIO); + } + + ntb_transport_init(net_softc.ntb); + + ifp = net_softc.ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) { + printf("ntb: cannot allocate ifnet structure\n"); + return (ENOMEM); + } + + net_softc.qp = ntb_transport_create_queue(ifp, net_softc.ntb, + &handlers); + if_initname(ifp, "ntb", 0); + ifp->if_init = ntb_net_init; + ifp->if_softc = &net_softc; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX; + ifp->if_ioctl = ntb_ioctl; + ifp->if_start = ntb_start; + IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_READY(&ifp->if_snd); + create_random_local_eui48(net_softc.eaddr); + ether_ifattach(ifp, net_softc.eaddr); + ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_JUMBO_MTU; + ifp->if_capenable = ifp->if_capabilities; + + ntb_transport_link_up(net_softc.qp); + net_softc.bufsize = ntb_transport_max_size(net_softc.qp) + + sizeof(struct ether_header); + return (0); +} + +static int +ntb_teardown_interface() +{ + struct ifnet *ifp = net_softc.ifp; + + ntb_transport_link_down(net_softc.qp); + + ether_ifdetach(ifp); + if_free(ifp); + ntb_transport_free_queue(net_softc.qp); + ntb_transport_free(&net_softc); + + return (0); +} + +/* Network device interface */ + +static void +ntb_net_init(void *arg) +{ + struct ntb_netdev *ntb_softc = arg; + struct ifnet *ifp = ntb_softc->ifp; + + ifp->if_drv_flags |= IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + ifp->if_flags |= IFF_UP; + if_link_state_change(ifp, LINK_STATE_UP); +} + +static int +ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) +{ + struct ntb_netdev *nt = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + int error = 0; + + switch (command) { + case SIOCSIFMTU: + { + if (ifr->ifr_mtu > ntb_transport_max_size(nt->qp) - + ETHER_HDR_LEN - ETHER_CRC_LEN) { + error = EINVAL; + break; + } + + ifp->if_mtu = ifr->ifr_mtu; + break; + } + default: + error = ether_ioctl(ifp, command, data); + break; + } + + return (error); +} + + +static void +ntb_start(struct ifnet *ifp) +{ + struct mbuf *m_head; + struct ntb_netdev *nt = ifp->if_softc; + int rc; + + mtx_lock(&nt->tx_lock); + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + CTR0(KTR_NTB, "TX: ntb_start"); + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); + CTR1(KTR_NTB, "TX: start mbuf %p", m_head); + rc = ntb_transport_tx_enqueue(nt->qp, m_head, m_head, + m_length(m_head, NULL)); + if (rc != 0) { + CTR1(KTR_NTB, + "TX: couldn't tx mbuf %p. Returning to snd q", + m_head); + if (rc == EAGAIN) { + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + IFQ_DRV_PREPEND(&ifp->if_snd, m_head); + callout_reset(&nt->qp->queue_full, hz / 1000, + ntb_qp_full, ifp); + } + break; + } + + } + mtx_unlock(&nt->tx_lock); +} + +/* Network Device Callbacks */ +static void +ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, + int len) +{ + + m_freem(data); + CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data); +} + +static void +ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, + int len) +{ + struct mbuf *m = data; + struct ifnet *ifp = qp_data; + + CTR0(KTR_NTB, "RX: rx handler"); + (*ifp->if_input)(ifp, m); +} + +static void +ntb_net_event_handler(void *data, int status) +{ + +} + +/* Transport Init and teardown */ + +static int +ntb_transport_init(struct ntb_softc *ntb) +{ + struct ntb_netdev *nt = &net_softc; + int rc, i; + + nt->max_qps = max_num_clients; + ntb_register_transport(ntb, nt); + mtx_init(&nt->tx_lock, "ntb transport tx", NULL, MTX_DEF); + mtx_init(&nt->rx_lock, "ntb transport rx", NULL, MTX_DEF); + + nt->qps = malloc(nt->max_qps * sizeof(struct ntb_transport_qp), + M_NTB_IF, M_WAITOK|M_ZERO); + + nt->qp_bitmap = ((uint64_t) 1 << nt->max_qps) - 1; + + for (i = 0; i < nt->max_qps; i++) + ntb_transport_init_queue(nt, i); + + callout_init(&nt->link_work, 0); + + rc = ntb_register_event_callback(ntb, + ntb_transport_event_callback); + if (rc != 0) + goto err; + + if (ntb_query_link_status(ntb)) + callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt); + + return (0); + +err: + free(nt->qps, M_NTB_IF); + ntb_unregister_transport(ntb); + return (rc); +} + +static void +ntb_transport_free(void *transport) +{ + struct ntb_netdev *nt = transport; + struct ntb_softc *ntb = nt->ntb; + int i; + + nt->transport_link = NTB_LINK_DOWN; + + callout_drain(&nt->link_work); + + /* verify that all the qp's are freed */ + for (i = 0; i < nt->max_qps; i++) + if (!test_bit(i, &nt->qp_bitmap)) + ntb_transport_free_queue(&nt->qps[i]); + + + ntb_unregister_event_callback(ntb); + + for (i = 0; i < NTB_NUM_MW; i++) + if (nt->mw[i].virt_addr != NULL) + contigfree(nt->mw[i].virt_addr, nt->mw[i].size, + M_NTB_IF); + + free(nt->qps, M_NTB_IF); + ntb_unregister_transport(ntb); +} + +static void +ntb_transport_init_queue(struct ntb_netdev *nt, unsigned int qp_num) +{ + struct ntb_transport_qp *qp; + unsigned int num_qps_mw, tx_size; + uint8_t mw_num = QP_TO_MW(qp_num); + + qp = &nt->qps[qp_num]; + qp->qp_num = qp_num; + qp->transport = nt; + qp->ntb = nt->ntb; + qp->qp_link = NTB_LINK_DOWN; + qp->client_ready = NTB_LINK_DOWN; + qp->event_handler = NULL; + + if (nt->max_qps % NTB_NUM_MW && mw_num < nt->max_qps % NTB_NUM_MW) + num_qps_mw = nt->max_qps / NTB_NUM_MW + 1; + else + num_qps_mw = nt->max_qps / NTB_NUM_MW; + + tx_size = (unsigned int) ntb_get_mw_size(qp->ntb, mw_num) / num_qps_mw; + qp->rx_info = (struct ntb_rx_info *) + ((char *)ntb_get_mw_vbase(qp->ntb, mw_num) + + (qp_num / NTB_NUM_MW * tx_size)); + tx_size -= sizeof(struct ntb_rx_info); + + qp->tx_mw = qp->rx_info + sizeof(struct ntb_rx_info); + qp->tx_max_frame = min(transport_mtu + sizeof(struct ntb_payload_header), + tx_size); + qp->tx_max_entry = tx_size / qp->tx_max_frame; + qp->tx_index = 0; + + callout_init(&qp->link_work, 0); + callout_init(&qp->queue_full, CALLOUT_MPSAFE); + callout_init(&qp->rx_full, CALLOUT_MPSAFE); + + mtx_init(&qp->ntb_rx_pend_q_lock, "ntb rx pend q", NULL, MTX_SPIN); + mtx_init(&qp->ntb_rx_free_q_lock, "ntb rx free q", NULL, MTX_SPIN); + mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN); + TASK_INIT(&qp->rx_completion_task, 0, ntb_rx_completion_task, qp); + + STAILQ_INIT(&qp->rx_pend_q); + STAILQ_INIT(&qp->rx_free_q); + STAILQ_INIT(&qp->tx_free_q); +} + +static void +ntb_transport_free_queue(struct ntb_transport_qp *qp) +{ + struct ntb_queue_entry *entry; + + if (qp == NULL) + return; + + callout_drain(&qp->link_work); + + ntb_unregister_db_callback(qp->ntb, qp->qp_num); + + while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) + free(entry, M_NTB_IF); + + while ((entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q))) + free(entry, M_NTB_IF); + + while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) + free(entry, M_NTB_IF); + + set_bit(qp->qp_num, &qp->transport->qp_bitmap); +} + +/** + * ntb_transport_create_queue - Create a new NTB transport layer queue + * @rx_handler: receive callback function + * @tx_handler: transmit callback function + * @event_handler: event callback function + * + * Create a new NTB transport layer queue and provide the queue with a callback + * routine for both transmit and receive. The receive callback routine will be + * used to pass up data when the transport has received it on the queue. The + * transmit callback routine will be called when the transport has completed the + * transmission of the data on the queue and the data is ready to be freed. + * + * RETURNS: pointer to newly created ntb_queue, NULL on error. + */ +static struct ntb_transport_qp * +ntb_transport_create_queue(void *data, struct ntb_softc *pdev, + const struct ntb_queue_handlers *handlers) +{ + struct ntb_queue_entry *entry; + struct ntb_transport_qp *qp; + struct ntb_netdev *nt; + unsigned int free_queue; + int rc, i; + + nt = ntb_find_transport(pdev); + if (nt == NULL) + goto err; + + free_queue = ffs(nt->qp_bitmap); + if (free_queue == 0) + goto err; + + /* decrement free_queue to make it zero based */ + free_queue--; + + clear_bit(free_queue, &nt->qp_bitmap); + + qp = &nt->qps[free_queue]; + qp->cb_data = data; + qp->rx_handler = handlers->rx_handler; + qp->tx_handler = handlers->tx_handler; + qp->event_handler = handlers->event_handler; + + for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { + entry = malloc(sizeof(struct ntb_queue_entry), M_NTB_IF, + M_WAITOK|M_ZERO); + entry->cb_data = nt->ifp; + entry->buf = NULL; + entry->len = transport_mtu; + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + } + + for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { + entry = malloc(sizeof(struct ntb_queue_entry), M_NTB_IF, + M_WAITOK|M_ZERO); + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); + } + + rc = ntb_register_db_callback(qp->ntb, free_queue, qp, + ntb_transport_rxc_db); + if (rc != 0) + goto err1; + + return (qp); + +err1: + while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) + free(entry, M_NTB_IF); + while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) + free(entry, M_NTB_IF); + set_bit(free_queue, &nt->qp_bitmap); +err: + return (NULL); +} + +/** + * ntb_transport_link_up - Notify NTB transport of client readiness to use queue + * @qp: NTB transport layer queue to be enabled + * + * Notify NTB transport layer of client readiness to use queue + */ +static void +ntb_transport_link_up(struct ntb_transport_qp *qp) +{ + + if (qp == NULL) + return; + + qp->client_ready = NTB_LINK_UP; + + if (qp->transport->transport_link == NTB_LINK_UP) + callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); +} + + + +/* Transport Tx */ + +/** + * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry + * @qp: NTB transport layer queue the entry is to be enqueued on + * @cb: per buffer pointer for callback function to use + * @data: pointer to data buffer that will be sent + * @len: length of the data buffer + * + * Enqueue a new transmit buffer onto the transport queue from which a NTB + * payload will be transmitted. This assumes that a lock is behing held to + * serialize access to the qp. + * + * RETURNS: An appropriate ERRNO error value on error, or zero for success. + */ +static int +ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, + unsigned int len) +{ + struct ntb_queue_entry *entry; + int rc; + + if (qp == NULL || qp->qp_link != NTB_LINK_UP || len == 0) { + CTR0(KTR_NTB, "TX: link not up"); + return (EINVAL); + } + + entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); + if (entry == NULL) { + CTR0(KTR_NTB, "TX: couldn't get entry from tx_free_q"); + return (ENOMEM); + } + CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry); + + entry->cb_data = cb; + entry->buf = data; + entry->len = len; + entry->flags = 0; + + rc = ntb_process_tx(qp, entry); + if (rc != 0) { + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); + CTR1(KTR_NTB, + "TX: process_tx failed. Returning entry %p to tx_free_q", + entry); + } + return (rc); +} + +static int +ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry) +{ + void *offset; + + offset = (char *)qp->tx_mw + qp->tx_max_frame * qp->tx_index; + CTR3(KTR_NTB, + "TX: process_tx: tx_pkts=%u, tx_index=%u, remote entry=%u", + qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry); + if (qp->tx_index == qp->remote_rx_info->entry) { + CTR0(KTR_NTB, "TX: ring full"); + qp->tx_ring_full++; + return (EAGAIN); + } + + if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) { + if (qp->tx_handler != NULL) + qp->tx_handler(qp, qp->cb_data, entry->buf, + EIO); + + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); + CTR1(KTR_NTB, + "TX: frame too big. returning entry %p to tx_free_q", + entry); + return (0); + } + CTR2(KTR_NTB, "TX: copying entry %p to offset %p", entry, offset); + ntb_tx_copy_task(qp, entry, offset); + + qp->tx_index++; + qp->tx_index %= qp->tx_max_entry; + + qp->tx_pkts++; + + return (0); +} + +static void +ntb_tx_copy_task(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry, + void *offset) +{ + struct ntb_payload_header *hdr; + + CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset); + if (entry->buf != NULL) + m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset); + + hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame - + sizeof(struct ntb_payload_header)); + hdr->len = entry->len; /* TODO: replace with bus_space_write */ + hdr->ver = qp->tx_pkts; /* TODO: replace with bus_space_write */ + wmb(); + /* TODO: replace with bus_space_write */ + hdr->flags = entry->flags | IF_NTB_DESC_DONE_FLAG; + + ntb_ring_sdb(qp->ntb, qp->qp_num); + + /* + * The entry length can only be zero if the packet is intended to be a + * "link down" or similar. Since no payload is being sent in these + * cases, there is nothing to add to the completion queue. + */ + if (entry->len > 0) { + qp->tx_bytes += entry->len; + + if (qp->tx_handler) + qp->tx_handler(qp, qp->cb_data, entry->cb_data, + entry->len); + } + + CTR2(KTR_NTB, + "TX: entry %p sent. hdr->ver = %d, Returning to tx_free_q", entry, + hdr->ver); + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); +} + +static void +ntb_qp_full(void *arg) +{ + + CTR0(KTR_NTB, "TX: qp_full callout"); + ntb_start(arg); +} + +/* Transport Rx */ +static void +ntb_transport_rxc_db(void *data, int db_num) +{ + struct ntb_transport_qp *qp = data; + + ntb_transport_rx(qp); +} + +static void +ntb_rx_pendq_full(void *arg) +{ + + CTR0(KTR_NTB, "RX: ntb_rx_pendq_full callout"); + ntb_transport_rx(arg); +} + +static void +ntb_transport_rx(struct ntb_transport_qp *qp) +{ + int rc, i; + + /* + * Limit the number of packets processed in a single interrupt to + * provide fairness to others + */ + mtx_lock(&qp->transport->rx_lock); + CTR0(KTR_NTB, "RX: transport_rx"); + for (i = 0; i < NTB_RX_MAX_PKTS; i++) { + rc = ntb_process_rxc(qp); + if (rc != 0) { + CTR0(KTR_NTB, "RX: process_rxc failed"); + break; + } + } + mtx_unlock(&qp->transport->rx_lock); +} + +static int +ntb_process_rxc(struct ntb_transport_qp *qp) +{ + struct ntb_payload_header *hdr; + struct ntb_queue_entry *entry; + void *offset; + + offset = (void *) + ((char *)qp->rx_buff + qp->rx_max_frame * qp->rx_index); + hdr = (void *) + ((char *)offset + qp->rx_max_frame - + sizeof(struct ntb_payload_header)); + + CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index); + entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); + if (entry == NULL) { + qp->rx_err_no_buf++; + CTR0(KTR_NTB, "RX: No entries in rx_pend_q"); + return (ENOMEM); + } + callout_stop(&qp->rx_full); + CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry); + + if ((hdr->flags & IF_NTB_DESC_DONE_FLAG) == 0) { + CTR1(KTR_NTB, + "RX: hdr not done. Returning entry %p to rx_pend_q", entry); + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + qp->rx_ring_empty++; + return (EAGAIN); + } + + if (hdr->ver != (uint32_t) qp->rx_pkts) { + CTR3(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). " + "Returning entry %p to rx_pend_q", hdr->ver, qp->rx_pkts, + entry); + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + qp->rx_err_ver++; + return (EIO); + } + + if ((hdr->flags & IF_NTB_LINK_DOWN_FLAG) != 0) { + ntb_qp_link_down(qp); + CTR1(KTR_NTB, + "RX: link down. adding entry %p back to rx_pend_q", entry); + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + goto out; + } + + if (hdr->len <= entry->len) { + entry->len = hdr->len; + ntb_rx_copy_task(qp, entry, offset); + } else { + CTR1(KTR_NTB, + "RX: len too long. Returning entry %p to rx_pend_q", entry); + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + + qp->rx_err_oflow++; + } + + qp->rx_bytes += hdr->len; + qp->rx_pkts++; + CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts); + + +out: + /* Ensure that the data is globally visible before clearing the flag */ + wmb(); + hdr->flags = 0; + /* TODO: replace with bus_space_write */ + qp->rx_info->entry = qp->rx_index; + + qp->rx_index++; + qp->rx_index %= qp->rx_max_entry; + + return (0); +} + +static void +ntb_rx_copy_task(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry, + void *offset) +{ + struct ifnet *ifp = entry->cb_data; + unsigned int len = entry->len; + struct mbuf *m; + + CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset); + m = m_devget(offset, len, 0, ifp, NULL); + m->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID; + + entry->buf = (void *)m; + + CTR2(KTR_NTB, + "RX: copied entry %p to mbuf %p. Adding entry to rx_free_q", entry, + m); + ntb_list_add(&qp->ntb_rx_free_q_lock, entry, &qp->rx_free_q); + + taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task); +} + +static void +ntb_rx_completion_task(void *arg, int pending) +{ + struct ntb_transport_qp *qp = arg; + struct mbuf *m; + struct ntb_queue_entry *entry; + + CTR0(KTR_NTB, "RX: rx_completion_task"); + + while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) { + m = entry->buf; + CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m); + if (qp->rx_handler && qp->client_ready == NTB_LINK_UP) + qp->rx_handler(qp, qp->cb_data, m, entry->len); + + entry->buf = NULL; + entry->len = qp->transport->bufsize; + + CTR1(KTR_NTB,"RX: entry %p removed from rx_free_q " + "and added to rx_pend_q", entry); + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + if (qp->rx_err_no_buf > qp->last_rx_no_buf) { + qp->last_rx_no_buf = qp->rx_err_no_buf; + CTR0(KTR_NTB, "RX: could spawn rx task"); + callout_reset(&qp->rx_full, hz / 1000, ntb_rx_pendq_full, + qp); + } + } +} + +/* Link Event handler */ +static void +ntb_transport_event_callback(void *data, enum ntb_hw_event event) +{ + struct ntb_netdev *nt = data; + + switch (event) { + case NTB_EVENT_HW_LINK_UP: + callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt); + break; + case NTB_EVENT_HW_LINK_DOWN: + ntb_transport_link_cleanup(nt); + break; + default: + panic("ntb: Unknown NTB event"); + } +} + +/* Link bring up */ +static void +ntb_transport_link_work(void *arg) +{ + struct ntb_netdev *nt = arg; + struct ntb_softc *ntb = nt->ntb; + struct ntb_transport_qp *qp; + uint32_t val; + int rc, i; + + /* send the local info */ + rc = ntb_write_remote_spad(ntb, IF_NTB_VERSION, NTB_TRANSPORT_VERSION); + if (rc != 0) + goto out; + + rc = ntb_write_remote_spad(ntb, IF_NTB_MW0_SZ, ntb_get_mw_size(ntb, 0)); + if (rc != 0) + goto out; + + rc = ntb_write_remote_spad(ntb, IF_NTB_MW1_SZ, ntb_get_mw_size(ntb, 1)); + if (rc != 0) + goto out; + + rc = ntb_write_remote_spad(ntb, IF_NTB_NUM_QPS, nt->max_qps); + if (rc != 0) + goto out; + + rc = ntb_read_remote_spad(ntb, IF_NTB_QP_LINKS, &val); + if (rc != 0) + goto out; + + rc = ntb_write_remote_spad(ntb, IF_NTB_QP_LINKS, val); + if (rc != 0) + goto out; + + /* Query the remote side for its info */ + rc = ntb_read_local_spad(ntb, IF_NTB_VERSION, &val); + if (rc != 0) + goto out; + + if (val != NTB_TRANSPORT_VERSION) + goto out; + + rc = ntb_read_local_spad(ntb, IF_NTB_NUM_QPS, &val); + if (rc != 0) + goto out; + + if (val != nt->max_qps) + goto out; + + rc = ntb_read_local_spad(ntb, IF_NTB_MW0_SZ, &val); + if (rc != 0) + goto out; + + if (val == 0) + goto out; + + rc = ntb_set_mw(nt, 0, val); + if (rc != 0) + return; + + rc = ntb_read_local_spad(ntb, IF_NTB_MW1_SZ, &val); + if (rc != 0) + goto out; + + if (val == 0) + goto out; + + rc = ntb_set_mw(nt, 1, val); + if (rc != 0) + return; + + nt->transport_link = NTB_LINK_UP; + + for (i = 0; i < nt->max_qps; i++) { + qp = &nt->qps[i]; + + ntb_transport_setup_qp_mw(nt, i); + + if (qp->client_ready == NTB_LINK_UP) + callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); + } + + return; + +out: + if (ntb_query_link_status(ntb)) + callout_reset(&nt->link_work, + NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt); +} + +static int +ntb_set_mw(struct ntb_netdev *nt, int num_mw, unsigned int size) +{ + struct ntb_transport_mw *mw = &nt->mw[num_mw]; + + /* Alloc memory for receiving data. Must be 4k aligned */ + mw->size = size; + + mw->virt_addr = contigmalloc(mw->size, M_NTB_IF, M_ZERO, 0, + BUS_SPACE_MAXADDR, mw->size, 0); + if (mw->virt_addr == NULL) { + printf("ntb: Unable to allocate MW buffer of size %d\n", + (int)mw->size); + return (ENOMEM); + } + /* TODO: replace with bus_space_* functions */ + mw->dma_addr = vtophys(mw->virt_addr); + + /* Notify HW the memory location of the receive buffer */ + ntb_set_mw_addr(nt->ntb, num_mw, mw->dma_addr); + + return (0); +} + +static void +ntb_transport_setup_qp_mw(struct ntb_netdev *nt, unsigned int qp_num) +{ + struct ntb_transport_qp *qp = &nt->qps[qp_num]; + void *offset; + unsigned int rx_size, num_qps_mw; + uint8_t mw_num = QP_TO_MW(qp_num); + unsigned int i; + + if (nt->max_qps % NTB_NUM_MW && mw_num < nt->max_qps % NTB_NUM_MW) + num_qps_mw = nt->max_qps / NTB_NUM_MW + 1; + else + num_qps_mw = nt->max_qps / NTB_NUM_MW; + + rx_size = (unsigned int) nt->mw[mw_num].size / num_qps_mw; + qp->remote_rx_info = (void *)((uint8_t *)nt->mw[mw_num].virt_addr + + (qp_num / NTB_NUM_MW * rx_size)); + rx_size -= sizeof(struct ntb_rx_info); + + qp->rx_buff = qp->remote_rx_info + sizeof(struct ntb_rx_info); + qp->rx_max_frame = min(transport_mtu + sizeof(struct ntb_payload_header), + rx_size); + qp->rx_max_entry = rx_size / qp->rx_max_frame; + qp->rx_index = 0; + qp->tx_index = 0; + + qp->remote_rx_info->entry = qp->rx_max_entry; + + /* setup the hdr offsets with 0's */ + for (i = 0; i < qp->rx_max_entry; i++) { + offset = (void *)((uint8_t *)qp->rx_buff + + qp->rx_max_frame * (i + 1) - + sizeof(struct ntb_payload_header)); + memset(offset, 0, sizeof(struct ntb_payload_header)); + } + + qp->rx_pkts = 0; + qp->tx_pkts = 0; +} + +static void +ntb_qp_link_work(void *arg) +{ + struct ntb_transport_qp *qp = arg; + struct ntb_softc *ntb = qp->ntb; + struct ntb_netdev *nt = qp->transport; + int rc, val; + + + rc = ntb_read_remote_spad(ntb, IF_NTB_QP_LINKS, &val); + if (rc != 0) + return; + + rc = ntb_write_remote_spad(ntb, IF_NTB_QP_LINKS, val | 1 << qp->qp_num); + + /* query remote spad for qp ready bits */ + rc = ntb_read_local_spad(ntb, IF_NTB_QP_LINKS, &val); + + /* See if the remote side is up */ + if ((1 << qp->qp_num & val) != 0) { + qp->qp_link = NTB_LINK_UP; + if (qp->event_handler != NULL) + qp->event_handler(qp->cb_data, NTB_LINK_UP); + } else if (nt->transport_link == NTB_LINK_UP) { + callout_reset(&qp->link_work, + NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp); + } +} + +/* Link down event*/ +static void +ntb_transport_link_cleanup(struct ntb_netdev *nt) +{ + int i; + + if (nt->transport_link == NTB_LINK_DOWN) + callout_drain(&nt->link_work); + else + nt->transport_link = NTB_LINK_DOWN; + + /* Pass along the info to any clients */ + for (i = 0; i < nt->max_qps; i++) + if (!test_bit(i, &nt->qp_bitmap)) + ntb_qp_link_down(&nt->qps[i]); + + /* + * The scratchpad registers keep the values if the remote side + * goes down, blast them now to give them a sane value the next + * time they are accessed + */ + for (i = 0; i < IF_NTB_MAX_SPAD; i++) + ntb_write_local_spad(nt->ntb, i, 0); +} + + +static void +ntb_qp_link_down(struct ntb_transport_qp *qp) +{ + + ntb_qp_link_cleanup(qp); +} + +static void +ntb_qp_link_cleanup(struct ntb_transport_qp *qp) +{ + struct ntb_netdev *nt = qp->transport; + + if (qp->qp_link == NTB_LINK_DOWN) { + callout_drain(&qp->link_work); + return; + } + + if (qp->event_handler != NULL) + qp->event_handler(qp->cb_data, NTB_LINK_DOWN); + + qp->qp_link = NTB_LINK_DOWN; + + if (nt->transport_link == NTB_LINK_UP) + callout_reset(&qp->link_work, + NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp); +} + +/* Link commanded down */ +/** + * ntb_transport_link_down - Notify NTB transport to no longer enqueue data + * @qp: NTB transport layer queue to be disabled + * + * Notify NTB transport layer of client's desire to no longer receive data on + * transport queue specified. It is the client's responsibility to ensure all + * entries on queue are purged or otherwise handled appropraitely. + */ +static void +ntb_transport_link_down(struct ntb_transport_qp *qp) +{ + int rc, val; + + if (qp == NULL) + return; + + qp->client_ready = NTB_LINK_DOWN; + + rc = ntb_read_remote_spad(qp->ntb, IF_NTB_QP_LINKS, &val); + if (rc != 0) + return; + + rc = ntb_write_remote_spad(qp->ntb, IF_NTB_QP_LINKS, + val & ~(1 << qp->qp_num)); + + if (qp->qp_link == NTB_LINK_UP) + ntb_send_link_down(qp); + else + callout_drain(&qp->link_work); + +} + +static void +ntb_send_link_down(struct ntb_transport_qp *qp) +{ + struct ntb_queue_entry *entry; + int i, rc; + + if (qp->qp_link == NTB_LINK_DOWN) + return; + + qp->qp_link = NTB_LINK_DOWN; + + for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) { + entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); + if (entry != NULL) + break; + pause("NTB Wait for link down", hz / 10); + } + + if (entry == NULL) + return; + + entry->cb_data = NULL; + entry->buf = NULL; + entry->len = 0; + entry->flags = IF_NTB_LINK_DOWN_FLAG; + + mtx_lock(&qp->transport->tx_lock); + rc = ntb_process_tx(qp, entry); + if (rc != 0) + printf("ntb: Failed to send link down\n"); + mtx_unlock(&qp->transport->tx_lock); +} + + +/* List Management */ + +static void +ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry, + struct ntb_queue_list *list) +{ + + mtx_lock_spin(lock); + STAILQ_INSERT_TAIL(list, entry, entry); + mtx_unlock_spin(lock); +} + +static struct ntb_queue_entry * +ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list) +{ + struct ntb_queue_entry *entry; + + mtx_lock_spin(lock); + if (STAILQ_EMPTY(list)) { + entry = NULL; + goto out; + } + entry = STAILQ_FIRST(list); + STAILQ_REMOVE_HEAD(list, entry); +out: + mtx_unlock_spin(lock); + + return (entry); +} + +/* Helper functions */ +/* TODO: This too should really be part of the kernel */ +#define EUI48_MULTICAST 1 << 0 +#define EUI48_LOCALLY_ADMINISTERED 1 << 1 +static void +create_random_local_eui48(u_char *eaddr) +{ + static uint8_t counter = 0; + uint32_t seed = ticks; + + eaddr[0] = EUI48_LOCALLY_ADMINISTERED; + memcpy(&eaddr[1], &seed, sizeof(uint32_t)); + eaddr[5] = counter++; +} + +/** + * ntb_transport_max_size - Query the max payload size of a qp + * @qp: NTB transport layer queue to be queried + * + * Query the maximum payload size permissible on the given qp + * + * RETURNS: the max payload size of a qp + */ +static unsigned int +ntb_transport_max_size(struct ntb_transport_qp *qp) +{ + + if (qp == NULL) + return (0); + + return (qp->tx_max_frame - sizeof(struct ntb_payload_header)); +} diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.c b/sys/dev/ntb/ntb_hw/ntb_hw.c new file mode 100644 index 0000000..72314dd --- /dev/null +++ b/sys/dev/ntb/ntb_hw/ntb_hw.c @@ -0,0 +1,1288 @@ +/*- + * Copyright (C) 2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/queue.h> +#include <sys/rman.h> +#include <vm/vm.h> +#include <vm/pmap.h> +#include <machine/bus.h> +#include <machine/pmap.h> +#include <machine/resource.h> +#include <dev/pci/pcireg.h> +#include <dev/pci/pcivar.h> + +#include "ntb_regs.h" +#include "ntb_hw.h" + +/* + * The Non-Transparent Bridge (NTB) is a device on some Intel processors that + * allows you to connect two systems using a PCI-e link. + * + * This module contains the hardware abstraction layer for the NTB. It allows + * you to send and recieve interrupts, map the memory windows and send and + * receive messages in the scratch-pad registers. + * + * NOTE: Much of the code in this module is shared with Linux. Any patches may + * be picked up and redistributed in Linux with a dual GPL/BSD license. + */ + +#define NTB_CONFIG_BAR 0 +#define NTB_B2B_BAR_1 1 +#define NTB_B2B_BAR_2 2 +#define NTB_MAX_BARS 3 +#define NTB_MW_TO_BAR(mw) ((mw) + 1) + +#define MAX_MSIX_INTERRUPTS MAX(XEON_MAX_DB_BITS, SOC_MAX_DB_BITS) + +#define NTB_HB_TIMEOUT 1 /* second */ +#define SOC_LINK_RECOVERY_TIME 500 + +#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev)) + +enum ntb_device_type { + NTB_XEON, + NTB_SOC +}; + +struct ntb_hw_info { + uint32_t device_id; + enum ntb_device_type type; + const char *desc; +}; + +struct ntb_pci_bar_info { + bus_space_tag_t pci_bus_tag; + bus_space_handle_t pci_bus_handle; + int pci_resource_id; + struct resource *pci_resource; + vm_paddr_t pbase; + void *vbase; + u_long size; +}; + +struct ntb_int_info { + struct resource *res; + int rid; + void *tag; +}; + +struct ntb_db_cb { + ntb_db_callback callback; + unsigned int db_num; + void *data; + struct ntb_softc *ntb; +}; + +struct ntb_softc { + device_t device; + enum ntb_device_type type; + + struct ntb_pci_bar_info bar_info[NTB_MAX_BARS]; + struct ntb_int_info int_info[MAX_MSIX_INTERRUPTS]; + uint32_t allocated_interrupts; + + struct callout heartbeat_timer; + struct callout lr_timer; + + void *ntb_transport; + ntb_event_callback event_cb; + struct ntb_db_cb *db_cb; + + struct { + uint32_t max_spads; + uint32_t max_db_bits; + uint32_t msix_cnt; + } limits; + struct { + uint32_t pdb; + uint32_t pdb_mask; + uint32_t sdb; + uint32_t sbar2_xlat; + uint32_t sbar4_xlat; + uint32_t spad_remote; + uint32_t spad_local; + uint32_t lnk_cntl; + uint32_t lnk_stat; + uint32_t spci_cmd; + } reg_ofs; + uint8_t conn_type; + uint8_t dev_type; + uint8_t bits_per_vector; + uint8_t link_status; + uint8_t link_width; + uint8_t link_speed; +}; + +#define ntb_reg_read(SIZE, offset) \ + bus_space_read_ ## SIZE (ntb->bar_info[NTB_CONFIG_BAR].pci_bus_tag, \ + ntb->bar_info[NTB_CONFIG_BAR].pci_bus_handle, (offset)) +#define ntb_reg_write(SIZE, offset, val) \ + bus_space_write_ ## SIZE (ntb->bar_info[NTB_CONFIG_BAR].pci_bus_tag, \ + ntb->bar_info[NTB_CONFIG_BAR].pci_bus_handle, (offset), (val)) + +#define ntb_read_1(offset) ntb_reg_read(1, (offset)) +#define ntb_read_2(offset) ntb_reg_read(2, (offset)) +#define ntb_read_4(offset) ntb_reg_read(4, (offset)) +#define ntb_read_8(offset) ntb_reg_read(8, (offset)) +#define ntb_write_1(offset, val) ntb_reg_write(1, (offset), (val)) +#define ntb_write_2(offset, val) ntb_reg_write(2, (offset), (val)) +#define ntb_write_4(offset, val) ntb_reg_write(4, (offset), (val)) +#define ntb_write_8(offset, val) ntb_reg_write(8, (offset), (val)) + +static int ntb_probe(device_t device); +static int ntb_attach(device_t device); +static int ntb_detach(device_t device); +static int ntb_map_pci_bar(struct ntb_softc *ntb); +static void ntb_unmap_pci_bar(struct ntb_softc *ntb); +static int ntb_setup_interrupts(struct ntb_softc *ntb); +static void ntb_teardown_interrupts(struct ntb_softc *ntb); +static void handle_soc_irq(void *arg); +static void handle_xeon_irq(void *arg); +static void handle_xeon_event_irq(void *arg); +static void ntb_handle_legacy_interrupt(void *arg); +static int ntb_create_callbacks(struct ntb_softc *ntb, int num_vectors); +static void ntb_free_callbacks(struct ntb_softc *ntb); +static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id); +static int ntb_initialize_hw(struct ntb_softc *ntb); +static int ntb_setup_xeon(struct ntb_softc *ntb); +static int ntb_setup_soc(struct ntb_softc *ntb); +static void ntb_handle_heartbeat(void *arg); +static void ntb_handle_link_event(struct ntb_softc *ntb, int link_state); +static void recover_soc_link(void *arg); +static int ntb_check_link_status(struct ntb_softc *ntb); +static bool is_bar_for_data_transfer(int bar_num); + +static struct ntb_hw_info pci_ids[] = { + { 0x3C0D8086, NTB_XEON, "Xeon E5/Core i7 Non-Transparent Bridge B2B" }, + { 0x0C4E8086, NTB_SOC, "Atom Processor S1200 NTB Primary B2B" }, + { 0x0E0D8086, NTB_XEON, "Xeon E5 V2 Non-Transparent Bridge B2B" }, + { 0x00000000, NTB_SOC, NULL } +}; + +/* + * OS <-> Driver interface structures + */ +MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations"); + +static device_method_t ntb_pci_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ntb_probe), + DEVMETHOD(device_attach, ntb_attach), + DEVMETHOD(device_detach, ntb_detach), + DEVMETHOD_END +}; + +static driver_t ntb_pci_driver = { + "ntb_hw", + ntb_pci_methods, + sizeof(struct ntb_softc), +}; + +static devclass_t ntb_devclass; +DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL); +MODULE_VERSION(ntb_hw, 1); + +/* + * OS <-> Driver linkage functions + */ +static int +ntb_probe(device_t device) +{ + struct ntb_hw_info *p = ntb_get_device_info(pci_get_devid(device)); + + if (p != NULL) { + device_set_desc(device, p->desc); + return (0); + } else + return (ENXIO); +} + +#define DETACH_ON_ERROR(func) \ + error = func; \ + if (error < 0) { \ + ntb_detach(device); \ + return (error); \ + } + +static int +ntb_attach(device_t device) +{ + struct ntb_softc *ntb = DEVICE2SOFTC(device); + struct ntb_hw_info *p = ntb_get_device_info(pci_get_devid(device)); + int error; + + ntb->device = device; + ntb->type = p->type; + + /* Heartbeat timer for NTB_SOC since there is no link interrupt */ + callout_init(&ntb->heartbeat_timer, CALLOUT_MPSAFE); + callout_init(&ntb->lr_timer, CALLOUT_MPSAFE); + + DETACH_ON_ERROR(ntb_map_pci_bar(ntb)); + DETACH_ON_ERROR(ntb_initialize_hw(ntb)); + DETACH_ON_ERROR(ntb_setup_interrupts(ntb)); + + pci_enable_busmaster(ntb->device); + + return (error); +} + +static int +ntb_detach(device_t device) +{ + struct ntb_softc *ntb = DEVICE2SOFTC(device); + + callout_drain(&ntb->heartbeat_timer); + callout_drain(&ntb->lr_timer); + ntb_teardown_interrupts(ntb); + ntb_unmap_pci_bar(ntb); + + return (0); +} + +static int +ntb_map_pci_bar(struct ntb_softc *ntb) +{ + struct ntb_pci_bar_info *current_bar; + int rc, i; + + ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0); + ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2); + ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4); + + for (i = 0; i< NTB_MAX_BARS; i++) { + current_bar = &ntb->bar_info[i]; + current_bar->pci_resource = + bus_alloc_resource(ntb->device, + SYS_RES_MEMORY, + ¤t_bar->pci_resource_id, 0, ~0, 1, + RF_ACTIVE); + + if (current_bar->pci_resource == NULL) { + device_printf(ntb->device, + "unable to allocate pci resource\n"); + return (ENXIO); + } + else { + current_bar->pci_bus_tag = + rman_get_bustag(current_bar->pci_resource); + current_bar->pci_bus_handle = + rman_get_bushandle(current_bar->pci_resource); + current_bar->pbase = + rman_get_start(current_bar->pci_resource); + current_bar->size = + rman_get_size(current_bar->pci_resource); + current_bar->vbase = + rman_get_virtual(current_bar->pci_resource); + if (is_bar_for_data_transfer(i)) { + /* + * Mark bar region as write combining to improve + * performance. + */ + rc = pmap_change_attr( + (vm_offset_t)current_bar->vbase, + current_bar->size, + VM_MEMATTR_WRITE_COMBINING); + if (rc != 0) { + device_printf(ntb->device, + "Couldn't mark bar as" + " WRITE_COMBINING\n"); + return (rc); + } + } + device_printf(ntb->device, + "Bar size = %lx, v %p, p %p\n", + current_bar->size, current_bar->vbase, + (void *)(current_bar->pbase)); + } + } + return (0); +} + +static void +ntb_unmap_pci_bar(struct ntb_softc *ntb) +{ + struct ntb_pci_bar_info *current_bar; + int i; + + for (i = 0; i< NTB_MAX_BARS; i++) { + current_bar = &ntb->bar_info[i]; + if (current_bar->pci_resource != NULL) + bus_release_resource(ntb->device, SYS_RES_MEMORY, + current_bar->pci_resource_id, + current_bar->pci_resource); + } +} + +static int +ntb_setup_interrupts(struct ntb_softc *ntb) +{ + void (*interrupt_handler)(void *); + void *int_arg; + bool use_msix = 0; + uint32_t num_vectors; + int i; + + ntb->allocated_interrupts = 0; + /* + * On SOC, disable all interrupts. On XEON, disable all but Link + * Interrupt. The rest will be unmasked as callbacks are registered. + */ + if (ntb->type == NTB_SOC) + ntb_write_8(ntb->reg_ofs.pdb_mask, ~0); + else + ntb_write_2(ntb->reg_ofs.pdb_mask, + ~(1 << ntb->limits.max_db_bits)); + + num_vectors = MIN(pci_msix_count(ntb->device), + ntb->limits.max_db_bits); + if (num_vectors >= 1) { + pci_alloc_msix(ntb->device, &num_vectors); + if (num_vectors >= 4) + use_msix = TRUE; + } + + ntb_create_callbacks(ntb, num_vectors); + if (use_msix == TRUE) { + for (i = 0; i < num_vectors; i++) { + ntb->int_info[i].rid = i + 1; + ntb->int_info[i].res = bus_alloc_resource_any( + ntb->device, SYS_RES_IRQ, &ntb->int_info[i].rid, + RF_ACTIVE); + if (ntb->int_info[i].res == NULL) { + device_printf(ntb->device, + "bus_alloc_resource failed\n"); + return (-1); + } + ntb->int_info[i].tag = NULL; + ntb->allocated_interrupts++; + if (ntb->type == NTB_SOC) { + interrupt_handler = handle_soc_irq; + int_arg = &ntb->db_cb[i]; + } else { + if (i == num_vectors - 1) { + interrupt_handler = handle_xeon_event_irq; + int_arg = ntb; + } else { + interrupt_handler = + handle_xeon_irq; + int_arg = &ntb->db_cb[i]; + } + } + if (bus_setup_intr(ntb->device, ntb->int_info[i].res, + INTR_MPSAFE | INTR_TYPE_MISC, NULL, + interrupt_handler, int_arg, + &ntb->int_info[i].tag) != 0) { + device_printf(ntb->device, + "bus_setup_intr failed\n"); + return (ENXIO); + } + } + } + else { + ntb->int_info[0].rid = 0; + ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ, + &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE); + interrupt_handler = ntb_handle_legacy_interrupt; + if (ntb->int_info[0].res == NULL) { + device_printf(ntb->device, + "bus_alloc_resource failed\n"); + return (-1); + } + ntb->int_info[0].tag = NULL; + ntb->allocated_interrupts = 1; + + if (bus_setup_intr(ntb->device, ntb->int_info[0].res, + INTR_MPSAFE | INTR_TYPE_MISC, NULL, + interrupt_handler, ntb, &ntb->int_info[0].tag) != 0) { + + device_printf(ntb->device, "bus_setup_intr failed\n"); + return (ENXIO); + } + } + + return (0); +} + +static void +ntb_teardown_interrupts(struct ntb_softc *ntb) +{ + struct ntb_int_info *current_int; + int i; + + for (i=0; i<ntb->allocated_interrupts; i++) { + current_int = &ntb->int_info[i]; + if (current_int->tag != NULL) + bus_teardown_intr(ntb->device, current_int->res, + current_int->tag); + + if (current_int->res != NULL) + bus_release_resource(ntb->device, SYS_RES_IRQ, + rman_get_rid(current_int->res), current_int->res); + } + + ntb_free_callbacks(ntb); + pci_release_msi(ntb->device); +} + +static void +handle_soc_irq(void *arg) +{ + struct ntb_db_cb *db_cb = arg; + struct ntb_softc *ntb = db_cb->ntb; + + ntb_write_8(ntb->reg_ofs.pdb, (uint64_t) 1 << db_cb->db_num); + + if (db_cb->callback != NULL) + db_cb->callback(db_cb->data, db_cb->db_num); +} + +static void +handle_xeon_irq(void *arg) +{ + struct ntb_db_cb *db_cb = arg; + struct ntb_softc *ntb = db_cb->ntb; + + /* + * On Xeon, there are 16 bits in the interrupt register + * but only 4 vectors. So, 5 bits are assigned to the first 3 + * vectors, with the 4th having a single bit for link + * interrupts. + */ + ntb_write_2(ntb->reg_ofs.pdb, + ((1 << ntb->bits_per_vector) - 1) << + (db_cb->db_num * ntb->bits_per_vector)); + + if (db_cb->callback != NULL) + db_cb->callback(db_cb->data, db_cb->db_num); +} + +/* Since we do not have a HW doorbell in SOC, this is only used in JF/JT */ +static void +handle_xeon_event_irq(void *arg) +{ + struct ntb_softc *ntb = arg; + int rc; + + rc = ntb_check_link_status(ntb); + if (rc != 0) + device_printf(ntb->device, "Error determining link status\n"); + + /* bit 15 is always the link bit */ + ntb_write_2(ntb->reg_ofs.pdb, 1 << ntb->limits.max_db_bits); +} + +static void +ntb_handle_legacy_interrupt(void *arg) +{ + struct ntb_softc *ntb = arg; + unsigned int i = 0; + uint64_t pdb64; + uint16_t pdb16; + + if (ntb->type == NTB_SOC) { + pdb64 = ntb_read_8(ntb->reg_ofs.pdb); + + while (pdb64) { + i = ffs(pdb64); + pdb64 &= pdb64 - 1; + handle_soc_irq(&ntb->db_cb[i]); + } + } else { + pdb16 = ntb_read_2(ntb->reg_ofs.pdb); + + if ((pdb16 & XEON_DB_HW_LINK) != 0) { + handle_xeon_event_irq(ntb); + pdb16 &= ~XEON_DB_HW_LINK; + } + + while (pdb16 != 0) { + i = ffs(pdb16); + pdb16 &= pdb16 - 1; + handle_xeon_irq(&ntb->db_cb[i]); + } + } + +} + +static int +ntb_create_callbacks(struct ntb_softc *ntb, int num_vectors) +{ + int i; + + ntb->db_cb = malloc(num_vectors * sizeof(struct ntb_db_cb), M_NTB, + M_ZERO | M_WAITOK); + for (i = 0; i < num_vectors; i++) { + ntb->db_cb[i].db_num = i; + ntb->db_cb[i].ntb = ntb; + } + + return (0); +} + +static void +ntb_free_callbacks(struct ntb_softc *ntb) +{ + int i; + + for (i = 0; i < ntb->limits.max_db_bits; i++) + ntb_unregister_db_callback(ntb, i); + + free(ntb->db_cb, M_NTB); +} + +static struct ntb_hw_info * +ntb_get_device_info(uint32_t device_id) +{ + struct ntb_hw_info *ep = pci_ids; + + while (ep->device_id) { + if (ep->device_id == device_id) + return (ep); + ++ep; + } + return (NULL); +} + +static int +ntb_initialize_hw(struct ntb_softc *ntb) +{ + + if (ntb->type == NTB_SOC) + return (ntb_setup_soc(ntb)); + else + return (ntb_setup_xeon(ntb)); +} + +static int +ntb_setup_xeon(struct ntb_softc *ntb) +{ + uint8_t val, connection_type; + + val = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1); + + connection_type = val & XEON_PPD_CONN_TYPE; + switch (connection_type) { + case NTB_CONN_B2B: + ntb->conn_type = NTB_CONN_B2B; + break; + case NTB_CONN_CLASSIC: + case NTB_CONN_RP: + default: + device_printf(ntb->device, "Connection type %d not supported\n", + connection_type); + return (ENXIO); + } + + if ((val & XEON_PPD_DEV_TYPE) != 0) + ntb->dev_type = NTB_DEV_DSD; + else + ntb->dev_type = NTB_DEV_USD; + + ntb->reg_ofs.pdb = XEON_PDOORBELL_OFFSET; + ntb->reg_ofs.pdb_mask = XEON_PDBMSK_OFFSET; + ntb->reg_ofs.sbar2_xlat = XEON_SBAR2XLAT_OFFSET; + ntb->reg_ofs.sbar4_xlat = XEON_SBAR4XLAT_OFFSET; + ntb->reg_ofs.lnk_cntl = XEON_NTBCNTL_OFFSET; + ntb->reg_ofs.lnk_stat = XEON_LINK_STATUS_OFFSET; + ntb->reg_ofs.spad_local = XEON_SPAD_OFFSET; + ntb->reg_ofs.spci_cmd = XEON_PCICMD_OFFSET; + + if (ntb->conn_type == NTB_CONN_B2B) { + ntb->reg_ofs.sdb = XEON_B2B_DOORBELL_OFFSET; + ntb->reg_ofs.spad_remote = XEON_B2B_SPAD_OFFSET; + ntb->limits.max_spads = XEON_MAX_SPADS; + } else { + ntb->reg_ofs.sdb = XEON_SDOORBELL_OFFSET; + ntb->reg_ofs.spad_remote = XEON_SPAD_OFFSET; + ntb->limits.max_spads = XEON_MAX_COMPAT_SPADS; + } + + ntb->limits.max_db_bits = XEON_MAX_DB_BITS; + ntb->limits.msix_cnt = XEON_MSIX_CNT; + ntb->bits_per_vector = XEON_DB_BITS_PER_VEC; + + /* Enable Bus Master and Memory Space on the secondary side */ + ntb_write_2(ntb->reg_ofs.spci_cmd, + PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); + + return (0); +} + +static int +ntb_setup_soc(struct ntb_softc *ntb) +{ + uint32_t val, connection_type; + + val = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4); + + connection_type = (val & SOC_PPD_CONN_TYPE) >> 8; + switch (connection_type) { + case NTB_CONN_B2B: + ntb->conn_type = NTB_CONN_B2B; + break; + case NTB_CONN_RP: + default: + device_printf(ntb->device, "Connection type %d not supported\n", + connection_type); + return (ENXIO); + } + + if ((val & SOC_PPD_DEV_TYPE) != 0) + ntb->dev_type = NTB_DEV_DSD; + else + ntb->dev_type = NTB_DEV_USD; + + /* Initiate PCI-E link training */ + pci_write_config(ntb->device, NTB_PPD_OFFSET, val | SOC_PPD_INIT_LINK, + 4); + + ntb->reg_ofs.pdb = SOC_PDOORBELL_OFFSET; + ntb->reg_ofs.pdb_mask = SOC_PDBMSK_OFFSET; + ntb->reg_ofs.sbar2_xlat = SOC_SBAR2XLAT_OFFSET; + ntb->reg_ofs.sbar4_xlat = SOC_SBAR4XLAT_OFFSET; + ntb->reg_ofs.lnk_cntl = SOC_NTBCNTL_OFFSET; + ntb->reg_ofs.lnk_stat = SOC_LINK_STATUS_OFFSET; + ntb->reg_ofs.spad_local = SOC_SPAD_OFFSET; + ntb->reg_ofs.spci_cmd = SOC_PCICMD_OFFSET; + + if (ntb->conn_type == NTB_CONN_B2B) { + ntb->reg_ofs.sdb = SOC_B2B_DOORBELL_OFFSET; + ntb->reg_ofs.spad_remote = SOC_B2B_SPAD_OFFSET; + ntb->limits.max_spads = SOC_MAX_SPADS; + } else { + ntb->reg_ofs.sdb = SOC_PDOORBELL_OFFSET; + ntb->reg_ofs.spad_remote = SOC_SPAD_OFFSET; + ntb->limits.max_spads = SOC_MAX_COMPAT_SPADS; + } + + ntb->limits.max_db_bits = SOC_MAX_DB_BITS; + ntb->limits.msix_cnt = SOC_MSIX_CNT; + ntb->bits_per_vector = SOC_DB_BITS_PER_VEC; + + /* + * FIXME - MSI-X bug on early SOC HW, remove once internal issue is + * resolved. Mask transaction layer internal parity errors. + */ + pci_write_config(ntb->device, 0xFC, 0x4, 4); + + /* + * Some BIOSes aren't filling out the XLAT offsets. + * Check and correct the issue. + */ + if (ntb->dev_type == NTB_DEV_USD) { + if (ntb_read_8(SOC_PBAR2XLAT_OFFSET) == 0) + ntb_write_8(SOC_PBAR2XLAT_OFFSET, + SOC_PBAR2XLAT_USD_ADDR); + + if (ntb_read_8(SOC_PBAR4XLAT_OFFSET) == 0) + ntb_write_8(SOC_PBAR4XLAT_OFFSET, + SOC_PBAR4XLAT_USD_ADDR); + + if (ntb_read_8(SOC_MBAR23_OFFSET) == 0xC) + ntb_write_8(SOC_MBAR23_OFFSET, SOC_MBAR23_USD_ADDR); + + if (ntb_read_8(SOC_MBAR45_OFFSET) == 0xC) + ntb_write_8(SOC_MBAR45_OFFSET, SOC_MBAR45_USD_ADDR); + } else { + if (ntb_read_8(SOC_PBAR2XLAT_OFFSET) == 0) + ntb_write_8(SOC_PBAR2XLAT_OFFSET, + SOC_PBAR2XLAT_DSD_ADDR); + + if (ntb_read_8(SOC_PBAR4XLAT_OFFSET) == 0) + ntb_write_8(SOC_PBAR4XLAT_OFFSET, + SOC_PBAR4XLAT_DSD_ADDR); + + if (ntb_read_8(SOC_MBAR23_OFFSET) == 0xC) + ntb_write_8(SOC_MBAR23_OFFSET, SOC_MBAR23_DSD_ADDR); + + if (ntb_read_8(SOC_MBAR45_OFFSET) == 0xC) + ntb_write_8(SOC_MBAR45_OFFSET, SOC_MBAR45_DSD_ADDR); + } + + /* Enable Bus Master and Memory Space on the secondary side */ + ntb_write_2(ntb->reg_ofs.spci_cmd, + PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); + callout_reset(&ntb->heartbeat_timer, 0, ntb_handle_heartbeat, ntb); + + return (0); +} + +/* SOC doesn't have link status interrupt, poll on that platform */ +static void +ntb_handle_heartbeat(void *arg) +{ + struct ntb_softc *ntb = arg; + uint32_t status32; + int rc = ntb_check_link_status(ntb); + + if (rc != 0) + device_printf(ntb->device, + "Error determining link status\n"); + /* Check to see if a link error is the cause of the link down */ + if (ntb->link_status == NTB_LINK_DOWN) { + status32 = ntb_read_4(SOC_LTSSMSTATEJMP_OFFSET); + if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0) { + callout_reset(&ntb->lr_timer, 0, recover_soc_link, + ntb); + return; + } + } + + callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, + ntb_handle_heartbeat, ntb); +} + +static void +soc_perform_link_restart(struct ntb_softc *ntb) +{ + uint32_t status; + + /* Driver resets the NTB ModPhy lanes - magic! */ + ntb_write_1(SOC_MODPHY_PCSREG6, 0xe0); + ntb_write_1(SOC_MODPHY_PCSREG4, 0x40); + ntb_write_1(SOC_MODPHY_PCSREG4, 0x60); + ntb_write_1(SOC_MODPHY_PCSREG6, 0x60); + + /* Driver waits 100ms to allow the NTB ModPhy to settle */ + pause("ModPhy", hz / 10); + + /* Clear AER Errors, write to clear */ + status = ntb_read_4(SOC_ERRCORSTS_OFFSET); + status &= PCIM_AER_COR_REPLAY_ROLLOVER; + ntb_write_4(SOC_ERRCORSTS_OFFSET, status); + + /* Clear unexpected electrical idle event in LTSSM, write to clear */ + status = ntb_read_4(SOC_LTSSMERRSTS0_OFFSET); + status |= SOC_LTSSMERRSTS0_UNEXPECTEDEI; + ntb_write_4(SOC_LTSSMERRSTS0_OFFSET, status); + + /* Clear DeSkew Buffer error, write to clear */ + status = ntb_read_4(SOC_DESKEWSTS_OFFSET); + status |= SOC_DESKEWSTS_DBERR; + ntb_write_4(SOC_DESKEWSTS_OFFSET, status); + + status = ntb_read_4(SOC_IBSTERRRCRVSTS0_OFFSET); + status &= SOC_IBIST_ERR_OFLOW; + ntb_write_4(SOC_IBSTERRRCRVSTS0_OFFSET, status); + + /* Releases the NTB state machine to allow the link to retrain */ + status = ntb_read_4(SOC_LTSSMSTATEJMP_OFFSET); + status &= ~SOC_LTSSMSTATEJMP_FORCEDETECT; + ntb_write_4(SOC_LTSSMSTATEJMP_OFFSET, status); +} + +static void +ntb_handle_link_event(struct ntb_softc *ntb, int link_state) +{ + enum ntb_hw_event event; + uint16_t status; + + if (ntb->link_status == link_state) + return; + + if (link_state == NTB_LINK_UP) { + device_printf(ntb->device, "Link Up\n"); + ntb->link_status = NTB_LINK_UP; + event = NTB_EVENT_HW_LINK_UP; + + if (ntb->type == NTB_SOC) + status = ntb_read_2(ntb->reg_ofs.lnk_stat); + else + status = pci_read_config(ntb->device, + XEON_LINK_STATUS_OFFSET, 2); + ntb->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4; + ntb->link_speed = (status & NTB_LINK_SPEED_MASK); + device_printf(ntb->device, "Link Width %d, Link Speed %d\n", + ntb->link_width, ntb->link_speed); + callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, + ntb_handle_heartbeat, ntb); + } else { + device_printf(ntb->device, "Link Down\n"); + ntb->link_status = NTB_LINK_DOWN; + event = NTB_EVENT_HW_LINK_DOWN; + /* Don't modify link width/speed, we need it in link recovery */ + } + + /* notify the upper layer if we have an event change */ + if (ntb->event_cb != NULL) + ntb->event_cb(ntb->ntb_transport, event); +} + +static void +recover_soc_link(void *arg) +{ + struct ntb_softc *ntb = arg; + uint8_t speed, width; + uint32_t status32; + uint16_t status16; + + soc_perform_link_restart(ntb); + pause("Link", SOC_LINK_RECOVERY_TIME * hz / 1000); + + status32 = ntb_read_4(SOC_LTSSMSTATEJMP_OFFSET); + if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0) + goto retry; + + status32 = ntb_read_4(SOC_IBSTERRRCRVSTS0_OFFSET); + if ((status32 & SOC_IBIST_ERR_OFLOW) != 0) + goto retry; + + status16 = ntb_read_2(ntb->reg_ofs.lnk_stat); + width = (status16 & NTB_LINK_WIDTH_MASK) >> 4; + speed = (status16 & NTB_LINK_SPEED_MASK); + if (ntb->link_width != width || ntb->link_speed != speed) + goto retry; + + callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, + ntb_handle_heartbeat, ntb); + return; + +retry: + callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_soc_link, + ntb); +} + +static int +ntb_check_link_status(struct ntb_softc *ntb) +{ + int link_state; + uint32_t ntb_cntl; + uint16_t status; + + if (ntb->type == NTB_SOC) { + ntb_cntl = ntb_read_4(ntb->reg_ofs.lnk_cntl); + if ((ntb_cntl & SOC_CNTL_LINK_DOWN) != 0) + link_state = NTB_LINK_DOWN; + else + link_state = NTB_LINK_UP; + } else { + status = pci_read_config(ntb->device, XEON_LINK_STATUS_OFFSET, + 2); + + if ((status & NTB_LINK_STATUS_ACTIVE) != 0) + link_state = NTB_LINK_UP; + else + link_state = NTB_LINK_DOWN; + } + + ntb_handle_link_event(ntb, link_state); + + return (0); +} + +/** + * ntb_register_event_callback() - register event callback + * @ntb: pointer to ntb_softc instance + * @func: callback function to register + * + * This function registers a callback for any HW driver events such as link + * up/down, power management notices and etc. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_register_event_callback(struct ntb_softc *ntb, ntb_event_callback func) +{ + + if (ntb->event_cb != NULL) + return (EINVAL); + + ntb->event_cb = func; + + return (0); +} + +/** + * ntb_unregister_event_callback() - unregisters the event callback + * @ntb: pointer to ntb_softc instance + * + * This function unregisters the existing callback from transport + */ +void +ntb_unregister_event_callback(struct ntb_softc *ntb) +{ + + ntb->event_cb = NULL; +} + +/** + * ntb_register_db_callback() - register a callback for doorbell interrupt + * @ntb: pointer to ntb_softc instance + * @idx: doorbell index to register callback, zero based + * @func: callback function to register + * + * This function registers a callback function for the doorbell interrupt + * on the primary side. The function will unmask the doorbell as well to + * allow interrupt. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_register_db_callback(struct ntb_softc *ntb, unsigned int idx, void *data, + ntb_db_callback func) +{ + uint16_t mask; + + if (idx >= ntb->allocated_interrupts || ntb->db_cb[idx].callback) { + device_printf(ntb->device, "Invalid Index.\n"); + return (EINVAL); + } + + ntb->db_cb[idx].callback = func; + ntb->db_cb[idx].data = data; + + /* unmask interrupt */ + mask = ntb_read_2(ntb->reg_ofs.pdb_mask); + mask &= ~(1 << (idx * ntb->bits_per_vector)); + ntb_write_2(ntb->reg_ofs.pdb_mask, mask); + + return (0); +} + +/** + * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt + * @ntb: pointer to ntb_softc instance + * @idx: doorbell index to register callback, zero based + * + * This function unregisters a callback function for the doorbell interrupt + * on the primary side. The function will also mask the said doorbell. + */ +void +ntb_unregister_db_callback(struct ntb_softc *ntb, unsigned int idx) +{ + unsigned long mask; + + if (idx >= ntb->allocated_interrupts || !ntb->db_cb[idx].callback) + return; + + mask = ntb_read_2(ntb->reg_ofs.pdb_mask); + mask |= 1 << (idx * ntb->bits_per_vector); + ntb_write_2(ntb->reg_ofs.pdb_mask, mask); + + ntb->db_cb[idx].callback = NULL; +} + +/** + * ntb_find_transport() - find the transport pointer + * @transport: pointer to pci device + * + * Given the pci device pointer, return the transport pointer passed in when + * the transport attached when it was inited. + * + * RETURNS: pointer to transport. + */ +void * +ntb_find_transport(struct ntb_softc *ntb) +{ + + return (ntb->ntb_transport); +} + +/** + * ntb_register_transport() - Register NTB transport with NTB HW driver + * @transport: transport identifier + * + * This function allows a transport to reserve the hardware driver for + * NTB usage. + * + * RETURNS: pointer to ntb_softc, NULL on error. + */ +struct ntb_softc * +ntb_register_transport(struct ntb_softc *ntb, void *transport) +{ + + /* + * TODO: when we have more than one transport, we will need to rewrite + * this to prevent race conditions + */ + if (ntb->ntb_transport != NULL) + return (NULL); + + ntb->ntb_transport = transport; + return (ntb); +} + +/** + * ntb_unregister_transport() - Unregister the transport with the NTB HW driver + * @ntb - ntb_softc of the transport to be freed + * + * This function unregisters the transport from the HW driver and performs any + * necessary cleanups. + */ +void +ntb_unregister_transport(struct ntb_softc *ntb) +{ + int i; + + if (ntb->ntb_transport == NULL) + return; + + for (i = 0; i < ntb->allocated_interrupts; i++) + ntb_unregister_db_callback(ntb, i); + + ntb_unregister_event_callback(ntb); + ntb->ntb_transport = NULL; +} + +/** + * ntb_get_max_spads() - get the total scratch regs usable + * @ntb: pointer to ntb_softc instance + * + * This function returns the max 32bit scratchpad registers usable by the + * upper layer. + * + * RETURNS: total number of scratch pad registers available + */ +int +ntb_get_max_spads(struct ntb_softc *ntb) +{ + + return (ntb->limits.max_spads); +} + +/** + * ntb_write_local_spad() - write to the secondary scratchpad register + * @ntb: pointer to ntb_softc instance + * @idx: index to the scratchpad register, 0 based + * @val: the data value to put into the register + * + * This function allows writing of a 32bit value to the indexed scratchpad + * register. The register resides on the secondary (external) side. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_write_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val) +{ + + if (idx >= ntb->limits.max_spads) + return (EINVAL); + + ntb_write_4(ntb->reg_ofs.spad_local + idx * 4, val); + + return (0); +} + +/** + * ntb_read_local_spad() - read from the primary scratchpad register + * @ntb: pointer to ntb_softc instance + * @idx: index to scratchpad register, 0 based + * @val: pointer to 32bit integer for storing the register value + * + * This function allows reading of the 32bit scratchpad register on + * the primary (internal) side. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_read_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val) +{ + + if (idx >= ntb->limits.max_spads) + return (EINVAL); + + *val = ntb_read_4(ntb->reg_ofs.spad_local + idx * 4); + + return (0); +} + +/** + * ntb_write_remote_spad() - write to the secondary scratchpad register + * @ntb: pointer to ntb_softc instance + * @idx: index to the scratchpad register, 0 based + * @val: the data value to put into the register + * + * This function allows writing of a 32bit value to the indexed scratchpad + * register. The register resides on the secondary (external) side. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_write_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val) +{ + + if (idx >= ntb->limits.max_spads) + return (EINVAL); + + ntb_write_4(ntb->reg_ofs.spad_remote + idx * 4, val); + + return (0); +} + +/** + * ntb_read_remote_spad() - read from the primary scratchpad register + * @ntb: pointer to ntb_softc instance + * @idx: index to scratchpad register, 0 based + * @val: pointer to 32bit integer for storing the register value + * + * This function allows reading of the 32bit scratchpad register on + * the primary (internal) side. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_read_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val) +{ + + if (idx >= ntb->limits.max_spads) + return (EINVAL); + + *val = ntb_read_4(ntb->reg_ofs.spad_remote + idx * 4); + + return (0); +} + +/** + * ntb_get_mw_vbase() - get virtual addr for the NTB memory window + * @ntb: pointer to ntb_softc instance + * @mw: memory window number + * + * This function provides the base virtual address of the memory window + * specified. + * + * RETURNS: pointer to virtual address, or NULL on error. + */ +void * +ntb_get_mw_vbase(struct ntb_softc *ntb, unsigned int mw) +{ + + if (mw >= NTB_NUM_MW) + return (NULL); + + return (ntb->bar_info[NTB_MW_TO_BAR(mw)].vbase); +} + +vm_paddr_t +ntb_get_mw_pbase(struct ntb_softc *ntb, unsigned int mw) +{ + + if (mw >= NTB_NUM_MW) + return (0); + + return (ntb->bar_info[NTB_MW_TO_BAR(mw)].pbase); +} + +/** + * ntb_get_mw_size() - return size of NTB memory window + * @ntb: pointer to ntb_softc instance + * @mw: memory window number + * + * This function provides the physical size of the memory window specified + * + * RETURNS: the size of the memory window or zero on error + */ +u_long +ntb_get_mw_size(struct ntb_softc *ntb, unsigned int mw) +{ + + if (mw >= NTB_NUM_MW) + return (0); + + return (ntb->bar_info[NTB_MW_TO_BAR(mw)].size); +} + +/** + * ntb_set_mw_addr - set the memory window address + * @ntb: pointer to ntb_softc instance + * @mw: memory window number + * @addr: base address for data + * + * This function sets the base physical address of the memory window. This + * memory address is where data from the remote system will be transfered into + * or out of depending on how the transport is configured. + */ +void +ntb_set_mw_addr(struct ntb_softc *ntb, unsigned int mw, uint64_t addr) +{ + + if (mw >= NTB_NUM_MW) + return; + + switch (NTB_MW_TO_BAR(mw)) { + case NTB_B2B_BAR_1: + ntb_write_8(ntb->reg_ofs.sbar2_xlat, addr); + break; + case NTB_B2B_BAR_2: + ntb_write_8(ntb->reg_ofs.sbar4_xlat, addr); + break; + } +} + +/** + * ntb_ring_sdb() - Set the doorbell on the secondary/external side + * @ntb: pointer to ntb_softc instance + * @db: doorbell to ring + * + * This function allows triggering of a doorbell on the secondary/external + * side that will initiate an interrupt on the remote host + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +void +ntb_ring_sdb(struct ntb_softc *ntb, unsigned int db) +{ + + if (ntb->type == NTB_SOC) + ntb_write_8(ntb->reg_ofs.sdb, (uint64_t) 1 << db); + else + ntb_write_2(ntb->reg_ofs.sdb, + ((1 << ntb->bits_per_vector) - 1) << + (db * ntb->bits_per_vector)); +} + +/** + * ntb_query_link_status() - return the hardware link status + * @ndev: pointer to ntb_device instance + * + * Returns true if the hardware is connected to the remote system + * + * RETURNS: true or false based on the hardware link state + */ +bool +ntb_query_link_status(struct ntb_softc *ntb) +{ + + return (ntb->link_status == NTB_LINK_UP); +} + +static bool +is_bar_for_data_transfer(int bar_num) +{ + if ((bar_num > NTB_CONFIG_BAR) && (bar_num < NTB_MAX_BARS)) + return true; + else + return false; +} diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.h b/sys/dev/ntb/ntb_hw/ntb_hw.h new file mode 100644 index 0000000..4f44031 --- /dev/null +++ b/sys/dev/ntb/ntb_hw/ntb_hw.h @@ -0,0 +1,73 @@ +/*- + * Copyright (C) 2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NTB_HW_H_ +#define _NTB_HW_H_ + +struct ntb_softc; + +#define NTB_NUM_MW 2 +#define NTB_LINK_DOWN 0 +#define NTB_LINK_UP 1 + +enum ntb_hw_event { + NTB_EVENT_SW_EVENT0 = 0, + NTB_EVENT_SW_EVENT1, + NTB_EVENT_SW_EVENT2, + NTB_EVENT_HW_ERROR, + NTB_EVENT_HW_LINK_UP, + NTB_EVENT_HW_LINK_DOWN, +}; + +typedef void (*ntb_db_callback)(void *data, int db_num); +typedef void (*ntb_event_callback)(void *data, enum ntb_hw_event event); + +int ntb_register_event_callback(struct ntb_softc *ntb, ntb_event_callback func); +void ntb_unregister_event_callback(struct ntb_softc *ntb); +int ntb_register_db_callback(struct ntb_softc *ntb, unsigned int idx, + void *data, ntb_db_callback func); +void ntb_unregister_db_callback(struct ntb_softc *ntb, unsigned int idx); +void *ntb_find_transport(struct ntb_softc *ntb); +struct ntb_softc *ntb_register_transport(struct ntb_softc *ntb, + void *transport); +void ntb_unregister_transport(struct ntb_softc *ntb); +int ntb_get_max_spads(struct ntb_softc *ntb); +int ntb_write_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val); +int ntb_read_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val); +int ntb_write_remote_spad(struct ntb_softc *ntb, unsigned int idx, + uint32_t val); +int ntb_read_remote_spad(struct ntb_softc *ntb, unsigned int idx, + uint32_t *val); +void *ntb_get_mw_vbase(struct ntb_softc *ntb, unsigned int mw); +vm_paddr_t ntb_get_mw_pbase(struct ntb_softc *ntb, unsigned int mw); +u_long ntb_get_mw_size(struct ntb_softc *ntb, unsigned int mw); +void ntb_set_mw_addr(struct ntb_softc *ntb, unsigned int mw, uint64_t addr); +void ntb_ring_sdb(struct ntb_softc *ntb, unsigned int db); +bool ntb_query_link_status(struct ntb_softc *ntb); + +#endif /* _NTB_HW_H_ */ diff --git a/sys/dev/ntb/ntb_hw/ntb_regs.h b/sys/dev/ntb/ntb_hw/ntb_regs.h new file mode 100644 index 0000000..34ad779 --- /dev/null +++ b/sys/dev/ntb/ntb_hw/ntb_regs.h @@ -0,0 +1,146 @@ +/*- + * Copyright (C) 2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NTB_REGS_H_ +#define _NTB_REGS_H_ + +#define NTB_LINK_ENABLE 0x0000 +#define NTB_LINK_DISABLE 0x0002 +#define NTB_LINK_STATUS_ACTIVE 0x2000 +#define NTB_LINK_SPEED_MASK 0x000f +#define NTB_LINK_WIDTH_MASK 0x03f0 + +#define XEON_MSIX_CNT 4 +#define XEON_MAX_SPADS 16 +#define XEON_MAX_COMPAT_SPADS 8 +/* Reserve the uppermost bit for link interrupt */ +#define XEON_MAX_DB_BITS 15 +#define XEON_DB_BITS_PER_VEC 5 + +#define XEON_DB_HW_LINK 0x8000 + +#define XEON_PCICMD_OFFSET 0x0504 +#define XEON_DEVCTRL_OFFSET 0x0598 +#define XEON_LINK_STATUS_OFFSET 0x01A2 + +#define XEON_PBAR2LMT_OFFSET 0x0000 +#define XEON_PBAR4LMT_OFFSET 0x0008 +#define XEON_PBAR2XLAT_OFFSET 0x0010 +#define XEON_PBAR4XLAT_OFFSET 0x0018 +#define XEON_SBAR2LMT_OFFSET 0x0020 +#define XEON_SBAR4LMT_OFFSET 0x0028 +#define XEON_SBAR2XLAT_OFFSET 0x0030 +#define XEON_SBAR4XLAT_OFFSET 0x0038 +#define XEON_SBAR0BASE_OFFSET 0x0040 +#define XEON_SBAR2BASE_OFFSET 0x0048 +#define XEON_SBAR4BASE_OFFSET 0x0050 +#define XEON_NTBCNTL_OFFSET 0x0058 +#define XEON_SBDF_OFFSET 0x005C +#define XEON_PDOORBELL_OFFSET 0x0060 +#define XEON_PDBMSK_OFFSET 0x0062 +#define XEON_SDOORBELL_OFFSET 0x0064 +#define XEON_SDBMSK_OFFSET 0x0066 +#define XEON_USMEMMISS 0x0070 +#define XEON_SPAD_OFFSET 0x0080 +#define XEON_SPADSEMA4_OFFSET 0x00c0 +#define XEON_WCCNTRL_OFFSET 0x00e0 +#define XEON_B2B_SPAD_OFFSET 0x0100 +#define XEON_B2B_DOORBELL_OFFSET 0x0140 +#define XEON_B2B_XLAT_OFFSET 0x0144 + +#define SOC_MSIX_CNT 34 +#define SOC_MAX_SPADS 16 +#define SOC_MAX_COMPAT_SPADS 16 +#define SOC_MAX_DB_BITS 34 +#define SOC_DB_BITS_PER_VEC 1 + +#define SOC_PCICMD_OFFSET 0xb004 +#define SOC_MBAR23_OFFSET 0xb018 +#define SOC_MBAR45_OFFSET 0xb020 +#define SOC_DEVCTRL_OFFSET 0xb048 +#define SOC_LINK_STATUS_OFFSET 0xb052 +#define SOC_ERRCORSTS_OFFSET 0xb110 + +#define SOC_SBAR2XLAT_OFFSET 0x0008 +#define SOC_SBAR4XLAT_OFFSET 0x0010 +#define SOC_PDOORBELL_OFFSET 0x0020 +#define SOC_PDBMSK_OFFSET 0x0028 +#define SOC_NTBCNTL_OFFSET 0x0060 +#define SOC_EBDF_OFFSET 0x0064 +#define SOC_SPAD_OFFSET 0x0080 +#define SOC_SPADSEMA_OFFSET 0x00c0 +#define SOC_STKYSPAD_OFFSET 0x00c4 +#define SOC_PBAR2XLAT_OFFSET 0x8008 +#define SOC_PBAR4XLAT_OFFSET 0x8010 +#define SOC_B2B_DOORBELL_OFFSET 0x8020 +#define SOC_B2B_SPAD_OFFSET 0x8080 +#define SOC_B2B_SPADSEMA_OFFSET 0x80c0 +#define SOC_B2B_STKYSPAD_OFFSET 0x80c4 + +#define SOC_MODPHY_PCSREG4 0x1c004 +#define SOC_MODPHY_PCSREG6 0x1c006 + +#define SOC_IP_BASE 0xC000 +#define SOC_DESKEWSTS_OFFSET (SOC_IP_BASE + 0x3024) +#define SOC_LTSSMERRSTS0_OFFSET (SOC_IP_BASE + 0x3180) +#define SOC_LTSSMSTATEJMP_OFFSET (SOC_IP_BASE + 0x3040) +#define SOC_IBSTERRRCRVSTS0_OFFSET (SOC_IP_BASE + 0x3324) + +#define SOC_DESKEWSTS_DBERR (1 << 15) +#define SOC_LTSSMERRSTS0_UNEXPECTEDEI (1 << 20) +#define SOC_LTSSMSTATEJMP_FORCEDETECT (1 << 2) +#define SOC_IBIST_ERR_OFLOW 0x7FFF7FFF + +#define NTB_CNTL_BAR23_SNOOP (1 << 2) +#define NTB_CNTL_BAR45_SNOOP (1 << 6) +#define SOC_CNTL_LINK_DOWN (1 << 16) + +#define NTB_PPD_OFFSET 0x00D4 +#define XEON_PPD_CONN_TYPE 0x0003 +#define XEON_PPD_DEV_TYPE 0x0010 +#define SOC_PPD_INIT_LINK 0x0008 +#define SOC_PPD_CONN_TYPE 0x0300 +#define SOC_PPD_DEV_TYPE 0x1000 + +#define NTB_CONN_CLASSIC 0 +#define NTB_CONN_B2B 1 +#define NTB_CONN_RP 2 + +#define NTB_DEV_DSD 1 +#define NTB_DEV_USD 0 + +#define SOC_PBAR2XLAT_USD_ADDR 0x0000004000000000 +#define SOC_PBAR4XLAT_USD_ADDR 0x0000008000000000 +#define SOC_MBAR23_USD_ADDR 0x000000410000000C +#define SOC_MBAR45_USD_ADDR 0x000000810000000C +#define SOC_PBAR2XLAT_DSD_ADDR 0x0000004100000000 +#define SOC_PBAR4XLAT_DSD_ADDR 0x0000008100000000 +#define SOC_MBAR23_DSD_ADDR 0x000000400000000C +#define SOC_MBAR45_DSD_ADDR 0x000000800000000C + +#endif /* _NTB_REGS_H_ */ diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 55af93a..c2dc799 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -243,6 +243,7 @@ SUBDIR= \ nmdm \ ${_nsp} \ nullfs \ + ${_ntb} \ ${_nvd} \ ${_nve} \ ${_nvme} \ @@ -693,6 +694,7 @@ _mthca= mthca .endif _ndis= ndis _nfe= nfe +_ntb= ntb _nvd= nvd .if ${MK_SOURCELESS_HOST} != "no" _nve= nve diff --git a/sys/modules/ntb/Makefile b/sys/modules/ntb/Makefile new file mode 100644 index 0000000..a5169a0 --- /dev/null +++ b/sys/modules/ntb/Makefile @@ -0,0 +1,5 @@ +# $FreeBSD$ + +SUBDIR= ntb_hw if_ntb + +.include <bsd.subdir.mk> diff --git a/sys/modules/ntb/if_ntb/Makefile b/sys/modules/ntb/if_ntb/Makefile new file mode 100644 index 0000000..468593d --- /dev/null +++ b/sys/modules/ntb/if_ntb/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../dev/ntb/if_ntb + +KMOD = if_ntb +SRCS = if_ntb.c +SRCS += device_if.h bus_if.h pci_if.h + +.include <bsd.kmod.mk> diff --git a/sys/modules/ntb/ntb_hw/Makefile b/sys/modules/ntb/ntb_hw/Makefile new file mode 100644 index 0000000..fc46b46 --- /dev/null +++ b/sys/modules/ntb/ntb_hw/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../dev/ntb/ntb_hw + +KMOD = ntb_hw +SRCS = ntb_hw.c +SRCS += device_if.h bus_if.h pci_if.h + +.include <bsd.kmod.mk> |