From 542feb6d728678f9a7f9437f0323a9bcfbaf7192 Mon Sep 17 00:00:00 2001 From: carl Date: Mon, 29 Apr 2013 22:48:53 +0000 Subject: Add a new driver to support the Intel Non-Transparent Bridge(NTB). The NTB allows you to connect two systems with this device using a PCI-e link. The driver is made of two modules: - ntb_hw which is a basic hardware abstraction layer for the device. - if_ntb which implements the ntb network device and the communication protocol. The driver is limited at the moment to CPU memcpy instead of using DMA, and only Back-to-Back mode is supported. Also the network device isn't full featured yet. These changes will be coming soon. The DMA change will also bring in the ioat driver from the project branch it is on now. This is an initial port of the GPL/BSD Linux driver contributed by Jon Mason from Intel. Any bugs are my contributions. Sponsored by: Intel Reviewed by: jimharris, joel (man page only) Approved by: jimharris (mentor) --- share/man/man4/Makefile | 5 + share/man/man4/ntb.4 | 114 ++++ sys/amd64/conf/NOTES | 4 + sys/conf/files.amd64 | 2 + sys/dev/ntb/if_ntb/if_ntb.c | 1366 +++++++++++++++++++++++++++++++++++++++ sys/dev/ntb/ntb_hw/ntb_hw.c | 1288 ++++++++++++++++++++++++++++++++++++ sys/dev/ntb/ntb_hw/ntb_hw.h | 73 +++ sys/dev/ntb/ntb_hw/ntb_regs.h | 146 +++++ sys/modules/Makefile | 2 + sys/modules/ntb/Makefile | 5 + sys/modules/ntb/if_ntb/Makefile | 9 + sys/modules/ntb/ntb_hw/Makefile | 9 + 12 files changed, 3023 insertions(+) create mode 100644 share/man/man4/ntb.4 create mode 100644 sys/dev/ntb/if_ntb/if_ntb.c create mode 100644 sys/dev/ntb/ntb_hw/ntb_hw.c create mode 100644 sys/dev/ntb/ntb_hw/ntb_hw.h create mode 100644 sys/dev/ntb/ntb_hw/ntb_regs.h create mode 100644 sys/modules/ntb/Makefile create mode 100644 sys/modules/ntb/if_ntb/Makefile create mode 100644 sys/modules/ntb/ntb_hw/Makefile diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index 22bb8d1..2c4eda0 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -333,6 +333,7 @@ MAN= aac.4 \ ng_vlan.4 \ nmdm.4 \ nsp.4 \ + ${_ntb.4} \ null.4 \ ${_nvd.4} \ ${_nve.4} \ @@ -647,6 +648,7 @@ MLINKS+=netintro.4 net.4 \ netintro.4 networking.4 MLINKS+=${_nfe.4} ${_if_nfe.4} MLINKS+=nge.4 if_nge.4 +MLINKS+=${_ntb.4} ${_if_ntb.4} ${_ntb_hw.4} MLINKS+=${_nve.4} ${_if_nve.4} MLINKS+=${_nxge.4} ${_if_nxge.4} MLINKS+=patm.4 if_patm.4 @@ -784,6 +786,9 @@ MLINKS+=lindev.4 full.4 .if ${MACHINE_CPUARCH} == "amd64" _bhyve.4= bhyve.4 +_if_ntb.4= if_ntb.4 +_ntb.4= ntb.4 +_ntb_hw.4= ntb_hw.4 _qlxgb.4= qlxgb.4 _sfxge.4= sfxge.4 diff --git a/share/man/man4/ntb.4 b/share/man/man4/ntb.4 new file mode 100644 index 0000000..c576526 --- /dev/null +++ b/share/man/man4/ntb.4 @@ -0,0 +1,114 @@ +.\" +.\" Copyright (c) 2013 Intel Corporation +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions, and the following disclaimer, +.\" without modification. +.\" 2. Redistributions in binary form must reproduce at minimum a disclaimer +.\" substantially similar to the "NO WARRANTY" disclaimer below +.\" ("Disclaimer") and any redistribution must be conditioned upon +.\" including a substantially similar Disclaimer requirement for further +.\" binary redistribution. +.\" +.\" NO WARRANTY +.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR +.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +.\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGES. +.\" +.\" ntb driver man page. +.\" +.\" Author: Carl Delsey +.\" +.\" $FreeBSD$ +.\" +.Dd Apr 11, 2013 +.Dt NTB 4 +.Os +.Sh NAME +.Nm ntb , +.Nm ntb_hw , +.Nm if_ntb +.Nd Intel(R) Non-Transparent Bridge driver +.Sh SYNOPSIS +To compile this driver into your kernel, +place the following lines in your kernel configuration file: +.Bd -ragged -offset indent +.Cd "device ntb_hw" +.Cd "device if_ntb" +.Ed +.Pp +Or, to load the driver as a module at boot, place the following line in +.Xr loader.conf 5 : +.Bd -literal -offset indent +if_ntb_load="YES" +.Ed +.Sh DESCRIPTION +The +.Nm +driver provides support for the Non-Transparent Bridge (NTB) in the Intel S1200, +Xeon E3 and Xeon E5 processor families. +.Pp +The NTB allows you to connect two computer systems using a PCI-e link if they +have the correct equipment and connectors. +.Sh CONFIGURATION +The NTB memory windows need to be configured by the BIOS. +If your BIOS allows you to set their size, you should set the size of both +memory windows to 1 MiB. +This needs to be done on both systems. +.Pp +Each system needs to have a different IP address assigned. +The MAC address is randomly generated. +Also for maximum performance, the MTU should be set to 16 kiB. +This can be down by adding the line below to +.Xr rc.conf 5 : +.Bd -literal -offset indent +ifconfig_ntb0="inet 192.168.1.10 netmask 255.255.255.0 mtu 16384" +.Ed +.Pp +And on the second system : +.Bd -literal -offset indent +ifconfig_ntb0="inet 192.168.1.11 netmask 255.255.255.0 mtu 16384" +.Ed +.Pp +If you are using the UDP protocol, you may want to increase the +.Va net.inet.udp.maxdgram +.Xr sysctl 8 +variable. +.Sh SEE ALSO +.Xr rc.conf 5 , +.Xr sysctl 8 +.Sh AUTHORS +.An -nosplit +The +.Nm +driver was developed by Intel and originally written by +.An Carl Delsey Aq carl@FreeBSD.org. +.Sh BUGS +If the driver is unloaded, it cannot be reloaded without a system reboot. +.Pp +The network support is limited. +It isn't fully configurable yet. +It also isn't integrated into +.Xr netgraph 4 +or +.Xr bpf 4 . +.Pp +NTB to Root Port mode is not yet supported. +.Pp +There is no way to protect your system from malicious behavior on the other +system once the link is brought up. +Anyone with root or kernel access on the other system can read or write to +any location on your system. +In other words, only connect two systems that completely trust each other. diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES index 7a41464..5689ed3 100644 --- a/sys/amd64/conf/NOTES +++ b/sys/amd64/conf/NOTES @@ -366,6 +366,10 @@ device iwn6000fw device iwn6050fw device wpifw +# Intel Non-Transparent Bridge (NTB) hardware +device ntb_hw # Hardware Abstraction Layer for the NTB +device if_ntb # Simulated ethernet device using the NTB + # #XXX this stores pointers in a 32bit field that is defined by the hardware #device pst diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index ece99da..2d6db7a 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -211,6 +211,8 @@ dev/kbd/kbd.c optional atkbd | sc | ukbd dev/lindev/full.c optional lindev dev/lindev/lindev.c optional lindev dev/nfe/if_nfe.c optional nfe pci +dev/ntb/if_ntb/if_ntb.c optional if_ntb +dev/ntb/ntb_hw/ntb_hw.c optional if_ntb ntb_hw dev/nvd/nvd.c optional nvd nvme dev/nve/if_nve.c optional nve pci dev/nvme/nvme.c optional nvme diff --git a/sys/dev/ntb/if_ntb/if_ntb.c b/sys/dev/ntb/if_ntb/if_ntb.c new file mode 100644 index 0000000..55b19c5 --- /dev/null +++ b/sys/dev/ntb/if_ntb/if_ntb.c @@ -0,0 +1,1366 @@ +/*- + * Copyright (C) 2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../ntb_hw/ntb_hw.h" + +/* + * The Non-Transparent Bridge (NTB) is a device on some Intel processors that + * allows you to connect two systems using a PCI-e link. + * + * This module contains a protocol for sending and receiving messages, and + * exposes that protocol through a simulated ethernet device called ntb. + * + * NOTE: Much of the code in this module is shared with Linux. Any patches may + * be picked up and redistributed in Linux with a dual GPL/BSD license. + */ + +/* TODO: These functions should really be part of the kernel */ +#define test_bit(pos, bitmap_addr) (*(bitmap_addr) & 1UL << (pos)) +#define set_bit(pos, bitmap_addr) *(bitmap_addr) |= 1UL << (pos) +#define clear_bit(pos, bitmap_addr) *(bitmap_addr) &= ~(1UL << (pos)) + +#define KTR_NTB KTR_SPARE3 + +#define NTB_TRANSPORT_VERSION 3 +#define NTB_RX_MAX_PKTS 64 +#define NTB_RXQ_SIZE 300 + +static unsigned int transport_mtu = 0x4000 + ETHER_HDR_LEN + ETHER_CRC_LEN; +static unsigned int max_num_clients = 1; + +STAILQ_HEAD(ntb_queue_list, ntb_queue_entry); + +struct ntb_queue_entry { + /* ntb_queue list reference */ + STAILQ_ENTRY(ntb_queue_entry) entry; + + /* info on data to be transfered */ + void *cb_data; + void *buf; + uint64_t len; + uint64_t flags; +}; + +struct ntb_rx_info { + unsigned int entry; +}; + +struct ntb_transport_qp { + struct ntb_netdev *transport; + struct ntb_softc *ntb; + + void *cb_data; + + bool client_ready; + bool qp_link; + uint8_t qp_num; /* Only 64 QP's are allowed. 0-63 */ + + struct ntb_rx_info *rx_info; + struct ntb_rx_info *remote_rx_info; + + void (*tx_handler) (struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + struct ntb_queue_list tx_free_q; + struct mtx ntb_tx_free_q_lock; + void *tx_mw; + uint64_t tx_index; + uint64_t tx_max_entry; + uint64_t tx_max_frame; + + void (*rx_handler) (struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + struct ntb_queue_list rx_pend_q; + struct ntb_queue_list rx_free_q; + struct mtx ntb_rx_pend_q_lock; + struct mtx ntb_rx_free_q_lock; + struct task rx_completion_task; + void *rx_buff; + uint64_t rx_index; + uint64_t rx_max_entry; + uint64_t rx_max_frame; + + void (*event_handler) (void *data, int status); + struct callout link_work; + struct callout queue_full; + struct callout rx_full; + + uint64_t last_rx_no_buf; + + /* Stats */ + uint64_t rx_bytes; + uint64_t rx_pkts; + uint64_t rx_ring_empty; + uint64_t rx_err_no_buf; + uint64_t rx_err_oflow; + uint64_t rx_err_ver; + uint64_t tx_bytes; + uint64_t tx_pkts; + uint64_t tx_ring_full; +}; + +struct ntb_queue_handlers { + void (*rx_handler) (struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*tx_handler) (struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*event_handler) (void *data, int status); +}; + + +struct ntb_transport_mw { + size_t size; + void *virt_addr; + vm_paddr_t dma_addr; +}; + +struct ntb_netdev { + struct ntb_softc *ntb; + struct ifnet *ifp; + struct ntb_transport_mw mw[NTB_NUM_MW]; + struct ntb_transport_qp *qps; + uint64_t max_qps; + uint64_t qp_bitmap; + bool transport_link; + struct callout link_work; + struct ntb_transport_qp *qp; + uint64_t bufsize; + u_char eaddr[ETHER_ADDR_LEN]; + struct mtx tx_lock; + struct mtx rx_lock; +}; + +static struct ntb_netdev net_softc; + +enum { + IF_NTB_DESC_DONE_FLAG = 1 << 0, + IF_NTB_LINK_DOWN_FLAG = 1 << 1, +}; + +struct ntb_payload_header { + uint64_t ver; + uint64_t len; + uint64_t flags; +}; + +enum { + IF_NTB_VERSION = 0, + IF_NTB_MW0_SZ, + IF_NTB_MW1_SZ, + IF_NTB_NUM_QPS, + IF_NTB_QP_LINKS, + IF_NTB_MAX_SPAD, +}; + +#define QP_TO_MW(qp) ((qp) % NTB_NUM_MW) +#define NTB_QP_DEF_NUM_ENTRIES 100 +#define NTB_LINK_DOWN_TIMEOUT 10 + +static int ntb_handle_module_events(struct module *m, int what, void *arg); +static int ntb_setup_interface(void); +static int ntb_teardown_interface(void); +static void ntb_net_init(void *arg); +static int ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data); +static void ntb_start(struct ifnet *ifp); +static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); +static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); +static void ntb_net_event_handler(void *data, int status); +static int ntb_transport_init(struct ntb_softc *ntb); +static void ntb_transport_free(void *transport); +static void ntb_transport_init_queue(struct ntb_netdev *nt, + unsigned int qp_num); +static void ntb_transport_free_queue(struct ntb_transport_qp *qp); +static struct ntb_transport_qp * ntb_transport_create_queue(void *data, + struct ntb_softc *pdev, const struct ntb_queue_handlers *handlers); +static void ntb_transport_link_up(struct ntb_transport_qp *qp); +static int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, + void *data, unsigned int len); +static int ntb_process_tx(struct ntb_transport_qp *qp, + struct ntb_queue_entry *entry); +static void ntb_tx_copy_task(struct ntb_transport_qp *qp, + struct ntb_queue_entry *entry, void *offset); +static void ntb_qp_full(void *arg); +static void ntb_transport_rxc_db(void *data, int db_num); +static void ntb_rx_pendq_full(void *arg); +static void ntb_transport_rx(struct ntb_transport_qp *qp); +static int ntb_process_rxc(struct ntb_transport_qp *qp); +static void ntb_rx_copy_task(struct ntb_transport_qp *qp, + struct ntb_queue_entry *entry, void *offset); +static void ntb_rx_completion_task(void *arg, int pending); +static void ntb_transport_event_callback(void *data, enum ntb_hw_event event); +static void ntb_transport_link_work(void *arg); +static int ntb_set_mw(struct ntb_netdev *nt, int num_mw, unsigned int size); +static void ntb_transport_setup_qp_mw(struct ntb_netdev *nt, + unsigned int qp_num); +static void ntb_qp_link_work(void *arg); +static void ntb_transport_link_cleanup(struct ntb_netdev *nt); +static void ntb_qp_link_down(struct ntb_transport_qp *qp); +static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp); +static void ntb_transport_link_down(struct ntb_transport_qp *qp); +static void ntb_send_link_down(struct ntb_transport_qp *qp); +static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry, + struct ntb_queue_list *list); +static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock, + struct ntb_queue_list *list); +static void create_random_local_eui48(u_char *eaddr); +static unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); + +MALLOC_DEFINE(M_NTB_IF, "if_ntb", "ntb network driver"); + +/* Module setup and teardown */ +static int +ntb_handle_module_events(struct module *m, int what, void *arg) +{ + int err = 0; + + switch (what) { + case MOD_LOAD: + err = ntb_setup_interface(); + break; + case MOD_UNLOAD: + err = ntb_teardown_interface(); + break; + default: + err = EOPNOTSUPP; + break; + } + return (err); +} + +static moduledata_t ntb_transport_mod = { + "ntb_transport", + ntb_handle_module_events, + NULL +}; + +DECLARE_MODULE(ntb_transport, ntb_transport_mod, SI_SUB_KLD, SI_ORDER_ANY); +MODULE_DEPEND(ntb_transport, ntb_hw, 1, 1, 1); + +static int +ntb_setup_interface() +{ + struct ifnet *ifp; + struct ntb_queue_handlers handlers = { ntb_net_rx_handler, + ntb_net_tx_handler, ntb_net_event_handler }; + + net_softc.ntb = devclass_get_softc(devclass_find("ntb_hw"), 0); + if (net_softc.ntb == NULL) { + printf("ntb: Can't find devclass\n"); + return (ENXIO); + } + + ntb_transport_init(net_softc.ntb); + + ifp = net_softc.ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) { + printf("ntb: cannot allocate ifnet structure\n"); + return (ENOMEM); + } + + net_softc.qp = ntb_transport_create_queue(ifp, net_softc.ntb, + &handlers); + if_initname(ifp, "ntb", 0); + ifp->if_init = ntb_net_init; + ifp->if_softc = &net_softc; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX; + ifp->if_ioctl = ntb_ioctl; + ifp->if_start = ntb_start; + IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_READY(&ifp->if_snd); + create_random_local_eui48(net_softc.eaddr); + ether_ifattach(ifp, net_softc.eaddr); + ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_JUMBO_MTU; + ifp->if_capenable = ifp->if_capabilities; + + ntb_transport_link_up(net_softc.qp); + net_softc.bufsize = ntb_transport_max_size(net_softc.qp) + + sizeof(struct ether_header); + return (0); +} + +static int +ntb_teardown_interface() +{ + struct ifnet *ifp = net_softc.ifp; + + ntb_transport_link_down(net_softc.qp); + + ether_ifdetach(ifp); + if_free(ifp); + ntb_transport_free_queue(net_softc.qp); + ntb_transport_free(&net_softc); + + return (0); +} + +/* Network device interface */ + +static void +ntb_net_init(void *arg) +{ + struct ntb_netdev *ntb_softc = arg; + struct ifnet *ifp = ntb_softc->ifp; + + ifp->if_drv_flags |= IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + ifp->if_flags |= IFF_UP; + if_link_state_change(ifp, LINK_STATE_UP); +} + +static int +ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) +{ + struct ntb_netdev *nt = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + int error = 0; + + switch (command) { + case SIOCSIFMTU: + { + if (ifr->ifr_mtu > ntb_transport_max_size(nt->qp) - + ETHER_HDR_LEN - ETHER_CRC_LEN) { + error = EINVAL; + break; + } + + ifp->if_mtu = ifr->ifr_mtu; + break; + } + default: + error = ether_ioctl(ifp, command, data); + break; + } + + return (error); +} + + +static void +ntb_start(struct ifnet *ifp) +{ + struct mbuf *m_head; + struct ntb_netdev *nt = ifp->if_softc; + int rc; + + mtx_lock(&nt->tx_lock); + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + CTR0(KTR_NTB, "TX: ntb_start"); + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); + CTR1(KTR_NTB, "TX: start mbuf %p", m_head); + rc = ntb_transport_tx_enqueue(nt->qp, m_head, m_head, + m_length(m_head, NULL)); + if (rc != 0) { + CTR1(KTR_NTB, + "TX: couldn't tx mbuf %p. Returning to snd q", + m_head); + if (rc == EAGAIN) { + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + IFQ_DRV_PREPEND(&ifp->if_snd, m_head); + callout_reset(&nt->qp->queue_full, hz / 1000, + ntb_qp_full, ifp); + } + break; + } + + } + mtx_unlock(&nt->tx_lock); +} + +/* Network Device Callbacks */ +static void +ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, + int len) +{ + + m_freem(data); + CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data); +} + +static void +ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, + int len) +{ + struct mbuf *m = data; + struct ifnet *ifp = qp_data; + + CTR0(KTR_NTB, "RX: rx handler"); + (*ifp->if_input)(ifp, m); +} + +static void +ntb_net_event_handler(void *data, int status) +{ + +} + +/* Transport Init and teardown */ + +static int +ntb_transport_init(struct ntb_softc *ntb) +{ + struct ntb_netdev *nt = &net_softc; + int rc, i; + + nt->max_qps = max_num_clients; + ntb_register_transport(ntb, nt); + mtx_init(&nt->tx_lock, "ntb transport tx", NULL, MTX_DEF); + mtx_init(&nt->rx_lock, "ntb transport rx", NULL, MTX_DEF); + + nt->qps = malloc(nt->max_qps * sizeof(struct ntb_transport_qp), + M_NTB_IF, M_WAITOK|M_ZERO); + + nt->qp_bitmap = ((uint64_t) 1 << nt->max_qps) - 1; + + for (i = 0; i < nt->max_qps; i++) + ntb_transport_init_queue(nt, i); + + callout_init(&nt->link_work, 0); + + rc = ntb_register_event_callback(ntb, + ntb_transport_event_callback); + if (rc != 0) + goto err; + + if (ntb_query_link_status(ntb)) + callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt); + + return (0); + +err: + free(nt->qps, M_NTB_IF); + ntb_unregister_transport(ntb); + return (rc); +} + +static void +ntb_transport_free(void *transport) +{ + struct ntb_netdev *nt = transport; + struct ntb_softc *ntb = nt->ntb; + int i; + + nt->transport_link = NTB_LINK_DOWN; + + callout_drain(&nt->link_work); + + /* verify that all the qp's are freed */ + for (i = 0; i < nt->max_qps; i++) + if (!test_bit(i, &nt->qp_bitmap)) + ntb_transport_free_queue(&nt->qps[i]); + + + ntb_unregister_event_callback(ntb); + + for (i = 0; i < NTB_NUM_MW; i++) + if (nt->mw[i].virt_addr != NULL) + contigfree(nt->mw[i].virt_addr, nt->mw[i].size, + M_NTB_IF); + + free(nt->qps, M_NTB_IF); + ntb_unregister_transport(ntb); +} + +static void +ntb_transport_init_queue(struct ntb_netdev *nt, unsigned int qp_num) +{ + struct ntb_transport_qp *qp; + unsigned int num_qps_mw, tx_size; + uint8_t mw_num = QP_TO_MW(qp_num); + + qp = &nt->qps[qp_num]; + qp->qp_num = qp_num; + qp->transport = nt; + qp->ntb = nt->ntb; + qp->qp_link = NTB_LINK_DOWN; + qp->client_ready = NTB_LINK_DOWN; + qp->event_handler = NULL; + + if (nt->max_qps % NTB_NUM_MW && mw_num < nt->max_qps % NTB_NUM_MW) + num_qps_mw = nt->max_qps / NTB_NUM_MW + 1; + else + num_qps_mw = nt->max_qps / NTB_NUM_MW; + + tx_size = (unsigned int) ntb_get_mw_size(qp->ntb, mw_num) / num_qps_mw; + qp->rx_info = (struct ntb_rx_info *) + ((char *)ntb_get_mw_vbase(qp->ntb, mw_num) + + (qp_num / NTB_NUM_MW * tx_size)); + tx_size -= sizeof(struct ntb_rx_info); + + qp->tx_mw = qp->rx_info + sizeof(struct ntb_rx_info); + qp->tx_max_frame = min(transport_mtu + sizeof(struct ntb_payload_header), + tx_size); + qp->tx_max_entry = tx_size / qp->tx_max_frame; + qp->tx_index = 0; + + callout_init(&qp->link_work, 0); + callout_init(&qp->queue_full, CALLOUT_MPSAFE); + callout_init(&qp->rx_full, CALLOUT_MPSAFE); + + mtx_init(&qp->ntb_rx_pend_q_lock, "ntb rx pend q", NULL, MTX_SPIN); + mtx_init(&qp->ntb_rx_free_q_lock, "ntb rx free q", NULL, MTX_SPIN); + mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN); + TASK_INIT(&qp->rx_completion_task, 0, ntb_rx_completion_task, qp); + + STAILQ_INIT(&qp->rx_pend_q); + STAILQ_INIT(&qp->rx_free_q); + STAILQ_INIT(&qp->tx_free_q); +} + +static void +ntb_transport_free_queue(struct ntb_transport_qp *qp) +{ + struct ntb_queue_entry *entry; + + if (qp == NULL) + return; + + callout_drain(&qp->link_work); + + ntb_unregister_db_callback(qp->ntb, qp->qp_num); + + while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) + free(entry, M_NTB_IF); + + while ((entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q))) + free(entry, M_NTB_IF); + + while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) + free(entry, M_NTB_IF); + + set_bit(qp->qp_num, &qp->transport->qp_bitmap); +} + +/** + * ntb_transport_create_queue - Create a new NTB transport layer queue + * @rx_handler: receive callback function + * @tx_handler: transmit callback function + * @event_handler: event callback function + * + * Create a new NTB transport layer queue and provide the queue with a callback + * routine for both transmit and receive. The receive callback routine will be + * used to pass up data when the transport has received it on the queue. The + * transmit callback routine will be called when the transport has completed the + * transmission of the data on the queue and the data is ready to be freed. + * + * RETURNS: pointer to newly created ntb_queue, NULL on error. + */ +static struct ntb_transport_qp * +ntb_transport_create_queue(void *data, struct ntb_softc *pdev, + const struct ntb_queue_handlers *handlers) +{ + struct ntb_queue_entry *entry; + struct ntb_transport_qp *qp; + struct ntb_netdev *nt; + unsigned int free_queue; + int rc, i; + + nt = ntb_find_transport(pdev); + if (nt == NULL) + goto err; + + free_queue = ffs(nt->qp_bitmap); + if (free_queue == 0) + goto err; + + /* decrement free_queue to make it zero based */ + free_queue--; + + clear_bit(free_queue, &nt->qp_bitmap); + + qp = &nt->qps[free_queue]; + qp->cb_data = data; + qp->rx_handler = handlers->rx_handler; + qp->tx_handler = handlers->tx_handler; + qp->event_handler = handlers->event_handler; + + for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { + entry = malloc(sizeof(struct ntb_queue_entry), M_NTB_IF, + M_WAITOK|M_ZERO); + entry->cb_data = nt->ifp; + entry->buf = NULL; + entry->len = transport_mtu; + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + } + + for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { + entry = malloc(sizeof(struct ntb_queue_entry), M_NTB_IF, + M_WAITOK|M_ZERO); + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); + } + + rc = ntb_register_db_callback(qp->ntb, free_queue, qp, + ntb_transport_rxc_db); + if (rc != 0) + goto err1; + + return (qp); + +err1: + while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) + free(entry, M_NTB_IF); + while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) + free(entry, M_NTB_IF); + set_bit(free_queue, &nt->qp_bitmap); +err: + return (NULL); +} + +/** + * ntb_transport_link_up - Notify NTB transport of client readiness to use queue + * @qp: NTB transport layer queue to be enabled + * + * Notify NTB transport layer of client readiness to use queue + */ +static void +ntb_transport_link_up(struct ntb_transport_qp *qp) +{ + + if (qp == NULL) + return; + + qp->client_ready = NTB_LINK_UP; + + if (qp->transport->transport_link == NTB_LINK_UP) + callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); +} + + + +/* Transport Tx */ + +/** + * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry + * @qp: NTB transport layer queue the entry is to be enqueued on + * @cb: per buffer pointer for callback function to use + * @data: pointer to data buffer that will be sent + * @len: length of the data buffer + * + * Enqueue a new transmit buffer onto the transport queue from which a NTB + * payload will be transmitted. This assumes that a lock is behing held to + * serialize access to the qp. + * + * RETURNS: An appropriate ERRNO error value on error, or zero for success. + */ +static int +ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, + unsigned int len) +{ + struct ntb_queue_entry *entry; + int rc; + + if (qp == NULL || qp->qp_link != NTB_LINK_UP || len == 0) { + CTR0(KTR_NTB, "TX: link not up"); + return (EINVAL); + } + + entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); + if (entry == NULL) { + CTR0(KTR_NTB, "TX: couldn't get entry from tx_free_q"); + return (ENOMEM); + } + CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry); + + entry->cb_data = cb; + entry->buf = data; + entry->len = len; + entry->flags = 0; + + rc = ntb_process_tx(qp, entry); + if (rc != 0) { + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); + CTR1(KTR_NTB, + "TX: process_tx failed. Returning entry %p to tx_free_q", + entry); + } + return (rc); +} + +static int +ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry) +{ + void *offset; + + offset = (char *)qp->tx_mw + qp->tx_max_frame * qp->tx_index; + CTR3(KTR_NTB, + "TX: process_tx: tx_pkts=%u, tx_index=%u, remote entry=%u", + qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry); + if (qp->tx_index == qp->remote_rx_info->entry) { + CTR0(KTR_NTB, "TX: ring full"); + qp->tx_ring_full++; + return (EAGAIN); + } + + if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) { + if (qp->tx_handler != NULL) + qp->tx_handler(qp, qp->cb_data, entry->buf, + EIO); + + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); + CTR1(KTR_NTB, + "TX: frame too big. returning entry %p to tx_free_q", + entry); + return (0); + } + CTR2(KTR_NTB, "TX: copying entry %p to offset %p", entry, offset); + ntb_tx_copy_task(qp, entry, offset); + + qp->tx_index++; + qp->tx_index %= qp->tx_max_entry; + + qp->tx_pkts++; + + return (0); +} + +static void +ntb_tx_copy_task(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry, + void *offset) +{ + struct ntb_payload_header *hdr; + + CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset); + if (entry->buf != NULL) + m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset); + + hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame - + sizeof(struct ntb_payload_header)); + hdr->len = entry->len; /* TODO: replace with bus_space_write */ + hdr->ver = qp->tx_pkts; /* TODO: replace with bus_space_write */ + wmb(); + /* TODO: replace with bus_space_write */ + hdr->flags = entry->flags | IF_NTB_DESC_DONE_FLAG; + + ntb_ring_sdb(qp->ntb, qp->qp_num); + + /* + * The entry length can only be zero if the packet is intended to be a + * "link down" or similar. Since no payload is being sent in these + * cases, there is nothing to add to the completion queue. + */ + if (entry->len > 0) { + qp->tx_bytes += entry->len; + + if (qp->tx_handler) + qp->tx_handler(qp, qp->cb_data, entry->cb_data, + entry->len); + } + + CTR2(KTR_NTB, + "TX: entry %p sent. hdr->ver = %d, Returning to tx_free_q", entry, + hdr->ver); + ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); +} + +static void +ntb_qp_full(void *arg) +{ + + CTR0(KTR_NTB, "TX: qp_full callout"); + ntb_start(arg); +} + +/* Transport Rx */ +static void +ntb_transport_rxc_db(void *data, int db_num) +{ + struct ntb_transport_qp *qp = data; + + ntb_transport_rx(qp); +} + +static void +ntb_rx_pendq_full(void *arg) +{ + + CTR0(KTR_NTB, "RX: ntb_rx_pendq_full callout"); + ntb_transport_rx(arg); +} + +static void +ntb_transport_rx(struct ntb_transport_qp *qp) +{ + int rc, i; + + /* + * Limit the number of packets processed in a single interrupt to + * provide fairness to others + */ + mtx_lock(&qp->transport->rx_lock); + CTR0(KTR_NTB, "RX: transport_rx"); + for (i = 0; i < NTB_RX_MAX_PKTS; i++) { + rc = ntb_process_rxc(qp); + if (rc != 0) { + CTR0(KTR_NTB, "RX: process_rxc failed"); + break; + } + } + mtx_unlock(&qp->transport->rx_lock); +} + +static int +ntb_process_rxc(struct ntb_transport_qp *qp) +{ + struct ntb_payload_header *hdr; + struct ntb_queue_entry *entry; + void *offset; + + offset = (void *) + ((char *)qp->rx_buff + qp->rx_max_frame * qp->rx_index); + hdr = (void *) + ((char *)offset + qp->rx_max_frame - + sizeof(struct ntb_payload_header)); + + CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index); + entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); + if (entry == NULL) { + qp->rx_err_no_buf++; + CTR0(KTR_NTB, "RX: No entries in rx_pend_q"); + return (ENOMEM); + } + callout_stop(&qp->rx_full); + CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry); + + if ((hdr->flags & IF_NTB_DESC_DONE_FLAG) == 0) { + CTR1(KTR_NTB, + "RX: hdr not done. Returning entry %p to rx_pend_q", entry); + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + qp->rx_ring_empty++; + return (EAGAIN); + } + + if (hdr->ver != (uint32_t) qp->rx_pkts) { + CTR3(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). " + "Returning entry %p to rx_pend_q", hdr->ver, qp->rx_pkts, + entry); + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + qp->rx_err_ver++; + return (EIO); + } + + if ((hdr->flags & IF_NTB_LINK_DOWN_FLAG) != 0) { + ntb_qp_link_down(qp); + CTR1(KTR_NTB, + "RX: link down. adding entry %p back to rx_pend_q", entry); + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + goto out; + } + + if (hdr->len <= entry->len) { + entry->len = hdr->len; + ntb_rx_copy_task(qp, entry, offset); + } else { + CTR1(KTR_NTB, + "RX: len too long. Returning entry %p to rx_pend_q", entry); + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + + qp->rx_err_oflow++; + } + + qp->rx_bytes += hdr->len; + qp->rx_pkts++; + CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts); + + +out: + /* Ensure that the data is globally visible before clearing the flag */ + wmb(); + hdr->flags = 0; + /* TODO: replace with bus_space_write */ + qp->rx_info->entry = qp->rx_index; + + qp->rx_index++; + qp->rx_index %= qp->rx_max_entry; + + return (0); +} + +static void +ntb_rx_copy_task(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry, + void *offset) +{ + struct ifnet *ifp = entry->cb_data; + unsigned int len = entry->len; + struct mbuf *m; + + CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset); + m = m_devget(offset, len, 0, ifp, NULL); + m->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID; + + entry->buf = (void *)m; + + CTR2(KTR_NTB, + "RX: copied entry %p to mbuf %p. Adding entry to rx_free_q", entry, + m); + ntb_list_add(&qp->ntb_rx_free_q_lock, entry, &qp->rx_free_q); + + taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task); +} + +static void +ntb_rx_completion_task(void *arg, int pending) +{ + struct ntb_transport_qp *qp = arg; + struct mbuf *m; + struct ntb_queue_entry *entry; + + CTR0(KTR_NTB, "RX: rx_completion_task"); + + while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) { + m = entry->buf; + CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m); + if (qp->rx_handler && qp->client_ready == NTB_LINK_UP) + qp->rx_handler(qp, qp->cb_data, m, entry->len); + + entry->buf = NULL; + entry->len = qp->transport->bufsize; + + CTR1(KTR_NTB,"RX: entry %p removed from rx_free_q " + "and added to rx_pend_q", entry); + ntb_list_add(&qp->ntb_rx_pend_q_lock, entry, &qp->rx_pend_q); + if (qp->rx_err_no_buf > qp->last_rx_no_buf) { + qp->last_rx_no_buf = qp->rx_err_no_buf; + CTR0(KTR_NTB, "RX: could spawn rx task"); + callout_reset(&qp->rx_full, hz / 1000, ntb_rx_pendq_full, + qp); + } + } +} + +/* Link Event handler */ +static void +ntb_transport_event_callback(void *data, enum ntb_hw_event event) +{ + struct ntb_netdev *nt = data; + + switch (event) { + case NTB_EVENT_HW_LINK_UP: + callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt); + break; + case NTB_EVENT_HW_LINK_DOWN: + ntb_transport_link_cleanup(nt); + break; + default: + panic("ntb: Unknown NTB event"); + } +} + +/* Link bring up */ +static void +ntb_transport_link_work(void *arg) +{ + struct ntb_netdev *nt = arg; + struct ntb_softc *ntb = nt->ntb; + struct ntb_transport_qp *qp; + uint32_t val; + int rc, i; + + /* send the local info */ + rc = ntb_write_remote_spad(ntb, IF_NTB_VERSION, NTB_TRANSPORT_VERSION); + if (rc != 0) + goto out; + + rc = ntb_write_remote_spad(ntb, IF_NTB_MW0_SZ, ntb_get_mw_size(ntb, 0)); + if (rc != 0) + goto out; + + rc = ntb_write_remote_spad(ntb, IF_NTB_MW1_SZ, ntb_get_mw_size(ntb, 1)); + if (rc != 0) + goto out; + + rc = ntb_write_remote_spad(ntb, IF_NTB_NUM_QPS, nt->max_qps); + if (rc != 0) + goto out; + + rc = ntb_read_remote_spad(ntb, IF_NTB_QP_LINKS, &val); + if (rc != 0) + goto out; + + rc = ntb_write_remote_spad(ntb, IF_NTB_QP_LINKS, val); + if (rc != 0) + goto out; + + /* Query the remote side for its info */ + rc = ntb_read_local_spad(ntb, IF_NTB_VERSION, &val); + if (rc != 0) + goto out; + + if (val != NTB_TRANSPORT_VERSION) + goto out; + + rc = ntb_read_local_spad(ntb, IF_NTB_NUM_QPS, &val); + if (rc != 0) + goto out; + + if (val != nt->max_qps) + goto out; + + rc = ntb_read_local_spad(ntb, IF_NTB_MW0_SZ, &val); + if (rc != 0) + goto out; + + if (val == 0) + goto out; + + rc = ntb_set_mw(nt, 0, val); + if (rc != 0) + return; + + rc = ntb_read_local_spad(ntb, IF_NTB_MW1_SZ, &val); + if (rc != 0) + goto out; + + if (val == 0) + goto out; + + rc = ntb_set_mw(nt, 1, val); + if (rc != 0) + return; + + nt->transport_link = NTB_LINK_UP; + + for (i = 0; i < nt->max_qps; i++) { + qp = &nt->qps[i]; + + ntb_transport_setup_qp_mw(nt, i); + + if (qp->client_ready == NTB_LINK_UP) + callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); + } + + return; + +out: + if (ntb_query_link_status(ntb)) + callout_reset(&nt->link_work, + NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt); +} + +static int +ntb_set_mw(struct ntb_netdev *nt, int num_mw, unsigned int size) +{ + struct ntb_transport_mw *mw = &nt->mw[num_mw]; + + /* Alloc memory for receiving data. Must be 4k aligned */ + mw->size = size; + + mw->virt_addr = contigmalloc(mw->size, M_NTB_IF, M_ZERO, 0, + BUS_SPACE_MAXADDR, mw->size, 0); + if (mw->virt_addr == NULL) { + printf("ntb: Unable to allocate MW buffer of size %d\n", + (int)mw->size); + return (ENOMEM); + } + /* TODO: replace with bus_space_* functions */ + mw->dma_addr = vtophys(mw->virt_addr); + + /* Notify HW the memory location of the receive buffer */ + ntb_set_mw_addr(nt->ntb, num_mw, mw->dma_addr); + + return (0); +} + +static void +ntb_transport_setup_qp_mw(struct ntb_netdev *nt, unsigned int qp_num) +{ + struct ntb_transport_qp *qp = &nt->qps[qp_num]; + void *offset; + unsigned int rx_size, num_qps_mw; + uint8_t mw_num = QP_TO_MW(qp_num); + unsigned int i; + + if (nt->max_qps % NTB_NUM_MW && mw_num < nt->max_qps % NTB_NUM_MW) + num_qps_mw = nt->max_qps / NTB_NUM_MW + 1; + else + num_qps_mw = nt->max_qps / NTB_NUM_MW; + + rx_size = (unsigned int) nt->mw[mw_num].size / num_qps_mw; + qp->remote_rx_info = (void *)((uint8_t *)nt->mw[mw_num].virt_addr + + (qp_num / NTB_NUM_MW * rx_size)); + rx_size -= sizeof(struct ntb_rx_info); + + qp->rx_buff = qp->remote_rx_info + sizeof(struct ntb_rx_info); + qp->rx_max_frame = min(transport_mtu + sizeof(struct ntb_payload_header), + rx_size); + qp->rx_max_entry = rx_size / qp->rx_max_frame; + qp->rx_index = 0; + qp->tx_index = 0; + + qp->remote_rx_info->entry = qp->rx_max_entry; + + /* setup the hdr offsets with 0's */ + for (i = 0; i < qp->rx_max_entry; i++) { + offset = (void *)((uint8_t *)qp->rx_buff + + qp->rx_max_frame * (i + 1) - + sizeof(struct ntb_payload_header)); + memset(offset, 0, sizeof(struct ntb_payload_header)); + } + + qp->rx_pkts = 0; + qp->tx_pkts = 0; +} + +static void +ntb_qp_link_work(void *arg) +{ + struct ntb_transport_qp *qp = arg; + struct ntb_softc *ntb = qp->ntb; + struct ntb_netdev *nt = qp->transport; + int rc, val; + + + rc = ntb_read_remote_spad(ntb, IF_NTB_QP_LINKS, &val); + if (rc != 0) + return; + + rc = ntb_write_remote_spad(ntb, IF_NTB_QP_LINKS, val | 1 << qp->qp_num); + + /* query remote spad for qp ready bits */ + rc = ntb_read_local_spad(ntb, IF_NTB_QP_LINKS, &val); + + /* See if the remote side is up */ + if ((1 << qp->qp_num & val) != 0) { + qp->qp_link = NTB_LINK_UP; + if (qp->event_handler != NULL) + qp->event_handler(qp->cb_data, NTB_LINK_UP); + } else if (nt->transport_link == NTB_LINK_UP) { + callout_reset(&qp->link_work, + NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp); + } +} + +/* Link down event*/ +static void +ntb_transport_link_cleanup(struct ntb_netdev *nt) +{ + int i; + + if (nt->transport_link == NTB_LINK_DOWN) + callout_drain(&nt->link_work); + else + nt->transport_link = NTB_LINK_DOWN; + + /* Pass along the info to any clients */ + for (i = 0; i < nt->max_qps; i++) + if (!test_bit(i, &nt->qp_bitmap)) + ntb_qp_link_down(&nt->qps[i]); + + /* + * The scratchpad registers keep the values if the remote side + * goes down, blast them now to give them a sane value the next + * time they are accessed + */ + for (i = 0; i < IF_NTB_MAX_SPAD; i++) + ntb_write_local_spad(nt->ntb, i, 0); +} + + +static void +ntb_qp_link_down(struct ntb_transport_qp *qp) +{ + + ntb_qp_link_cleanup(qp); +} + +static void +ntb_qp_link_cleanup(struct ntb_transport_qp *qp) +{ + struct ntb_netdev *nt = qp->transport; + + if (qp->qp_link == NTB_LINK_DOWN) { + callout_drain(&qp->link_work); + return; + } + + if (qp->event_handler != NULL) + qp->event_handler(qp->cb_data, NTB_LINK_DOWN); + + qp->qp_link = NTB_LINK_DOWN; + + if (nt->transport_link == NTB_LINK_UP) + callout_reset(&qp->link_work, + NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp); +} + +/* Link commanded down */ +/** + * ntb_transport_link_down - Notify NTB transport to no longer enqueue data + * @qp: NTB transport layer queue to be disabled + * + * Notify NTB transport layer of client's desire to no longer receive data on + * transport queue specified. It is the client's responsibility to ensure all + * entries on queue are purged or otherwise handled appropraitely. + */ +static void +ntb_transport_link_down(struct ntb_transport_qp *qp) +{ + int rc, val; + + if (qp == NULL) + return; + + qp->client_ready = NTB_LINK_DOWN; + + rc = ntb_read_remote_spad(qp->ntb, IF_NTB_QP_LINKS, &val); + if (rc != 0) + return; + + rc = ntb_write_remote_spad(qp->ntb, IF_NTB_QP_LINKS, + val & ~(1 << qp->qp_num)); + + if (qp->qp_link == NTB_LINK_UP) + ntb_send_link_down(qp); + else + callout_drain(&qp->link_work); + +} + +static void +ntb_send_link_down(struct ntb_transport_qp *qp) +{ + struct ntb_queue_entry *entry; + int i, rc; + + if (qp->qp_link == NTB_LINK_DOWN) + return; + + qp->qp_link = NTB_LINK_DOWN; + + for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) { + entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); + if (entry != NULL) + break; + pause("NTB Wait for link down", hz / 10); + } + + if (entry == NULL) + return; + + entry->cb_data = NULL; + entry->buf = NULL; + entry->len = 0; + entry->flags = IF_NTB_LINK_DOWN_FLAG; + + mtx_lock(&qp->transport->tx_lock); + rc = ntb_process_tx(qp, entry); + if (rc != 0) + printf("ntb: Failed to send link down\n"); + mtx_unlock(&qp->transport->tx_lock); +} + + +/* List Management */ + +static void +ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry, + struct ntb_queue_list *list) +{ + + mtx_lock_spin(lock); + STAILQ_INSERT_TAIL(list, entry, entry); + mtx_unlock_spin(lock); +} + +static struct ntb_queue_entry * +ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list) +{ + struct ntb_queue_entry *entry; + + mtx_lock_spin(lock); + if (STAILQ_EMPTY(list)) { + entry = NULL; + goto out; + } + entry = STAILQ_FIRST(list); + STAILQ_REMOVE_HEAD(list, entry); +out: + mtx_unlock_spin(lock); + + return (entry); +} + +/* Helper functions */ +/* TODO: This too should really be part of the kernel */ +#define EUI48_MULTICAST 1 << 0 +#define EUI48_LOCALLY_ADMINISTERED 1 << 1 +static void +create_random_local_eui48(u_char *eaddr) +{ + static uint8_t counter = 0; + uint32_t seed = ticks; + + eaddr[0] = EUI48_LOCALLY_ADMINISTERED; + memcpy(&eaddr[1], &seed, sizeof(uint32_t)); + eaddr[5] = counter++; +} + +/** + * ntb_transport_max_size - Query the max payload size of a qp + * @qp: NTB transport layer queue to be queried + * + * Query the maximum payload size permissible on the given qp + * + * RETURNS: the max payload size of a qp + */ +static unsigned int +ntb_transport_max_size(struct ntb_transport_qp *qp) +{ + + if (qp == NULL) + return (0); + + return (qp->tx_max_frame - sizeof(struct ntb_payload_header)); +} diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.c b/sys/dev/ntb/ntb_hw/ntb_hw.c new file mode 100644 index 0000000..72314dd --- /dev/null +++ b/sys/dev/ntb/ntb_hw/ntb_hw.c @@ -0,0 +1,1288 @@ +/*- + * Copyright (C) 2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ntb_regs.h" +#include "ntb_hw.h" + +/* + * The Non-Transparent Bridge (NTB) is a device on some Intel processors that + * allows you to connect two systems using a PCI-e link. + * + * This module contains the hardware abstraction layer for the NTB. It allows + * you to send and recieve interrupts, map the memory windows and send and + * receive messages in the scratch-pad registers. + * + * NOTE: Much of the code in this module is shared with Linux. Any patches may + * be picked up and redistributed in Linux with a dual GPL/BSD license. + */ + +#define NTB_CONFIG_BAR 0 +#define NTB_B2B_BAR_1 1 +#define NTB_B2B_BAR_2 2 +#define NTB_MAX_BARS 3 +#define NTB_MW_TO_BAR(mw) ((mw) + 1) + +#define MAX_MSIX_INTERRUPTS MAX(XEON_MAX_DB_BITS, SOC_MAX_DB_BITS) + +#define NTB_HB_TIMEOUT 1 /* second */ +#define SOC_LINK_RECOVERY_TIME 500 + +#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev)) + +enum ntb_device_type { + NTB_XEON, + NTB_SOC +}; + +struct ntb_hw_info { + uint32_t device_id; + enum ntb_device_type type; + const char *desc; +}; + +struct ntb_pci_bar_info { + bus_space_tag_t pci_bus_tag; + bus_space_handle_t pci_bus_handle; + int pci_resource_id; + struct resource *pci_resource; + vm_paddr_t pbase; + void *vbase; + u_long size; +}; + +struct ntb_int_info { + struct resource *res; + int rid; + void *tag; +}; + +struct ntb_db_cb { + ntb_db_callback callback; + unsigned int db_num; + void *data; + struct ntb_softc *ntb; +}; + +struct ntb_softc { + device_t device; + enum ntb_device_type type; + + struct ntb_pci_bar_info bar_info[NTB_MAX_BARS]; + struct ntb_int_info int_info[MAX_MSIX_INTERRUPTS]; + uint32_t allocated_interrupts; + + struct callout heartbeat_timer; + struct callout lr_timer; + + void *ntb_transport; + ntb_event_callback event_cb; + struct ntb_db_cb *db_cb; + + struct { + uint32_t max_spads; + uint32_t max_db_bits; + uint32_t msix_cnt; + } limits; + struct { + uint32_t pdb; + uint32_t pdb_mask; + uint32_t sdb; + uint32_t sbar2_xlat; + uint32_t sbar4_xlat; + uint32_t spad_remote; + uint32_t spad_local; + uint32_t lnk_cntl; + uint32_t lnk_stat; + uint32_t spci_cmd; + } reg_ofs; + uint8_t conn_type; + uint8_t dev_type; + uint8_t bits_per_vector; + uint8_t link_status; + uint8_t link_width; + uint8_t link_speed; +}; + +#define ntb_reg_read(SIZE, offset) \ + bus_space_read_ ## SIZE (ntb->bar_info[NTB_CONFIG_BAR].pci_bus_tag, \ + ntb->bar_info[NTB_CONFIG_BAR].pci_bus_handle, (offset)) +#define ntb_reg_write(SIZE, offset, val) \ + bus_space_write_ ## SIZE (ntb->bar_info[NTB_CONFIG_BAR].pci_bus_tag, \ + ntb->bar_info[NTB_CONFIG_BAR].pci_bus_handle, (offset), (val)) + +#define ntb_read_1(offset) ntb_reg_read(1, (offset)) +#define ntb_read_2(offset) ntb_reg_read(2, (offset)) +#define ntb_read_4(offset) ntb_reg_read(4, (offset)) +#define ntb_read_8(offset) ntb_reg_read(8, (offset)) +#define ntb_write_1(offset, val) ntb_reg_write(1, (offset), (val)) +#define ntb_write_2(offset, val) ntb_reg_write(2, (offset), (val)) +#define ntb_write_4(offset, val) ntb_reg_write(4, (offset), (val)) +#define ntb_write_8(offset, val) ntb_reg_write(8, (offset), (val)) + +static int ntb_probe(device_t device); +static int ntb_attach(device_t device); +static int ntb_detach(device_t device); +static int ntb_map_pci_bar(struct ntb_softc *ntb); +static void ntb_unmap_pci_bar(struct ntb_softc *ntb); +static int ntb_setup_interrupts(struct ntb_softc *ntb); +static void ntb_teardown_interrupts(struct ntb_softc *ntb); +static void handle_soc_irq(void *arg); +static void handle_xeon_irq(void *arg); +static void handle_xeon_event_irq(void *arg); +static void ntb_handle_legacy_interrupt(void *arg); +static int ntb_create_callbacks(struct ntb_softc *ntb, int num_vectors); +static void ntb_free_callbacks(struct ntb_softc *ntb); +static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id); +static int ntb_initialize_hw(struct ntb_softc *ntb); +static int ntb_setup_xeon(struct ntb_softc *ntb); +static int ntb_setup_soc(struct ntb_softc *ntb); +static void ntb_handle_heartbeat(void *arg); +static void ntb_handle_link_event(struct ntb_softc *ntb, int link_state); +static void recover_soc_link(void *arg); +static int ntb_check_link_status(struct ntb_softc *ntb); +static bool is_bar_for_data_transfer(int bar_num); + +static struct ntb_hw_info pci_ids[] = { + { 0x3C0D8086, NTB_XEON, "Xeon E5/Core i7 Non-Transparent Bridge B2B" }, + { 0x0C4E8086, NTB_SOC, "Atom Processor S1200 NTB Primary B2B" }, + { 0x0E0D8086, NTB_XEON, "Xeon E5 V2 Non-Transparent Bridge B2B" }, + { 0x00000000, NTB_SOC, NULL } +}; + +/* + * OS <-> Driver interface structures + */ +MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations"); + +static device_method_t ntb_pci_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ntb_probe), + DEVMETHOD(device_attach, ntb_attach), + DEVMETHOD(device_detach, ntb_detach), + DEVMETHOD_END +}; + +static driver_t ntb_pci_driver = { + "ntb_hw", + ntb_pci_methods, + sizeof(struct ntb_softc), +}; + +static devclass_t ntb_devclass; +DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL); +MODULE_VERSION(ntb_hw, 1); + +/* + * OS <-> Driver linkage functions + */ +static int +ntb_probe(device_t device) +{ + struct ntb_hw_info *p = ntb_get_device_info(pci_get_devid(device)); + + if (p != NULL) { + device_set_desc(device, p->desc); + return (0); + } else + return (ENXIO); +} + +#define DETACH_ON_ERROR(func) \ + error = func; \ + if (error < 0) { \ + ntb_detach(device); \ + return (error); \ + } + +static int +ntb_attach(device_t device) +{ + struct ntb_softc *ntb = DEVICE2SOFTC(device); + struct ntb_hw_info *p = ntb_get_device_info(pci_get_devid(device)); + int error; + + ntb->device = device; + ntb->type = p->type; + + /* Heartbeat timer for NTB_SOC since there is no link interrupt */ + callout_init(&ntb->heartbeat_timer, CALLOUT_MPSAFE); + callout_init(&ntb->lr_timer, CALLOUT_MPSAFE); + + DETACH_ON_ERROR(ntb_map_pci_bar(ntb)); + DETACH_ON_ERROR(ntb_initialize_hw(ntb)); + DETACH_ON_ERROR(ntb_setup_interrupts(ntb)); + + pci_enable_busmaster(ntb->device); + + return (error); +} + +static int +ntb_detach(device_t device) +{ + struct ntb_softc *ntb = DEVICE2SOFTC(device); + + callout_drain(&ntb->heartbeat_timer); + callout_drain(&ntb->lr_timer); + ntb_teardown_interrupts(ntb); + ntb_unmap_pci_bar(ntb); + + return (0); +} + +static int +ntb_map_pci_bar(struct ntb_softc *ntb) +{ + struct ntb_pci_bar_info *current_bar; + int rc, i; + + ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0); + ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2); + ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4); + + for (i = 0; i< NTB_MAX_BARS; i++) { + current_bar = &ntb->bar_info[i]; + current_bar->pci_resource = + bus_alloc_resource(ntb->device, + SYS_RES_MEMORY, + ¤t_bar->pci_resource_id, 0, ~0, 1, + RF_ACTIVE); + + if (current_bar->pci_resource == NULL) { + device_printf(ntb->device, + "unable to allocate pci resource\n"); + return (ENXIO); + } + else { + current_bar->pci_bus_tag = + rman_get_bustag(current_bar->pci_resource); + current_bar->pci_bus_handle = + rman_get_bushandle(current_bar->pci_resource); + current_bar->pbase = + rman_get_start(current_bar->pci_resource); + current_bar->size = + rman_get_size(current_bar->pci_resource); + current_bar->vbase = + rman_get_virtual(current_bar->pci_resource); + if (is_bar_for_data_transfer(i)) { + /* + * Mark bar region as write combining to improve + * performance. + */ + rc = pmap_change_attr( + (vm_offset_t)current_bar->vbase, + current_bar->size, + VM_MEMATTR_WRITE_COMBINING); + if (rc != 0) { + device_printf(ntb->device, + "Couldn't mark bar as" + " WRITE_COMBINING\n"); + return (rc); + } + } + device_printf(ntb->device, + "Bar size = %lx, v %p, p %p\n", + current_bar->size, current_bar->vbase, + (void *)(current_bar->pbase)); + } + } + return (0); +} + +static void +ntb_unmap_pci_bar(struct ntb_softc *ntb) +{ + struct ntb_pci_bar_info *current_bar; + int i; + + for (i = 0; i< NTB_MAX_BARS; i++) { + current_bar = &ntb->bar_info[i]; + if (current_bar->pci_resource != NULL) + bus_release_resource(ntb->device, SYS_RES_MEMORY, + current_bar->pci_resource_id, + current_bar->pci_resource); + } +} + +static int +ntb_setup_interrupts(struct ntb_softc *ntb) +{ + void (*interrupt_handler)(void *); + void *int_arg; + bool use_msix = 0; + uint32_t num_vectors; + int i; + + ntb->allocated_interrupts = 0; + /* + * On SOC, disable all interrupts. On XEON, disable all but Link + * Interrupt. The rest will be unmasked as callbacks are registered. + */ + if (ntb->type == NTB_SOC) + ntb_write_8(ntb->reg_ofs.pdb_mask, ~0); + else + ntb_write_2(ntb->reg_ofs.pdb_mask, + ~(1 << ntb->limits.max_db_bits)); + + num_vectors = MIN(pci_msix_count(ntb->device), + ntb->limits.max_db_bits); + if (num_vectors >= 1) { + pci_alloc_msix(ntb->device, &num_vectors); + if (num_vectors >= 4) + use_msix = TRUE; + } + + ntb_create_callbacks(ntb, num_vectors); + if (use_msix == TRUE) { + for (i = 0; i < num_vectors; i++) { + ntb->int_info[i].rid = i + 1; + ntb->int_info[i].res = bus_alloc_resource_any( + ntb->device, SYS_RES_IRQ, &ntb->int_info[i].rid, + RF_ACTIVE); + if (ntb->int_info[i].res == NULL) { + device_printf(ntb->device, + "bus_alloc_resource failed\n"); + return (-1); + } + ntb->int_info[i].tag = NULL; + ntb->allocated_interrupts++; + if (ntb->type == NTB_SOC) { + interrupt_handler = handle_soc_irq; + int_arg = &ntb->db_cb[i]; + } else { + if (i == num_vectors - 1) { + interrupt_handler = handle_xeon_event_irq; + int_arg = ntb; + } else { + interrupt_handler = + handle_xeon_irq; + int_arg = &ntb->db_cb[i]; + } + } + if (bus_setup_intr(ntb->device, ntb->int_info[i].res, + INTR_MPSAFE | INTR_TYPE_MISC, NULL, + interrupt_handler, int_arg, + &ntb->int_info[i].tag) != 0) { + device_printf(ntb->device, + "bus_setup_intr failed\n"); + return (ENXIO); + } + } + } + else { + ntb->int_info[0].rid = 0; + ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ, + &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE); + interrupt_handler = ntb_handle_legacy_interrupt; + if (ntb->int_info[0].res == NULL) { + device_printf(ntb->device, + "bus_alloc_resource failed\n"); + return (-1); + } + ntb->int_info[0].tag = NULL; + ntb->allocated_interrupts = 1; + + if (bus_setup_intr(ntb->device, ntb->int_info[0].res, + INTR_MPSAFE | INTR_TYPE_MISC, NULL, + interrupt_handler, ntb, &ntb->int_info[0].tag) != 0) { + + device_printf(ntb->device, "bus_setup_intr failed\n"); + return (ENXIO); + } + } + + return (0); +} + +static void +ntb_teardown_interrupts(struct ntb_softc *ntb) +{ + struct ntb_int_info *current_int; + int i; + + for (i=0; iallocated_interrupts; i++) { + current_int = &ntb->int_info[i]; + if (current_int->tag != NULL) + bus_teardown_intr(ntb->device, current_int->res, + current_int->tag); + + if (current_int->res != NULL) + bus_release_resource(ntb->device, SYS_RES_IRQ, + rman_get_rid(current_int->res), current_int->res); + } + + ntb_free_callbacks(ntb); + pci_release_msi(ntb->device); +} + +static void +handle_soc_irq(void *arg) +{ + struct ntb_db_cb *db_cb = arg; + struct ntb_softc *ntb = db_cb->ntb; + + ntb_write_8(ntb->reg_ofs.pdb, (uint64_t) 1 << db_cb->db_num); + + if (db_cb->callback != NULL) + db_cb->callback(db_cb->data, db_cb->db_num); +} + +static void +handle_xeon_irq(void *arg) +{ + struct ntb_db_cb *db_cb = arg; + struct ntb_softc *ntb = db_cb->ntb; + + /* + * On Xeon, there are 16 bits in the interrupt register + * but only 4 vectors. So, 5 bits are assigned to the first 3 + * vectors, with the 4th having a single bit for link + * interrupts. + */ + ntb_write_2(ntb->reg_ofs.pdb, + ((1 << ntb->bits_per_vector) - 1) << + (db_cb->db_num * ntb->bits_per_vector)); + + if (db_cb->callback != NULL) + db_cb->callback(db_cb->data, db_cb->db_num); +} + +/* Since we do not have a HW doorbell in SOC, this is only used in JF/JT */ +static void +handle_xeon_event_irq(void *arg) +{ + struct ntb_softc *ntb = arg; + int rc; + + rc = ntb_check_link_status(ntb); + if (rc != 0) + device_printf(ntb->device, "Error determining link status\n"); + + /* bit 15 is always the link bit */ + ntb_write_2(ntb->reg_ofs.pdb, 1 << ntb->limits.max_db_bits); +} + +static void +ntb_handle_legacy_interrupt(void *arg) +{ + struct ntb_softc *ntb = arg; + unsigned int i = 0; + uint64_t pdb64; + uint16_t pdb16; + + if (ntb->type == NTB_SOC) { + pdb64 = ntb_read_8(ntb->reg_ofs.pdb); + + while (pdb64) { + i = ffs(pdb64); + pdb64 &= pdb64 - 1; + handle_soc_irq(&ntb->db_cb[i]); + } + } else { + pdb16 = ntb_read_2(ntb->reg_ofs.pdb); + + if ((pdb16 & XEON_DB_HW_LINK) != 0) { + handle_xeon_event_irq(ntb); + pdb16 &= ~XEON_DB_HW_LINK; + } + + while (pdb16 != 0) { + i = ffs(pdb16); + pdb16 &= pdb16 - 1; + handle_xeon_irq(&ntb->db_cb[i]); + } + } + +} + +static int +ntb_create_callbacks(struct ntb_softc *ntb, int num_vectors) +{ + int i; + + ntb->db_cb = malloc(num_vectors * sizeof(struct ntb_db_cb), M_NTB, + M_ZERO | M_WAITOK); + for (i = 0; i < num_vectors; i++) { + ntb->db_cb[i].db_num = i; + ntb->db_cb[i].ntb = ntb; + } + + return (0); +} + +static void +ntb_free_callbacks(struct ntb_softc *ntb) +{ + int i; + + for (i = 0; i < ntb->limits.max_db_bits; i++) + ntb_unregister_db_callback(ntb, i); + + free(ntb->db_cb, M_NTB); +} + +static struct ntb_hw_info * +ntb_get_device_info(uint32_t device_id) +{ + struct ntb_hw_info *ep = pci_ids; + + while (ep->device_id) { + if (ep->device_id == device_id) + return (ep); + ++ep; + } + return (NULL); +} + +static int +ntb_initialize_hw(struct ntb_softc *ntb) +{ + + if (ntb->type == NTB_SOC) + return (ntb_setup_soc(ntb)); + else + return (ntb_setup_xeon(ntb)); +} + +static int +ntb_setup_xeon(struct ntb_softc *ntb) +{ + uint8_t val, connection_type; + + val = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1); + + connection_type = val & XEON_PPD_CONN_TYPE; + switch (connection_type) { + case NTB_CONN_B2B: + ntb->conn_type = NTB_CONN_B2B; + break; + case NTB_CONN_CLASSIC: + case NTB_CONN_RP: + default: + device_printf(ntb->device, "Connection type %d not supported\n", + connection_type); + return (ENXIO); + } + + if ((val & XEON_PPD_DEV_TYPE) != 0) + ntb->dev_type = NTB_DEV_DSD; + else + ntb->dev_type = NTB_DEV_USD; + + ntb->reg_ofs.pdb = XEON_PDOORBELL_OFFSET; + ntb->reg_ofs.pdb_mask = XEON_PDBMSK_OFFSET; + ntb->reg_ofs.sbar2_xlat = XEON_SBAR2XLAT_OFFSET; + ntb->reg_ofs.sbar4_xlat = XEON_SBAR4XLAT_OFFSET; + ntb->reg_ofs.lnk_cntl = XEON_NTBCNTL_OFFSET; + ntb->reg_ofs.lnk_stat = XEON_LINK_STATUS_OFFSET; + ntb->reg_ofs.spad_local = XEON_SPAD_OFFSET; + ntb->reg_ofs.spci_cmd = XEON_PCICMD_OFFSET; + + if (ntb->conn_type == NTB_CONN_B2B) { + ntb->reg_ofs.sdb = XEON_B2B_DOORBELL_OFFSET; + ntb->reg_ofs.spad_remote = XEON_B2B_SPAD_OFFSET; + ntb->limits.max_spads = XEON_MAX_SPADS; + } else { + ntb->reg_ofs.sdb = XEON_SDOORBELL_OFFSET; + ntb->reg_ofs.spad_remote = XEON_SPAD_OFFSET; + ntb->limits.max_spads = XEON_MAX_COMPAT_SPADS; + } + + ntb->limits.max_db_bits = XEON_MAX_DB_BITS; + ntb->limits.msix_cnt = XEON_MSIX_CNT; + ntb->bits_per_vector = XEON_DB_BITS_PER_VEC; + + /* Enable Bus Master and Memory Space on the secondary side */ + ntb_write_2(ntb->reg_ofs.spci_cmd, + PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); + + return (0); +} + +static int +ntb_setup_soc(struct ntb_softc *ntb) +{ + uint32_t val, connection_type; + + val = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4); + + connection_type = (val & SOC_PPD_CONN_TYPE) >> 8; + switch (connection_type) { + case NTB_CONN_B2B: + ntb->conn_type = NTB_CONN_B2B; + break; + case NTB_CONN_RP: + default: + device_printf(ntb->device, "Connection type %d not supported\n", + connection_type); + return (ENXIO); + } + + if ((val & SOC_PPD_DEV_TYPE) != 0) + ntb->dev_type = NTB_DEV_DSD; + else + ntb->dev_type = NTB_DEV_USD; + + /* Initiate PCI-E link training */ + pci_write_config(ntb->device, NTB_PPD_OFFSET, val | SOC_PPD_INIT_LINK, + 4); + + ntb->reg_ofs.pdb = SOC_PDOORBELL_OFFSET; + ntb->reg_ofs.pdb_mask = SOC_PDBMSK_OFFSET; + ntb->reg_ofs.sbar2_xlat = SOC_SBAR2XLAT_OFFSET; + ntb->reg_ofs.sbar4_xlat = SOC_SBAR4XLAT_OFFSET; + ntb->reg_ofs.lnk_cntl = SOC_NTBCNTL_OFFSET; + ntb->reg_ofs.lnk_stat = SOC_LINK_STATUS_OFFSET; + ntb->reg_ofs.spad_local = SOC_SPAD_OFFSET; + ntb->reg_ofs.spci_cmd = SOC_PCICMD_OFFSET; + + if (ntb->conn_type == NTB_CONN_B2B) { + ntb->reg_ofs.sdb = SOC_B2B_DOORBELL_OFFSET; + ntb->reg_ofs.spad_remote = SOC_B2B_SPAD_OFFSET; + ntb->limits.max_spads = SOC_MAX_SPADS; + } else { + ntb->reg_ofs.sdb = SOC_PDOORBELL_OFFSET; + ntb->reg_ofs.spad_remote = SOC_SPAD_OFFSET; + ntb->limits.max_spads = SOC_MAX_COMPAT_SPADS; + } + + ntb->limits.max_db_bits = SOC_MAX_DB_BITS; + ntb->limits.msix_cnt = SOC_MSIX_CNT; + ntb->bits_per_vector = SOC_DB_BITS_PER_VEC; + + /* + * FIXME - MSI-X bug on early SOC HW, remove once internal issue is + * resolved. Mask transaction layer internal parity errors. + */ + pci_write_config(ntb->device, 0xFC, 0x4, 4); + + /* + * Some BIOSes aren't filling out the XLAT offsets. + * Check and correct the issue. + */ + if (ntb->dev_type == NTB_DEV_USD) { + if (ntb_read_8(SOC_PBAR2XLAT_OFFSET) == 0) + ntb_write_8(SOC_PBAR2XLAT_OFFSET, + SOC_PBAR2XLAT_USD_ADDR); + + if (ntb_read_8(SOC_PBAR4XLAT_OFFSET) == 0) + ntb_write_8(SOC_PBAR4XLAT_OFFSET, + SOC_PBAR4XLAT_USD_ADDR); + + if (ntb_read_8(SOC_MBAR23_OFFSET) == 0xC) + ntb_write_8(SOC_MBAR23_OFFSET, SOC_MBAR23_USD_ADDR); + + if (ntb_read_8(SOC_MBAR45_OFFSET) == 0xC) + ntb_write_8(SOC_MBAR45_OFFSET, SOC_MBAR45_USD_ADDR); + } else { + if (ntb_read_8(SOC_PBAR2XLAT_OFFSET) == 0) + ntb_write_8(SOC_PBAR2XLAT_OFFSET, + SOC_PBAR2XLAT_DSD_ADDR); + + if (ntb_read_8(SOC_PBAR4XLAT_OFFSET) == 0) + ntb_write_8(SOC_PBAR4XLAT_OFFSET, + SOC_PBAR4XLAT_DSD_ADDR); + + if (ntb_read_8(SOC_MBAR23_OFFSET) == 0xC) + ntb_write_8(SOC_MBAR23_OFFSET, SOC_MBAR23_DSD_ADDR); + + if (ntb_read_8(SOC_MBAR45_OFFSET) == 0xC) + ntb_write_8(SOC_MBAR45_OFFSET, SOC_MBAR45_DSD_ADDR); + } + + /* Enable Bus Master and Memory Space on the secondary side */ + ntb_write_2(ntb->reg_ofs.spci_cmd, + PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); + callout_reset(&ntb->heartbeat_timer, 0, ntb_handle_heartbeat, ntb); + + return (0); +} + +/* SOC doesn't have link status interrupt, poll on that platform */ +static void +ntb_handle_heartbeat(void *arg) +{ + struct ntb_softc *ntb = arg; + uint32_t status32; + int rc = ntb_check_link_status(ntb); + + if (rc != 0) + device_printf(ntb->device, + "Error determining link status\n"); + /* Check to see if a link error is the cause of the link down */ + if (ntb->link_status == NTB_LINK_DOWN) { + status32 = ntb_read_4(SOC_LTSSMSTATEJMP_OFFSET); + if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0) { + callout_reset(&ntb->lr_timer, 0, recover_soc_link, + ntb); + return; + } + } + + callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, + ntb_handle_heartbeat, ntb); +} + +static void +soc_perform_link_restart(struct ntb_softc *ntb) +{ + uint32_t status; + + /* Driver resets the NTB ModPhy lanes - magic! */ + ntb_write_1(SOC_MODPHY_PCSREG6, 0xe0); + ntb_write_1(SOC_MODPHY_PCSREG4, 0x40); + ntb_write_1(SOC_MODPHY_PCSREG4, 0x60); + ntb_write_1(SOC_MODPHY_PCSREG6, 0x60); + + /* Driver waits 100ms to allow the NTB ModPhy to settle */ + pause("ModPhy", hz / 10); + + /* Clear AER Errors, write to clear */ + status = ntb_read_4(SOC_ERRCORSTS_OFFSET); + status &= PCIM_AER_COR_REPLAY_ROLLOVER; + ntb_write_4(SOC_ERRCORSTS_OFFSET, status); + + /* Clear unexpected electrical idle event in LTSSM, write to clear */ + status = ntb_read_4(SOC_LTSSMERRSTS0_OFFSET); + status |= SOC_LTSSMERRSTS0_UNEXPECTEDEI; + ntb_write_4(SOC_LTSSMERRSTS0_OFFSET, status); + + /* Clear DeSkew Buffer error, write to clear */ + status = ntb_read_4(SOC_DESKEWSTS_OFFSET); + status |= SOC_DESKEWSTS_DBERR; + ntb_write_4(SOC_DESKEWSTS_OFFSET, status); + + status = ntb_read_4(SOC_IBSTERRRCRVSTS0_OFFSET); + status &= SOC_IBIST_ERR_OFLOW; + ntb_write_4(SOC_IBSTERRRCRVSTS0_OFFSET, status); + + /* Releases the NTB state machine to allow the link to retrain */ + status = ntb_read_4(SOC_LTSSMSTATEJMP_OFFSET); + status &= ~SOC_LTSSMSTATEJMP_FORCEDETECT; + ntb_write_4(SOC_LTSSMSTATEJMP_OFFSET, status); +} + +static void +ntb_handle_link_event(struct ntb_softc *ntb, int link_state) +{ + enum ntb_hw_event event; + uint16_t status; + + if (ntb->link_status == link_state) + return; + + if (link_state == NTB_LINK_UP) { + device_printf(ntb->device, "Link Up\n"); + ntb->link_status = NTB_LINK_UP; + event = NTB_EVENT_HW_LINK_UP; + + if (ntb->type == NTB_SOC) + status = ntb_read_2(ntb->reg_ofs.lnk_stat); + else + status = pci_read_config(ntb->device, + XEON_LINK_STATUS_OFFSET, 2); + ntb->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4; + ntb->link_speed = (status & NTB_LINK_SPEED_MASK); + device_printf(ntb->device, "Link Width %d, Link Speed %d\n", + ntb->link_width, ntb->link_speed); + callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, + ntb_handle_heartbeat, ntb); + } else { + device_printf(ntb->device, "Link Down\n"); + ntb->link_status = NTB_LINK_DOWN; + event = NTB_EVENT_HW_LINK_DOWN; + /* Don't modify link width/speed, we need it in link recovery */ + } + + /* notify the upper layer if we have an event change */ + if (ntb->event_cb != NULL) + ntb->event_cb(ntb->ntb_transport, event); +} + +static void +recover_soc_link(void *arg) +{ + struct ntb_softc *ntb = arg; + uint8_t speed, width; + uint32_t status32; + uint16_t status16; + + soc_perform_link_restart(ntb); + pause("Link", SOC_LINK_RECOVERY_TIME * hz / 1000); + + status32 = ntb_read_4(SOC_LTSSMSTATEJMP_OFFSET); + if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0) + goto retry; + + status32 = ntb_read_4(SOC_IBSTERRRCRVSTS0_OFFSET); + if ((status32 & SOC_IBIST_ERR_OFLOW) != 0) + goto retry; + + status16 = ntb_read_2(ntb->reg_ofs.lnk_stat); + width = (status16 & NTB_LINK_WIDTH_MASK) >> 4; + speed = (status16 & NTB_LINK_SPEED_MASK); + if (ntb->link_width != width || ntb->link_speed != speed) + goto retry; + + callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, + ntb_handle_heartbeat, ntb); + return; + +retry: + callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_soc_link, + ntb); +} + +static int +ntb_check_link_status(struct ntb_softc *ntb) +{ + int link_state; + uint32_t ntb_cntl; + uint16_t status; + + if (ntb->type == NTB_SOC) { + ntb_cntl = ntb_read_4(ntb->reg_ofs.lnk_cntl); + if ((ntb_cntl & SOC_CNTL_LINK_DOWN) != 0) + link_state = NTB_LINK_DOWN; + else + link_state = NTB_LINK_UP; + } else { + status = pci_read_config(ntb->device, XEON_LINK_STATUS_OFFSET, + 2); + + if ((status & NTB_LINK_STATUS_ACTIVE) != 0) + link_state = NTB_LINK_UP; + else + link_state = NTB_LINK_DOWN; + } + + ntb_handle_link_event(ntb, link_state); + + return (0); +} + +/** + * ntb_register_event_callback() - register event callback + * @ntb: pointer to ntb_softc instance + * @func: callback function to register + * + * This function registers a callback for any HW driver events such as link + * up/down, power management notices and etc. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_register_event_callback(struct ntb_softc *ntb, ntb_event_callback func) +{ + + if (ntb->event_cb != NULL) + return (EINVAL); + + ntb->event_cb = func; + + return (0); +} + +/** + * ntb_unregister_event_callback() - unregisters the event callback + * @ntb: pointer to ntb_softc instance + * + * This function unregisters the existing callback from transport + */ +void +ntb_unregister_event_callback(struct ntb_softc *ntb) +{ + + ntb->event_cb = NULL; +} + +/** + * ntb_register_db_callback() - register a callback for doorbell interrupt + * @ntb: pointer to ntb_softc instance + * @idx: doorbell index to register callback, zero based + * @func: callback function to register + * + * This function registers a callback function for the doorbell interrupt + * on the primary side. The function will unmask the doorbell as well to + * allow interrupt. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_register_db_callback(struct ntb_softc *ntb, unsigned int idx, void *data, + ntb_db_callback func) +{ + uint16_t mask; + + if (idx >= ntb->allocated_interrupts || ntb->db_cb[idx].callback) { + device_printf(ntb->device, "Invalid Index.\n"); + return (EINVAL); + } + + ntb->db_cb[idx].callback = func; + ntb->db_cb[idx].data = data; + + /* unmask interrupt */ + mask = ntb_read_2(ntb->reg_ofs.pdb_mask); + mask &= ~(1 << (idx * ntb->bits_per_vector)); + ntb_write_2(ntb->reg_ofs.pdb_mask, mask); + + return (0); +} + +/** + * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt + * @ntb: pointer to ntb_softc instance + * @idx: doorbell index to register callback, zero based + * + * This function unregisters a callback function for the doorbell interrupt + * on the primary side. The function will also mask the said doorbell. + */ +void +ntb_unregister_db_callback(struct ntb_softc *ntb, unsigned int idx) +{ + unsigned long mask; + + if (idx >= ntb->allocated_interrupts || !ntb->db_cb[idx].callback) + return; + + mask = ntb_read_2(ntb->reg_ofs.pdb_mask); + mask |= 1 << (idx * ntb->bits_per_vector); + ntb_write_2(ntb->reg_ofs.pdb_mask, mask); + + ntb->db_cb[idx].callback = NULL; +} + +/** + * ntb_find_transport() - find the transport pointer + * @transport: pointer to pci device + * + * Given the pci device pointer, return the transport pointer passed in when + * the transport attached when it was inited. + * + * RETURNS: pointer to transport. + */ +void * +ntb_find_transport(struct ntb_softc *ntb) +{ + + return (ntb->ntb_transport); +} + +/** + * ntb_register_transport() - Register NTB transport with NTB HW driver + * @transport: transport identifier + * + * This function allows a transport to reserve the hardware driver for + * NTB usage. + * + * RETURNS: pointer to ntb_softc, NULL on error. + */ +struct ntb_softc * +ntb_register_transport(struct ntb_softc *ntb, void *transport) +{ + + /* + * TODO: when we have more than one transport, we will need to rewrite + * this to prevent race conditions + */ + if (ntb->ntb_transport != NULL) + return (NULL); + + ntb->ntb_transport = transport; + return (ntb); +} + +/** + * ntb_unregister_transport() - Unregister the transport with the NTB HW driver + * @ntb - ntb_softc of the transport to be freed + * + * This function unregisters the transport from the HW driver and performs any + * necessary cleanups. + */ +void +ntb_unregister_transport(struct ntb_softc *ntb) +{ + int i; + + if (ntb->ntb_transport == NULL) + return; + + for (i = 0; i < ntb->allocated_interrupts; i++) + ntb_unregister_db_callback(ntb, i); + + ntb_unregister_event_callback(ntb); + ntb->ntb_transport = NULL; +} + +/** + * ntb_get_max_spads() - get the total scratch regs usable + * @ntb: pointer to ntb_softc instance + * + * This function returns the max 32bit scratchpad registers usable by the + * upper layer. + * + * RETURNS: total number of scratch pad registers available + */ +int +ntb_get_max_spads(struct ntb_softc *ntb) +{ + + return (ntb->limits.max_spads); +} + +/** + * ntb_write_local_spad() - write to the secondary scratchpad register + * @ntb: pointer to ntb_softc instance + * @idx: index to the scratchpad register, 0 based + * @val: the data value to put into the register + * + * This function allows writing of a 32bit value to the indexed scratchpad + * register. The register resides on the secondary (external) side. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_write_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val) +{ + + if (idx >= ntb->limits.max_spads) + return (EINVAL); + + ntb_write_4(ntb->reg_ofs.spad_local + idx * 4, val); + + return (0); +} + +/** + * ntb_read_local_spad() - read from the primary scratchpad register + * @ntb: pointer to ntb_softc instance + * @idx: index to scratchpad register, 0 based + * @val: pointer to 32bit integer for storing the register value + * + * This function allows reading of the 32bit scratchpad register on + * the primary (internal) side. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_read_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val) +{ + + if (idx >= ntb->limits.max_spads) + return (EINVAL); + + *val = ntb_read_4(ntb->reg_ofs.spad_local + idx * 4); + + return (0); +} + +/** + * ntb_write_remote_spad() - write to the secondary scratchpad register + * @ntb: pointer to ntb_softc instance + * @idx: index to the scratchpad register, 0 based + * @val: the data value to put into the register + * + * This function allows writing of a 32bit value to the indexed scratchpad + * register. The register resides on the secondary (external) side. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_write_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val) +{ + + if (idx >= ntb->limits.max_spads) + return (EINVAL); + + ntb_write_4(ntb->reg_ofs.spad_remote + idx * 4, val); + + return (0); +} + +/** + * ntb_read_remote_spad() - read from the primary scratchpad register + * @ntb: pointer to ntb_softc instance + * @idx: index to scratchpad register, 0 based + * @val: pointer to 32bit integer for storing the register value + * + * This function allows reading of the 32bit scratchpad register on + * the primary (internal) side. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int +ntb_read_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val) +{ + + if (idx >= ntb->limits.max_spads) + return (EINVAL); + + *val = ntb_read_4(ntb->reg_ofs.spad_remote + idx * 4); + + return (0); +} + +/** + * ntb_get_mw_vbase() - get virtual addr for the NTB memory window + * @ntb: pointer to ntb_softc instance + * @mw: memory window number + * + * This function provides the base virtual address of the memory window + * specified. + * + * RETURNS: pointer to virtual address, or NULL on error. + */ +void * +ntb_get_mw_vbase(struct ntb_softc *ntb, unsigned int mw) +{ + + if (mw >= NTB_NUM_MW) + return (NULL); + + return (ntb->bar_info[NTB_MW_TO_BAR(mw)].vbase); +} + +vm_paddr_t +ntb_get_mw_pbase(struct ntb_softc *ntb, unsigned int mw) +{ + + if (mw >= NTB_NUM_MW) + return (0); + + return (ntb->bar_info[NTB_MW_TO_BAR(mw)].pbase); +} + +/** + * ntb_get_mw_size() - return size of NTB memory window + * @ntb: pointer to ntb_softc instance + * @mw: memory window number + * + * This function provides the physical size of the memory window specified + * + * RETURNS: the size of the memory window or zero on error + */ +u_long +ntb_get_mw_size(struct ntb_softc *ntb, unsigned int mw) +{ + + if (mw >= NTB_NUM_MW) + return (0); + + return (ntb->bar_info[NTB_MW_TO_BAR(mw)].size); +} + +/** + * ntb_set_mw_addr - set the memory window address + * @ntb: pointer to ntb_softc instance + * @mw: memory window number + * @addr: base address for data + * + * This function sets the base physical address of the memory window. This + * memory address is where data from the remote system will be transfered into + * or out of depending on how the transport is configured. + */ +void +ntb_set_mw_addr(struct ntb_softc *ntb, unsigned int mw, uint64_t addr) +{ + + if (mw >= NTB_NUM_MW) + return; + + switch (NTB_MW_TO_BAR(mw)) { + case NTB_B2B_BAR_1: + ntb_write_8(ntb->reg_ofs.sbar2_xlat, addr); + break; + case NTB_B2B_BAR_2: + ntb_write_8(ntb->reg_ofs.sbar4_xlat, addr); + break; + } +} + +/** + * ntb_ring_sdb() - Set the doorbell on the secondary/external side + * @ntb: pointer to ntb_softc instance + * @db: doorbell to ring + * + * This function allows triggering of a doorbell on the secondary/external + * side that will initiate an interrupt on the remote host + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +void +ntb_ring_sdb(struct ntb_softc *ntb, unsigned int db) +{ + + if (ntb->type == NTB_SOC) + ntb_write_8(ntb->reg_ofs.sdb, (uint64_t) 1 << db); + else + ntb_write_2(ntb->reg_ofs.sdb, + ((1 << ntb->bits_per_vector) - 1) << + (db * ntb->bits_per_vector)); +} + +/** + * ntb_query_link_status() - return the hardware link status + * @ndev: pointer to ntb_device instance + * + * Returns true if the hardware is connected to the remote system + * + * RETURNS: true or false based on the hardware link state + */ +bool +ntb_query_link_status(struct ntb_softc *ntb) +{ + + return (ntb->link_status == NTB_LINK_UP); +} + +static bool +is_bar_for_data_transfer(int bar_num) +{ + if ((bar_num > NTB_CONFIG_BAR) && (bar_num < NTB_MAX_BARS)) + return true; + else + return false; +} diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.h b/sys/dev/ntb/ntb_hw/ntb_hw.h new file mode 100644 index 0000000..4f44031 --- /dev/null +++ b/sys/dev/ntb/ntb_hw/ntb_hw.h @@ -0,0 +1,73 @@ +/*- + * Copyright (C) 2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NTB_HW_H_ +#define _NTB_HW_H_ + +struct ntb_softc; + +#define NTB_NUM_MW 2 +#define NTB_LINK_DOWN 0 +#define NTB_LINK_UP 1 + +enum ntb_hw_event { + NTB_EVENT_SW_EVENT0 = 0, + NTB_EVENT_SW_EVENT1, + NTB_EVENT_SW_EVENT2, + NTB_EVENT_HW_ERROR, + NTB_EVENT_HW_LINK_UP, + NTB_EVENT_HW_LINK_DOWN, +}; + +typedef void (*ntb_db_callback)(void *data, int db_num); +typedef void (*ntb_event_callback)(void *data, enum ntb_hw_event event); + +int ntb_register_event_callback(struct ntb_softc *ntb, ntb_event_callback func); +void ntb_unregister_event_callback(struct ntb_softc *ntb); +int ntb_register_db_callback(struct ntb_softc *ntb, unsigned int idx, + void *data, ntb_db_callback func); +void ntb_unregister_db_callback(struct ntb_softc *ntb, unsigned int idx); +void *ntb_find_transport(struct ntb_softc *ntb); +struct ntb_softc *ntb_register_transport(struct ntb_softc *ntb, + void *transport); +void ntb_unregister_transport(struct ntb_softc *ntb); +int ntb_get_max_spads(struct ntb_softc *ntb); +int ntb_write_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val); +int ntb_read_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val); +int ntb_write_remote_spad(struct ntb_softc *ntb, unsigned int idx, + uint32_t val); +int ntb_read_remote_spad(struct ntb_softc *ntb, unsigned int idx, + uint32_t *val); +void *ntb_get_mw_vbase(struct ntb_softc *ntb, unsigned int mw); +vm_paddr_t ntb_get_mw_pbase(struct ntb_softc *ntb, unsigned int mw); +u_long ntb_get_mw_size(struct ntb_softc *ntb, unsigned int mw); +void ntb_set_mw_addr(struct ntb_softc *ntb, unsigned int mw, uint64_t addr); +void ntb_ring_sdb(struct ntb_softc *ntb, unsigned int db); +bool ntb_query_link_status(struct ntb_softc *ntb); + +#endif /* _NTB_HW_H_ */ diff --git a/sys/dev/ntb/ntb_hw/ntb_regs.h b/sys/dev/ntb/ntb_hw/ntb_regs.h new file mode 100644 index 0000000..34ad779 --- /dev/null +++ b/sys/dev/ntb/ntb_hw/ntb_regs.h @@ -0,0 +1,146 @@ +/*- + * Copyright (C) 2013 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NTB_REGS_H_ +#define _NTB_REGS_H_ + +#define NTB_LINK_ENABLE 0x0000 +#define NTB_LINK_DISABLE 0x0002 +#define NTB_LINK_STATUS_ACTIVE 0x2000 +#define NTB_LINK_SPEED_MASK 0x000f +#define NTB_LINK_WIDTH_MASK 0x03f0 + +#define XEON_MSIX_CNT 4 +#define XEON_MAX_SPADS 16 +#define XEON_MAX_COMPAT_SPADS 8 +/* Reserve the uppermost bit for link interrupt */ +#define XEON_MAX_DB_BITS 15 +#define XEON_DB_BITS_PER_VEC 5 + +#define XEON_DB_HW_LINK 0x8000 + +#define XEON_PCICMD_OFFSET 0x0504 +#define XEON_DEVCTRL_OFFSET 0x0598 +#define XEON_LINK_STATUS_OFFSET 0x01A2 + +#define XEON_PBAR2LMT_OFFSET 0x0000 +#define XEON_PBAR4LMT_OFFSET 0x0008 +#define XEON_PBAR2XLAT_OFFSET 0x0010 +#define XEON_PBAR4XLAT_OFFSET 0x0018 +#define XEON_SBAR2LMT_OFFSET 0x0020 +#define XEON_SBAR4LMT_OFFSET 0x0028 +#define XEON_SBAR2XLAT_OFFSET 0x0030 +#define XEON_SBAR4XLAT_OFFSET 0x0038 +#define XEON_SBAR0BASE_OFFSET 0x0040 +#define XEON_SBAR2BASE_OFFSET 0x0048 +#define XEON_SBAR4BASE_OFFSET 0x0050 +#define XEON_NTBCNTL_OFFSET 0x0058 +#define XEON_SBDF_OFFSET 0x005C +#define XEON_PDOORBELL_OFFSET 0x0060 +#define XEON_PDBMSK_OFFSET 0x0062 +#define XEON_SDOORBELL_OFFSET 0x0064 +#define XEON_SDBMSK_OFFSET 0x0066 +#define XEON_USMEMMISS 0x0070 +#define XEON_SPAD_OFFSET 0x0080 +#define XEON_SPADSEMA4_OFFSET 0x00c0 +#define XEON_WCCNTRL_OFFSET 0x00e0 +#define XEON_B2B_SPAD_OFFSET 0x0100 +#define XEON_B2B_DOORBELL_OFFSET 0x0140 +#define XEON_B2B_XLAT_OFFSET 0x0144 + +#define SOC_MSIX_CNT 34 +#define SOC_MAX_SPADS 16 +#define SOC_MAX_COMPAT_SPADS 16 +#define SOC_MAX_DB_BITS 34 +#define SOC_DB_BITS_PER_VEC 1 + +#define SOC_PCICMD_OFFSET 0xb004 +#define SOC_MBAR23_OFFSET 0xb018 +#define SOC_MBAR45_OFFSET 0xb020 +#define SOC_DEVCTRL_OFFSET 0xb048 +#define SOC_LINK_STATUS_OFFSET 0xb052 +#define SOC_ERRCORSTS_OFFSET 0xb110 + +#define SOC_SBAR2XLAT_OFFSET 0x0008 +#define SOC_SBAR4XLAT_OFFSET 0x0010 +#define SOC_PDOORBELL_OFFSET 0x0020 +#define SOC_PDBMSK_OFFSET 0x0028 +#define SOC_NTBCNTL_OFFSET 0x0060 +#define SOC_EBDF_OFFSET 0x0064 +#define SOC_SPAD_OFFSET 0x0080 +#define SOC_SPADSEMA_OFFSET 0x00c0 +#define SOC_STKYSPAD_OFFSET 0x00c4 +#define SOC_PBAR2XLAT_OFFSET 0x8008 +#define SOC_PBAR4XLAT_OFFSET 0x8010 +#define SOC_B2B_DOORBELL_OFFSET 0x8020 +#define SOC_B2B_SPAD_OFFSET 0x8080 +#define SOC_B2B_SPADSEMA_OFFSET 0x80c0 +#define SOC_B2B_STKYSPAD_OFFSET 0x80c4 + +#define SOC_MODPHY_PCSREG4 0x1c004 +#define SOC_MODPHY_PCSREG6 0x1c006 + +#define SOC_IP_BASE 0xC000 +#define SOC_DESKEWSTS_OFFSET (SOC_IP_BASE + 0x3024) +#define SOC_LTSSMERRSTS0_OFFSET (SOC_IP_BASE + 0x3180) +#define SOC_LTSSMSTATEJMP_OFFSET (SOC_IP_BASE + 0x3040) +#define SOC_IBSTERRRCRVSTS0_OFFSET (SOC_IP_BASE + 0x3324) + +#define SOC_DESKEWSTS_DBERR (1 << 15) +#define SOC_LTSSMERRSTS0_UNEXPECTEDEI (1 << 20) +#define SOC_LTSSMSTATEJMP_FORCEDETECT (1 << 2) +#define SOC_IBIST_ERR_OFLOW 0x7FFF7FFF + +#define NTB_CNTL_BAR23_SNOOP (1 << 2) +#define NTB_CNTL_BAR45_SNOOP (1 << 6) +#define SOC_CNTL_LINK_DOWN (1 << 16) + +#define NTB_PPD_OFFSET 0x00D4 +#define XEON_PPD_CONN_TYPE 0x0003 +#define XEON_PPD_DEV_TYPE 0x0010 +#define SOC_PPD_INIT_LINK 0x0008 +#define SOC_PPD_CONN_TYPE 0x0300 +#define SOC_PPD_DEV_TYPE 0x1000 + +#define NTB_CONN_CLASSIC 0 +#define NTB_CONN_B2B 1 +#define NTB_CONN_RP 2 + +#define NTB_DEV_DSD 1 +#define NTB_DEV_USD 0 + +#define SOC_PBAR2XLAT_USD_ADDR 0x0000004000000000 +#define SOC_PBAR4XLAT_USD_ADDR 0x0000008000000000 +#define SOC_MBAR23_USD_ADDR 0x000000410000000C +#define SOC_MBAR45_USD_ADDR 0x000000810000000C +#define SOC_PBAR2XLAT_DSD_ADDR 0x0000004100000000 +#define SOC_PBAR4XLAT_DSD_ADDR 0x0000008100000000 +#define SOC_MBAR23_DSD_ADDR 0x000000400000000C +#define SOC_MBAR45_DSD_ADDR 0x000000800000000C + +#endif /* _NTB_REGS_H_ */ diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 55af93a..c2dc799 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -243,6 +243,7 @@ SUBDIR= \ nmdm \ ${_nsp} \ nullfs \ + ${_ntb} \ ${_nvd} \ ${_nve} \ ${_nvme} \ @@ -693,6 +694,7 @@ _mthca= mthca .endif _ndis= ndis _nfe= nfe +_ntb= ntb _nvd= nvd .if ${MK_SOURCELESS_HOST} != "no" _nve= nve diff --git a/sys/modules/ntb/Makefile b/sys/modules/ntb/Makefile new file mode 100644 index 0000000..a5169a0 --- /dev/null +++ b/sys/modules/ntb/Makefile @@ -0,0 +1,5 @@ +# $FreeBSD$ + +SUBDIR= ntb_hw if_ntb + +.include diff --git a/sys/modules/ntb/if_ntb/Makefile b/sys/modules/ntb/if_ntb/Makefile new file mode 100644 index 0000000..468593d --- /dev/null +++ b/sys/modules/ntb/if_ntb/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../dev/ntb/if_ntb + +KMOD = if_ntb +SRCS = if_ntb.c +SRCS += device_if.h bus_if.h pci_if.h + +.include diff --git a/sys/modules/ntb/ntb_hw/Makefile b/sys/modules/ntb/ntb_hw/Makefile new file mode 100644 index 0000000..fc46b46 --- /dev/null +++ b/sys/modules/ntb/ntb_hw/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../dev/ntb/ntb_hw + +KMOD = ntb_hw +SRCS = ntb_hw.c +SRCS += device_if.h bus_if.h pci_if.h + +.include -- cgit v1.1