summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1/user_sdma.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-03 17:49:17 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-03 17:49:17 -0700
commitaa9d4648c2fbb455df7750ade1b73dd9ad9b3690 (patch)
treebc4590c27e6f30ec0612b28f3f38a539535b9930 /drivers/infiniband/hw/hfi1/user_sdma.h
parent906dde0f355bd97c080c215811ae7db1137c4af8 (diff)
parent8eb19e8e7c8658226d8b7e75728e6dfa2ef32717 (diff)
downloadop-kernel-dev-aa9d4648c2fbb455df7750ade1b73dd9ad9b3690.zip
op-kernel-dev-aa9d4648c2fbb455df7750ade1b73dd9ad9b3690.tar.gz
Merge tag 'for-linus-ioctl' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford: "This is a big pull request. Of note is that I'm sending you the new ioctl API for the rdma subsystem. We put it up on linux-api@, but didn't get much response. The API is complex, but it solves two different problems in one go: 1) The bi-directional nature of the RDMA file write calls, which created the security hole we had to handle (and for which the fix is now causing problems for systems in production, we were a bit over zealous in the fix and the ability to open a device, then fork, then create new queue pairs on the device and use them is broken). 2) The bloat caused by different vendors implementing extensions to the base verbs API. Each vendor's hardware is slightly different, and the hardware might be suitable for one extension but not another. By the time we add generic extensions for all the different ways that the different hardware can offload things, the API becomes bloated. Things like our completion structs have started to exceed a cache line in size because of all the elements needed to support this. That in turn shows up heavily in the performance graphs with a noticable drop in performance on 100Gigabit links as our completion structs go from occupying one cache line to 1+. This API makes things like the completion structs modular in a very similar way to netlink so that your structs can only include the items needed for the offloads/features you are actually using on a given queue pair. In that way we support everything, but only use what we need, and our structs stay smaller. The ioctl API is better explained by the posting on linux-api@ than I can explain it here, so I'll just leave it at that. The rest of the pull request is typical stuff. Updates for 4.14 kernel merge window - Lots of hfi1 driver updates (mixed with a few qib and core updates as well) - rxe updates - various mlx updates - Set default roce type to RoCEv2 - Several larger fixes for bnxt_re that were too big for -rc - Several larger fixes for qedr that, likewise, were too big for -rc - Misc core changes - Make the hns_roce driver compilable on arches other than aarch64 so we can more easily debug build issues related to it - Add rdma-netlink infrastructure updates - Add automatic IRQ affinity infrastructure - Add 32bit lid support - Lots of misc fixes across the subsystem from random people - Autoloading of RDMA netlink modules - PCI pool cleanups from Romain Perier - mlx5 driver feature additions and fixes - Hardware tag matchine feature - Fix sleeping in atomic when resolving roce ah - Add experimental ioctl interface as posted to linux-api@" * tag 'for-linus-ioctl' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (328 commits) IB/core: Expose ioctl interface through experimental Kconfig IB/core: Assign root to all drivers IB/core: Add completion queue (cq) object actions IB/core: Add legacy driver's user-data IB/core: Export ioctl enum types to user-space IB/core: Explicitly destroy an object while keeping uobject IB/core: Add macros for declaring methods and attributes IB/core: Add uverbs merge trees functionality IB/core: Add DEVICE object and root tree structure IB/core: Declare an object instead of declaring only type attributes IB/core: Add new ioctl interface RDMA/vmw_pvrdma: Fix a signedness RDMA/vmw_pvrdma: Report network header type in WC IB/core: Add might_sleep() annotation to ib_init_ah_from_wc() IB/cm: Fix sleeping in atomic when RoCE is used IB/core: Add support to finalize objects in one transaction IB/core: Add a generic way to execute an operation on a uobject Documentation: Hardware tag matching IB/mlx5: Support IB_SRQT_TM net/mlx5: Add XRQ support ...
Diffstat (limited to 'drivers/infiniband/hw/hfi1/user_sdma.h')
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h169
1 files changed, 166 insertions, 3 deletions
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index e5b10ae..9b8bb56 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -53,11 +53,68 @@
#include "iowait.h"
#include "user_exp_rcv.h"
+/* The maximum number of Data io vectors per message/request */
+#define MAX_VECTORS_PER_REQ 8
+/*
+ * Maximum number of packet to send from each message/request
+ * before moving to the next one.
+ */
+#define MAX_PKTS_PER_QUEUE 16
+
+#define num_pages(x) (1 + ((((x) - 1) & PAGE_MASK) >> PAGE_SHIFT))
+
+#define req_opcode(x) \
+ (((x) >> HFI1_SDMA_REQ_OPCODE_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK)
+#define req_version(x) \
+ (((x) >> HFI1_SDMA_REQ_VERSION_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK)
+#define req_iovcnt(x) \
+ (((x) >> HFI1_SDMA_REQ_IOVCNT_SHIFT) & HFI1_SDMA_REQ_IOVCNT_MASK)
+
+/* Number of BTH.PSN bits used for sequence number in expected rcvs */
+#define BTH_SEQ_MASK 0x7ffull
+
+#define AHG_KDETH_INTR_SHIFT 12
+#define AHG_KDETH_SH_SHIFT 13
+#define AHG_KDETH_ARRAY_SIZE 9
+
+#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
+#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
+
+#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \
+ do { \
+ if ((idx) < ARRAY_SIZE((arr))) \
+ (arr)[(idx++)] = sdma_build_ahg_descriptor( \
+ (__force u16)(value), (dw), (bit), \
+ (width)); \
+ else \
+ return -ERANGE; \
+ } while (0)
+
+/* Tx request flag bits */
+#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
+#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
+
+#define SDMA_PKT_Q_INACTIVE BIT(0)
+#define SDMA_PKT_Q_ACTIVE BIT(1)
+#define SDMA_PKT_Q_DEFERRED BIT(2)
+
+/*
+ * Maximum retry attempts to submit a TX request
+ * before putting the process to sleep.
+ */
+#define MAX_DEFER_RETRY_COUNT 1
+
+#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
+
+#define SDMA_DBG(req, fmt, ...) \
+ hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \
+ (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \
+ ##__VA_ARGS__)
+
extern uint extended_psn;
struct hfi1_user_sdma_pkt_q {
- struct list_head list;
- unsigned ctxt;
+ u16 ctxt;
u16 subctxt;
u16 n_max_reqs;
atomic_t n_reqs;
@@ -80,9 +137,115 @@ struct hfi1_user_sdma_comp_q {
struct hfi1_sdma_comp_entry *comps;
};
+struct sdma_mmu_node {
+ struct mmu_rb_node rb;
+ struct hfi1_user_sdma_pkt_q *pq;
+ atomic_t refcount;
+ struct page **pages;
+ unsigned int npages;
+};
+
+struct user_sdma_iovec {
+ struct list_head list;
+ struct iovec iov;
+ /* number of pages in this vector */
+ unsigned int npages;
+ /* array of pinned pages for this vector */
+ struct page **pages;
+ /*
+ * offset into the virtual address space of the vector at
+ * which we last left off.
+ */
+ u64 offset;
+ struct sdma_mmu_node *node;
+};
+
+/* evict operation argument */
+struct evict_data {
+ u32 cleared; /* count evicted so far */
+ u32 target; /* target count to evict */
+};
+
+struct user_sdma_request {
+ /* This is the original header from user space */
+ struct hfi1_pkt_header hdr;
+
+ /* Read mostly fields */
+ struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp;
+ struct hfi1_user_sdma_comp_q *cq;
+ /*
+ * Pointer to the SDMA engine for this request.
+ * Since different request could be on different VLs,
+ * each request will need it's own engine pointer.
+ */
+ struct sdma_engine *sde;
+ struct sdma_req_info info;
+ /* TID array values copied from the tid_iov vector */
+ u32 *tids;
+ /* total length of the data in the request */
+ u32 data_len;
+ /* number of elements copied to the tids array */
+ u16 n_tids;
+ /*
+ * We copy the iovs for this request (based on
+ * info.iovcnt). These are only the data vectors
+ */
+ u8 data_iovs;
+ s8 ahg_idx;
+
+ /* Writeable fields shared with interrupt */
+ u64 seqcomp ____cacheline_aligned_in_smp;
+ u64 seqsubmitted;
+ /* status of the last txreq completed */
+ int status;
+
+ /* Send side fields */
+ struct list_head txps ____cacheline_aligned_in_smp;
+ u64 seqnum;
+ /*
+ * KDETH.OFFSET (TID) field
+ * The offset can cover multiple packets, depending on the
+ * size of the TID entry.
+ */
+ u32 tidoffset;
+ /*
+ * KDETH.Offset (Eager) field
+ * We need to remember the initial value so the headers
+ * can be updated properly.
+ */
+ u32 koffset;
+ u32 sent;
+ /* TID index copied from the tid_iov vector */
+ u16 tididx;
+ /* progress index moving along the iovs array */
+ u8 iov_idx;
+ u8 done;
+ u8 has_error;
+
+ struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
+} ____cacheline_aligned_in_smp;
+
+/*
+ * A single txreq could span up to 3 physical pages when the MTU
+ * is sufficiently large (> 4K). Each of the IOV pointers also
+ * needs it's own set of flags so the vector has been handled
+ * independently of each other.
+ */
+struct user_sdma_txreq {
+ /* Packet header for the txreq */
+ struct hfi1_pkt_header hdr;
+ struct sdma_txreq txreq;
+ struct list_head list;
+ struct user_sdma_request *req;
+ u16 flags;
+ unsigned int busycount;
+ u64 seqnum;
+};
+
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
struct hfi1_filedata *fd);
-int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd);
+int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt);
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim,
unsigned long *count);
OpenPOWER on IntegriCloud