diff options
Diffstat (limited to 'drivers/staging/lustre/lnet')
27 files changed, 400 insertions, 729 deletions
diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig index 2b59301..13b4327 100644 --- a/drivers/staging/lustre/lnet/Kconfig +++ b/drivers/staging/lustre/lnet/Kconfig @@ -35,6 +35,7 @@ config LNET_SELFTEST config LNET_XPRT_IB tristate "LNET infiniband support" depends on LNET && INFINIBAND && INFINIBAND_ADDR_TRANS + depends on BROKEN default LNET && INFINIBAND help This option allows the LNET users to use infiniband as an diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 4f5978b..9e88021 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -128,6 +128,7 @@ static int kiblnd_msgtype2size(int type) static int kiblnd_unpack_rd(struct kib_msg *msg, int flip) { struct kib_rdma_desc *rd; + int msg_size; int nob; int n; int i; @@ -146,12 +147,6 @@ static int kiblnd_unpack_rd(struct kib_msg *msg, int flip) n = rd->rd_nfrags; - if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) { - CERROR("Bad nfrags: %d, should be 0 < n <= %d\n", - n, IBLND_MAX_RDMA_FRAGS); - return 1; - } - nob = offsetof(struct kib_msg, ibm_u) + kiblnd_rd_msg_size(rd, msg->ibm_type, n); @@ -161,6 +156,13 @@ static int kiblnd_unpack_rd(struct kib_msg *msg, int flip) return 1; } + msg_size = kiblnd_rd_size(rd); + if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) { + CERROR("Bad msg_size: %d, should be 0 < n <= %d\n", + msg_size, LNET_MAX_PAYLOAD); + return 1; + } + if (!flip) return 0; @@ -618,7 +620,7 @@ static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt) } struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid, - int state, int version) + int state, int version) { /* * CAVEAT EMPTOR: @@ -2465,7 +2467,7 @@ int kiblnd_dev_failover(struct kib_dev *dev) hdev->ibh_cmid = cmid; hdev->ibh_ibdev = cmid->device; - pd = ib_alloc_pd(cmid->device); + pd = ib_alloc_pd(cmid->device, 0); if (IS_ERR(pd)) { rc = PTR_ERR(pd); CERROR("Can't allocate PD: %d\n", rc); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index 078a0c3..1457697 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -113,8 +113,9 @@ extern struct kib_tunables kiblnd_tunables; #define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1) #define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0) -#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */ -#define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */ +#define IBLND_FRAG_SHIFT (PAGE_SHIFT - 12) /* frag size on wire is in 4K units */ +#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */ +#define IBLND_MAX_RDMA_FRAGS (LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */ /************************/ /* derived constants... */ @@ -133,8 +134,8 @@ extern struct kib_tunables kiblnd_tunables; /* WRs and CQEs (per connection) */ #define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c) #define IBLND_SEND_WRS(c) \ - ((c->ibc_max_frags + 1) * kiblnd_concurrent_sends(c->ibc_version, \ - c->ibc_peer->ibp_ni)) + (((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \ + kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni)) #define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c)) struct kib_hca_dev; @@ -582,6 +583,8 @@ struct kib_peer { unsigned short ibp_connecting; /* reconnect this peer later */ unsigned short ibp_reconnecting:1; + /* counter of how many times we triggered a conn race */ + unsigned char ibp_races; /* # consecutive reconnection attempts to this peer */ unsigned int ibp_reconnected; /* errno on closing this peer */ @@ -607,14 +610,14 @@ kiblnd_cfg_rdma_frags(struct lnet_ni *ni) tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; mod = tunables->lnd_map_on_demand; - return mod ? mod : IBLND_MAX_RDMA_FRAGS; + return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT; } static inline int kiblnd_rdma_frags(int version, struct lnet_ni *ni) { return version == IBLND_MSG_VERSION_1 ? - IBLND_MAX_RDMA_FRAGS : + (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) : kiblnd_cfg_rdma_frags(ni); } @@ -1034,5 +1037,4 @@ int kiblnd_post_rx(struct kib_rx *rx, int credit); int kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); + struct iov_iter *to, unsigned int rlen); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 596a697..b27de88 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -36,16 +36,19 @@ #include "o2iblnd.h" +#define MAX_CONN_RACES_BEFORE_ABORT 20 + static void kiblnd_peer_alive(struct kib_peer *peer); static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error); -static void kiblnd_check_sends(struct kib_conn *conn); static void kiblnd_init_tx_msg(lnet_ni_t *ni, struct kib_tx *tx, - int type, int body_nob); + int type, int body_nob); static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type, - int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie); + int resid, struct kib_rdma_desc *dstrd, + __u64 dstcookie); static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn); static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn); static void kiblnd_unmap_tx(lnet_ni_t *ni, struct kib_tx *tx); +static void kiblnd_check_sends_locked(struct kib_conn *conn); static void kiblnd_tx_done(lnet_ni_t *ni, struct kib_tx *tx) @@ -211,9 +214,9 @@ kiblnd_post_rx(struct kib_rx *rx, int credit) conn->ibc_outstanding_credits++; else conn->ibc_reserved_credits++; + kiblnd_check_sends_locked(conn); spin_unlock(&conn->ibc_lock); - kiblnd_check_sends(conn); out: kiblnd_conn_decref(conn); return rc; @@ -344,8 +347,8 @@ kiblnd_handle_rx(struct kib_rx *rx) !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */ conn->ibc_outstanding_credits++; + kiblnd_check_sends_locked(conn); spin_unlock(&conn->ibc_lock); - kiblnd_check_sends(conn); } switch (msg->ibm_type) { @@ -648,7 +651,7 @@ static int kiblnd_map_tx(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc static int kiblnd_setup_rd_iov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd, - unsigned int niov, struct kvec *iov, int offset, int nob) + unsigned int niov, const struct kvec *iov, int offset, int nob) { struct kib_net *net = ni->ni_data; struct page *page; @@ -705,7 +708,7 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd, static int kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) + int nkiov, const lnet_kiov_t *kiov, int offset, int nob) { struct kib_net *net = ni->ni_data; struct scatterlist *sg; @@ -717,8 +720,8 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd, LASSERT(nkiov > 0); LASSERT(net); - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; + while (offset >= kiov->bv_len) { + offset -= kiov->bv_len; nkiov--; kiov++; LASSERT(nkiov > 0); @@ -728,10 +731,10 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd, do { LASSERT(nkiov > 0); - fragnob = min((int)(kiov->kiov_len - offset), nob); + fragnob = min((int)(kiov->bv_len - offset), nob); - sg_set_page(sg, kiov->kiov_page, fragnob, - kiov->kiov_offset + offset); + sg_set_page(sg, kiov->bv_page, fragnob, + kiov->bv_offset + offset); sg = sg_next(sg); if (!sg) { CERROR("lacking enough sg entries to map tx\n"); @@ -761,7 +764,6 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit) LASSERT(tx->tx_queued); /* We rely on this for QP sizing */ LASSERT(tx->tx_nwrq > 0); - LASSERT(tx->tx_nwrq <= 1 + conn->ibc_max_frags); LASSERT(!credit || credit == 1); LASSERT(conn->ibc_outstanding_credits >= 0); @@ -800,7 +802,7 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit) conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) { /* * OK to drop when posted enough NOOPs, since - * kiblnd_check_sends will queue NOOP again when + * kiblnd_check_sends_locked will queue NOOP again when * posted NOOPs complete */ spin_unlock(&conn->ibc_lock); @@ -905,7 +907,7 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit) } static void -kiblnd_check_sends(struct kib_conn *conn) +kiblnd_check_sends_locked(struct kib_conn *conn) { int ver = conn->ibc_version; lnet_ni_t *ni = conn->ibc_peer->ibp_ni; @@ -918,8 +920,6 @@ kiblnd_check_sends(struct kib_conn *conn) return; } - spin_lock(&conn->ibc_lock); - LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni)); LASSERT(!IBLND_OOB_CAPABLE(ver) || conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver)); @@ -969,8 +969,6 @@ kiblnd_check_sends(struct kib_conn *conn) if (kiblnd_post_tx_locked(conn, tx, credit)) break; } - - spin_unlock(&conn->ibc_lock); } static void @@ -1016,16 +1014,11 @@ kiblnd_tx_complete(struct kib_tx *tx, int status) if (idle) list_del(&tx->tx_list); - kiblnd_conn_addref(conn); /* 1 ref for me.... */ - + kiblnd_check_sends_locked(conn); spin_unlock(&conn->ibc_lock); if (idle) kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx); - - kiblnd_check_sends(conn); - - kiblnd_conn_decref(conn); /* ...until here */ } static void @@ -1078,6 +1071,15 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type, LASSERT(type == IBLND_MSG_GET_DONE || type == IBLND_MSG_PUT_DONE); + if (kiblnd_rd_size(srcrd) > conn->ibc_max_frags << PAGE_SHIFT) { + CERROR("RDMA is too large for peer %s (%d), src size: %d dst size: %d\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid), + conn->ibc_max_frags << PAGE_SHIFT, + kiblnd_rd_size(srcrd), kiblnd_rd_size(dstrd)); + rc = -EMSGSIZE; + goto too_big; + } + while (resid > 0) { if (srcidx >= srcrd->rd_nfrags) { CERROR("Src buffer exhausted: %d frags\n", srcidx); @@ -1091,10 +1093,10 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type, break; } - if (tx->tx_nwrq >= conn->ibc_max_frags) { + if (tx->tx_nwrq >= IBLND_MAX_RDMA_FRAGS) { CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), - conn->ibc_max_frags, + IBLND_MAX_RDMA_FRAGS, srcidx, srcrd->rd_nfrags, dstidx, dstrd->rd_nfrags); rc = -EMSGSIZE; @@ -1132,7 +1134,7 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type, wrq++; sge++; } - +too_big: if (rc < 0) /* no RDMA if completing with failure */ tx->tx_nwrq = 0; @@ -1204,9 +1206,8 @@ kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn) { spin_lock(&conn->ibc_lock); kiblnd_queue_tx_locked(tx, conn); + kiblnd_check_sends_locked(conn); spin_unlock(&conn->ibc_lock); - - kiblnd_check_sends(conn); } static int kiblnd_resolve_addr(struct rdma_cm_id *cmid, @@ -1499,6 +1500,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; unsigned int payload_offset = lntmsg->msg_offset; unsigned int payload_nob = lntmsg->msg_len; + struct iov_iter from; struct kib_msg *ibmsg; struct kib_rdma_desc *rd; struct kib_tx *tx; @@ -1518,6 +1520,17 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) /* payload is either all vaddrs or all pages */ LASSERT(!(payload_kiov && payload_iov)); + if (payload_kiov) + iov_iter_bvec(&from, ITER_BVEC | WRITE, + payload_kiov, payload_niov, + payload_nob + payload_offset); + else + iov_iter_kvec(&from, ITER_KVEC | WRITE, + payload_iov, payload_niov, + payload_nob + payload_offset); + + iov_iter_advance(&from, payload_offset); + switch (type) { default: LBUG(); @@ -1637,17 +1650,8 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) ibmsg = tx->tx_msg; ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - if (payload_kiov) - lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg, - offsetof(struct kib_msg, ibm_u.immediate.ibim_payload), - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg, - offsetof(struct kib_msg, ibm_u.immediate.ibim_payload), - payload_niov, payload_iov, - payload_offset, payload_nob); - + copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, IBLND_MSG_SIZE, + &from); nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]); kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob); @@ -1719,8 +1723,7 @@ kiblnd_reply(lnet_ni_t *ni, struct kib_rx *rx, lnet_msg_t *lntmsg) int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) + struct iov_iter *to, unsigned int rlen) { struct kib_rx *rx = private; struct kib_msg *rxmsg = rx->rx_msg; @@ -1730,10 +1733,9 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, int post_credit = IBLND_POSTRX_PEER_CREDIT; int rc = 0; - LASSERT(mlen <= rlen); + LASSERT(iov_iter_count(to) <= rlen); LASSERT(!in_interrupt()); /* Either all pages or all vaddrs */ - LASSERT(!(kiov && iov)); switch (rxmsg->ibm_type) { default: @@ -1749,16 +1751,8 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, break; } - if (kiov) - lnet_copy_flat2kiov(niov, kiov, offset, - IBLND_MSG_SIZE, rxmsg, - offsetof(struct kib_msg, ibm_u.immediate.ibim_payload), - mlen); - else - lnet_copy_flat2iov(niov, iov, offset, - IBLND_MSG_SIZE, rxmsg, - offsetof(struct kib_msg, ibm_u.immediate.ibim_payload), - mlen); + copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload, + IBLND_MSG_SIZE, to); lnet_finalize(ni, lntmsg, 0); break; @@ -1766,7 +1760,7 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, struct kib_msg *txmsg; struct kib_rdma_desc *rd; - if (!mlen) { + if (!iov_iter_count(to)) { lnet_finalize(ni, lntmsg, 0); kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0, rxmsg->ibm_u.putreq.ibprm_cookie); @@ -1784,12 +1778,16 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, txmsg = tx->tx_msg; rd = &txmsg->ibm_u.putack.ibpam_rd; - if (!kiov) + if (!(to->type & ITER_BVEC)) rc = kiblnd_setup_rd_iov(ni, tx, rd, - niov, iov, offset, mlen); + to->nr_segs, to->kvec, + to->iov_offset, + iov_iter_count(to)); else rc = kiblnd_setup_rd_kiov(ni, tx, rd, - niov, kiov, offset, mlen); + to->nr_segs, to->bvec, + to->iov_offset, + iov_iter_count(to)); if (rc) { CERROR("Can't setup PUT sink for %s: %d\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); @@ -2183,14 +2181,11 @@ kiblnd_connreq_done(struct kib_conn *conn, int status) return; } - /** - * refcount taken by cmid is not reliable after I released the glock - * because this connection is visible to other threads now, another - * thread can find and close this connection right after I released - * the glock, if kiblnd_cm_callback for RDMA_CM_EVENT_DISCONNECTED is - * called, it can release the connection refcount taken by cmid. - * It means the connection could be destroyed before I finish my - * operations on it. + /* + * +1 ref for myself, this connection is visible to other threads + * now, refcount of peer:ibp_conns can be released by connection + * close from either a different thread, or the calling of + * kiblnd_check_sends_locked() below. See bz21911 for details. */ kiblnd_conn_addref(conn); write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); @@ -2202,10 +2197,9 @@ kiblnd_connreq_done(struct kib_conn *conn, int status) kiblnd_queue_tx_locked(tx, conn); } + kiblnd_check_sends_locked(conn); spin_unlock(&conn->ibc_lock); - kiblnd_check_sends(conn); - /* schedule blocked rxs */ kiblnd_handle_early_rxs(conn); @@ -2240,6 +2234,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) struct kib_rej rej; int version = IBLND_MSG_VERSION; unsigned long flags; + int max_frags; int rc; struct sockaddr_in *peer_addr; @@ -2346,22 +2341,20 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) goto failed; } - if (reqmsg->ibm_u.connparams.ibcp_max_frags > - kiblnd_rdma_frags(version, ni)) { - CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n", - libcfs_nid2str(nid), version, - reqmsg->ibm_u.connparams.ibcp_max_frags, + max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT; + if (max_frags > kiblnd_rdma_frags(version, ni)) { + CWARN("Can't accept conn from %s (version %x): max message size %d is too large (%d wanted)\n", + libcfs_nid2str(nid), version, max_frags, kiblnd_rdma_frags(version, ni)); if (version >= IBLND_MSG_VERSION) rej.ibr_why = IBLND_REJECT_RDMA_FRAGS; goto failed; - } else if (reqmsg->ibm_u.connparams.ibcp_max_frags < - kiblnd_rdma_frags(version, ni) && !net->ibn_fmr_ps) { - CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n", - libcfs_nid2str(nid), version, - reqmsg->ibm_u.connparams.ibcp_max_frags, + } else if (max_frags < kiblnd_rdma_frags(version, ni) && + !net->ibn_fmr_ps) { + CWARN("Can't accept conn from %s (version %x): max message size %d incompatible without FMR pool (%d wanted)\n", + libcfs_nid2str(nid), version, max_frags, kiblnd_rdma_frags(version, ni)); if (version == IBLND_MSG_VERSION) @@ -2387,7 +2380,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) } /* We have validated the peer's parameters so use those */ - peer->ibp_max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags; + peer->ibp_max_frags = max_frags; peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth; write_lock_irqsave(g_lock, flags); @@ -2419,23 +2412,37 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) goto failed; } - /* tie-break connection race in favour of the higher NID */ + /* + * Tie-break connection race in favour of the higher NID. + * If we keep running into a race condition multiple times, + * we have to assume that the connection attempt with the + * higher NID is stuck in a connecting state and will never + * recover. As such, we pass through this if-block and let + * the lower NID connection win so we can move forward. + */ if (peer2->ibp_connecting && - nid < ni->ni_nid) { + nid < ni->ni_nid && peer2->ibp_races < + MAX_CONN_RACES_BEFORE_ABORT) { + peer2->ibp_races++; write_unlock_irqrestore(g_lock, flags); - CWARN("Conn race %s\n", libcfs_nid2str(peer2->ibp_nid)); + CDEBUG(D_NET, "Conn race %s\n", + libcfs_nid2str(peer2->ibp_nid)); kiblnd_peer_decref(peer); rej.ibr_why = IBLND_REJECT_CONN_RACE; goto failed; } - + if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT) + CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n", + libcfs_nid2str(peer2->ibp_nid), + MAX_CONN_RACES_BEFORE_ABORT); /** * passive connection is allowed even this peer is waiting for * reconnection. */ peer2->ibp_reconnecting = 0; + peer2->ibp_races = 0; peer2->ibp_accepting++; kiblnd_peer_addref(peer2); @@ -2494,7 +2501,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK, sizeof(ackmsg->ibm_u.connparams)); ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth; - ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags; + ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT; ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp); @@ -2526,9 +2533,9 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) failed: if (ni) { - lnet_ni_decref(ni); rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni); rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni); + lnet_ni_decref(ni); } rej.ibr_version = version; @@ -2556,7 +2563,7 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version, if (cp) { msg_size = cp->ibcp_max_msg_size; - frag_num = cp->ibcp_max_frags; + frag_num = cp->ibcp_max_frags << IBLND_FRAG_SHIFT; queue_dep = cp->ibcp_queue_depth; } @@ -2821,11 +2828,11 @@ kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob) goto failed; } - if (msg->ibm_u.connparams.ibcp_max_frags > + if ((msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT) > conn->ibc_max_frags) { CERROR("%s has incompatible max_frags %d (<=%d wanted)\n", libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_max_frags, + msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT, conn->ibc_max_frags); rc = -EPROTO; goto failed; @@ -2859,7 +2866,7 @@ kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob) conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth; conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth; conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth; - conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags; + conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT; LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn)); @@ -2916,7 +2923,7 @@ kiblnd_active_connect(struct rdma_cm_id *cmid) memset(msg, 0, sizeof(*msg)); kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams)); msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth; - msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags; + msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT; msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; kiblnd_pack_msg(peer->ibp_ni, msg, version, @@ -3233,7 +3240,11 @@ kiblnd_check_conns(int idx) */ list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) { list_del(&conn->ibc_connd_list); - kiblnd_check_sends(conn); + + spin_lock(&conn->ibc_lock); + kiblnd_check_sends_locked(conn); + spin_unlock(&conn->ibc_lock); + kiblnd_conn_decref(conn); } } @@ -3419,6 +3430,12 @@ kiblnd_qp_event(struct ib_event *event, void *arg) case IB_EVENT_COMM_EST: CDEBUG(D_NET, "%s established\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); + /* + * We received a packet but connection isn't established + * probably handshake packet was lost, so free to + * force make connection established + */ + rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST); return; default: diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c index 07ec540..cbc9a9c 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c @@ -1468,11 +1468,6 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) conn->ksnc_route = NULL; -#if 0 /* irrelevant with only eager routes */ - /* make route least favourite */ - list_del(&route->ksnr_list); - list_add_tail(&route->ksnr_list, &peer->ksnp_routes); -#endif ksocknal_route_decref(route); /* drop conn's ref on route */ } diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h index a56632b..e6ca0cf 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h @@ -86,8 +86,6 @@ struct ksock_sched { /* per scheduler state */ int kss_nconns; /* # connections assigned to * this scheduler */ struct ksock_sched_info *kss_info; /* owner of it */ - struct page *kss_rx_scratch_pgs[LNET_MAX_IOV]; - struct kvec kss_scratch_iov[LNET_MAX_IOV]; }; struct ksock_sched_info { @@ -616,9 +614,7 @@ void ksocknal_shutdown(lnet_ni_t *ni); int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); int ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct kvec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); + int delayed, struct iov_iter *to, unsigned int rlen); int ksocknal_accept(lnet_ni_t *ni, struct socket *sock); int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port); @@ -635,7 +631,7 @@ int ksocknal_close_peer_conns_locked(struct ksock_peer *peer, int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why); int ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr); struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer, - struct ksock_tx *tx, int nonblk); + struct ksock_tx *tx, int nonblk); int ksocknal_launch_packet(lnet_ni_t *ni, struct ksock_tx *tx, lnet_process_id_t id); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c index 303576d..c1c6f60 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c @@ -35,8 +35,8 @@ ksocknal_alloc_tx(int type, int size) spin_lock(&ksocknal_data.ksnd_tx_lock); if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - tx = list_entry(ksocknal_data.ksnd_idle_noop_txs. \ - next, struct ksock_tx, tx_list); + tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next, + struct ksock_tx, tx_list); LASSERT(tx->tx_desc_size == size); list_del(&tx->tx_list); } @@ -164,13 +164,13 @@ ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx) do { LASSERT(tx->tx_nkiov > 0); - if (nob < (int)kiov->kiov_len) { - kiov->kiov_offset += nob; - kiov->kiov_len -= nob; + if (nob < (int)kiov->bv_len) { + kiov->bv_offset += nob; + kiov->bv_len -= nob; return rc; } - nob -= (int)kiov->kiov_len; + nob -= (int)kiov->bv_len; tx->tx_kiov = ++kiov; tx->tx_nkiov--; } while (nob); @@ -326,13 +326,13 @@ ksocknal_recv_kiov(struct ksock_conn *conn) do { LASSERT(conn->ksnc_rx_nkiov > 0); - if (nob < (int)kiov->kiov_len) { - kiov->kiov_offset += nob; - kiov->kiov_len -= nob; + if (nob < (int)kiov->bv_len) { + kiov->bv_offset += nob; + kiov->bv_len -= nob; return -EAGAIN; } - nob -= kiov->kiov_len; + nob -= kiov->bv_len; conn->ksnc_rx_kiov = ++kiov; conn->ksnc_rx_nkiov--; } while (nob); @@ -1325,39 +1325,36 @@ ksocknal_process_receive(struct ksock_conn *conn) int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) + struct iov_iter *to, unsigned int rlen) { struct ksock_conn *conn = private; struct ksock_sched *sched = conn->ksnc_scheduler; - LASSERT(mlen <= rlen); - LASSERT(niov <= LNET_MAX_IOV); + LASSERT(iov_iter_count(to) <= rlen); + LASSERT(to->nr_segs <= LNET_MAX_IOV); conn->ksnc_cookie = msg; - conn->ksnc_rx_nob_wanted = mlen; + conn->ksnc_rx_nob_wanted = iov_iter_count(to); conn->ksnc_rx_nob_left = rlen; - if (!mlen || iov) { + if (to->type & ITER_KVEC) { conn->ksnc_rx_nkiov = 0; conn->ksnc_rx_kiov = NULL; conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov; conn->ksnc_rx_niov = lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov, - niov, iov, offset, mlen); + to->nr_segs, to->kvec, + to->iov_offset, iov_iter_count(to)); } else { conn->ksnc_rx_niov = 0; conn->ksnc_rx_iov = NULL; conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov; conn->ksnc_rx_nkiov = lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov, - niov, kiov, offset, mlen); + to->nr_segs, to->bvec, + to->iov_offset, iov_iter_count(to)); } - LASSERT(mlen == - lnet_iov_nob(conn->ksnc_rx_niov, conn->ksnc_rx_iov) + - lnet_kiov_nob(conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); - LASSERT(conn->ksnc_rx_scheduled); spin_lock_bh(&sched->kss_lock); @@ -2008,13 +2005,6 @@ ksocknal_connect(struct ksock_route *route) list_splice_init(&peer->ksnp_tx_queue, &zombies); } -#if 0 /* irrelevant with only eager routes */ - if (!route->ksnr_deleted) { - /* make this route least-favourite for re-selection */ - list_del(&route->ksnr_list); - list_add_tail(&route->ksnr_list, &peer->ksnp_routes); - } -#endif write_unlock_bh(&ksocknal_data.ksnd_global_lock); ksocknal_peer_failed(peer); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index 6a17757..6c95e98 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -73,9 +73,9 @@ ksocknal_lib_zc_capable(struct ksock_conn *conn) int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx) { + struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; struct socket *sock = conn->ksnc_sock; - int nob; - int rc; + int nob, i; if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */ conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */ @@ -83,34 +83,16 @@ ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx) !tx->tx_msg.ksm_csum) /* not checksummed */ ksocknal_lib_csum_tx(tx); - /* - * NB we can't trust socket ops to either consume our iovs - * or leave them alone. - */ - { -#if SOCKNAL_SINGLE_FRAG_TX - struct kvec scratch; - struct kvec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; - unsigned int niov = tx->tx_niov; -#endif - struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; - int i; + for (nob = i = 0; i < tx->tx_niov; i++) + nob += tx->tx_iov[i].iov_len; - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = tx->tx_iov[i]; - nob += scratchiov[i].iov_len; - } + if (!list_empty(&conn->ksnc_tx_queue) || + nob < tx->tx_resid) + msg.msg_flags |= MSG_MORE; - if (!list_empty(&conn->ksnc_tx_queue) || - nob < tx->tx_resid) - msg.msg_flags |= MSG_MORE; - - rc = kernel_sendmsg(sock, &msg, scratchiov, niov, nob); - } - return rc; + iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, + tx->tx_iov, tx->tx_niov, nob); + return sock_sendmsg(sock, &msg); } int @@ -124,20 +106,16 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx) /* Not NOOP message */ LASSERT(tx->tx_lnetmsg); - /* - * NB we can't trust socket ops to either consume our iovs - * or leave them alone. - */ if (tx->tx_msg.ksm_zc_cookies[0]) { /* Zero copy is enabled */ struct sock *sk = sock->sk; - struct page *page = kiov->kiov_page; - int offset = kiov->kiov_offset; - int fragsize = kiov->kiov_len; + struct page *page = kiov->bv_page; + int offset = kiov->bv_offset; + int fragsize = kiov->bv_len; int msgflg = MSG_DONTWAIT; CDEBUG(D_NET, "page %p + offset %x for %d\n", - page, offset, kiov->kiov_len); + page, offset, kiov->bv_len); if (!list_empty(&conn->ksnc_tx_queue) || fragsize < tx->tx_resid) @@ -150,34 +128,19 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx) rc = tcp_sendpage(sk, page, offset, fragsize, msgflg); } } else { -#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct kvec scratch; - struct kvec *scratchiov = &scratch; - unsigned int niov = 1; -#else -#ifdef CONFIG_HIGHMEM -#warning "XXX risk of kmap deadlock on multiple frags..." -#endif - struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; - unsigned int niov = tx->tx_nkiov; -#endif struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; int i; - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + - kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } + for (nob = i = 0; i < tx->tx_nkiov; i++) + nob += kiov[i].bv_len; if (!list_empty(&conn->ksnc_tx_queue) || nob < tx->tx_resid) msg.msg_flags |= MSG_MORE; - rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob); - - for (i = 0; i < niov; i++) - kunmap(kiov[i].kiov_page); + iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, + kiov, tx->tx_nkiov, nob); + rc = sock_sendmsg(sock, &msg); } return rc; } @@ -201,14 +164,7 @@ ksocknal_lib_eager_ack(struct ksock_conn *conn) int ksocknal_lib_recv_iov(struct ksock_conn *conn) { -#if SOCKNAL_SINGLE_FRAG_RX - struct kvec scratch; - struct kvec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_niov; -#endif struct kvec *iov = conn->ksnc_rx_iov; struct msghdr msg = { .msg_flags = 0 @@ -220,20 +176,15 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn) int sum; __u32 saved_csum; - /* - * NB we can't trust socket ops to either consume our iovs - * or leave them alone. - */ LASSERT(niov > 0); - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = iov[i]; - nob += scratchiov[i].iov_len; - } + for (nob = i = 0; i < niov; i++) + nob += iov[i].iov_len; + LASSERT(nob <= conn->ksnc_rx_nob_wanted); - rc = kernel_recvmsg(conn->ksnc_sock, &msg, scratchiov, niov, nob, - MSG_DONTWAIT); + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, niov, nob); + rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); saved_csum = 0; if (conn->ksnc_proto == &ksocknal_protocol_v2x) { @@ -259,67 +210,10 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn) return rc; } -static void -ksocknal_lib_kiov_vunmap(void *addr) -{ - if (!addr) - return; - - vunmap(addr); -} - -static void * -ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov, - struct kvec *iov, struct page **pages) -{ - void *addr; - int nob; - int i; - - if (!*ksocknal_tunables.ksnd_zc_recv || !pages) - return NULL; - - LASSERT(niov <= LNET_MAX_IOV); - - if (niov < 2 || - niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags) - return NULL; - - for (nob = i = 0; i < niov; i++) { - if ((kiov[i].kiov_offset && i > 0) || - (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_SIZE && i < niov - 1)) - return NULL; - - pages[i] = kiov[i].kiov_page; - nob += kiov[i].kiov_len; - } - - addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL); - if (!addr) - return NULL; - - iov->iov_base = addr + kiov[0].kiov_offset; - iov->iov_len = nob; - - return addr; -} - int ksocknal_lib_recv_kiov(struct ksock_conn *conn) { -#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct kvec scratch; - struct kvec *scratchiov = &scratch; - struct page **pages = NULL; - unsigned int niov = 1; -#else -#ifdef CONFIG_HIGHMEM -#warning "XXX risk of kmap deadlock on multiple frags..." -#endif - struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; - struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs; unsigned int niov = conn->ksnc_rx_nkiov; -#endif lnet_kiov_t *kiov = conn->ksnc_rx_kiov; struct msghdr msg = { .msg_flags = 0 @@ -328,63 +222,32 @@ ksocknal_lib_recv_kiov(struct ksock_conn *conn) int i; int rc; void *base; - void *addr; int sum; int fragnob; - int n; - - /* - * NB we can't trust socket ops to either consume our iovs - * or leave them alone. - */ - addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages); - if (addr) { - nob = scratchiov[0].iov_len; - n = 1; - } else { - for (nob = i = 0; i < niov; i++) { - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + - kiov[i].kiov_offset; - } - n = niov; - } + for (nob = i = 0; i < niov; i++) + nob += kiov[i].bv_len; LASSERT(nob <= conn->ksnc_rx_nob_wanted); - rc = kernel_recvmsg(conn->ksnc_sock, &msg, (struct kvec *)scratchiov, - n, nob, MSG_DONTWAIT); + iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, kiov, niov, nob); + rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); if (conn->ksnc_msg.ksm_csum) { for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { LASSERT(i < niov); - /* - * Dang! have to kmap again because I have nowhere to - * stash the mapped address. But by doing it while the - * page is still mapped, the kernel just bumps the map - * count and returns me the address it stashed. - */ - base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; - fragnob = kiov[i].kiov_len; + base = kmap(kiov[i].bv_page) + kiov[i].bv_offset; + fragnob = kiov[i].bv_len; if (fragnob > sum) fragnob = sum; conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum, base, fragnob); - kunmap(kiov[i].kiov_page); + kunmap(kiov[i].bv_page); } } - - if (addr) { - ksocknal_lib_kiov_vunmap(addr); - } else { - for (i = 0; i < niov; i++) - kunmap(kiov[i].kiov_page); - } - return rc; } @@ -406,12 +269,12 @@ ksocknal_lib_csum_tx(struct ksock_tx *tx) if (tx->tx_kiov) { for (i = 0; i < tx->tx_nkiov; i++) { - base = kmap(tx->tx_kiov[i].kiov_page) + - tx->tx_kiov[i].kiov_offset; + base = kmap(tx->tx_kiov[i].bv_page) + + tx->tx_kiov[i].bv_offset; - csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len); + csum = ksocknal_csum(csum, base, tx->tx_kiov[i].bv_len); - kunmap(tx->tx_kiov[i].kiov_page); + kunmap(tx->tx_kiov[i].bv_page); } } else { for (i = 1; i < tx->tx_niov; i++) diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c index 42b15a7..23b36b8 100644 --- a/drivers/staging/lustre/lnet/libcfs/debug.c +++ b/drivers/staging/lustre/lnet/libcfs/debug.c @@ -328,15 +328,20 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) */ void libcfs_debug_dumplog_internal(void *arg) { + static time64_t last_dump_time; + time64_t current_time; void *journal_info; journal_info = current->journal_info; current->journal_info = NULL; + current_time = ktime_get_real_seconds(); - if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) { + if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) && + current_time > last_dump_time) { + last_dump_time = current_time; snprintf(debug_file_name, sizeof(debug_file_name) - 1, "%s.%lld.%ld", libcfs_debug_file_path_arr, - (s64)ktime_get_real_seconds(), (long_ptr_t)arg); + (s64)current_time, (long_ptr_t)arg); pr_alert("LustreError: dumping log to %s\n", debug_file_name); cfs_tracefile_dump_all_pages(debug_file_name); libcfs_run_debug_log_upcall(debug_file_name); diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c index 9288ee0..e4b1a0a 100644 --- a/drivers/staging/lustre/lnet/libcfs/fail.c +++ b/drivers/staging/lustre/lnet/libcfs/fail.c @@ -90,8 +90,10 @@ int __cfs_fail_check_set(__u32 id, __u32 value, int set) } } - if ((set == CFS_FAIL_LOC_ORSET || set == CFS_FAIL_LOC_RESET) && - (value & CFS_FAIL_ONCE)) + /* Take into account the current call for FAIL_ONCE for ORSET only, + * as RESET is a new fail_loc, it does not change the current call + */ + if ((set == CFS_FAIL_LOC_ORSET) && (value & CFS_FAIL_ONCE)) set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); /* Lost race to set CFS_FAILED_BIT. */ if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) { diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c index fc697cd..56a614d 100644 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c @@ -229,8 +229,6 @@ cfs_str2num_check(char *str, int nob, unsigned *num, char *endp, cache; int rc; - str = cfs_trimwhite(str); - /** * kstrouint can only handle strings composed * of only numbers. We need to scan the string diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c index b52518c5..e8b1a61 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c @@ -74,6 +74,17 @@ struct cfs_cpt_data { static struct cfs_cpt_data cpt_data; +static void +cfs_node_to_cpumask(int node, cpumask_t *mask) +{ + const cpumask_t *tmp = cpumask_of_node(node); + + if (tmp) + cpumask_copy(mask, tmp); + else + cpumask_clear(mask); +} + void cfs_cpt_table_free(struct cfs_cpt_table *cptab) { @@ -403,7 +414,7 @@ cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) mutex_lock(&cpt_data.cpt_mutex); mask = cpt_data.cpt_cpumask; - cpumask_copy(mask, cpumask_of_node(node)); + cfs_node_to_cpumask(node, mask); rc = cfs_cpt_set_cpumask(cptab, cpt, mask); @@ -427,7 +438,7 @@ cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) mutex_lock(&cpt_data.cpt_mutex); mask = cpt_data.cpt_cpumask; - cpumask_copy(mask, cpumask_of_node(node)); + cfs_node_to_cpumask(node, mask); cfs_cpt_unset_cpumask(cptab, cpt, mask); @@ -749,7 +760,7 @@ cfs_cpt_table_create(int ncpt) } for_each_online_node(i) { - cpumask_copy(mask, cpumask_of_node(i)); + cfs_node_to_cpumask(i, mask); while (!cpumask_empty(mask)) { struct cfs_cpu_partition *part; diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c index 5c0116a..7f56d2c 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c @@ -95,8 +95,8 @@ static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg, err = crypto_ahash_setkey(tfm, key, key_len); else if ((*type)->cht_key != 0) err = crypto_ahash_setkey(tfm, - (unsigned char *)&((*type)->cht_key), - (*type)->cht_size); + (unsigned char *)&((*type)->cht_key), + (*type)->cht_size); if (err != 0) { ahash_request_free(*req); diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c index 346db89..4daf828 100644 --- a/drivers/staging/lustre/lnet/lnet/api-ni.c +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c @@ -1286,6 +1286,25 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf) sizeof(*ni->ni_lnd_tunables)); } + /* + * If given some LND tunable parameters, parse those now to + * override the values in the NI structure. + */ + if (conf) { + if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) + ni->ni_peerrtrcredits = + conf->cfg_config_u.cfg_net.net_peer_rtr_credits; + if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) + ni->ni_peertimeout = + conf->cfg_config_u.cfg_net.net_peer_timeout; + if (conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1) + ni->ni_peertxcredits = + conf->cfg_config_u.cfg_net.net_peer_tx_credits; + if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) + ni->ni_maxtxcredits = + conf->cfg_config_u.cfg_net.net_max_tx_credits; + } + rc = lnd->lnd_startup(ni); mutex_unlock(&the_lnet.ln_lnd_mutex); @@ -1299,33 +1318,6 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf) goto failed0; } - /* - * If given some LND tunable parameters, parse those now to - * override the values in the NI structure. - */ - if (conf && conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) { - ni->ni_peerrtrcredits = - conf->cfg_config_u.cfg_net.net_peer_rtr_credits; - } - if (conf && conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) { - ni->ni_peertimeout = - conf->cfg_config_u.cfg_net.net_peer_timeout; - } - /* - * TODO - * Note: For now, don't allow the user to change - * peertxcredits as this number is used in the - * IB LND to control queue depth. - * - * if (conf && conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1) - * ni->ni_peertxcredits = - * conf->cfg_config_u.cfg_net.net_peer_tx_credits; - */ - if (conf && conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) { - ni->ni_maxtxcredits = - conf->cfg_config_u.cfg_net.net_max_tx_credits; - } - LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query); lnet_net_lock(LNET_LOCK_EX); diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c index a72afdf..9e2183f 100644 --- a/drivers/staging/lustre/lnet/lnet/config.c +++ b/drivers/staging/lustre/lnet/lnet/config.c @@ -31,6 +31,8 @@ */ #define DEBUG_SUBSYSTEM S_LNET +#include <linux/nsproxy.h> +#include <net/net_namespace.h> #include "../../include/linux/lnet/lib-lnet.h" struct lnet_text_buf { /* tmp struct for parsing routes */ @@ -110,6 +112,11 @@ lnet_ni_free(struct lnet_ni *ni) LIBCFS_FREE(ni->ni_interfaces[i], strlen(ni->ni_interfaces[i]) + 1); } + + /* release reference to net namespace */ + if (ni->ni_net_ns) + put_net(ni->ni_net_ns); + LIBCFS_FREE(ni, sizeof(*ni)); } @@ -171,6 +178,13 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist) /* LND will fill in the address part of the NID */ ni->ni_nid = LNET_MKNID(net, 0); + + /* Store net namespace in which current ni is being created */ + if (current->nsproxy->net_ns) + ni->ni_net_ns = get_net(current->nsproxy->net_ns); + else + ni->ni_net_ns = NULL; + ni->ni_last_alive = ktime_get_real_seconds(); list_add_tail(&ni->ni_list, nilist); return ni; diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c index 1834bf7..eab53cd 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-md.c +++ b/drivers/staging/lustre/lnet/lnet/lib-md.c @@ -134,11 +134,11 @@ lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink) for (i = 0; i < (int)niov; i++) { /* We take the page pointer on trust */ - if (lmd->md_iov.kiov[i].kiov_offset + - lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE) + if (lmd->md_iov.kiov[i].bv_offset + + lmd->md_iov.kiov[i].bv_len > PAGE_SIZE) return -EINVAL; /* invalid length */ - total_length += lmd->md_iov.kiov[i].kiov_len; + total_length += lmd->md_iov.kiov[i].bv_len; } lmd->md_length = total_length; @@ -292,11 +292,12 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd, return -ENOMEM; rc = lnet_md_build(md, &umd, unlink); + if (rc) + goto out_free; + cpt = lnet_cpt_of_cookie(meh.cookie); lnet_res_lock(cpt); - if (rc) - goto failed; me = lnet_handle2me(&meh); if (!me) @@ -307,7 +308,7 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd, rc = lnet_md_link(md, umd.eq_handle, cpt); if (rc) - goto failed; + goto out_unlock; /* * attach this MD to portal of ME and check if it matches any @@ -324,10 +325,10 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd, return 0; - failed: - lnet_md_free(md); - +out_unlock: lnet_res_unlock(cpt); +out_free: + lnet_md_free(md); return rc; } EXPORT_SYMBOL(LNetMDAttach); @@ -370,24 +371,25 @@ LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle) return -ENOMEM; rc = lnet_md_build(md, &umd, unlink); + if (rc) + goto out_free; cpt = lnet_res_lock_current(); - if (rc) - goto failed; rc = lnet_md_link(md, umd.eq_handle, cpt); if (rc) - goto failed; + goto out_unlock; lnet_md2handle(handle, md); lnet_res_unlock(cpt); return 0; - failed: +out_unlock: + lnet_res_unlock(cpt); +out_free: lnet_md_free(md); - lnet_res_unlock(cpt); return rc; } EXPORT_SYMBOL(LNetMDBind); diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c index e6d3b80..48e6f8f 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c @@ -37,6 +37,8 @@ #define DEBUG_SUBSYSTEM S_LNET #include "../../include/linux/lnet/lib-lnet.h" +#include <linux/nsproxy.h> +#include <net/net_namespace.h> static int local_nid_dist_zero = 1; module_param(local_nid_dist_zero, int, 0444); @@ -166,25 +168,17 @@ lnet_iov_nob(unsigned int niov, struct kvec *iov) EXPORT_SYMBOL(lnet_iov_nob); void -lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset, - unsigned int nsiov, struct kvec *siov, unsigned int soffset, - unsigned int nob) +lnet_copy_iov2iter(struct iov_iter *to, + unsigned int nsiov, const struct kvec *siov, + unsigned int soffset, unsigned int nob) { /* NB diov, siov are READ-ONLY */ - unsigned int this_nob; + const char *s; + size_t left; if (!nob) return; - /* skip complete frags before 'doffset' */ - LASSERT(ndiov > 0); - while (doffset >= diov->iov_len) { - doffset -= diov->iov_len; - diov++; - ndiov--; - LASSERT(ndiov > 0); - } - /* skip complete frags before 'soffset' */ LASSERT(nsiov > 0); while (soffset >= siov->iov_len) { @@ -194,39 +188,68 @@ lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset, LASSERT(nsiov > 0); } + s = (char *)siov->iov_base + soffset; + left = siov->iov_len - soffset; do { - LASSERT(ndiov > 0); + size_t n, copy = left; LASSERT(nsiov > 0); - this_nob = min(diov->iov_len - doffset, - siov->iov_len - soffset); - this_nob = min(this_nob, nob); - memcpy((char *)diov->iov_base + doffset, - (char *)siov->iov_base + soffset, this_nob); - nob -= this_nob; + if (copy > nob) + copy = nob; + n = copy_to_iter(s, copy, to); + if (n != copy) + return; + nob -= n; - if (diov->iov_len > doffset + this_nob) { - doffset += this_nob; - } else { - diov++; - ndiov--; - doffset = 0; - } + siov++; + s = (char *)siov->iov_base; + left = siov->iov_len; + nsiov--; + } while (nob > 0); +} +EXPORT_SYMBOL(lnet_copy_iov2iter); - if (siov->iov_len > soffset + this_nob) { - soffset += this_nob; - } else { - siov++; - nsiov--; - soffset = 0; - } +void +lnet_copy_kiov2iter(struct iov_iter *to, + unsigned int nsiov, const lnet_kiov_t *siov, + unsigned int soffset, unsigned int nob) +{ + if (!nob) + return; + + LASSERT(!in_interrupt()); + + LASSERT(nsiov > 0); + while (soffset >= siov->bv_len) { + soffset -= siov->bv_len; + siov++; + nsiov--; + LASSERT(nsiov > 0); + } + + do { + size_t copy = siov->bv_len - soffset, n; + + LASSERT(nsiov > 0); + + if (copy > nob) + copy = nob; + n = copy_page_to_iter(siov->bv_page, + siov->bv_offset + soffset, + copy, to); + if (n != copy) + return; + nob -= n; + siov++; + nsiov--; + soffset = 0; } while (nob > 0); } -EXPORT_SYMBOL(lnet_copy_iov2iov); +EXPORT_SYMBOL(lnet_copy_kiov2iter); int lnet_extract_iov(int dst_niov, struct kvec *dst, - int src_niov, struct kvec *src, + int src_niov, const struct kvec *src, unsigned int offset, unsigned int len) { /* @@ -280,238 +303,15 @@ lnet_kiov_nob(unsigned int niov, lnet_kiov_t *kiov) LASSERT(!niov || kiov); while (niov-- > 0) - nob += (kiov++)->kiov_len; + nob += (kiov++)->bv_len; return nob; } EXPORT_SYMBOL(lnet_kiov_nob); -void -lnet_copy_kiov2kiov(unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset, - unsigned int nsiov, lnet_kiov_t *siov, unsigned int soffset, - unsigned int nob) -{ - /* NB diov, siov are READ-ONLY */ - unsigned int this_nob; - char *daddr = NULL; - char *saddr = NULL; - - if (!nob) - return; - - LASSERT(!in_interrupt()); - - LASSERT(ndiov > 0); - while (doffset >= diov->kiov_len) { - doffset -= diov->kiov_len; - diov++; - ndiov--; - LASSERT(ndiov > 0); - } - - LASSERT(nsiov > 0); - while (soffset >= siov->kiov_len) { - soffset -= siov->kiov_len; - siov++; - nsiov--; - LASSERT(nsiov > 0); - } - - do { - LASSERT(ndiov > 0); - LASSERT(nsiov > 0); - this_nob = min(diov->kiov_len - doffset, - siov->kiov_len - soffset); - this_nob = min(this_nob, nob); - - if (!daddr) - daddr = ((char *)kmap(diov->kiov_page)) + - diov->kiov_offset + doffset; - if (!saddr) - saddr = ((char *)kmap(siov->kiov_page)) + - siov->kiov_offset + soffset; - - /* - * Vanishing risk of kmap deadlock when mapping 2 pages. - * However in practice at least one of the kiovs will be mapped - * kernel pages and the map/unmap will be NOOPs - */ - memcpy(daddr, saddr, this_nob); - nob -= this_nob; - - if (diov->kiov_len > doffset + this_nob) { - daddr += this_nob; - doffset += this_nob; - } else { - kunmap(diov->kiov_page); - daddr = NULL; - diov++; - ndiov--; - doffset = 0; - } - - if (siov->kiov_len > soffset + this_nob) { - saddr += this_nob; - soffset += this_nob; - } else { - kunmap(siov->kiov_page); - saddr = NULL; - siov++; - nsiov--; - soffset = 0; - } - } while (nob > 0); - - if (daddr) - kunmap(diov->kiov_page); - if (saddr) - kunmap(siov->kiov_page); -} -EXPORT_SYMBOL(lnet_copy_kiov2kiov); - -void -lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov, unsigned int iovoffset, - unsigned int nkiov, lnet_kiov_t *kiov, - unsigned int kiovoffset, unsigned int nob) -{ - /* NB iov, kiov are READ-ONLY */ - unsigned int this_nob; - char *addr = NULL; - - if (!nob) - return; - - LASSERT(!in_interrupt()); - - LASSERT(niov > 0); - while (iovoffset >= iov->iov_len) { - iovoffset -= iov->iov_len; - iov++; - niov--; - LASSERT(niov > 0); - } - - LASSERT(nkiov > 0); - while (kiovoffset >= kiov->kiov_len) { - kiovoffset -= kiov->kiov_len; - kiov++; - nkiov--; - LASSERT(nkiov > 0); - } - - do { - LASSERT(niov > 0); - LASSERT(nkiov > 0); - this_nob = min(iov->iov_len - iovoffset, - (__kernel_size_t)kiov->kiov_len - kiovoffset); - this_nob = min(this_nob, nob); - - if (!addr) - addr = ((char *)kmap(kiov->kiov_page)) + - kiov->kiov_offset + kiovoffset; - - memcpy((char *)iov->iov_base + iovoffset, addr, this_nob); - nob -= this_nob; - - if (iov->iov_len > iovoffset + this_nob) { - iovoffset += this_nob; - } else { - iov++; - niov--; - iovoffset = 0; - } - - if (kiov->kiov_len > kiovoffset + this_nob) { - addr += this_nob; - kiovoffset += this_nob; - } else { - kunmap(kiov->kiov_page); - addr = NULL; - kiov++; - nkiov--; - kiovoffset = 0; - } - - } while (nob > 0); - - if (addr) - kunmap(kiov->kiov_page); -} -EXPORT_SYMBOL(lnet_copy_kiov2iov); - -void -lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov, - unsigned int kiovoffset, unsigned int niov, - struct kvec *iov, unsigned int iovoffset, - unsigned int nob) -{ - /* NB kiov, iov are READ-ONLY */ - unsigned int this_nob; - char *addr = NULL; - - if (!nob) - return; - - LASSERT(!in_interrupt()); - - LASSERT(nkiov > 0); - while (kiovoffset >= kiov->kiov_len) { - kiovoffset -= kiov->kiov_len; - kiov++; - nkiov--; - LASSERT(nkiov > 0); - } - - LASSERT(niov > 0); - while (iovoffset >= iov->iov_len) { - iovoffset -= iov->iov_len; - iov++; - niov--; - LASSERT(niov > 0); - } - - do { - LASSERT(nkiov > 0); - LASSERT(niov > 0); - this_nob = min((__kernel_size_t)kiov->kiov_len - kiovoffset, - iov->iov_len - iovoffset); - this_nob = min(this_nob, nob); - - if (!addr) - addr = ((char *)kmap(kiov->kiov_page)) + - kiov->kiov_offset + kiovoffset; - - memcpy(addr, (char *)iov->iov_base + iovoffset, this_nob); - nob -= this_nob; - - if (kiov->kiov_len > kiovoffset + this_nob) { - addr += this_nob; - kiovoffset += this_nob; - } else { - kunmap(kiov->kiov_page); - addr = NULL; - kiov++; - nkiov--; - kiovoffset = 0; - } - - if (iov->iov_len > iovoffset + this_nob) { - iovoffset += this_nob; - } else { - iov++; - niov--; - iovoffset = 0; - } - } while (nob > 0); - - if (addr) - kunmap(kiov->kiov_page); -} -EXPORT_SYMBOL(lnet_copy_iov2kiov); - int lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst, - int src_niov, lnet_kiov_t *src, + int src_niov, const lnet_kiov_t *src, unsigned int offset, unsigned int len) { /* @@ -526,8 +326,8 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst, return 0; /* no frags */ LASSERT(src_niov > 0); - while (offset >= src->kiov_len) { /* skip initial frags */ - offset -= src->kiov_len; + while (offset >= src->bv_len) { /* skip initial frags */ + offset -= src->bv_len; src_niov--; src++; LASSERT(src_niov > 0); @@ -538,19 +338,19 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst, LASSERT(src_niov > 0); LASSERT((int)niov <= dst_niov); - frag_len = src->kiov_len - offset; - dst->kiov_page = src->kiov_page; - dst->kiov_offset = src->kiov_offset + offset; + frag_len = src->bv_len - offset; + dst->bv_page = src->bv_page; + dst->bv_offset = src->bv_offset + offset; if (len <= frag_len) { - dst->kiov_len = len; - LASSERT(dst->kiov_offset + dst->kiov_len + dst->bv_len = len; + LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE); return niov; } - dst->kiov_len = frag_len; - LASSERT(dst->kiov_offset + dst->kiov_len <= PAGE_SIZE); + dst->bv_len = frag_len; + LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE); len -= frag_len; dst++; @@ -569,6 +369,7 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, unsigned int niov = 0; struct kvec *iov = NULL; lnet_kiov_t *kiov = NULL; + struct iov_iter to; int rc; LASSERT(!in_interrupt()); @@ -594,8 +395,14 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, } } - rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, - niov, iov, kiov, offset, mlen, rlen); + if (iov) { + iov_iter_kvec(&to, ITER_KVEC | READ, iov, niov, mlen + offset); + iov_iter_advance(&to, offset); + } else { + iov_iter_bvec(&to, ITER_BVEC | READ, kiov, niov, mlen + offset); + iov_iter_advance(&to, offset); + } + rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, &to, rlen); if (rc < 0) lnet_finalize(ni, msg, rc); } @@ -2002,6 +1809,9 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), lnet_msgtyp2str(type), rc); lnet_msg_free(msg); + if (rc == -ESHUTDOWN) + /* We are shutting down. Don't do anything more */ + return 0; goto drop; } @@ -2512,6 +2322,15 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) } if (LNET_NIDNET(ni->ni_nid) == dstnet) { + /* + * Check if ni was originally created in + * current net namespace. + * If not, assign order above 0xffff0000, + * to make this ni not a priority. + */ + if (!net_eq(ni->ni_net_ns, current->nsproxy->net_ns)) + order += 0xffff0000; + if (srcnidp) *srcnidp = ni->ni_nid; if (orderp) diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c index 910e106..0897e58 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-msg.c +++ b/drivers/staging/lustre/lnet/lnet/lib-msg.c @@ -449,23 +449,7 @@ lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int status) if (!msg) return; -#if 0 - CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n", - lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target), - msg->msg_target_is_router ? "t" : "", - msg->msg_routing ? "X" : "", - msg->msg_ack ? "A" : "", - msg->msg_sending ? "S" : "", - msg->msg_receiving ? "R" : "", - msg->msg_delayed ? "d" : "", - msg->msg_txcredit ? "C" : "", - msg->msg_peertxcredit ? "c" : "", - msg->msg_rtrcredit ? "F" : "", - msg->msg_peerrtrcredit ? "f" : "", - msg->msg_onactivelist ? "!" : "", - !msg->msg_txpeer ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid), - !msg->msg_rxpeer ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid)); -#endif + msg->msg_ev.status = status; if (msg->msg_md) { diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c index 891fd59..4e6dd51 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-socket.c +++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c @@ -265,21 +265,17 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); unsigned long then; struct timeval tv; + struct kvec iov = { .iov_base = buffer, .iov_len = nob }; + struct msghdr msg = {NULL,}; LASSERT(nob > 0); /* * Caller may pass a zero timeout if she thinks the socket buffer is * empty enough to take the whole message immediately */ + iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob); for (;;) { - struct kvec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_flags = !timeout ? MSG_DONTWAIT : 0 - }; - + msg.msg_flags = !timeout ? MSG_DONTWAIT : 0; if (timeout) { /* Set send timeout to remaining time */ jiffies_to_timeval(jiffies_left, &tv); @@ -296,9 +292,6 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) rc = kernel_sendmsg(sock, &msg, &iov, 1, nob); jiffies_left -= jiffies - then; - if (rc == nob) - return 0; - if (rc < 0) return rc; @@ -307,11 +300,11 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) return -ECONNABORTED; } + if (!msg_data_left(&msg)) + break; + if (jiffies_left <= 0) return -EAGAIN; - - buffer = ((char *)buffer) + rc; - nob -= rc; } return 0; } diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c index 08402712..cb213b8 100644 --- a/drivers/staging/lustre/lnet/lnet/lo.c +++ b/drivers/staging/lustre/lnet/lnet/lo.c @@ -42,36 +42,23 @@ lolnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) static int lolnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct kvec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) + int delayed, struct iov_iter *to, unsigned int rlen) { lnet_msg_t *sendmsg = private; if (lntmsg) { /* not discarding */ - if (sendmsg->msg_iov) { - if (iov) - lnet_copy_iov2iov(niov, iov, offset, - sendmsg->msg_niov, - sendmsg->msg_iov, - sendmsg->msg_offset, mlen); - else - lnet_copy_iov2kiov(niov, kiov, offset, - sendmsg->msg_niov, - sendmsg->msg_iov, - sendmsg->msg_offset, mlen); - } else { - if (iov) - lnet_copy_kiov2iov(niov, iov, offset, - sendmsg->msg_niov, - sendmsg->msg_kiov, - sendmsg->msg_offset, mlen); - else - lnet_copy_kiov2kiov(niov, kiov, offset, - sendmsg->msg_niov, - sendmsg->msg_kiov, - sendmsg->msg_offset, mlen); - } + if (sendmsg->msg_iov) + lnet_copy_iov2iter(to, + sendmsg->msg_niov, + sendmsg->msg_iov, + sendmsg->msg_offset, + iov_iter_count(to)); + else + lnet_copy_kiov2iter(to, + sendmsg->msg_niov, + sendmsg->msg_kiov, + sendmsg->msg_offset, + iov_iter_count(to)); lnet_finalize(ni, lntmsg, 0); } diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c index 0635432..063ad55 100644 --- a/drivers/staging/lustre/lnet/lnet/router.c +++ b/drivers/staging/lustre/lnet/lnet/router.c @@ -1307,7 +1307,7 @@ lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages) int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]); while (--npages >= 0) - __free_page(rb->rb_kiov[npages].kiov_page); + __free_page(rb->rb_kiov[npages].bv_page); LIBCFS_FREE(rb, sz); } @@ -1333,15 +1333,15 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt) GFP_KERNEL | __GFP_ZERO, 0); if (!page) { while (--i >= 0) - __free_page(rb->rb_kiov[i].kiov_page); + __free_page(rb->rb_kiov[i].bv_page); LIBCFS_FREE(rb, sz); return NULL; } - rb->rb_kiov[i].kiov_len = PAGE_SIZE; - rb->rb_kiov[i].kiov_offset = 0; - rb->rb_kiov[i].kiov_page = page; + rb->rb_kiov[i].bv_len = PAGE_SIZE; + rb->rb_kiov[i].bv_offset = 0; + rb->rb_kiov[i].bv_page = page; } return rb; @@ -1693,7 +1693,7 @@ lnet_rtrpools_adjust(int tiny, int small, int large) int lnet_rtrpools_enable(void) { - int rc; + int rc = 0; if (the_lnet.ln_routing) return 0; @@ -1706,9 +1706,9 @@ lnet_rtrpools_enable(void) * if we are just configuring this for the first * time. */ - return lnet_rtrpools_alloc(1); - - rc = lnet_rtrpools_adjust_helper(0, 0, 0); + rc = lnet_rtrpools_alloc(1); + else + rc = lnet_rtrpools_adjust_helper(0, 0, 0); if (rc) return rc; @@ -1718,7 +1718,7 @@ lnet_rtrpools_enable(void) the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED; lnet_net_unlock(LNET_LOCK_EX); - return 0; + return rc; } void diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c index 13d0454..b20c5d3 100644 --- a/drivers/staging/lustre/lnet/selftest/brw_test.c +++ b/drivers/staging/lustre/lnet/selftest/brw_test.c @@ -226,7 +226,7 @@ brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic) struct page *pg; for (i = 0; i < bk->bk_niov; i++) { - pg = bk->bk_iovs[i].kiov_page; + pg = bk->bk_iovs[i].bv_page; brw_fill_page(pg, pattern, magic); } } @@ -238,7 +238,7 @@ brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic) struct page *pg; for (i = 0; i < bk->bk_niov; i++) { - pg = bk->bk_iovs[i].kiov_page; + pg = bk->bk_iovs[i].bv_page; if (brw_check_page(pg, pattern, magic)) { CERROR("Bulk page %p (%d/%d) is corrupted!\n", pg, i, bk->bk_niov); diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c index 1be3cad..55afb53 100644 --- a/drivers/staging/lustre/lnet/selftest/conrpc.c +++ b/drivers/staging/lustre/lnet/selftest/conrpc.c @@ -152,10 +152,10 @@ lstcon_rpc_put(struct lstcon_rpc *crpc) LASSERT(list_empty(&crpc->crp_link)); for (i = 0; i < bulk->bk_niov; i++) { - if (!bulk->bk_iovs[i].kiov_page) + if (!bulk->bk_iovs[i].bv_page) continue; - __free_page(bulk->bk_iovs[i].kiov_page); + __free_page(bulk->bk_iovs[i].bv_page); } srpc_client_rpc_decref(crpc->crp_rpc); @@ -705,7 +705,7 @@ lstcon_next_id(int idx, int nkiov, lnet_kiov_t *kiov) LASSERT(i < nkiov); - pid = (lnet_process_id_packed_t *)page_address(kiov[i].kiov_page); + pid = (lnet_process_id_packed_t *)page_address(kiov[i].bv_page); return &pid[idx % SFW_ID_PER_PAGE]; } @@ -849,12 +849,11 @@ lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned feats, min_t(int, nob, PAGE_SIZE); nob -= len; - bulk->bk_iovs[i].kiov_offset = 0; - bulk->bk_iovs[i].kiov_len = len; - bulk->bk_iovs[i].kiov_page = - alloc_page(GFP_KERNEL); + bulk->bk_iovs[i].bv_offset = 0; + bulk->bk_iovs[i].bv_len = len; + bulk->bk_iovs[i].bv_page = alloc_page(GFP_KERNEL); - if (!bulk->bk_iovs[i].kiov_page) { + if (!bulk->bk_iovs[i].bv_page) { lstcon_rpc_put(*crpc); return -ENOMEM; } diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c index 4c33621..a0fcbf3 100644 --- a/drivers/staging/lustre/lnet/selftest/console.c +++ b/drivers/staging/lustre/lnet/selftest/console.c @@ -1993,8 +1993,6 @@ static void lstcon_init_acceptor_service(void) lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX; } -extern int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr); - static DECLARE_IOCTL_HANDLER(lstcon_ioctl_handler, lstcon_ioctl_entry); /* initialize console */ diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h index 78b1477..78388a6 100644 --- a/drivers/staging/lustre/lnet/selftest/console.h +++ b/drivers/staging/lustre/lnet/selftest/console.h @@ -184,6 +184,7 @@ lstcon_id2hash(lnet_process_id_t id, struct list_head *hash) return &hash[idx]; } +int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr); int lstcon_console_init(void); int lstcon_console_fini(void); int lstcon_session_match(lst_sid_t sid); diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c index c2f121f..abbd628 100644 --- a/drivers/staging/lustre/lnet/selftest/framework.c +++ b/drivers/staging/lustre/lnet/selftest/framework.c @@ -784,8 +784,8 @@ sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc) lnet_process_id_packed_t id; int j; - dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page); - LASSERT(dests); /* my pages are within KVM always */ + dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].bv_page); + LASSERT(dests); /* my pages are within KVM always */ id = dests[i % SFW_ID_PER_PAGE]; if (msg->msg_magic != SRPC_MSG_MAGIC) sfw_unpack_id(id); diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c index 3b26d6e..f5619d8 100644 --- a/drivers/staging/lustre/lnet/selftest/rpc.c +++ b/drivers/staging/lustre/lnet/selftest/rpc.c @@ -91,9 +91,9 @@ srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int nob) LASSERT(nob > 0); LASSERT(i >= 0 && i < bk->bk_niov); - bk->bk_iovs[i].kiov_offset = 0; - bk->bk_iovs[i].kiov_page = pg; - bk->bk_iovs[i].kiov_len = nob; + bk->bk_iovs[i].bv_offset = 0; + bk->bk_iovs[i].bv_page = pg; + bk->bk_iovs[i].bv_len = nob; return nob; } @@ -106,7 +106,7 @@ srpc_free_bulk(struct srpc_bulk *bk) LASSERT(bk); for (i = 0; i < bk->bk_niov; i++) { - pg = bk->bk_iovs[i].kiov_page; + pg = bk->bk_iovs[i].bv_page; if (!pg) break; |