From ceb26569af5bc33a8f47b755fd64f12c81801ce8 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 12 May 2017 09:19:36 -0700 Subject: IB/hfi1: Remove unnecessary initialization from tx request The tx request is unnecessarily initialized in the hot code path with memset(), however, there's no need to do this as most fields are initialized later on. this initialization shows to be costly in the profile. Remove unnecessary initialization from tx request and make sure all variables are initialized properly. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index d55339f..16fd519 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -607,12 +607,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); req = pq->reqs + info.comp_idx; - memset(req, 0, sizeof(*req)); req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ + req->data_len = 0; req->pq = pq; req->cq = cq; req->status = -1; req->ahg_idx = -1; + req->iov_idx = 0; + req->sent = 0; + req->seqnum = 0; + req->seqcomp = 0; + req->seqsubmitted = 0; + req->flags = 0; + req->tids = NULL; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -701,12 +708,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, /* Save all the IO vector structures */ for (i = 0; i < req->data_iovs; i++) { + req->iovs[i].offset = 0; INIT_LIST_HEAD(&req->iovs[i].list); memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(req->iovs[i].iov)); ret = pin_vector_pages(req, &req->iovs[i]); if (ret) { + req->data_iovs = i; req->status = ret; goto free_req; } @@ -749,6 +758,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, } req->tids = tmp; req->n_tids = ntids; + req->tididx = 0; idx++; } -- cgit v1.1 From aa560df381f7199fafa4e7f71382de28f0400270 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 12 May 2017 09:19:47 -0700 Subject: IB/hfi1: Remove unused mk_qpn function Leftover function that is not used. Remove it. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 650305c..e91be05 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -73,12 +73,6 @@ static void iowait_wakeup(struct iowait *wait, int reason); static void iowait_sdma_drained(struct iowait *wait); static void qp_pio_drain(struct rvt_qp *qp); -static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, - struct rvt_qpn_map *map, unsigned off) -{ - return (map - qpt->map) * RVT_BITS_PER_PAGE + off; -} - const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { [IB_WR_RDMA_WRITE] = { .length = sizeof(struct ib_rdma_wr), -- cgit v1.1 From 7dafbab3753fcf59bc81748e5b2c5bf04e1c62c7 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:19:55 -0700 Subject: IB/hfi1: Add functions to parse BTH/IB headers Improve code readablity by adding inline functions to read specific BTH/IB fields without knowledge of byte offsets. Reviewed-by: Brian Welty Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 6 +++--- drivers/infiniband/hw/hfi1/rc.c | 8 ++++---- drivers/infiniband/hw/hfi1/uc.c | 2 +- drivers/infiniband/hw/hfi1/ud.c | 6 +++--- drivers/infiniband/hw/hfi1/verbs.c | 6 +++--- 5 files changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a50870e..0583479 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -286,7 +286,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, goto drop; } /* Get the destination QP number. */ - qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; + qp_num = ib_bth_get_qpn(ohdr); if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { struct rvt_qp *qp; unsigned long flags; @@ -438,7 +438,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, case IB_QPT_GSI: case IB_QPT_UD: rlid = ib_get_slid(hdr); - rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; + rqpn = ib_get_sqpn(ohdr); svc_type = IB_CC_SVCTYPE_UD; is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); @@ -461,7 +461,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bth1 = be32_to_cpu(ohdr->bth[1]); if (do_cnp && (bth1 & IB_FECN_SMASK)) { - u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); + u16 pkey = ib_bth_get_pkey(ohdr); return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); } diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1080778..66e6843 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -765,7 +765,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, ohdr->u.aeth = rvt_compute_aeth(qp); sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT); + pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; hdr.lrh[0] = cpu_to_be16(lrh0); @@ -1009,7 +1009,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) return; } - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); reset_sending_psn(qp, psn); /* @@ -1943,7 +1943,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) is_fecn = process_ecn(qp, packet, false); - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* @@ -2388,7 +2388,7 @@ void hfi1_rc_hdrerr( if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) return; - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* Only deal with RDMA Writes for now */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 5da1e45..2a5650f 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -319,7 +319,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) process_ecn(qp, packet, true); - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* Compare the PSN verses the expected PSN. */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 6a4e95c..49fe179 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -549,7 +549,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, hdr.lrh[3] = cpu_to_be16(slid); plen = 2 /* PBC */ + hwords; - pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); vl = sc_to_vlt(ppd->dd, sc5); pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { @@ -689,8 +689,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u16 slid; u8 extra_bytes; - qkey = be32_to_cpu(ohdr->u.ud.deth[0]); - src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; + qkey = ib_get_qkey(ohdr); + src_qp = ib_get_sqpn(ohdr); dlid = ib_get_dlid(hdr); bth1 = be32_to_cpu(ohdr->bth[1]); slid = ib_get_slid(hdr); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 90e7b77..128d291 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -595,7 +595,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) inc_opstats(tlen, &rcd->opstats->stats[opcode]); /* Get the destination QP number. */ - qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK; + qp_num = ib_bth_get_qpn(packet->ohdr); lid = ib_get_dlid(hdr); if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { @@ -863,7 +863,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, /* No vl15 here */ /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) pbc = hfi1_fault_tx(qp, opcode, pbc); @@ -999,7 +999,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u8 opcode = get_opcode(&tx->phdr.hdr); /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) pbc = hfi1_fault_tx(qp, opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); -- cgit v1.1 From 228d2af1b723deedee38f03d144b7d25b39f6f86 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:20:08 -0700 Subject: IB/hfi1: Separate input/output header tracing Calls to trace incoming packets will now receive the packet context as parameter. This enables trace support for future packet types. Header trace output is in the format : which makes parsing easier. input_ibhdr trace before change: -0 [001] d.h. 5904.250925: input_ibhdr: [0000:05:00.0] vl 0 lver 0 sl 0 lnh 2,LRH_BTH dlid 0002 len 18 slid 0001 op 0x64,UD_SEND_ONLY se 0 m 0 pad 0 tver 0 pkey 0xffff f 0 b 0 qpn 0x000001 a 0 psn 0x000001b2 deth qkey 0x80010000 sqpn 0x000001 input_ibhdr trace after change: -0 [001] d.h. 6655.714488: input_ibhdr: [0000:05:00.0] (IB) len:124 sc:0 dlid:0x0001 slid:0x0002 lnh:2,LRH_BTH lver:0 sl:0 age:0 becn:0 fecn:0 l4:0 rc:0 entropy:0 op:0x64,UD_SEND_ONLY se:0 m:0 pad:0 tver:0 pkey:0x7fff f:0 b:0 qpn:0x000001 a:0 psn:0x00000036 hlen:8 deth qkey:0x80010000 sqpn:0x000001 Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 49 ----- drivers/infiniband/hw/hfi1/rc.c | 3 +- drivers/infiniband/hw/hfi1/trace.c | 58 +++++- drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 322 ++++++++++++++++++++---------- drivers/infiniband/hw/hfi1/trace_rx.h | 9 + drivers/infiniband/hw/hfi1/verbs.c | 7 +- 6 files changed, 279 insertions(+), 169 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 414a04a..3b76631 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -2086,53 +2086,4 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) - -#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } -#define show_packettype(etype) \ -__print_symbolic(etype, \ - packettype_name(EXPECTED), \ - packettype_name(EAGER), \ - packettype_name(IB), \ - packettype_name(ERROR), \ - packettype_name(BYPASS)) - -#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } -#define show_ib_opcode(opcode) \ -__print_symbolic(opcode, \ - ib_opcode_name(RC_SEND_FIRST), \ - ib_opcode_name(RC_SEND_MIDDLE), \ - ib_opcode_name(RC_SEND_LAST), \ - ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_SEND_ONLY), \ - ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_FIRST), \ - ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(RC_RDMA_WRITE_LAST), \ - ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_READ_REQUEST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ - ib_opcode_name(RC_ACKNOWLEDGE), \ - ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ - ib_opcode_name(RC_COMPARE_SWAP), \ - ib_opcode_name(RC_FETCH_ADD), \ - ib_opcode_name(UC_SEND_FIRST), \ - ib_opcode_name(UC_SEND_MIDDLE), \ - ib_opcode_name(UC_SEND_LAST), \ - ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_SEND_ONLY), \ - ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_FIRST), \ - ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(UC_RDMA_WRITE_LAST), \ - ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UD_SEND_ONLY), \ - ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(CNP)) #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 66e6843..b443c1e 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -798,7 +798,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, goto queue_ack; } - trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr); + trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), + &hdr, ib_is_sc5(sc5)); /* write the pbc and data */ ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index eafae48..b80b74d 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -47,7 +47,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -u8 ibhdr_exhdr_len(struct ib_header *hdr) +u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr) { struct ib_other_headers *ohdr; u8 opcode; @@ -61,13 +61,18 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr) 0 : hdr_len_by_opcode[opcode] - (12 + 8); } -#define IMM_PRN "imm %d" -#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x" -#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" -#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" -#define IETH_PRN "ieth rkey 0x%.8x" -#define ATOMICACKETH_PRN "origdata %llx" -#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx" +const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet) +{ + return "IB"; +} + +#define IMM_PRN "imm:%d" +#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x" +#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x" +#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x" +#define IETH_PRN "ieth rkey:0x%.8x" +#define ATOMICACKETH_PRN "origdata:%llx" +#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx" #define OP(transport, op) IB_OPCODE_## transport ## _ ## op @@ -84,6 +89,43 @@ static const char *parse_syndrome(u8 syndrome) return ""; } +void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn) +{ + *ack = ib_bth_get_ackreq(ohdr); + *becn = ib_bth_get_becn(ohdr); + *fecn = ib_bth_get_fecn(ohdr); + *mig = ib_bth_get_migreq(ohdr); + *se = ib_bth_get_se(ohdr); + *pad = ib_bth_get_pad(ohdr); + *opcode = ib_bth_get_opcode(ohdr); + *tver = ib_bth_get_tver(ohdr); + *pkey = ib_bth_get_pkey(ohdr); + *psn = ib_bth_get_psn(ohdr); + *qpn = ib_bth_get_qpn(ohdr); +} + +void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, + struct ib_other_headers **ohdr, + u8 *lnh, u8 *lver, u8 *sl, u8 *sc, + u16 *len, u32 *dlid, u32 *slid) +{ + *lnh = ib_get_lnh(hdr); + *lver = ib_get_lver(hdr); + *sl = ib_get_sl(hdr); + *sc = ib_get_sc(hdr) | (sc5 << 4); + *len = ib_get_len(hdr); + *dlid = ib_get_dlid(hdr); + *slid = ib_get_slid(hdr); + + if (*lnh == HFI1_LRH_BTH) + *ohdr = &hdr->u.oth; + else + *ohdr = &hdr->u.l.oth; +} + const char *parse_everbs_hdrs( struct trace_seq *p, u8 opcode, diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 090f6b5..0f2d2da 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -55,8 +55,57 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_ibhdrs -u8 ibhdr_exhdr_len(struct ib_header *hdr); +#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } +#define show_ib_opcode(opcode) \ +__print_symbolic(opcode, \ + ib_opcode_name(RC_SEND_FIRST), \ + ib_opcode_name(RC_SEND_MIDDLE), \ + ib_opcode_name(RC_SEND_LAST), \ + ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_SEND_ONLY), \ + ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_FIRST), \ + ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(RC_RDMA_WRITE_LAST), \ + ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_READ_REQUEST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ + ib_opcode_name(RC_ACKNOWLEDGE), \ + ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ + ib_opcode_name(RC_COMPARE_SWAP), \ + ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(UC_SEND_FIRST), \ + ib_opcode_name(UC_SEND_MIDDLE), \ + ib_opcode_name(UC_SEND_LAST), \ + ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_SEND_ONLY), \ + ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_FIRST), \ + ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(UC_RDMA_WRITE_LAST), \ + ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UD_SEND_ONLY), \ + ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(CNP)) + const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); +u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr); +const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet); +void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn); +void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, + struct ib_other_headers **ohdr, + u8 *lnh, u8 *lver, u8 *sl, u8 *sc, + u16 *len, u32 *dlid, u32 *slid); #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) @@ -66,139 +115,198 @@ __print_symbolic(lrh, \ lrh_name(LRH_BTH), \ lrh_name(LRH_GRH)) -#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x" +#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x" +#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d " #define BTH_PRN \ - "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \ - "f %d b %d qpn 0x%.6x a %d psn 0x%.8x" -#define EHDR_PRN "%s" + "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \ + "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x" +#define EHDR_PRN "hlen:%d %s" -DECLARE_EVENT_CLASS(hfi1_ibhdr_template, +DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr), - TP_ARGS(dd, hdr), + struct hfi1_packet *packet, + bool sc5), + TP_ARGS(dd, packet, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) - /* LRH */ - __field(u8, vl) + __field(u8, lnh) __field(u8, lver) __field(u8, sl) + __field(u16, len) + __field(u32, dlid) + __field(u8, sc) + __field(u32, slid) + __field(u8, opcode) + __field(u8, se) + __field(u8, mig) + __field(u8, pad) + __field(u8, tver) + __field(u16, pkey) + __field(u8, fecn) + __field(u8, becn) + __field(u32, qpn) + __field(u8, ack) + __field(u32, psn) + /* extended headers */ + __dynamic_array(u8, ehdrs, + hfi1_trace_ib_hdr_len(packet->hdr)) + ), + TP_fast_assign( + struct ib_other_headers *ohdr; + + DD_DEV_ASSIGN(dd); + + hfi1_trace_parse_9b_hdr(packet->hdr, sc5, + &ohdr, + &__entry->lnh, + &__entry->lver, + &__entry->sl, + &__entry->sc, + &__entry->len, + &__entry->dlid, + &__entry->slid); + + hfi1_trace_parse_bth(ohdr, &__entry->ack, + &__entry->becn, &__entry->fecn, + &__entry->mig, &__entry->se, + &__entry->pad, &__entry->opcode, + &__entry->tver, &__entry->pkey, + &__entry->psn, &__entry->qpn); + /* extended headers */ + memcpy(__get_dynamic_array(ehdrs), &ohdr->u, + __get_dynamic_array_len(ehdrs)); + ), + TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " + BTH_PRN " " EHDR_PRN, + __get_str(dev), + __entry->len, + __entry->sc, + __entry->dlid, + __entry->slid, + __entry->lnh, show_lnh(__entry->lnh), + __entry->lver, + __entry->sl, + /* BTH */ + __entry->opcode, show_ib_opcode(__entry->opcode), + __entry->se, + __entry->mig, + __entry->pad, + __entry->tver, + __entry->pkey, + __entry->fecn, + __entry->becn, + __entry->qpn, + __entry->ack, + __entry->psn, + /* extended headers */ + __get_dynamic_array_len(ehdrs), + __parse_ib_ehdrs( + __entry->opcode, + (void *)__get_dynamic_array(ehdrs)) + ) +); + +DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct hfi1_packet *packet, bool sc5), + TP_ARGS(dd, packet, sc5)); + +DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, + bool sc5), + TP_ARGS(dd, hdr, sc5), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) __field(u8, lnh) - __field(u16, dlid) + __field(u8, lver) + __field(u8, sl) __field(u16, len) - __field(u16, slid) - /* BTH */ + __field(u32, dlid) + __field(u8, sc) + __field(u32, slid) __field(u8, opcode) __field(u8, se) - __field(u8, m) + __field(u8, mig) __field(u8, pad) __field(u8, tver) __field(u16, pkey) - __field(u8, f) - __field(u8, b) + __field(u8, fecn) + __field(u8, becn) __field(u32, qpn) - __field(u8, a) + __field(u8, ack) __field(u32, psn) /* extended headers */ - __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) + __dynamic_array(u8, ehdrs, + hfi1_trace_ib_hdr_len(hdr)) ), - TP_fast_assign( + TP_fast_assign( struct ib_other_headers *ohdr; DD_DEV_ASSIGN(dd); - /* LRH */ - __entry->vl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); - __entry->lver = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; - __entry->sl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->lnh = - (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - __entry->dlid = - be16_to_cpu(hdr->lrh[1]); - /* allow for larger len */ - __entry->len = - be16_to_cpu(hdr->lrh[2]); - __entry->slid = - be16_to_cpu(hdr->lrh[3]); - /* BTH */ - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - __entry->opcode = - (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->se = - (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; - __entry->m = - (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; - __entry->pad = - (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - __entry->tver = - (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; - __entry->pkey = - be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->f = - (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) & - IB_FECN_MASK; - __entry->b = - (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) & - IB_BECN_MASK; - __entry->qpn = - be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->a = - (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; - /* allow for larger PSN */ - __entry->psn = - be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; + + hfi1_trace_parse_9b_hdr(hdr, sc5, + &ohdr, &__entry->lnh, + &__entry->lver, &__entry->sl, + &__entry->sc, &__entry->len, + &__entry->dlid, &__entry->slid); + + hfi1_trace_parse_bth(ohdr, &__entry->ack, + &__entry->becn, &__entry->fecn, + &__entry->mig, &__entry->se, + &__entry->pad, &__entry->opcode, + &__entry->tver, &__entry->pkey, + &__entry->psn, &__entry->qpn); + /* extended headers */ - memcpy(__get_dynamic_array(ehdrs), &ohdr->u, - ibhdr_exhdr_len(hdr)); - ), - TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, - __get_str(dev), - /* LRH */ - __entry->vl, - __entry->lver, - __entry->sl, - __entry->lnh, show_lnh(__entry->lnh), - __entry->dlid, - __entry->len, - __entry->slid, - /* BTH */ - __entry->opcode, show_ib_opcode(__entry->opcode), - __entry->se, - __entry->m, - __entry->pad, - __entry->tver, - __entry->pkey, - __entry->f, - __entry->b, - __entry->qpn, - __entry->a, - __entry->psn, - /* extended headers */ - __parse_ib_ehdrs( - __entry->opcode, - (void *)__get_dynamic_array(ehdrs)) - ) + memcpy(__get_dynamic_array(ehdrs), + &ohdr->u, __get_dynamic_array_len(ehdrs)); + ), + TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " + BTH_PRN " " EHDR_PRN, + __get_str(dev), + __entry->len, + __entry->sc, + __entry->dlid, + __entry->slid, + __entry->lnh, show_lnh(__entry->lnh), + __entry->lver, + __entry->sl, + /* BTH */ + __entry->opcode, show_ib_opcode(__entry->opcode), + __entry->se, + __entry->mig, + __entry->pad, + __entry->tver, + __entry->pkey, + __entry->fecn, + __entry->becn, + __entry->qpn, + __entry->ack, + __entry->psn, + /* extended headers */ + __get_dynamic_array_len(ehdrs), + __parse_ib_ehdrs( + __entry->opcode, + (void *)__get_dynamic_array(ehdrs)) + ) ); -DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, pio_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, ack_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, sdma_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); #endif /* __HFI1_TRACE_IBHDRS_H */ diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index f77e59f..05fc6d6 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -55,6 +55,15 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rx +#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } +#define show_packettype(etype) \ +__print_symbolic(etype, \ + packettype_name(EXPECTED), \ + packettype_name(EAGER), \ + packettype_name(IB), \ + packettype_name(ERROR), \ + packettype_name(BYPASS)) + TRACE_EVENT(hfi1_rcvhdr, TP_PROTO(struct hfi1_devdata *dd, u32 ctxt, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 128d291..5f4be35 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -589,8 +589,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) goto drop; } - trace_input_ibhdr(rcd->dd, hdr); - + trace_input_ibhdr(rcd->dd, packet, !!(packet->rhf & RHF_DC_INFO_SMASK)); opcode = ib_bth_get_opcode(packet->ohdr); inc_opstats(tlen, &rcd->opstats->stats[opcode]); @@ -885,7 +884,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, return ret; } trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &ps->s_txreq->phdr.hdr); + &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); return ret; bail_ecomm: @@ -1058,7 +1057,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &ps->s_txreq->phdr.hdr); + &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); pio_bail: if (qp->s_wqe) { -- cgit v1.1 From 9039746cdf39dcbf2ddfcc4a68f729cbbbc853df Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:20:20 -0700 Subject: IB/hfi1: Setup common IB fields in hfi1_packet struct We move many common IB fields into the hfi1_packet structure and set them up in a single function. This allows us to set the fields in a single place and not deal with them throughout the driver. Reviewed-by: Brian Welty Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 9 -- drivers/infiniband/hw/hfi1/chip.h | 2 - drivers/infiniband/hw/hfi1/common.h | 1 + drivers/infiniband/hw/hfi1/driver.c | 158 +++++++++++++++++++++++++----------- drivers/infiniband/hw/hfi1/hfi.h | 21 ++++- drivers/infiniband/hw/hfi1/rc.c | 33 +++----- drivers/infiniband/hw/hfi1/ruc.c | 89 ++++++++++---------- drivers/infiniband/hw/hfi1/uc.c | 16 +--- drivers/infiniband/hw/hfi1/ud.c | 23 ++---- drivers/infiniband/hw/hfi1/verbs.c | 89 +++++++++----------- drivers/infiniband/hw/hfi1/verbs.h | 6 +- 11 files changed, 236 insertions(+), 211 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2ba00b8..5dbee3c 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9810,15 +9810,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); } -struct ib_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr) -{ - u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - - return (struct ib_header *) - (rhf_addr - dd->rhf_offset + offset); -} - static const char * const ib_cfg_name_strings[] = { "HFI1_IB_CFG_LIDLMC", "HFI1_IB_CFG_LWID_DG_ENB", diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index cbe455d..0b4f418 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1347,8 +1347,6 @@ enum { u64 get_all_cpu_total(u64 __percpu *cntr); void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); -struct ib_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr); void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 995d62c..ba9ab97 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -325,6 +325,7 @@ struct diag_pkt { #define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ /* misc. */ +#define SC15_PACKET 0xF #define SIZE_OF_CRC 1 #define LIM_MGMT_P_KEY 0x7FFF diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 0583479..2a1022e 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -224,6 +224,20 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf, (offset * RCV_BUF_BLOCK_SIZE)); } +static inline void *hfi1_get_header(struct hfi1_devdata *dd, + __le32 *rhf_addr) +{ + u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); + + return (void *)(rhf_addr - dd->rhf_offset + offset); +} + +static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, + __le32 *rhf_addr) +{ + return (struct ib_header *)hfi1_get_header(dd, rhf_addr); +} + /* * Validate and encode the a given RcvArray Buffer size. * The function will check whether the given size falls within @@ -249,7 +263,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, { struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); - int lnh = ib_get_lnh(rhdr); + u8 lnh = ib_get_lnh(rhdr); + bool has_grh = false; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct hfi1_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -257,37 +272,42 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) return; + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &rhdr->u.oth; + } else if (lnh == HFI1_LRH_GRH) { + has_grh = true; + packet->ohdr = &rhdr->u.l.oth; + packet->grh = &rhdr->u.l.grh; + } else { + goto drop; + } + if (packet->rhf & RHF_TID_ERR) { /* For TIDERR and RC QPs preemptively schedule a NAK */ - struct ib_other_headers *ohdr = NULL; u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ - u16 lid = ib_get_dlid(rhdr); + u32 dlid = ib_get_dlid(rhdr); u32 qp_num; - u32 rcv_flags = 0; + u32 mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE); /* Sanity check packet */ if (tlen < 24) goto drop; /* Check for GRH */ - if (lnh == HFI1_LRH_BTH) { - ohdr = &rhdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { + if (has_grh) { u32 vtf; + struct ib_grh *grh = packet->grh; - ohdr = &rhdr->u.l.oth; - if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + if (grh->next_hdr != IB_GRH_NEXT_HDR) goto drop; - vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow); + vtf = be32_to_cpu(grh->version_tclass_flow); if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; - rcv_flags |= HFI1_HAS_GRH; - } else { - goto drop; } + /* Get the destination QP number. */ - qp_num = ib_bth_get_qpn(ohdr); - if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { + qp_num = ib_bth_get_qpn(packet->ohdr); + if (dlid < mlid_base) { struct rvt_qp *qp; unsigned long flags; @@ -312,11 +332,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, switch (qp->ibqp.qp_type) { case IB_QPT_RC: - hfi1_rc_hdrerr( - rcd, - rhdr, - rcv_flags, - qp); + hfi1_rc_hdrerr(rcd, packet, qp); break; default: /* For now don't handle any other QP types */ @@ -332,9 +348,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, switch (rte) { case RHF_RTE_ERROR_OP_CODE_ERR: { - u32 opcode; void *ebuf = NULL; - __be32 *bth = NULL; + u8 opcode; if (rhf_use_egr_bfr(packet->rhf)) ebuf = packet->ebuf; @@ -342,16 +357,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (!ebuf) goto drop; /* this should never happen */ - if (lnh == HFI1_LRH_BTH) - bth = (__be32 *)ebuf; - else if (lnh == HFI1_LRH_GRH) - bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh)); - else - goto drop; - - opcode = be32_to_cpu(bth[0]) >> 24; - opcode &= 0xff; - + opcode = ib_bth_get_opcode(packet->ohdr); if (opcode == IB_OPCODE_CNP) { /* * Only in pre-B0 h/w is the CNP_OPCODE handled @@ -365,7 +371,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; - lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; + lqpn = ib_bth_get_qpn(packet->ohdr); rcu_read_lock(); qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn); if (!qp) { @@ -415,7 +421,6 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd, packet->rhf = rhf_to_cpu(packet->rhf_addr); packet->rhqoff = rcd->head; packet->numpkt = 0; - packet->rcv_flags = 0; } void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, @@ -424,15 +429,12 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct ib_header *hdr = pkt->hdr; struct ib_other_headers *ohdr = pkt->ohdr; - struct ib_grh *grh = NULL; + struct ib_grh *grh = pkt->grh; u32 rqpn = 0, bth1; u16 rlid, dlid = ib_get_dlid(hdr); u8 sc, svc_type; bool is_mcast = false; - if (pkt->rcv_flags & HFI1_HAS_GRH) - grh = &hdr->u.l.grh; - switch (qp->ibqp.qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: @@ -591,9 +593,10 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (lnh == HFI1_LRH_BTH) { packet->ohdr = &hdr->u.oth; + packet->grh = NULL; } else if (lnh == HFI1_LRH_GRH) { packet->ohdr = &hdr->u.l.oth; - packet->rcv_flags |= HFI1_HAS_GRH; + packet->grh = &hdr->u.l.grh; } else { goto next; /* just in case */ } @@ -698,10 +701,9 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; - packet->hdr = hfi1_get_msgheader(packet->rcd->dd, - packet->rhf_addr); - packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; packet->etype = rhf_rcv_type(packet->rhf); + + packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; /* total length */ packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ /* retrieve eager buffer details */ @@ -759,7 +761,7 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet) packet->etail, 0, 0); packet->updegr = 0; } - packet->rcv_flags = 0; + packet->grh = NULL; } static inline void finish_packet(struct hfi1_packet *packet) @@ -896,12 +898,15 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, struct hfi1_devdata *dd) { struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; - struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, - packet->rhf_addr); u8 etype = rhf_rcv_type(packet->rhf); + u8 sc = SC15_PACKET; - if (etype == RHF_RCV_TYPE_IB && - hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB) { + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); + sc = hfi1_9B_get_sc5(hdr, packet->rhf); + } + if (sc != SC15_PACKET) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { @@ -1321,6 +1326,58 @@ bail: return ret; } +static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) +{ + packet->hdr = (struct hfi1_ib_message_header *) + hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); + packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; +} + +static int hfi1_setup_9B_packet(struct hfi1_packet *packet) +{ + struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); + struct ib_header *hdr; + u8 lnh; + + hfi1_setup_ib_header(packet); + hdr = packet->hdr; + + lnh = ib_get_lnh(hdr); + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &hdr->u.oth; + packet->grh = NULL; + } else if (lnh == HFI1_LRH_GRH) { + u32 vtf; + + packet->ohdr = &hdr->u.l.oth; + packet->grh = &hdr->u.l.grh; + if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(packet->grh->version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else { + goto drop; + } + + /* Query commonly used fields from packet header */ + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->slid = ib_get_slid(hdr); + packet->dlid = ib_get_dlid(hdr); + packet->sl = ib_get_sl(hdr); + packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf); + packet->pad = ib_bth_get_pad(packet->ohdr); + packet->extra_byte = 0; + packet->fecn = ib_bth_get_fecn(packet->ohdr); + packet->becn = ib_bth_get_becn(packet->ohdr); + + return 0; +drop: + ibp->rvp.n_pkt_drops++; + return -EINVAL; +} + void handle_eflags(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; @@ -1351,6 +1408,9 @@ int process_receive_ib(struct hfi1_packet *packet) if (unlikely(hfi1_dbg_fault_packet(packet))) return RHF_RCV_CONTINUE; + if (hfi1_setup_9B_packet(packet)) + return RHF_RCV_CONTINUE; + trace_hfi1_rcvhdr(packet->rcd->ppd->dd, packet->rcd->ctxt, rhf_err_flags(packet->rhf), @@ -1422,6 +1482,7 @@ int process_receive_error(struct hfi1_packet *packet) rhf_rcv_type_err(packet->rhf) == 3)) return RHF_RCV_CONTINUE; + hfi1_setup_ib_header(packet); handle_eflags(packet); if (unlikely(rhf_err_flags(packet->rhf))) @@ -1435,6 +1496,8 @@ int kdeth_process_expected(struct hfi1_packet *packet) { if (unlikely(hfi1_dbg_fault_packet(packet))) return RHF_RCV_CONTINUE; + + hfi1_setup_ib_header(packet); if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); @@ -1445,6 +1508,7 @@ int kdeth_process_expected(struct hfi1_packet *packet) int kdeth_process_eager(struct hfi1_packet *packet) { + hfi1_setup_ib_header(packet); if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); if (unlikely(hfi1_dbg_fault_packet(packet))) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 3b76631..9c6c734 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -356,17 +356,26 @@ struct hfi1_packet { __le32 *rhf_addr; struct rvt_qp *qp; struct ib_other_headers *ohdr; + struct ib_grh *grh; u64 rhf; u32 maxcnt; u32 rhqoff; + u32 dlid; + u32 slid; u16 tlen; s16 etail; u8 hlen; u8 numpkt; u8 rsize; u8 updegr; - u8 rcv_flags; u8 etype; + u8 extra_byte; + u8 pad; + u8 sc; + u8 sl; + u8 opcode; + bool becn; + bool fecn; }; struct rvt_sge_state; @@ -2086,4 +2095,14 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) + +/* + * hfi1_check_mcast- Check if the given lid is + * in the IB multicast range. + */ +static inline bool hfi1_check_mcast(u16 lid) +{ + return ((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (lid != be16_to_cpu(IB_LID_PERMISSIVE))); +} #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index b443c1e..baa67bf 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1916,17 +1916,16 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void hfi1_rc_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct ib_other_headers *ohdr = packet->ohdr; - u32 bth0, opcode; + u32 bth0; + u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; u32 psn; - u32 pad; + u32 pad = packet->pad; struct ib_wc wc; u32 pmtu = qp->pmtu; int diff; @@ -1938,14 +1937,13 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) u32 rkey; lockdep_assert_held(&qp->r_lock); + bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; is_fecn = process_ecn(qp, packet, false); - psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); /* * Process responses (ACKs) before anything else. Note that the @@ -2075,8 +2073,6 @@ no_immediate_data: wc.wc_flags = 0; wc.ex.imm_data = 0; send_last: - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -2369,28 +2365,19 @@ send_ack: void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct ib_header *hdr, - u32 rcv_flags, + struct hfi1_packet *packet, struct rvt_qp *qp) { - int has_grh = rcv_flags & HFI1_HAS_GRH; - struct ib_other_headers *ohdr; struct hfi1_ibport *ibp = rcd_to_iport(rcd); int diff; u32 opcode; - u32 psn, bth0; - - /* Check for GRH */ - ohdr = &hdr->u.oth; - if (has_grh) - ohdr = &hdr->u.l.oth; + u32 psn; - bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; - psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); + psn = ib_bth_get_psn(packet->ohdr); + opcode = ib_bth_get_opcode(packet->ohdr); /* Only deal with RDMA Writes for now */ if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 3a17dab..9cc9c7b 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -214,100 +214,95 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the hfi1_migrate_qp() call. */ -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, - int has_grh, struct rvt_qp *qp, u32 bth0) +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) { __be64 guid; unsigned long flags; + struct rvt_qp *qp = packet->qp; u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; - - if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { - if (!has_grh) { + u32 dlid = packet->dlid; + u32 slid = packet->slid; + u32 sl = packet->sl; + int migrated; + u32 bth0, bth1; + + bth0 = be32_to_cpu(packet->ohdr->bth[0]); + bth1 = be32_to_cpu(packet->ohdr->bth[1]); + migrated = bth0 & IB_BTH_MIG_REQ; + + if (qp->s_mig_state == IB_MIG_ARMED && migrated) { + if (!packet->grh) { if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) & IB_AH_GRH) - goto err; + return 1; } else { const struct ib_global_route *grh; if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) & IB_AH_GRH)) - goto err; + return 1; grh = rdma_ah_read_grh(&qp->alt_ah_attr); guid = get_sguid(ibp, grh->sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, + if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix, guid)) - goto err; + return 1; if (!gid_ok( - &hdr->u.l.grh.sgid, + &packet->grh->sgid, grh->dgid.global.subnet_prefix, grh->dgid.global.interface_id)) - goto err; + return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, - ib_get_slid(hdr)))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - (u16)bth0, - ib_get_sl(hdr), - 0, qp->ibqp.qp_num, - ib_get_slid(hdr), - ib_get_dlid(hdr)); - goto err; + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + sc5, slid))) { + hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); + return 1; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ - if (ib_get_slid(hdr) != - rdma_ah_get_dlid(&qp->alt_ah_attr) || + if (slid != rdma_ah_get_dlid(&qp->alt_ah_attr) || ppd_from_ibp(ibp)->port != rdma_ah_get_port_num(&qp->alt_ah_attr)) - goto err; + return 1; spin_lock_irqsave(&qp->s_lock, flags); hfi1_migrate_qp(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } else { - if (!has_grh) { + if (!packet->grh) { if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - goto err; + return 1; } else { const struct ib_global_route *grh; if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) - goto err; + return 1; grh = rdma_ah_read_grh(&qp->remote_ah_attr); guid = get_sguid(ibp, grh->sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, + if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix, guid)) - goto err; + return 1; if (!gid_ok( - &hdr->u.l.grh.sgid, + &packet->grh->sgid, grh->dgid.global.subnet_prefix, grh->dgid.global.interface_id)) - goto err; + return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, - ib_get_slid(hdr)))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - (u16)bth0, - ib_get_sl(hdr), - 0, qp->ibqp.qp_num, - ib_get_slid(hdr), - ib_get_dlid(hdr)); - goto err; + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + sc5, slid))) { + hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); + return 1; } /* Validate the SLID. See Ch. 9.6.1.5 */ - if (ib_get_slid(hdr) != - rdma_ah_get_dlid(&qp->remote_ah_attr) || + if ((slid != rdma_ah_get_dlid(&qp->remote_ah_attr)) || ppd_from_ibp(ibp)->port != qp->port_num) - goto err; - if (qp->s_mig_state == IB_MIG_REARM && - !(bth0 & IB_BTH_MIG_REQ)) + return 1; + if (qp->s_mig_state == IB_MIG_REARM && !migrated) qp->s_mig_state = IB_MIG_ARMED; } return 0; - -err: - return 1; } /** diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 2a5650f..76c2451 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -297,31 +297,25 @@ bail_no_tx: void hfi1_uc_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); - struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct ib_other_headers *ohdr = packet->ohdr; - u32 bth0, opcode; + u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; u32 psn; - u32 pad; + u32 pad = packet->pad; struct ib_wc wc; u32 pmtu = qp->pmtu; struct ib_reth *reth; - int has_grh = rcv_flags & HFI1_HAS_GRH; int ret; - bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; process_ecn(qp, packet, true); psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); - /* Compare the PSN verses the expected PSN. */ if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) { /* @@ -432,8 +426,6 @@ no_immediate_data: wc.ex.imm_data = 0; wc.wc_flags = 0; send_last: - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -527,8 +519,6 @@ rdma_first: rdma_last_imm: wc.wc_flags = IB_WC_WITH_IMM; - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 49fe179..c995aa5 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -668,36 +668,31 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, void hfi1_ud_rcv(struct hfi1_packet *packet) { struct ib_other_headers *ohdr = packet->ohdr; - int opcode; u32 hdrsize = packet->hlen; struct ib_wc wc; u32 qkey; u32 src_qp; - u16 dlid, pkey; + u16 pkey; int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; - bool has_grh = rcv_flags & HFI1_HAS_GRH; u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); u32 bth1; - u8 sl_from_sc, sl; - u16 slid; - u8 extra_bytes; + u8 sl_from_sc; + u8 extra_bytes = packet->pad; + u8 opcode = packet->opcode; + u8 sl = packet->sl; + u32 dlid = packet->dlid; + u32 slid = packet->slid; + bth1 = be32_to_cpu(ohdr->bth[1]); qkey = ib_get_qkey(ohdr); src_qp = ib_get_sqpn(ohdr); - dlid = ib_get_dlid(hdr); - bth1 = be32_to_cpu(ohdr->bth[1]); - slid = ib_get_slid(hdr); pkey = ib_bth_get_pkey(ohdr); - opcode = ib_bth_get_opcode(ohdr); - sl = ib_get_sl(hdr); - extra_bytes = ib_bth_get_pad(ohdr); extra_bytes += (SIZE_OF_CRC << 2); sl_from_sc = ibp->sc_to_sl[sc5]; @@ -811,7 +806,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) qp->r_flags |= RVT_R_REUSE_SGE; goto drop; } - if (has_grh) { + if (packet->grh) { hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh, sizeof(struct ib_grh), true, false); wc.wc_flags |= IB_WC_GRH; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 5f4be35..af54d3f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -508,13 +508,14 @@ again: /* * Make sure the QP is ready and able to accept the given opcode. */ -static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) +static inline opcode_handler qp_ok(struct hfi1_packet *packet) { if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) return NULL; - if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) || - (opcode == IB_OPCODE_CNP)) - return opcode_handler_tbl[opcode]; + if (((packet->opcode & RVT_OPCODE_QP_MASK) == + packet->qp->allowed_ops) || + (packet->opcode == IB_OPCODE_CNP)) + return opcode_handler_tbl[packet->opcode]; return NULL; } @@ -548,68 +549,34 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) return pbc; } -/** - * hfi1_ib_rcv - process an incoming packet - * @packet: data packet information - * - * This is called to process an incoming packet at interrupt level. - * - * Tlen is the length of the header + data + CRC in bytes. - */ -void hfi1_ib_rcv(struct hfi1_packet *packet) +static inline void hfi1_handle_packet(struct hfi1_packet *packet, + bool is_mcast) { + u32 qp_num; struct hfi1_ctxtdata *rcd = packet->rcd; - struct ib_header *hdr = packet->hdr; - u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; opcode_handler packet_handler; unsigned long flags; - u32 qp_num; - int lnh; - u8 opcode; - u16 lid; - - /* Check for GRH */ - lnh = ib_get_lnh(hdr); - if (lnh == HFI1_LRH_BTH) { - packet->ohdr = &hdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { - u32 vtf; - - packet->ohdr = &hdr->u.l.oth; - if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) - goto drop; - vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); - if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) - goto drop; - packet->rcv_flags |= HFI1_HAS_GRH; - } else { - goto drop; - } - trace_input_ibhdr(rcd->dd, packet, !!(packet->rhf & RHF_DC_INFO_SMASK)); - opcode = ib_bth_get_opcode(packet->ohdr); - inc_opstats(tlen, &rcd->opstats->stats[opcode]); + inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]); - /* Get the destination QP number. */ - qp_num = ib_bth_get_qpn(packet->ohdr); - lid = ib_get_dlid(hdr); - if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { + if (unlikely(is_mcast)) { struct rvt_mcast *mcast; struct rvt_mcast_qp *p; - if (lnh != HFI1_LRH_GRH) + if (!packet->grh) goto drop; - mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid); + mcast = rvt_mcast_find(&ibp->rvp, + &packet->grh->dgid, + packet->dlid); if (!mcast) goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; spin_lock_irqsave(&packet->qp->r_lock, flags); - packet_handler = qp_ok(opcode, packet); + packet_handler = qp_ok(packet); if (likely(packet_handler)) packet_handler(packet); else @@ -623,19 +590,21 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) if (atomic_dec_return(&mcast->refcount) <= 1) wake_up(&mcast->wait); } else { + /* Get the destination QP number. */ + qp_num = ib_bth_get_qpn(packet->ohdr); rcu_read_lock(); packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); if (!packet->qp) { rcu_read_unlock(); goto drop; } - if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode, + if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode, true))) { rcu_read_unlock(); goto drop; } spin_lock_irqsave(&packet->qp->r_lock, flags); - packet_handler = qp_ok(opcode, packet); + packet_handler = qp_ok(packet); if (likely(packet_handler)) packet_handler(packet); else @@ -644,11 +613,29 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) rcu_read_unlock(); } return; - drop: ibp->rvp.n_pkt_drops++; } +/** + * hfi1_ib_rcv - process an incoming packet + * @packet: data packet information + * + * This is called to process an incoming packet at interrupt level. + */ +void hfi1_ib_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; + bool is_mcast = false; + + if (unlikely(hfi1_check_mcast(packet->dlid))) + is_mcast = true; + + trace_input_ibhdr(rcd->dd, packet, + !!(packet->rhf & RHF_DC_INFO_SMASK)); + hfi1_handle_packet(packet, is_mcast); +} + /* * This is called from a timer to check for QPs * which need kernel memory in order to send a packet. diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index cd635d0..17b38cd 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -307,8 +307,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet); void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct ib_header *hdr, - u32 rcv_flags, + struct hfi1_packet *packet, struct rvt_qp *qp); u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); @@ -346,8 +345,7 @@ static inline u8 get_opcode(struct ib_header *h) return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; } -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, - int has_grh, struct rvt_qp *qp, u32 bth0); +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, const struct ib_global_route *grh, u32 hwords, u32 nwords); -- cgit v1.1 From 14fe13fcd3afb96b06809f280b586be1c998332c Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 12 May 2017 09:20:31 -0700 Subject: IB/rdmavt: Compress adjacent SGEs in rvt_lkey_ok() SGEs that are contiguous needlessly consume driver dependent TX resources. The lkey validation logic is enhanced to compress the SGE that ends up in the send wqe when consecutive addresses are detected. The lkey validation API used to return 1 (success) or 0 (fail). The return value is now an -errno, 0 (compressed), or 1 (uncompressed). A additional argument is added to pass the last SQE for the compression. Loopback callers always pass a NULL to last_sge since the optimization is of little benefit in that situation. Reviewed-by: Dennis Dalessandro Signed-off-by: Brian Welty Signed-off-by: Venkata Sandeep Dhanalakota Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/ruc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 9cc9c7b..476fe5d 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -75,7 +75,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) continue; /* Check LKEY */ if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; -- cgit v1.1 From 7be85676f1d13c77a7e0c72e04903bfd39580d4f Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:12 -0700 Subject: IB/hfi1: Don't remove RB entry when not needed. An RB tree is used for the SDMA pinning cache. Cache entries are extracted and reinserted from the tree in case the address range for it changes. However, if the address range for the entry doesn't change, deleting the entry from the RB tree is not necessary. This affects performance since the tree needs to be rebalanced for each insertion, and this happens in the hot path. Optimize RB search by not removing entries when it's not needed. Reviewed-by: Mike Marciniszyn Reviewed-by: Mitko Haralanov Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mmu_rb.c | 14 ++++++++++---- drivers/infiniband/hw/hfi1/mmu_rb.h | 5 +++-- drivers/infiniband/hw/hfi1/user_sdma.c | 23 ++++++++++++++++------- 3 files changed, 29 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index ccbf52c..d41fd87 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -217,21 +217,27 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, return node; } -struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, - unsigned long addr, unsigned long len) +bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len, + struct mmu_rb_node **rb_node) { struct mmu_rb_node *node; unsigned long flags; + bool ret = false; spin_lock_irqsave(&handler->lock, flags); node = __mmu_rb_search(handler, addr, len); if (node) { + if (node->addr == addr && node->len == len) + goto unlock; __mmu_int_rb_remove(node, &handler->root); list_del(&node->list); /* remove from LRU list */ + ret = true; } +unlock: spin_unlock_irqrestore(&handler->lock, flags); - - return node; + *rb_node = node; + return ret; } void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index 754f6eb..f04cec1 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h @@ -81,7 +81,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg); void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, struct mmu_rb_node *mnode); -struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, - unsigned long addr, unsigned long len); +bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len, + struct mmu_rb_node **rb_node); #endif /* _HFI1_MMU_RB_H */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 16fd519..79450cf 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1165,14 +1165,23 @@ static int pin_vector_pages(struct user_sdma_request *req, struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; - - rb_node = hfi1_mmu_rb_extract(pq->handler, - (unsigned long)iovec->iov.iov_base, - iovec->iov.iov_len); - if (rb_node) + bool extracted; + + extracted = + hfi1_mmu_rb_remove_unless_exact(pq->handler, + (unsigned long) + iovec->iov.iov_base, + iovec->iov.iov_len, &rb_node); + if (rb_node) { node = container_of(rb_node, struct sdma_mmu_node, rb); - else - rb_node = NULL; + if (!extracted) { + atomic_inc(&node->refcount); + iovec->pages = node->pages; + iovec->npages = node->npages; + iovec->node = node; + return 0; + } + } if (!node) { node = kzalloc(sizeof(*node), GFP_KERNEL); -- cgit v1.1 From e3304b7cc4f14d365f46d6847a35563ae8b017f7 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:18 -0700 Subject: IB/hfi1: Optimize cachelines for user SDMA request structure The current user SDMA request structure layout has holes. The cachelines can be reduced to improve cacheline trading. Separate fields in the following categories: mostly read, writable and shared with interrupt. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 106 ++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 48 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 79450cf..92517ce 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -117,6 +117,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define AHG_KDETH_INTR_SHIFT 12 #define AHG_KDETH_SH_SHIFT 13 +#define AHG_KDETH_ARRAY_SIZE 9 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -204,25 +205,42 @@ struct evict_data { }; struct user_sdma_request { - struct sdma_req_info info; - struct hfi1_user_sdma_pkt_q *pq; - struct hfi1_user_sdma_comp_q *cq; /* This is the original header from user space */ struct hfi1_pkt_header hdr; + + /* Read mostly fields */ + struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp; + struct hfi1_user_sdma_comp_q *cq; /* * Pointer to the SDMA engine for this request. * Since different request could be on different VLs, * each request will need it's own engine pointer. */ struct sdma_engine *sde; - s8 ahg_idx; - u32 ahg[9]; + struct sdma_req_info info; + /* TID array values copied from the tid_iov vector */ + u32 *tids; + /* total length of the data in the request */ + u32 data_len; + /* number of elements copied to the tids array */ + u16 n_tids; /* - * KDETH.Offset (Eager) field - * We need to remember the initial value so the headers - * can be updated properly. + * We copy the iovs for this request (based on + * info.iovcnt). These are only the data vectors */ - u32 koffset; + u8 data_iovs; + s8 ahg_idx; + + /* Writeable fields shared with interrupt */ + u64 seqcomp ____cacheline_aligned_in_smp; + u64 seqsubmitted; + unsigned long flags; + /* status of the last txreq completed */ + int status; + + /* Send side fields */ + struct list_head txps ____cacheline_aligned_in_smp; + u64 seqnum; /* * KDETH.OFFSET (TID) field * The offset can cover multiple packets, depending on the @@ -230,29 +248,19 @@ struct user_sdma_request { */ u32 tidoffset; /* - * We copy the iovs for this request (based on - * info.iovcnt). These are only the data vectors + * KDETH.Offset (Eager) field + * We need to remember the initial value so the headers + * can be updated properly. */ - unsigned data_iovs; - /* total length of the data in the request */ - u32 data_len; + u32 koffset; + u32 sent; + /* TID index copied from the tid_iov vector */ + u16 tididx; /* progress index moving along the iovs array */ - unsigned iov_idx; + u8 iov_idx; + struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; - /* number of elements copied to the tids array */ - u16 n_tids; - /* TID array values copied from the tid_iov vector */ - u32 *tids; - u16 tididx; - u32 sent; - u64 seqnum; - u64 seqcomp; - u64 seqsubmitted; - struct list_head txps; - unsigned long flags; - /* status of the last txreq completed */ - int status; -}; +} ____cacheline_aligned_in_smp; /* * A single txreq could span up to 3 physical pages when the MTU @@ -1034,11 +1042,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) datalen); if (changes < 0) goto free_tx; - sdma_txinit_ahg(&tx->txreq, - SDMA_TXREQ_F_USE_AHG, - datalen, req->ahg_idx, changes, - req->ahg, sizeof(req->hdr), - user_sdma_txreq_cb); } } else { ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + @@ -1442,21 +1445,22 @@ done: } static int set_txreq_header_ahg(struct user_sdma_request *req, - struct user_sdma_txreq *tx, u32 len) + struct user_sdma_txreq *tx, u32 datalen) { + u32 ahg[AHG_KDETH_ARRAY_SIZE]; int diff = 0; u8 omfactor; /* KDETH.OM */ struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); - u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len)); + u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); if (PBC2LRH(pbclen) != lrhlen) { /* PBC.PbcLengthDWs */ - AHG_HEADER_SET(req->ahg, diff, 0, 0, 12, + AHG_HEADER_SET(ahg, diff, 0, 0, 12, cpu_to_le16(LRH2PBC(lrhlen))); /* LRH.PktLen (we need the full 16 bits due to byte swap) */ - AHG_HEADER_SET(req->ahg, diff, 3, 0, 16, + AHG_HEADER_SET(ahg, diff, 3, 0, 16, cpu_to_be16(lrhlen >> 2)); } @@ -1468,13 +1472,12 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; - AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); - AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); + AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); + AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); /* KDETH.Offset */ - AHG_HEADER_SET(req->ahg, diff, 15, 0, 16, + AHG_HEADER_SET(ahg, diff, 15, 0, 16, cpu_to_le16(req->koffset & 0xffff)); - AHG_HEADER_SET(req->ahg, diff, 15, 16, 16, - cpu_to_le16(req->koffset >> 16)); + AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16)); if (req_opcode(req->info.ctrl) == EXPECTED) { __le16 val; @@ -1492,9 +1495,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, * we have to check again. */ if (++req->tididx > req->n_tids - 1 || - !req->tids[req->tididx]) { + !req->tids[req->tididx]) return -EINVAL; - } tidval = req->tids[req->tididx]; } omfactor = ((EXP_TID_GET(tidval, LEN) * @@ -1502,7 +1504,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : KDETH_OM_SMALL_SHIFT; /* KDETH.OM and KDETH.OFFSET (TID) */ - AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, + AHG_HEADER_SET(ahg, diff, 7, 0, 16, ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | ((req->tidoffset >> omfactor) & 0x7fff))); @@ -1522,12 +1524,20 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_KDETH_INTR_SHIFT)); } - AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); + AHG_HEADER_SET(ahg, diff, 7, 16, 14, val); } + if (diff < 0) + return diff; trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, req->sde->this_idx, - req->ahg_idx, req->ahg, diff, tidval); + req->ahg_idx, ahg, diff, tidval); + sdma_txinit_ahg(&tx->txreq, + SDMA_TXREQ_F_USE_AHG, + datalen, req->ahg_idx, diff, + ahg, sizeof(req->hdr), + user_sdma_txreq_cb); + return diff; } -- cgit v1.1 From 721c462123b4b53745c1ccd99619b16e8f4e091b Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 26 May 2017 05:35:25 -0700 Subject: IB/hfi1: Name function prototype parameters for affinity module To improve the readability of function prototypes, give the parameters names in the affinity module. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index e78c7aa..2a1e374 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -75,24 +75,26 @@ struct hfi1_msix_entry; /* Initialize non-HT cpu cores mask */ void init_real_cpu_mask(void); /* Initialize driver affinity data */ -int hfi1_dev_affinity_init(struct hfi1_devdata *); +int hfi1_dev_affinity_init(struct hfi1_devdata *dd); /* * Set IRQ affinity to a CPU. The function will determine the * CPU and set the affinity to it. */ -int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); +int hfi1_get_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix); /* * Remove the IRQ's CPU affinity. This function also updates * any internal CPU tracking data */ -void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); +void hfi1_put_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix); /* * Determine a CPU affinity for a user process, if the process does not * have an affinity set yet. */ -int hfi1_get_proc_affinity(int); +int hfi1_get_proc_affinity(int node); /* Release a CPU used by a user process. */ -void hfi1_put_proc_affinity(int); +void hfi1_put_proc_affinity(int cpu); struct hfi1_affinity_node { int node; -- cgit v1.1 From bb7dde8784913c06ccd1456bed6dcc5ebd0b3c24 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 26 May 2017 05:35:31 -0700 Subject: IB/hfi1: Replace deprecated pci functions with new API pci_enable_msix_range() and pci_disable_msix() have been deprecated. Updating to the new pci_alloc_irq_vectors() interface. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 18 +++--- drivers/infiniband/hw/hfi1/chip.c | 105 +++++++++++++++++----------------- drivers/infiniband/hw/hfi1/hfi.h | 6 +- drivers/infiniband/hw/hfi1/pcie.c | 80 ++++++-------------------- 4 files changed, 78 insertions(+), 131 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index e2cd2cd..a97055d 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -335,10 +335,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) sde->cpu = cpu; cpumask_clear(&msix->mask); cpumask_set_cpu(cpu, &msix->mask); - dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n", - msix->msix.vector, irq_type_names[msix->type], + dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n", + msix->irq, irq_type_names[msix->type], sde->this_idx, cpu); - irq_set_affinity_hint(msix->msix.vector, &msix->mask); + irq_set_affinity_hint(msix->irq, &msix->mask); /* * Set the new cpu in the hfi1_affinity_node and clean @@ -387,7 +387,7 @@ static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) { struct irq_affinity_notify *notify = &msix->notify; - notify->irq = msix->msix.vector; + notify->irq = msix->irq; notify->notify = hfi1_irq_notifier_notify; notify->release = hfi1_irq_notifier_release; @@ -472,10 +472,10 @@ static int get_irq_affinity(struct hfi1_devdata *dd, } cpumask_set_cpu(cpu, &msix->mask); - dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n", - msix->msix.vector, irq_type_names[msix->type], + dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n", + msix->irq, irq_type_names[msix->type], extra, cpu); - irq_set_affinity_hint(msix->msix.vector, &msix->mask); + irq_set_affinity_hint(msix->irq, &msix->mask); if (msix->type == IRQ_SDMA) { sde->cpu = cpu; @@ -533,7 +533,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, } } - irq_set_affinity_hint(msix->msix.vector, NULL); + irq_set_affinity_hint(msix->irq, NULL); cpumask_clear(&msix->mask); mutex_unlock(&node_affinity.lock); } diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 5dbee3c..9118618d 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12800,30 +12800,24 @@ static void clean_up_interrupts(struct hfi1_devdata *dd) for (i = 0; i < dd->num_msix_entries; i++, me++) { if (!me->arg) /* => no irq, no affinity */ continue; - hfi1_put_irq_affinity(dd, &dd->msix_entries[i]); - free_irq(me->msix.vector, me->arg); + hfi1_put_irq_affinity(dd, me); + free_irq(me->irq, me->arg); } + + /* clean structures */ + kfree(dd->msix_entries); + dd->msix_entries = NULL; + dd->num_msix_entries = 0; } else { /* INTx */ if (dd->requested_intx_irq) { free_irq(dd->pcidev->irq, dd); dd->requested_intx_irq = 0; } - } - - /* turn off interrupts */ - if (dd->num_msix_entries) { - /* MSI-X */ - pci_disable_msix(dd->pcidev); - } else { - /* INTx */ disable_intx(dd->pcidev); } - /* clean structures */ - kfree(dd->msix_entries); - dd->msix_entries = NULL; - dd->num_msix_entries = 0; + pci_free_irq_vectors(dd->pcidev); } /* @@ -12972,13 +12966,21 @@ static int request_msix_irqs(struct hfi1_devdata *dd) continue; /* make sure the name is terminated */ me->name[sizeof(me->name) - 1] = 0; + me->irq = pci_irq_vector(dd->pcidev, i); + /* + * On err return me->irq. Don't need to clear this + * because 'arg' has not been set, and cleanup will + * do the right thing. + */ + if (me->irq < 0) + return me->irq; - ret = request_threaded_irq(me->msix.vector, handler, thread, 0, + ret = request_threaded_irq(me->irq, handler, thread, 0, me->name, arg); if (ret) { dd_dev_err(dd, - "unable to allocate %s interrupt, vector %d, index %d, err %d\n", - err_info, me->msix.vector, idx, ret); + "unable to allocate %s interrupt, irq %d, index %d, err %d\n", + err_info, me->irq, idx, ret); return ret; } /* @@ -12989,8 +12991,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) ret = hfi1_get_irq_affinity(dd, me); if (ret) - dd_dev_err(dd, - "unable to pin IRQ %d\n", ret); + dd_dev_err(dd, "unable to pin IRQ %d\n", ret); } return ret; @@ -13009,7 +13010,7 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; - synchronize_irq(me->msix.vector); + synchronize_irq(me->irq); } } @@ -13022,7 +13023,7 @@ void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) return; hfi1_put_irq_affinity(dd, me); - free_irq(me->msix.vector, me->arg); + free_irq(me->irq, me->arg); me->arg = NULL; } @@ -13050,14 +13051,19 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) DRIVER_NAME "_%d kctxt%d", dd->unit, idx); me->name[sizeof(me->name) - 1] = 0; me->type = IRQ_RCVCTXT; - + me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr); + if (me->irq < 0) { + dd_dev_err(dd, "vnic irq vector request (idx %d) fail %d\n", + idx, me->irq); + return; + } remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); - ret = request_threaded_irq(me->msix.vector, receive_context_interrupt, + ret = request_threaded_irq(me->irq, receive_context_interrupt, receive_context_thread, 0, me->name, arg); if (ret) { - dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n", - me->msix.vector, idx, ret); + dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n", + me->irq, idx, ret); return; } /* @@ -13070,7 +13076,7 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) if (ret) { dd_dev_err(dd, "unable to pin IRQ %d\n", ret); - free_irq(me->msix.vector, me->arg); + free_irq(me->irq, me->arg); } } @@ -13093,9 +13099,8 @@ static void reset_interrupts(struct hfi1_devdata *dd) static int set_up_interrupts(struct hfi1_devdata *dd) { - struct hfi1_msix_entry *entries; - u32 total, request; - int i, ret; + u32 total; + int ret, request; int single_interrupt = 0; /* we expect to have all the interrupts */ /* @@ -13107,39 +13112,31 @@ static int set_up_interrupts(struct hfi1_devdata *dd) */ total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; - entries = kcalloc(total, sizeof(*entries), GFP_KERNEL); - if (!entries) { - ret = -ENOMEM; - goto fail; - } - /* 1-1 MSI-X entry assignment */ - for (i = 0; i < total; i++) - entries[i].msix.entry = i; - /* ask for MSI-X interrupts */ - request = total; - request_msix(dd, &request, entries); - - if (request == 0) { + request = request_msix(dd, total); + if (request < 0) { + ret = request; + goto fail; + } else if (request == 0) { /* using INTx */ /* dd->num_msix_entries already zero */ - kfree(entries); single_interrupt = 1; dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n"); + } else if (request < total) { + /* using MSI-X, with reduced interrupts */ + dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n", + total, request); + ret = -EINVAL; + goto fail; } else { - /* using MSI-X */ - dd->num_msix_entries = request; - dd->msix_entries = entries; - - if (request != total) { - /* using MSI-X, with reduced interrupts */ - dd_dev_err( - dd, - "cannot handle reduced interrupt case, want %u, got %u\n", - total, request); - ret = -EINVAL; + dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries), + GFP_KERNEL); + if (!dd->msix_entries) { + ret = -ENOMEM; goto fail; } + /* using MSI-X */ + dd->num_msix_entries = total; dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total); } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 9c6c734..8f74cf6 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -521,7 +521,7 @@ static inline void incr_cntr32(u32 *cntr) #define MAX_NAME_SIZE 64 struct hfi1_msix_entry { enum irq_type type; - struct msix_entry msix; + int irq; void *arg; char name[MAX_NAME_SIZE]; cpumask_t mask; @@ -1838,9 +1838,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev); int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *dd); -void request_msix(struct hfi1_devdata *dd, u32 *nent, - struct hfi1_msix_entry *entry); -void hfi1_enable_intx(struct pci_dev *pdev); +int request_msix(struct hfi1_devdata *dd, u32 msireq); void restore_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 6a9f6f9..f01841b 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -240,50 +240,6 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd) iounmap(dd->piobase); } -static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt, - struct hfi1_msix_entry *hfi1_msix_entry) -{ - int ret; - int nvec = *msixcnt; - struct msix_entry *msix_entry; - int i; - - /* - * We can't pass hfi1_msix_entry array to msix_setup - * so use a dummy msix_entry array and copy the allocated - * irq back to the hfi1_msix_entry array. - */ - msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL); - if (!msix_entry) { - ret = -ENOMEM; - goto do_intx; - } - - for (i = 0; i < nvec; i++) - msix_entry[i] = hfi1_msix_entry[i].msix; - - ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec); - if (ret < 0) - goto free_msix_entry; - nvec = ret; - - for (i = 0; i < nvec; i++) - hfi1_msix_entry[i].msix = msix_entry[i]; - - kfree(msix_entry); - *msixcnt = nvec; - return; - -free_msix_entry: - kfree(msix_entry); - -do_intx: - dd_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n", - nvec, ret); - *msixcnt = 0; - hfi1_enable_intx(dd->pcidev); -} - /* return the PCIe link speed from the given link status */ static u32 extract_speed(u16 linkstat) { @@ -364,33 +320,29 @@ int pcie_speeds(struct hfi1_devdata *dd) } /* - * Returns in *nent: - * - actual number of interrupts allocated + * Returns: + * - actual number of interrupts allocated or * - 0 if fell back to INTx. + * - error */ -void request_msix(struct hfi1_devdata *dd, u32 *nent, - struct hfi1_msix_entry *entry) +int request_msix(struct hfi1_devdata *dd, u32 msireq) { - int pos; + int nvec; - pos = dd->pcidev->msix_cap; - if (*nent && pos) { - msix_setup(dd, pos, nent, entry); - /* did it, either MSI-X or INTx */ - } else { - *nent = 0; - hfi1_enable_intx(dd->pcidev); + nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, + PCI_IRQ_MSIX | PCI_IRQ_LEGACY); + if (nvec < 0) { + dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec); + return nvec; } tune_pcie_caps(dd); -} -void hfi1_enable_intx(struct pci_dev *pdev) -{ - /* first, turn on INTx */ - pci_intx(pdev, 1); - /* then turn off MSI-X */ - pci_disable_msix(pdev); + /* check for legacy IRQ */ + if (nvec == 1 && !dd->pcidev->msix_enabled) + return 0; + + return nvec; } /* restore command and BARs after a reset has wiped them out */ -- cgit v1.1 From cb49366f3616fdf197893c24a5b2677b8c26ce29 Mon Sep 17 00:00:00 2001 From: "Vishwanathapura, Niranjana" Date: Thu, 1 Jun 2017 17:04:02 -0700 Subject: IB/core,rdmavt,hfi1,opa-vnic: Send OPA cap_mask3 in trap Provide the ability for IB clients to modify the OPA specific capability mask and include this mask in the subsequent trap data. Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Michael N. Henry Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 7 ++----- drivers/infiniband/hw/hfi1/mad.h | 2 +- drivers/infiniband/hw/hfi1/verbs.c | 6 +++++- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 5977673..70831ad 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -260,6 +260,7 @@ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) data.issuer_lid = cpu_to_be32(lid); data.ntc_144.lid = data.issuer_lid; data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); + data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags); send_trap(ibp, &data, sizeof(data)); } @@ -704,11 +705,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT; pi->buffer_units = cpu_to_be32(buffer_units); - pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported | - OPA_CAP_MASK3_IsEthOnFabricSupported); - /* Driver does not support mcast/collective configuration */ - pi->opa_cap_mask &= - cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported); + pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags); pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7) << 3 | (HFI1_MCAST_NR & 0x7)); diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 5aa3fd1..a4e2506 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -115,7 +115,7 @@ struct opa_mad_notice_attr { __be32 lid; /* LID where change occurred */ __be32 new_cap_mask; /* new capability mask */ __be16 reserved2; - __be16 cap_mask; + __be16 cap_mask3; __be16 change_flags; /* low 4 bits only */ } __packed ntc_144; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index af54d3f..2d7759f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1537,9 +1537,13 @@ static void init_ibport(struct hfi1_pportdata *ppd) /* Set the prefix to the default value (see ch. 4.1.1) */ ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; ibp->rvp.sm_lid = 0; - /* Below should only set bits defined in OPA PortInfo.CapabilityMask */ + /* + * Below should only set bits defined in OPA PortInfo.CapabilityMask + * and PortInfo.CapabilityMask3 + */ ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP | IB_PORT_CAP_MASK_NOTICE_SUP; + ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported; ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; -- cgit v1.1 From b888429c202197916c8c549811a2dd62f090280d Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:44 -0700 Subject: IB/hfi1: Remove atomic SDMA_REQ_SEND_DONE bit operation The atomic SDMA_REQ_SEND_DONE bit is set by the process-level code, and then the same process-level code uses the bit to test that all packets have been submitted incurring a costly atomic read. Use a bool type with a READ_ONCE/WRITE_ONCE pairing for this bit, and use the same condition that is used to set the bit to test that all packets have been submitted. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 92517ce..6fb70f0 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -154,10 +154,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ /* SDMA request flag bits */ -#define SDMA_REQ_FOR_THREAD 1 -#define SDMA_REQ_SEND_DONE 2 -#define SDMA_REQ_HAS_ERROR 3 -#define SDMA_REQ_DONE_ERROR 4 +#define SDMA_REQ_HAS_ERROR 1 +#define SDMA_REQ_DONE_ERROR 2 #define SDMA_PKT_Q_INACTIVE BIT(0) #define SDMA_PKT_Q_ACTIVE BIT(1) @@ -258,6 +256,7 @@ struct user_sdma_request { u16 tididx; /* progress index moving along the iovs array */ u8 iov_idx; + u8 done; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; } ____cacheline_aligned_in_smp; @@ -628,6 +627,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->seqsubmitted = 0; req->flags = 0; req->tids = NULL; + req->done = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -809,7 +809,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, * request have been submitted to the SDMA engine. However, it * will not wait for send completions. */ - while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) { + while (req->seqsubmitted != req->info.npkts) { ret = user_sdma_send_pkts(req, pcount); if (ret < 0) { if (ret != -EBUSY) { @@ -1118,7 +1118,7 @@ dosend: ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); req->seqsubmitted += count; if (req->seqsubmitted == req->info.npkts) { - set_bit(SDMA_REQ_SEND_DONE, &req->flags); + WRITE_ONCE(req->done, 1); /* * The txreq has already been submitted to the HW queue * so we can free the AHG entry now. Corruption will not @@ -1585,7 +1585,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) req->status = status; if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && - (test_bit(SDMA_REQ_SEND_DONE, &req->flags) || + (READ_ONCE(req->done) || test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) { user_sdma_free_request(req, false); pq_update(pq); -- cgit v1.1 From e9c48ebd0cb4d31bbaf60ddec2a2fa40227b8cb5 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:50 -0700 Subject: IB/hfi1: Remove atomic SDMA_REQ_HAS_ERROR bit operation Atomic bit tests are used to single errors and the completion of request submissions. These operations don't need to be atomic and show to be expensive on the profile. Replace each atomic bit operation with a bool type and a READ_ONCE/WRITE_ONCE pairing. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 6fb70f0..fcadbb9 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -153,10 +153,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ -/* SDMA request flag bits */ -#define SDMA_REQ_HAS_ERROR 1 -#define SDMA_REQ_DONE_ERROR 2 - #define SDMA_PKT_Q_INACTIVE BIT(0) #define SDMA_PKT_Q_ACTIVE BIT(1) #define SDMA_PKT_Q_DEFERRED BIT(2) @@ -232,7 +228,6 @@ struct user_sdma_request { /* Writeable fields shared with interrupt */ u64 seqcomp ____cacheline_aligned_in_smp; u64 seqsubmitted; - unsigned long flags; /* status of the last txreq completed */ int status; @@ -257,6 +252,7 @@ struct user_sdma_request { /* progress index moving along the iovs array */ u8 iov_idx; u8 done; + u8 has_error; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; } ____cacheline_aligned_in_smp; @@ -625,9 +621,9 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->seqnum = 0; req->seqcomp = 0; req->seqsubmitted = 0; - req->flags = 0; req->tids = NULL; req->done = 0; + req->has_error = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -814,7 +810,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, if (ret < 0) { if (ret != -EBUSY) { req->status = ret; - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); if (ACCESS_ONCE(req->seqcomp) == req->seqsubmitted - 1) goto free_req; @@ -916,10 +912,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) pq = req->pq; /* If tx completion has reported an error, we are done. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (READ_ONCE(req->has_error)) return -EFAULT; - } /* * Check if we might have sent the entire request already @@ -942,10 +936,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) * with errors. If so, we are not going to process any * more packets from this request. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (READ_ONCE(req->has_error)) return -EFAULT; - } tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL); if (!tx) @@ -1566,7 +1558,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) { SDMA_DBG(req, "SDMA completion with error %d", status); - set_bit(SDMA_REQ_HAS_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); } req->seqcomp = tx->seqnum; @@ -1586,7 +1578,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) req->status = status; if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && (READ_ONCE(req->done) || - test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) { + READ_ONCE(req->has_error))) { user_sdma_free_request(req, false); pq_update(pq); set_comp_state(pq, cq, idx, ERROR, req->status); -- cgit v1.1 From b4e9e2f0fc87b876a4e2162733c1940e861785b1 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:57 -0700 Subject: IB/hfi1: Reclassify type of messages printed for platform config logic Reclassify messages printed out to /var/log/messages into warnings and errors to facilitate debugging in the future for issues related to the platform config logic. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 838fe84..cbda9b1 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -242,7 +242,7 @@ static int qual_power(struct hfi1_pportdata *ppd) if (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) { - dd_dev_info( + dd_dev_err( ppd->dd, "%s: Port disabled due to system power restrictions\n", __func__); @@ -268,7 +268,7 @@ static int qual_bitrate(struct hfi1_pportdata *ppd) if (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) { - dd_dev_info( + dd_dev_err( ppd->dd, "%s: Cable failed bitrate check, disabling port\n", __func__); @@ -709,15 +709,15 @@ static void apply_tunings( ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) - dd_dev_info(ppd->dd, - "%s: Failed set ext device config params\n", - __func__); + dd_dev_err(ppd->dd, + "%s: Failed set ext device config params\n", + __func__); } if (tx_preset_index == OPA_INVALID_INDEX) { if (ppd->port_type == PORT_TYPE_QSFP && limiting_active) - dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n", - __func__); + dd_dev_err(ppd->dd, "%s: Invalid Tx preset index\n", + __func__); return; } @@ -900,7 +900,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd, case 0xD: /* fallthrough */ case 0xF: default: - dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n", + dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n", __func__); break; } @@ -942,7 +942,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) case PORT_TYPE_DISCONNECTED: ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED); - dd_dev_info(dd, "%s: Port disconnected, disabling port\n", + dd_dev_warn(dd, "%s: Port disconnected, disabling port\n", __func__); goto bail; case PORT_TYPE_FIXED: @@ -1027,7 +1027,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) } break; default: - dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__); + dd_dev_warn(ppd->dd, "%s: Unknown port type\n", __func__); ppd->port_type = PORT_TYPE_UNKNOWN; tuning_method = OPA_UNKNOWN_TUNING; total_atten = 0; -- cgit v1.1 From 90dba23e1e30af72dbf4379842a5161e811b26b8 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 26 May 2017 05:36:04 -0700 Subject: IB/hfi1: Fix up sdma_init function comment sdma_init does not take a number of sdma engine parameters, rather it initializes all of the sdma engines. Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sdma.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index bfd0d51..d82ff57 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1340,10 +1340,8 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) * @dd: hfi1_devdata * @port: port number (currently only zero) * - * sdma_init initializes the specified number of engines. - * - * The code initializes each sde, its csrs. Interrupts - * are not required to be enabled. + * Initializes each sde and its csrs. + * Interrupts are not required to be enabled. * * Returns: * 0 - success, -errno on failure -- cgit v1.1 From bc54f6714c3a5d1f7ac6e7e5a5f7c390b1a01285 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:18:14 -0700 Subject: IB/hfi1: Ensure dd->gi_mask can not be overflowed As the code stands today the array access in remap_intr() is OK. To future proof the code though we should explicitly check to ensure the index value is not outside of the valid range. This is not a straight forward calculation so err on the side of caution. Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9118618d..d6af715 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12832,7 +12832,12 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) /* clear from the handled mask of the general interrupt */ m = isrc / 64; n = isrc % 64; - dd->gi_mask[m] &= ~((u64)1 << n); + if (likely(m < CCE_NUM_INT_CSRS)) { + dd->gi_mask[m] &= ~((u64)1 << n); + } else { + dd_dev_err(dd, "remap interrupt err\n"); + return; + } /* direct the chip source to the given MSI-X interrupt */ m = isrc / 8; -- cgit v1.1 From 67838e64fa63415fe4e0da7149bcb123ed9f5612 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:18:46 -0700 Subject: IB/hfi1: Fix spelling mistake in linkdown reason Spell receive correctly in OPA_LINKDOWN_REASON_RCV_ERROR Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index d6af715..99c29dd 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6906,7 +6906,7 @@ static void reset_neighbor_info(struct hfi1_pportdata *ppd) static const char * const link_down_reason_strs[] = { [OPA_LINKDOWN_REASON_NONE] = "None", - [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0", + [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Receive error 0", [OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length", [OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long", [OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short", -- cgit v1.1 From 52d86e72c515ebd4fb0d329470aa50698e28fb36 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:19:46 -0700 Subject: IB/hfi1: Remove subtraction of uninitialized value In process_receive_packet the packet header field is used to calculate the length of the packet. However this is not necessarily setup. In fact only if the ECN prescan is enabled will the packet header be valid at this point. The code works as is because we do not do anything with the packet length at this point in the packet processing. The length and header are setup correctly in hfi1_setup_ib_header which is called by the following sequence: process_receive_packet() -> rhf_receieve_function_map[]() --> process_receive_ib() ---> hfi1_setup_9B_packet() ----> hfi1_setup_ib_header() Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 2a1022e..9e59430 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -703,7 +703,6 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) packet->etype = rhf_rcv_type(packet->rhf); - packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; /* total length */ packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ /* retrieve eager buffer details */ -- cgit v1.1 From f1685179820bb7f9d9c4f5d0ac9bfb269529a919 Mon Sep 17 00:00:00 2001 From: Neel Desai Date: Mon, 29 May 2017 17:20:27 -0700 Subject: IB/hfi1: Add error checking for buffer overrun in OPA aggregate Improve safety of code by checking the size of the data buffer and prevent buffer overrun Reviewed-by: Dennis Dalessandro Reviewed-by: Brian Welty Signed-off-by: Neel Desai Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 296 ++++++++++++++++++++++++--------------- 1 file changed, 185 insertions(+), 111 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 70831ad..b180dff 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -59,6 +59,14 @@ #define OPA_LINK_WIDTH_RESET_OLD 0x0fff #define OPA_LINK_WIDTH_RESET 0xffff +static int smp_length_check(u32 data_size, u32 request_len) +{ + if (unlikely(request_len < data_size)) + return -EINVAL; + + return 0; +} + static int reply(struct ib_mad_hdr *smp) { /* @@ -308,11 +316,11 @@ void hfi1_node_desc_chg(struct hfi1_ibport *ibp) static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { struct opa_node_description *nd; - if (am) { + if (am || smp_length_check(sizeof(*nd), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -329,7 +337,7 @@ static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am, static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_node_info *ni; struct hfi1_devdata *dd = dd_from_ibdev(ibdev); @@ -339,6 +347,7 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, /* GUID 0 is illegal */ if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 || + smp_length_check(sizeof(*ni), max_len) || get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -520,7 +529,7 @@ void read_ltp_rtt(struct hfi1_devdata *dd) static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int i; struct hfi1_devdata *dd; @@ -536,7 +545,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, u32 buffer_units; u64 tmp = 0; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -745,7 +754,7 @@ static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 n_blocks_req = OPA_AM_NBLK(am); @@ -768,6 +777,11 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16); + if (smp_length_check(size, max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + if (start_block + n_blocks_req > n_blocks_avail || n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) { pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; " @@ -1071,7 +1085,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, */ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_port_info *pi = (struct opa_port_info *)data; struct ib_event event; @@ -1092,7 +1106,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, int ret, i, invalid = 0, call_set_mtu = 0; int call_link_downgrade_policy = 0; - if (num_ports != 1) { + if (num_ports != 1 || + smp_length_check(sizeof(*pi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1343,7 +1358,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, if (ret) return ret; - ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len); + ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len, + max_len); /* restore re-reg bit per o14-12.2.1 */ pi->clientrereg_subnettimeout |= clientrereg; @@ -1360,7 +1376,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, return ret; get_only: - return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len, + max_len); } /** @@ -1421,7 +1438,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 n_blocks_sent = OPA_AM_NBLK(am); @@ -1431,6 +1448,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, int i; u16 n_blocks_avail; unsigned npkeys = hfi1_get_npkeys(dd); + u32 size = 0; if (n_blocks_sent == 0) { pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n", @@ -1441,6 +1459,13 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1; + size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE); + + if (smp_length_check(size, max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + if (start_block + n_blocks_sent > n_blocks_avail || n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) { pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n", @@ -1458,7 +1483,8 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len, + max_len); } #define ILLEGAL_VL 12 @@ -1519,14 +1545,14 @@ static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data) static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */ unsigned i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1542,14 +1568,15 @@ static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; + size_t size = ARRAY_SIZE(ibp->sl_to_sc); int i; u8 sc; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1564,19 +1591,20 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, } } - return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */ unsigned i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1592,13 +1620,14 @@ static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); + size_t size = ARRAY_SIZE(ibp->sc_to_sl); u8 *p = data; int i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1606,19 +1635,20 @@ static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++) ibp->sc_to_sl[i] = *p++; - return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NBLK(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); void *vp = (void *)data; size_t size = 4 * sizeof(u64); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1633,7 +1663,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NBLK(am); int async_update = OPA_AM_ASYNC(am); @@ -1641,8 +1671,15 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, void *vp = (void *)data; struct hfi1_pportdata *ppd; int lstate; + /* + * set_sc2vlt_tables writes the information contained in *data + * to four 64-bit registers SendSC2VLt[0-3]. We need to make + * sure *max_len is not greater than the total size of the four + * SendSC2VLt[0-3] registers. + */ + size_t size = 4 * sizeof(u64); - if (n_blocks != 1 || async_update) { + if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1662,27 +1699,28 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, set_sc2vlt_tables(dd, vp); - return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; void *vp = (void *)data; - int size; + int size = sizeof(struct sc2vlnt); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } ppd = dd->pport + (port - 1); - size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp); + fm_get_table(ppd, FM_TBL_SC2VLNT, vp); if (resp_len) *resp_len += size; @@ -1692,15 +1730,16 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; void *vp = (void *)data; int lstate; + int size = sizeof(struct sc2vlnt); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1718,12 +1757,12 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, fm_set_table(ppd, FM_TBL_SC2VLNT, vp); return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); } static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 nports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -1732,7 +1771,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct hfi1_pportdata *ppd; struct opa_port_state_info *psi = (struct opa_port_state_info *)data; - if (nports != 1) { + if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1765,7 +1804,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 nports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -1776,7 +1815,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct opa_port_state_info *psi = (struct opa_port_state_info *)data; int ret, invalid = 0; - if (nports != 1) { + if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1806,19 +1845,21 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, if (invalid) smp->status |= IB_SMP_INVALID_FIELD; - return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 addr = OPA_AM_CI_ADDR(am); u32 len = OPA_AM_CI_LEN(am) + 1; int ret; - if (dd->pport->port_type != PORT_TYPE_QSFP) { + if (dd->pport->port_type != PORT_TYPE_QSFP || + smp_length_check(len, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1861,21 +1902,22 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, } static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, u8 port, u32 *resp_len, + u32 max_len) { u32 num_ports = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; struct buffer_control *p = (struct buffer_control *)data; - int size; + int size = sizeof(struct buffer_control); - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } ppd = dd->pport + (port - 1); - size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p); + fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p); trace_bct_get(dd, p); if (resp_len) *resp_len += size; @@ -1884,14 +1926,15 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data, } static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, u8 port, u32 *resp_len, + u32 max_len) { u32 num_ports = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; struct buffer_control *p = (struct buffer_control *)data; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1902,41 +1945,43 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); u32 num_ports = OPA_AM_NPORT(am); u8 section = (am & 0x00ff0000) >> 16; u8 *p = data; - int size = 0; + int size = 256; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } switch (section) { case OPA_VLARB_LOW_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p); + fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p); break; case OPA_VLARB_HIGH_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p); + fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p); break; case OPA_VLARB_PREEMPT_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p); + fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p); break; case OPA_VLARB_PREEMPT_MATRIX: - size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p); + fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p); break; default: pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n", be32_to_cpu(smp->attr_mod)); smp->status |= IB_SMP_INVALID_FIELD; + size = 0; break; } @@ -1948,14 +1993,15 @@ static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); u32 num_ports = OPA_AM_NPORT(am); u8 section = (am & 0x00ff0000) >> 16; u8 *p = data; + int size = 256; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1983,7 +2029,8 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, break; } - return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len, + max_len); } struct opa_pma_mad { @@ -3279,13 +3326,18 @@ struct opa_congestion_info_attr { static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_congestion_info_attr *p = (struct opa_congestion_info_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + p->congestion_info = 0; p->control_table_cap = ppd->cc_max_table_entries; p->congestion_log_length = OPA_CONG_LOG_ELEMS; @@ -3298,7 +3350,7 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data, static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { int i; struct opa_congestion_setting_attr *p = @@ -3308,6 +3360,11 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, struct opa_congestion_setting_entry_shadow *entries; struct cc_state *cc_state; + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + rcu_read_lock(); cc_state = get_cc_state(ppd); @@ -3382,7 +3439,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_congestion_setting_attr *p = (struct opa_congestion_setting_attr *)data; @@ -3391,6 +3448,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct opa_congestion_setting_entry_shadow *entries; int i; + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + /* * Save details from packet into the ppd. Hold the cc_state_lock so * our information is consistent with anyone trying to apply the state. @@ -3412,12 +3474,12 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, apply_cc_state(ppd); return __subn_get_opa_cong_setting(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); } static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -3425,7 +3487,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, s64 ts; int i; - if (am != 0) { + if (am || smp_length_check(sizeof(*cong_log), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3483,7 +3545,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct ib_cc_table_attr *cc_table_attr = (struct ib_cc_table_attr *)data; @@ -3495,9 +3557,10 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; struct cc_state *cc_state; + u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); /* sanity check n_blocks, start_block */ - if (n_blocks == 0 || + if (n_blocks == 0 || smp_length_check(size, max_len) || start_block + n_blocks > ppd->cc_max_table_entries) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -3527,14 +3590,14 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, rcu_read_unlock(); if (resp_len) - *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); + *resp_len += size; return reply((struct ib_mad_hdr *)smp); } static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3545,9 +3608,10 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; u16 ccti_limit; + u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); /* sanity check n_blocks, start_block */ - if (n_blocks == 0 || + if (n_blocks == 0 || smp_length_check(size, max_len) || start_block + n_blocks > ppd->cc_max_table_entries) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -3578,7 +3642,8 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, /* now apply the information */ apply_cc_state(ppd); - return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len, + max_len); } struct opa_led_info { @@ -3591,7 +3656,7 @@ struct opa_led_info { static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd = dd->pport; @@ -3599,7 +3664,7 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, u32 nport = OPA_AM_NPORT(am); u32 is_beaconing_active; - if (nport != 1) { + if (nport != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3621,14 +3686,14 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct opa_led_info *p = (struct opa_led_info *)data; u32 nport = OPA_AM_NPORT(am); int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK); - if (nport != 1) { + if (nport != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3638,12 +3703,13 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, else shutdown_led_override(dd->pport); - return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len, + max_len); } static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int ret; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3651,71 +3717,71 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, switch (attr_id) { case IB_SMP_ATTR_NODE_DESC: ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_NODE_INFO: ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PORT_INFO: ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PKEY_TABLE: ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SL_TO_SC_MAP: ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_SL_MAP: ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLT_MAP: ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLNT_MAP: ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_get_opa_psi(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE: ret = __subn_get_opa_bct(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_CABLE_INFO: ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_VL_ARB_TABLE: ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_INFO: ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING: ret = __subn_get_opa_cong_setting(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_LOG: ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE: ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_LED_INFO: ret = __subn_get_opa_led_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_SM_INFO: if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) @@ -3733,7 +3799,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int ret; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3741,51 +3807,51 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, switch (attr_id) { case IB_SMP_ATTR_PORT_INFO: ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PKEY_TABLE: ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SL_TO_SC_MAP: ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_SL_MAP: ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLT_MAP: ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLNT_MAP: ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_set_opa_psi(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE: ret = __subn_set_opa_bct(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_VL_ARB_TABLE: ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING: ret = __subn_set_opa_cong_setting(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE: ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_LED_INFO: ret = __subn_set_opa_led_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_SM_INFO: if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) @@ -3841,7 +3907,10 @@ static int subn_get_opa_aggregate(struct opa_smp *smp, memset(next_smp + sizeof(*agg), 0, agg_data_len); (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data, - ibdev, port, NULL); + ibdev, port, NULL, (u32)agg_data_len); + + if (smp->status & IB_SMP_INVALID_FIELD) + break; if (smp->status & ~IB_SMP_DIRECTION) { set_aggr_error(agg); return reply((struct ib_mad_hdr *)smp); @@ -3884,7 +3953,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp, } (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data, - ibdev, port, NULL); + ibdev, port, NULL, (u32)agg_data_len); + if (smp->status & IB_SMP_INVALID_FIELD) + break; if (smp->status & ~IB_SMP_DIRECTION) { set_aggr_error(agg); return reply((struct ib_mad_hdr *)smp); @@ -3994,12 +4065,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, struct opa_smp *smp = (struct opa_smp *)out_mad; struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *data; - u32 am; + u32 am, data_size; __be16 attr_id; int ret; *out_mad = *in_mad; data = opa_get_smp_data(smp); + data_size = (u32)opa_get_smp_data_size(smp); am = be32_to_cpu(smp->attr_mod); attr_id = smp->attr_id; @@ -4043,7 +4115,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, default: clear_opa_smp_data(smp); ret = subn_get_opa_sma(attr_id, smp, am, data, - ibdev, port, resp_len); + ibdev, port, resp_len, + data_size); break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_get_opa_aggregate(smp, ibdev, port, @@ -4055,7 +4128,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, switch (attr_id) { default: ret = subn_set_opa_sma(attr_id, smp, am, data, - ibdev, port, resp_len); + ibdev, port, resp_len, + data_size); break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_set_opa_aggregate(smp, ibdev, port, -- cgit v1.1 From bec7c79cd8f764ba84c8ec6d8c402b8a7cd3a54f Mon Sep 17 00:00:00 2001 From: "Byczkowski, Jakub" Date: Mon, 29 May 2017 17:21:32 -0700 Subject: IB/hfi1: Modify handling of physical link state by Host Driver Ensure states returned to the Fabric Manager are consistent with the OPA specification by caching the physical state along with the logical state. Reviewed-by: Stuart Summers Reviewed-by: Ira Weiny Reviewed-by: Andrzej Kotlowski Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 102 +++++++++++++++++++++++++----------- drivers/infiniband/hw/hfi1/chip.h | 2 +- drivers/infiniband/hw/hfi1/driver.c | 8 +-- drivers/infiniband/hw/hfi1/hfi.h | 18 ++++++- drivers/infiniband/hw/hfi1/mad.c | 6 +-- drivers/infiniband/hw/hfi1/verbs.c | 2 +- 6 files changed, 97 insertions(+), 41 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 99c29dd..3fae98d0 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1066,6 +1066,8 @@ static int thermal_init(struct hfi1_devdata *dd); static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, + int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); @@ -10028,28 +10030,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) sdma_update_lmc(dd, mask, ppd->lid); } -static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs) -{ - unsigned long timeout; - u32 curr_state; - - timeout = jiffies + msecs_to_jiffies(msecs); - while (1) { - curr_state = read_physical_state(dd); - if (curr_state == state) - break; - if (time_after(jiffies, timeout)) { - dd_dev_err(dd, - "timeout waiting for phy link state 0x%x, current state is 0x%x\n", - state, curr_state); - return -ETIMEDOUT; - } - usleep_range(1950, 2050); /* sleep 2ms-ish */ - } - - return 0; -} - static const char *state_completed_string(u32 completed) { static const char * const state_completed[] = { @@ -10283,7 +10263,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) if (do_wait) { /* it can take a while for the link to go down */ - ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000); + ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000); if (ret < 0) return ret; } @@ -10536,6 +10516,19 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) goto unexpected; } + /* + * Wait for Link_Up physical state. + * Physical and Logical states should already be + * be transitioned to LinkUp and LinkInit respectively. + */ + ret = wait_physical_linkstate(ppd, PLS_LINKUP, 1000); + if (ret) { + dd_dev_err(dd, + "%s: physical state did not change to LINK-UP\n", + __func__); + break; + } + ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000); if (ret) { dd_dev_err(dd, @@ -10649,6 +10642,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) */ if (ret) goto_offline(ppd, 0); + else + cache_physical_state(ppd); break; case HLS_DN_DISABLE: /* link is disabled */ @@ -10673,6 +10668,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; break; } + ret = wait_physical_linkstate(ppd, PLS_DISABLED, 10000); + if (ret) { + dd_dev_err(dd, + "%s: physical state did not change to DISABLED\n", + __func__); + break; + } dc_shutdown(dd); } ppd->host_link_state = HLS_DN_DISABLE; @@ -10690,6 +10692,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_DN_POLL) goto unexpected; ppd->host_link_state = HLS_VERIFY_CAP; + cache_physical_state(ppd); break; case HLS_GOING_UP: if (ppd->host_link_state != HLS_VERIFY_CAP) @@ -12663,21 +12666,56 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, return -ETIMEDOUT; } -u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) +/* + * Read the physical hardware link state and set the driver's cached value + * of it. + */ +void cache_physical_state(struct hfi1_pportdata *ppd) { - u32 pstate; + u32 read_pstate; u32 ib_pstate; - pstate = read_physical_state(ppd->dd); - ib_pstate = chip_to_opa_pstate(ppd->dd, pstate); - if (ppd->last_pstate != ib_pstate) { + read_pstate = read_physical_state(ppd->dd); + ib_pstate = chip_to_opa_pstate(ppd->dd, read_pstate); + /* check if OPA pstate changed */ + if (chip_to_opa_pstate(ppd->dd, ppd->pstate) != ib_pstate) { dd_dev_info(ppd->dd, "%s: physical state changed to %s (0x%x), phy 0x%x\n", __func__, opa_pstate_name(ib_pstate), ib_pstate, - pstate); - ppd->last_pstate = ib_pstate; + read_pstate); + } + ppd->pstate = read_pstate; +} + +/* + * wait_physical_linkstate - wait for an physical link state change to occur + * @ppd: port device + * @state: the state to wait for + * @msecs: the number of milliseconds to wait + * + * Wait up to msecs milliseconds for physical link state change to occur. + * Returns 0 if state reached, otherwise -ETIMEDOUT. + */ +static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, + int msecs) +{ + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msecs); + while (1) { + cache_physical_state(ppd); + if (ppd->pstate == state) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for phy link state 0x%x, current state is 0x%x\n", + state, ppd->pstate); + return -ETIMEDOUT; + } + usleep_range(1950, 2050); /* sleep 2ms-ish */ } - return ib_pstate; + + return 0; } #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ @@ -14781,7 +14819,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* start in offline */ ppd->host_link_state = HLS_DN_OFFLINE; init_vl_arb_caches(ppd); - ppd->last_pstate = 0xff; /* invalid value */ + ppd->pstate = PLS_OFFLINE; } dd->link_default = HLS_DN_POLL; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 0b4f418..3dab315 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -744,6 +744,7 @@ int is_bx(struct hfi1_devdata *dd); u32 read_physical_state(struct hfi1_devdata *dd); u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate); u32 get_logical_state(struct hfi1_pportdata *ppd); +void cache_physical_state(struct hfi1_pportdata *ppd); const char *opa_lstate_name(u32 lstate); const char *opa_pstate_name(u32 pstate); u32 driver_physical_state(struct hfi1_pportdata *ppd); @@ -1354,7 +1355,6 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt); u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); -u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 9e59430..e64e9e2 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -906,10 +906,12 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, sc = hfi1_9B_get_sc5(hdr, packet->rhf); } if (sc != SC15_PACKET) { - int hwstate = read_logical_state(dd); + int hwstate = driver_lstate(rcd->ppd); - if (hwstate != LSTATE_ACTIVE) { - dd_dev_info(dd, "Unexpected link state %d\n", hwstate); + if (hwstate != IB_PORT_ACTIVE) { + dd_dev_info(dd, + "Unexpected link state %s\n", + opa_lstate_name(hwstate)); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 8f74cf6..bca781c 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -663,7 +663,7 @@ struct hfi1_pportdata { u8 link_enabled; /* link enabled? */ u8 linkinit_reason; u8 local_tx_rate; /* rate given to 8051 firmware */ - u8 last_pstate; /* info only */ + u8 pstate; /* info only */ u8 qsfp_retry_count; /* placeholders for IB MAD packet settings */ @@ -1330,6 +1330,22 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) return ppd->lstate; } +/* return the driver's idea of the physical OPA port state */ +static inline u32 driver_pstate(struct hfi1_pportdata *ppd) +{ + /* + * The driver does some processing from the time the physical + * link state is at LINKUP to the time the SM can be notified + * as such. Return IB_PORTPHYSSTATE_TRAINING until the software + * state is ready. + */ + if (ppd->pstate == PLS_LINKUP && + !(ppd->host_link_state & HLS_UP)) + return IB_PORTPHYSSTATE_TRAINING; + else + return chip_to_opa_pstate(ppd->dd, ppd->pstate); +} + void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index b180dff..c8daf63 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -113,7 +113,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) return; /* o14-3.2.1 */ - if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE) + if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) return; /* o14-2 */ @@ -615,7 +615,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ppd->offline_disabled_reason; pi->port_states.portphysstate_portstate = - (hfi1_ibphys_portstate(ppd) << 4) | state; + (driver_pstate(ppd) << 4) | state; pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc; @@ -1791,7 +1791,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, ppd->offline_disabled_reason; psi->port_states.portphysstate_portstate = - (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf); + (driver_pstate(ppd) << 4) | (lstate & 0xf); psi->link_width_downgrade_tx_active = cpu_to_be16(ppd->link_width_downgrade_tx_active); psi->link_width_downgrade_rx_active = diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2d7759f..5b53faf 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1354,7 +1354,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->lmc = ppd->lmc; /* OPA logical states match IB logical states */ props->state = driver_lstate(ppd); - props->phys_state = hfi1_ibphys_portstate(ppd); + props->phys_state = driver_pstate(ppd); props->gid_tbl_len = HFI1_GUIDS_PER_PORT; props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); /* see rate_show() in ib core/sysfs.c */ -- cgit v1.1 From 13d84914db56c1afd1c9bf4f41e9bf91f061a7dd Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:22:01 -0700 Subject: IB/hfi1,qib: Do not send QKey trap for UD qps According to IBTA spec a QKey violation should not result in a bad qkey trap being triggered for UD queue pairs. Also since it is a silent error we do not increment the q_key violation or the dropped packet counters. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 14 +++++--------- drivers/infiniband/hw/hfi1/ruc.c | 8 ++++---- drivers/infiniband/hw/hfi1/ud.c | 37 ++++++++++++------------------------- drivers/infiniband/hw/hfi1/verbs.h | 4 ++-- 4 files changed, 23 insertions(+), 40 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index c8daf63..a081a98 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -180,10 +180,10 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) } /* - * Send a bad [PQ]_Key trap (ch. 14.3.8). + * Send a bad P_Key trap (ch. 14.3.8). */ -void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2) +void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, u16 lid1, u16 lid2) { struct opa_mad_notice_attr data; u32 lid = ppd_from_ibp(ibp)->lid; @@ -191,17 +191,13 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, u32 _lid2 = lid2; memset(&data, 0, sizeof(data)); - - if (trap_num == OPA_TRAP_BAD_P_KEY) - ibp->rvp.pkey_violations++; - else - ibp->rvp.qkey_violations++; ibp->rvp.n_pkt_drops++; + ibp->rvp.pkey_violations++; /* Send violation trap */ data.generic_type = IB_NOTICE_TYPE_SECURITY; data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = trap_num; + data.trap_num = OPA_TRAP_BAD_P_KEY; data.issuer_lid = cpu_to_be32(lid); data.ntc_257_258.lid1 = cpu_to_be32(_lid1); data.ntc_257_258.lid2 = cpu_to_be32(_lid2); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 476fe5d..9cf506a 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -254,8 +254,8 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) } if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ @@ -290,8 +290,8 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) } if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index c995aa5..6bf7a1b 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -110,10 +110,10 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) ((1 << ppd->lmc) - 1)); if (unlikely(ingress_pkey_check(ppd, pkey, sc5, qp->s_pkey_index, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - slid, rdma_ah_get_dlid(ah_attr)); + hfi1_bad_pkey(ibp, pkey, + rdma_ah_get_sl(ah_attr), + sqp->ibqp.qp_num, qp->ibqp.qp_num, + slid, rdma_ah_get_dlid(ah_attr)); goto drop; } } @@ -128,18 +128,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qkey = (int)swqe->ud_wr.remote_qkey < 0 ? sqp->qkey : swqe->ud_wr.remote_qkey; - if (unlikely(qkey != qp->qkey)) { - u16 lid; - - lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1)); - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - lid, - rdma_ah_get_dlid(ah_attr)); - goto drop; - } + if (unlikely(qkey != qp->qkey)) + goto drop; /* silently drop per IBTA spec */ } /* @@ -722,10 +712,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) * for invalid pkeys is optional according to * IB spec (release 1.3, section 10.9.4) */ - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - pkey, sl, - src_qp, qp->ibqp.qp_num, - slid, dlid); + hfi1_bad_pkey(ibp, + pkey, sl, + src_qp, qp->ibqp.qp_num, + slid, dlid); return; } } else { @@ -734,12 +724,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (mgmt_pkey_idx < 0) goto drop; } - if (unlikely(qkey != qp->qkey)) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl, - src_qp, qp->ibqp.qp_num, - slid, dlid); + if (unlikely(qkey != qp->qkey)) /* Silent drop */ return; - } + /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && (tlen > 2048 || (sc5 == 0xF)))) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 17b38cd..fdf1e1f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -236,8 +236,8 @@ static inline int hfi1_send_ok(struct rvt_qp *qp) /* * This must be called with s_lock held. */ -void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2); +void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, u16 lid1, u16 lid2); void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); -- cgit v1.1 From ddbf2efff4ad85135b9fd1cb53b10069a160bd58 Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Fri, 9 Jun 2017 15:59:26 -0700 Subject: IB/hfi1: Fix DC 8051 host info flag array Fix info array of host message flags by adding entry for link width downgrade and reverse values for BC SMA and BC PWR_MSG messages Reviewed-by: Jakub Byczkowski Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 3fae98d0..ebcb2c4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1012,14 +1012,15 @@ static struct flag_table dc8051_info_err_flags[] = { */ static struct flag_table dc8051_info_host_msg_flags[] = { FLAG_ENTRY0("Host request done", 0x0001), - FLAG_ENTRY0("BC SMA message", 0x0002), - FLAG_ENTRY0("BC PWR_MGM message", 0x0004), + FLAG_ENTRY0("BC PWR_MGM message", 0x0002), + FLAG_ENTRY0("BC SMA message", 0x0004), FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008), FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010), FLAG_ENTRY0("External device config request", 0x0020), FLAG_ENTRY0("VerifyCap all frames received", 0x0040), FLAG_ENTRY0("LinkUp achieved", 0x0080), FLAG_ENTRY0("Link going down", 0x0100), + FLAG_ENTRY0("Link width downgraded", 0x0200), }; static u32 encoded_size(u32 size); -- cgit v1.1 From 702265fc0002497fd0ddaae4a5bec00a8a40da3e Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 15:59:33 -0700 Subject: IB/hfi1: Set proper logging levels on QSFP cable error events Change QSFP cable error events logging levels from info to error. Reviewed-by: Jakub Byczkowski Reviewed-by: Dennis Dalessandro Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 64 +++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index ebcb2c4..b8ee0e2 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9376,13 +9376,13 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING)) - dd_dev_info(dd, "%s: QSFP cable temperature too high\n", - __func__); + dd_dev_err(dd, "%s: QSFP cable temperature too high\n", + __func__); if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING)) - dd_dev_info(dd, "%s: QSFP cable temperature too low\n", - __func__); + dd_dev_err(dd, "%s: QSFP cable temperature too low\n", + __func__); /* * The remaining alarms/warnings don't matter if the link is down. @@ -9392,75 +9392,75 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) - dd_dev_info(dd, "%s: QSFP supply voltage too high\n", - __func__); + dd_dev_err(dd, "%s: QSFP supply voltage too high\n", + __func__); if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING)) - dd_dev_info(dd, "%s: QSFP supply voltage too low\n", - __func__); + dd_dev_err(dd, "%s: QSFP supply voltage too low\n", + __func__); /* Byte 2 is vendor specific */ if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 1/2 power too high\n", + __func__); if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 1/2 power too low\n", + __func__); if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 3/4 power too high\n", + __func__); if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 3/4 power too low\n", + __func__); if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) || (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too high\n", + __func__); if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) || (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too low\n", + __func__); if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) || (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too high\n", + __func__); if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) || (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too low\n", + __func__); if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 power too high\n", + __func__); if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 power too low\n", + __func__); if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 power too high\n", + __func__); if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 power too low\n", + __func__); /* Bytes 9-10 and 11-12 are reserved */ /* Bytes 13-15 are vendor specific */ -- cgit v1.1 From 9c1a99c3882beb9e88ed41d914e75bab2d593926 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:40 -0700 Subject: IB/hfi1: Create common expected receive verbs/PSM code Declarations and code in common between verbs and PSM are now moved to exp_rcv.[ch]. Reviewed-by: Michael J. Ruhl Reviewed-by: Mitko Haralanov Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/Makefile | 2 +- drivers/infiniband/hw/hfi1/exp_rcv.c | 118 +++++++++++++++++++ drivers/infiniband/hw/hfi1/exp_rcv.h | 187 ++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/file_ops.c | 4 +- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 121 ------------------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 23 +--- drivers/infiniband/hw/hfi1/user_sdma.c | 38 ------ 7 files changed, 309 insertions(+), 184 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/exp_rcv.c create mode 100644 drivers/infiniband/hw/hfi1/exp_rcv.h (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 88085f6..66d538c 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ - eprom.o file_ops.o firmware.o \ + eprom.o exp_rcv.o file_ops.o firmware.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c new file mode 100644 index 0000000..08d13ed --- /dev/null +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -0,0 +1,118 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "exp_rcv.h" +#include "trace.h" + +/** + * exp_tid_group_init - initialize exp_tid_set + * @set - the set + */ +void hfi1_exp_tid_group_init(struct exp_tid_set *set) +{ + INIT_LIST_HEAD(&set->list); + set->count = 0; +} + +/** + * alloc_ctxt_rcv_groups - initialize expected receive groups + * @rcd - the context to add the groupings to + */ +int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + u32 tidbase; + struct tid_group *grp; + int i; + + hfi1_exp_tid_group_init(&rcd->tid_group_list); + hfi1_exp_tid_group_init(&rcd->tid_used_list); + hfi1_exp_tid_group_init(&rcd->tid_full_list); + + tidbase = rcd->expected_base; + for (i = 0; i < rcd->expected_count / + dd->rcv_entries.group_size; i++) { + grp = kzalloc(sizeof(*grp), GFP_KERNEL); + if (!grp) + goto bail; + grp->size = dd->rcv_entries.group_size; + grp->base = tidbase; + tid_group_add_tail(grp, &rcd->tid_group_list); + tidbase += dd->rcv_entries.group_size; + } + + return 0; +bail: + hfi1_free_ctxt_rcv_groups(rcd); + return -ENOMEM; +} + +/** + * free_ctxt_rcv_groups - free expected receive groups + * @rcd - the context to free + * + * The routine dismantles the expect receive linked + * list and clears any tids associated with the receive + * context. + * + * This should only be called for kernel contexts and the + * a base user context. + */ +void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) +{ + struct tid_group *grp, *gptr; + + WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list)); + WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list)); + + list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) { + tid_group_remove(grp, &rcd->tid_group_list); + kfree(grp); + } + + hfi1_clear_tids(rcd); +} diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h new file mode 100644 index 0000000..c7d02bcd --- /dev/null +++ b/drivers/infiniband/hw/hfi1/exp_rcv.h @@ -0,0 +1,187 @@ +#ifndef _HFI1_EXP_RCV_H +#define _HFI1_EXP_RCV_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "hfi.h" + +#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) + +#define EXP_TID_TIDLEN_MASK 0x7FFULL +#define EXP_TID_TIDLEN_SHIFT 0 +#define EXP_TID_TIDCTRL_MASK 0x3ULL +#define EXP_TID_TIDCTRL_SHIFT 20 +#define EXP_TID_TIDIDX_MASK 0x3FFULL +#define EXP_TID_TIDIDX_SHIFT 22 +#define EXP_TID_GET(tid, field) \ + (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK) + +#define EXP_TID_SET(field, value) \ + (((value) & EXP_TID_TID##field##_MASK) << \ + EXP_TID_TID##field##_SHIFT) +#define EXP_TID_CLEAR(tid, field) ({ \ + (tid) &= ~(EXP_TID_TID##field##_MASK << \ + EXP_TID_TID##field##_SHIFT); \ + }) +#define EXP_TID_RESET(tid, field, value) do { \ + EXP_TID_CLEAR(tid, field); \ + (tid) |= EXP_TID_SET(field, (value)); \ + } while (0) + +/* + * Define fields in the KDETH header so we can update the header + * template. + */ +#define KDETH_OFFSET_SHIFT 0 +#define KDETH_OFFSET_MASK 0x7fff +#define KDETH_OM_SHIFT 15 +#define KDETH_OM_MASK 0x1 +#define KDETH_TID_SHIFT 16 +#define KDETH_TID_MASK 0x3ff +#define KDETH_TIDCTRL_SHIFT 26 +#define KDETH_TIDCTRL_MASK 0x3 +#define KDETH_INTR_SHIFT 28 +#define KDETH_INTR_MASK 0x1 +#define KDETH_SH_SHIFT 29 +#define KDETH_SH_MASK 0x1 +#define KDETH_KVER_SHIFT 30 +#define KDETH_KVER_MASK 0x3 +#define KDETH_JKEY_SHIFT 0x0 +#define KDETH_JKEY_MASK 0xff +#define KDETH_HCRC_UPPER_SHIFT 16 +#define KDETH_HCRC_UPPER_MASK 0xff +#define KDETH_HCRC_LOWER_SHIFT 24 +#define KDETH_HCRC_LOWER_MASK 0xff + +#define KDETH_GET(val, field) \ + (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK) +#define KDETH_SET(dw, field, val) do { \ + u32 dwval = le32_to_cpu(dw); \ + dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \ + dwval |= (((val) & KDETH_##field##_MASK) << \ + KDETH_##field##_SHIFT); \ + dw = cpu_to_le32(dwval); \ + } while (0) + +#define KDETH_RESET(dw, field, val) ({ dw = 0; KDETH_SET(dw, field, val); }) + +/* KDETH OM multipliers and switch over point */ +#define KDETH_OM_SMALL 4 +#define KDETH_OM_SMALL_SHIFT 2 +#define KDETH_OM_LARGE 64 +#define KDETH_OM_LARGE_SHIFT 6 +#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) + +struct tid_group { + struct list_head list; + u32 base; + u8 size; + u8 used; + u8 map; +}; + +/* + * Write an "empty" RcvArray entry. + * This function exists so the TID registaration code can use it + * to write to unused/unneeded entries and still take advantage + * of the WC performance improvements. The HFI will ignore this + * write to the RcvArray entry. + */ +static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) +{ + /* + * Doing the WC fill writes only makes sense if the device is + * present and the RcvArray has been mapped as WC memory. + */ + if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) + writeq(0, dd->rcvarray_wc + (index * 8)); +} + +static inline void tid_group_add_tail(struct tid_group *grp, + struct exp_tid_set *set) +{ + list_add_tail(&grp->list, &set->list); + set->count++; +} + +static inline void tid_group_remove(struct tid_group *grp, + struct exp_tid_set *set) +{ + list_del_init(&grp->list); + set->count--; +} + +static inline void tid_group_move(struct tid_group *group, + struct exp_tid_set *s1, + struct exp_tid_set *s2) +{ + tid_group_remove(group, s1); + tid_group_add_tail(group, s2); +} + +static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) +{ + struct tid_group *grp = + list_first_entry(&set->list, struct tid_group, list); + list_del_init(&grp->list); + set->count--; + return grp; +} + +static inline u32 rcventry2tidinfo(u32 rcventry) +{ + u32 pair = rcventry & ~0x1; + + return EXP_TID_SET(IDX, pair >> 1) | + EXP_TID_SET(CTRL, 1 << (rcventry - pair)); +} + +int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); +void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); +void hfi1_exp_tid_group_init(struct exp_tid_set *set); + +#endif /* _HFI1_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 3158128..2dd8758 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -804,7 +804,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) dd->rcd[uctxt->ctxt] = NULL; - hfi1_user_exp_rcv_grp_free(uctxt); + hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); uctxt->rcvwait_to = 0; @@ -1260,7 +1260,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) if (ret) goto setup_failed; - ret = hfi1_user_exp_rcv_grp_init(fd); + ret = hfi1_alloc_ctxt_rcv_groups(uctxt); if (ret) goto setup_failed; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index a8f0aa4..6318e6c 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -51,14 +51,6 @@ #include "trace.h" #include "mmu_rb.h" -struct tid_group { - struct list_head list; - u32 base; - u8 size; - u8 used; - u8 map; -}; - struct tid_rb_node { struct mmu_rb_node mmu; unsigned long phys; @@ -75,8 +67,6 @@ struct tid_pageset { u16 count; }; -#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) - #define num_user_pages(vaddr, len) \ (1 + (((((unsigned long)(vaddr) + \ (unsigned long)(len) - 1) & PAGE_MASK) - \ @@ -109,88 +99,6 @@ static struct mmu_rb_ops tid_rb_ops = { .invalidate = tid_rb_invalidate }; -static inline u32 rcventry2tidinfo(u32 rcventry) -{ - u32 pair = rcventry & ~0x1; - - return EXP_TID_SET(IDX, pair >> 1) | - EXP_TID_SET(CTRL, 1 << (rcventry - pair)); -} - -static inline void exp_tid_group_init(struct exp_tid_set *set) -{ - INIT_LIST_HEAD(&set->list); - set->count = 0; -} - -static inline void tid_group_remove(struct tid_group *grp, - struct exp_tid_set *set) -{ - list_del_init(&grp->list); - set->count--; -} - -static inline void tid_group_add_tail(struct tid_group *grp, - struct exp_tid_set *set) -{ - list_add_tail(&grp->list, &set->list); - set->count++; -} - -static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) -{ - struct tid_group *grp = - list_first_entry(&set->list, struct tid_group, list); - list_del_init(&grp->list); - set->count--; - return grp; -} - -static inline void tid_group_move(struct tid_group *group, - struct exp_tid_set *s1, - struct exp_tid_set *s2) -{ - tid_group_remove(group, s1); - tid_group_add_tail(group, s2); -} - -int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd) -{ - struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_devdata *dd = fd->dd; - u32 tidbase; - u32 i; - struct tid_group *grp, *gptr; - - exp_tid_group_init(&uctxt->tid_group_list); - exp_tid_group_init(&uctxt->tid_used_list); - exp_tid_group_init(&uctxt->tid_full_list); - - tidbase = uctxt->expected_base; - for (i = 0; i < uctxt->expected_count / - dd->rcv_entries.group_size; i++) { - grp = kzalloc(sizeof(*grp), GFP_KERNEL); - if (!grp) - goto grp_failed; - - grp->size = dd->rcv_entries.group_size; - grp->base = tidbase; - tid_group_add_tail(grp, &uctxt->tid_group_list); - tidbase += dd->rcv_entries.group_size; - } - - return 0; - -grp_failed: - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - - return -ENOMEM; -} - /* * Initialize context and file private data needed for Expected * receive caching. This needs to be done after the context has @@ -266,18 +174,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) return ret; } -void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt) -{ - struct tid_group *grp, *gptr; - - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - hfi1_clear_tids(uctxt); -} - void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; @@ -303,23 +199,6 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) } /* - * Write an "empty" RcvArray entry. - * This function exists so the TID registaration code can use it - * to write to unused/unneeded entries and still take advantage - * of the WC performance improvements. The HFI will ignore this - * write to the RcvArray entry. - */ -static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) -{ - /* - * Doing the WC fill writes only makes sense if the device is - * present and the RcvArray has been mapped as WC memory. - */ - if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) - writeq(0, dd->rcvarray_wc + (index * 8)); -} - -/* * RcvArray entry allocation for Expected Receives is done by the * following algorithm: * diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 5250c89..1bdc61b 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -49,29 +49,8 @@ #include "hfi.h" -#define EXP_TID_TIDLEN_MASK 0x7FFULL -#define EXP_TID_TIDLEN_SHIFT 0 -#define EXP_TID_TIDCTRL_MASK 0x3ULL -#define EXP_TID_TIDCTRL_SHIFT 20 -#define EXP_TID_TIDIDX_MASK 0x3FFULL -#define EXP_TID_TIDIDX_SHIFT 22 -#define EXP_TID_GET(tid, field) \ - (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK) +#include "exp_rcv.h" -#define EXP_TID_SET(field, value) \ - (((value) & EXP_TID_TID##field##_MASK) << \ - EXP_TID_TID##field##_SHIFT) -#define EXP_TID_CLEAR(tid, field) ({ \ - (tid) &= ~(EXP_TID_TID##field##_MASK << \ - EXP_TID_TID##field##_SHIFT); \ - }) -#define EXP_TID_RESET(tid, field, value) do { \ - EXP_TID_CLEAR(tid, field); \ - (tid) |= EXP_TID_SET(field, (value)); \ - } while (0) - -void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt); -int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd); void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index fcadbb9..8f7cfdd 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -94,27 +94,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* Number of BTH.PSN bits used for sequence number in expected rcvs */ #define BTH_SEQ_MASK 0x7ffull -/* - * Define fields in the KDETH header so we can update the header - * template. - */ -#define KDETH_OFFSET_SHIFT 0 -#define KDETH_OFFSET_MASK 0x7fff -#define KDETH_OM_SHIFT 15 -#define KDETH_OM_MASK 0x1 -#define KDETH_TID_SHIFT 16 -#define KDETH_TID_MASK 0x3ff -#define KDETH_TIDCTRL_SHIFT 26 -#define KDETH_TIDCTRL_MASK 0x3 -#define KDETH_INTR_SHIFT 28 -#define KDETH_INTR_MASK 0x1 -#define KDETH_SH_SHIFT 29 -#define KDETH_SH_MASK 0x1 -#define KDETH_HCRC_UPPER_SHIFT 16 -#define KDETH_HCRC_UPPER_MASK 0xff -#define KDETH_HCRC_LOWER_SHIFT 24 -#define KDETH_HCRC_LOWER_MASK 0xff - #define AHG_KDETH_INTR_SHIFT 12 #define AHG_KDETH_SH_SHIFT 13 #define AHG_KDETH_ARRAY_SIZE 9 @@ -122,16 +101,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) -#define KDETH_GET(val, field) \ - (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK) -#define KDETH_SET(dw, field, val) do { \ - u32 dwval = le32_to_cpu(dw); \ - dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \ - dwval |= (((val) & KDETH_##field##_MASK) << \ - KDETH_##field##_SHIFT); \ - dw = cpu_to_le32(dwval); \ - } while (0) - #define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \ do { \ if ((idx) < ARRAY_SIZE((arr))) \ @@ -142,13 +111,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 return -ERANGE; \ } while (0) -/* KDETH OM multipliers and switch over point */ -#define KDETH_OM_SMALL 4 -#define KDETH_OM_SMALL_SHIFT 2 -#define KDETH_OM_LARGE 64 -#define KDETH_OM_LARGE_SHIFT 6 -#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) - /* Tx request flag bits */ #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ -- cgit v1.1 From f5114440c5491d4737b061c3a77195088bbaca52 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 15:59:46 -0700 Subject: IB/hfi1: Remove reading platform configuration from EFI variable Currently, platform configuration can be read from EFI variable for discrete cards. It will happen when reading from EPROM fails. EFI variables should not be queried for platform configuration in any scenario. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index cbda9b1..41307e4 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -136,7 +136,6 @@ static void save_platform_config_fields(struct hfi1_devdata *dd) void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; - unsigned long size = 0; u8 *temp_platform_config = NULL; u32 esize; @@ -160,15 +159,6 @@ void get_platform_config(struct hfi1_devdata *dd) dd->platform_config.size = esize; return; } - /* fail, try EFI variable */ - - ret = read_hfi1_efi_var(dd, "configuration", &size, - (void **)&temp_platform_config); - if (!ret) { - dd->platform_config.data = temp_platform_config; - dd->platform_config.size = size; - return; - } } dd_dev_err(dd, "%s: Failed to get platform config, falling back to sub-optimal default file\n", -- cgit v1.1 From f523984fb85d16e098aac94642cc16803a4cc61f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:53 -0700 Subject: IB/hfi1: Use a template for tid reg/unreg This is the preferred way to add a duplicate trace call. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/trace_rx.h | 46 ++++++++++------------------------- 1 file changed, 13 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 05fc6d6..7af5938 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -138,7 +138,8 @@ TRACE_EVENT(hfi1_receive_interrupt, ) ); -TRACE_EVENT(hfi1_exp_tid_reg, +DECLARE_EVENT_CLASS( + hfi1_exp_tid_reg_unreg, TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, unsigned long va, unsigned long pa, dma_addr_t dma), @@ -172,38 +173,17 @@ TRACE_EVENT(hfi1_exp_tid_reg, ) ); -TRACE_EVENT(hfi1_exp_tid_unreg, - TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, - unsigned long va, unsigned long pa, dma_addr_t dma), - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), - TP_STRUCT__entry( - __field(unsigned int, ctxt) - __field(u16, subctxt) - __field(u32, rarr) - __field(u32, npages) - __field(unsigned long, va) - __field(unsigned long, pa) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->va = va; - __entry->pa = pa; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->pa, - __entry->va, - __entry->dma - ) - ); +DEFINE_EVENT( + hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); + +DEFINE_EVENT( + hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); TRACE_EVENT(hfi1_exp_tid_inval, TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, -- cgit v1.1 From 8cb1021b806bda3f5fda6f3699e1f98df14245df Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:59 -0700 Subject: IB/hfi1: Add traces for TID operations This patch adds a trace for putting a TID and for writing the RcvArray CSR. The CSR access template can be easily extended for additional CSR readq/writeq calls. Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 16 ++------------- drivers/infiniband/hw/hfi1/trace_misc.h | 20 +++++++++++++++++++ drivers/infiniband/hw/hfi1/trace_rx.h | 35 +++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b8ee0e2..937350d 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9745,17 +9745,6 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd) return 0; } -static const char * const pt_names[] = { - "expected", - "eager", - "invalid" -}; - -static const char *pt_name(u32 type) -{ - return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type]; -} - /* * index is the index into the receive array */ @@ -9777,15 +9766,14 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, type, index); goto done; } - - hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx", - pt_name(type), index, pa, (unsigned long)order); + trace_hfi1_put_tid(dd, index, type, pa, order); #define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */ reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK) << RCV_ARRAY_RT_ADDR_SHIFT; + trace_hfi1_write_rcvarray(base + (index * 8), reg); writeq(reg, base + (index * 8)); if (type == PT_EAGER) diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h index deac77d..8db2253 100644 --- a/drivers/infiniband/hw/hfi1/trace_misc.h +++ b/drivers/infiniband/hw/hfi1/trace_misc.h @@ -72,6 +72,26 @@ TRACE_EVENT(hfi1_interrupt, __entry->src) ); +DECLARE_EVENT_CLASS( + hfi1_csr_template, + TP_PROTO(void __iomem *addr, u64 value), + TP_ARGS(addr, value), + TP_STRUCT__entry( + __field(void __iomem *, addr) + __field(u64, value) + ), + TP_fast_assign( + __entry->addr = addr; + __entry->value = value; + ), + TP_printk("addr %p value %llx", __entry->addr, __entry->value) +); + +DEFINE_EVENT( + hfi1_csr_template, hfi1_write_rcvarray, + TP_PROTO(void __iomem *addr, u64 value), + TP_ARGS(addr, value)); + #ifdef CONFIG_FAULT_INJECTION TRACE_EVENT(hfi1_fault_opcode, TP_PROTO(struct rvt_qp *qp, u8 opcode), diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 7af5938..8492957 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -52,6 +52,13 @@ #include "hfi.h" +#define tidtype_name(type) { PT_##type, #type } +#define show_tidtype(type) \ +__print_symbolic(type, \ + tidtype_name(EXPECTED), \ + tidtype_name(EAGER), \ + tidtype_name(INVALID)) \ + #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rx @@ -185,6 +192,34 @@ DEFINE_EVENT( unsigned long va, unsigned long pa, dma_addr_t dma), TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); +TRACE_EVENT( + hfi1_put_tid, + TP_PROTO(struct hfi1_devdata *dd, + u32 index, u32 type, unsigned long pa, u16 order), + TP_ARGS(dd, index, type, pa, order), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(unsigned long, pa); + __field(u32, index); + __field(u32, type); + __field(u16, order); + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd); + __entry->pa = pa; + __entry->index = index; + __entry->type = type; + __entry->order = order; + ), + TP_printk("[%s] type %s pa %lx index %u order %u", + __get_str(dev), + show_tidtype(__entry->type), + __entry->pa, + __entry->index, + __entry->order + ) +); + TRACE_EVENT(hfi1_exp_tid_inval, TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, u32 npages, dma_addr_t dma), -- cgit v1.1 From fe4e74eeb24286c730672e776ac4c2c3caa19137 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 9 Jun 2017 16:00:12 -0700 Subject: IB/hfi1: Initialize TID lists to avoid crash on cleanup The expected receive lists (tid_xxx_list) are not initialized until late in the receive context initialization. If an error happens before the initialization, a NULL pointer access will occur during cleanup. Initialized the lists sooner rather than later to avoid this Oops: IP: unlock_exp_tids.isra.11+0x26/0xd0 [hfi1] RIP: 0010:unlock_exp_tids.isra.11+0x26/0xd0 [hfi1] Call Trace: hfi1_user_exp_rcv_free+0x79/0xb0 [hfi1] hfi1_file_close+0x87/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/exp_rcv.c | 4 ---- drivers/infiniband/hw/hfi1/init.c | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index 08d13ed..0af9167 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -69,10 +69,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) struct tid_group *grp; int i; - hfi1_exp_tid_group_init(&rcd->tid_group_list); - hfi1_exp_tid_group_init(&rcd->tid_used_list); - hfi1_exp_tid_group_init(&rcd->tid_full_list); - tidbase = rcd->expected_base; for (i = 0; i < rcd->expected_count / dd->rcv_entries.group_size; i++) { diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 4a11d4d..a00308c 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -67,6 +67,7 @@ #include "aspm.h" #include "affinity.h" #include "vnic.h" +#include "exp_rcv.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -221,6 +222,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, hfi1_cdbg(PROC, "setting up context %u\n", ctxt); INIT_LIST_HEAD(&rcd->qp_wait_list); + hfi1_exp_tid_group_init(&rcd->tid_group_list); + hfi1_exp_tid_group_init(&rcd->tid_used_list); + hfi1_exp_tid_group_init(&rcd->tid_full_list); rcd->ppd = ppd; rcd->dd = dd; __set_bit(0, rcd->in_use_ctxts); -- cgit v1.1 From f683c80ca68e087b55c6f9ab6ca6beb88ebc6d69 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 9 Jun 2017 16:00:19 -0700 Subject: IB/hfi1: Resolve kernel panics by reference counting receive contexts Base receive contexts can be used by sub contexts. Because of this, resources for the context cannot be completely freed until all sub contexts are done using the base context. Introduce a reference count so that the base receive context can be freed only when all sub contexts are done with it. Use the provided function call for setting default send context integrity rather than the manual method. The cleanup path does not set all variables back to NULL after freeing resources. Since the clean up code can get called more than once, (e.g. during context close and on the error path), it is necessary to make sure that all the variables are NULLed. Possible crash are: BUG: unable to handle kernel paging request at 0000000001908900 IP: read_csr+0x24/0x30 [hfi1] RIP: 0010:read_csr+0x24/0x30 [hfi1] Call Trace: sc_disable+0x40/0x110 [hfi1] hfi1_file_close+0x16f/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 or kernel BUG at mm/slub.c:3877! RIP: 0010:kfree+0x14f/0x170 Call Trace: hfi1_free_ctxtdata+0x19a/0x2b0 [hfi1] ? hfi1_user_exp_rcv_grp_free+0x73/0x80 [hfi1] hfi1_file_close+0x20f/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 Fixes: Commit 62239fc6e554 ("IB/hfi1: Clean up on context initialization failure") Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 39 +++++++++++++++-------- drivers/infiniband/hw/hfi1/hfi.h | 11 +++---- drivers/infiniband/hw/hfi1/init.c | 58 ++++++++++++++++++++++++++++------ drivers/infiniband/hw/hfi1/vnic_main.c | 11 ++++--- 4 files changed, 85 insertions(+), 34 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 2dd8758..bbf80b1 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -774,6 +774,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) *ev = 0; __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); + fdata->uctxt = NULL; + hfi1_rcd_put(uctxt); /* fdata reference */ if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { mutex_unlock(&hfi1_mutex); goto done; @@ -794,16 +796,15 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) /* Clear the context's J_KEY */ hfi1_clear_ctxt_jkey(dd, uctxt->ctxt); /* - * Reset context integrity checks to default. - * (writes to CSRs probably belong in chip.c) + * If a send context is allocated, reset context integrity + * checks to default and disable the send context. */ - write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, - hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type)); - sc_disable(uctxt->sc); + if (uctxt->sc) { + set_pio_integrity(uctxt->sc); + sc_disable(uctxt->sc); + } spin_unlock_irqrestore(&dd->uctxt_lock, flags); - dd->rcd[uctxt->ctxt] = NULL; - hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); @@ -816,8 +817,11 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_stats.sps_ctxts--; if (++dd->freectxts == dd->num_user_contexts) aspm_enable_all(dd); + + /* _rcd_put() should be done after releasing mutex */ + dd->rcd[uctxt->ctxt] = NULL; mutex_unlock(&hfi1_mutex); - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); /* dd reference */ done: mmdrop(fdata->mm); kobject_put(&dd->kobj); @@ -887,16 +891,17 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) ret = wait_event_interruptible(fd->uctxt->wait, !test_bit( HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); - if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) { - clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); - return -ENOMEM; - } + if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) + ret = -ENOMEM; + /* The only thing a sub context needs is the user_xxx stuff */ if (!ret) ret = init_user_ctxt(fd); - if (ret) + if (ret) { clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); + hfi1_rcd_put(fd->uctxt); + } } else if (!ret) { ret = setup_base_ctxt(fd); if (fd->uctxt->subctxt_cnt) { @@ -961,6 +966,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, fd->uctxt = uctxt; fd->subctxt = subctxt; + + hfi1_rcd_get(uctxt); __set_bit(fd->subctxt, uctxt->in_use_ctxts); return 1; @@ -1069,11 +1076,14 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, aspm_disable_all(dd); fd->uctxt = uctxt; + /* Count the reference for the fd */ + hfi1_rcd_get(uctxt); + return 0; ctxdata_free: dd->rcd[ctxt] = NULL; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); return ret; } @@ -1273,6 +1283,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) return 0; setup_failed: + /* Call _free_ctxtdata, not _rcd_put(). We still need the context. */ hfi1_free_ctxtdata(dd, uctxt); return ret; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index bca781c..1a33a50 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -213,11 +213,9 @@ struct hfi1_ctxtdata { /* dynamic receive available interrupt timeout */ u32 rcvavail_timeout; - /* - * number of opens (including slave sub-contexts) on this instance - * (ignoring forks, dup, etc. for now) - */ - int cnt; + /* Reference count the base context usage */ + struct kref kref; + /* Device context index */ unsigned ctxt; /* @@ -1291,7 +1289,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); - +int hfi1_rcd_put(struct hfi1_ctxtdata *rcd); +void hfi1_rcd_get(struct hfi1_ctxtdata *rcd); int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index a00308c..dfdb412 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -191,16 +191,46 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) nomem: ret = -ENOMEM; - if (dd->rcd) { - for (i = 0; i < dd->num_rcv_contexts; ++i) - hfi1_free_ctxtdata(dd, dd->rcd[i]); - } + for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) + hfi1_rcd_put(dd->rcd[i]); + + /* All the contexts should be freed, free the array */ kfree(dd->rcd); dd->rcd = NULL; return ret; } /* + * Helper routines for the receive context reference count (rcd and uctxt) + */ +static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) +{ + kref_init(&rcd->kref); +} + +static void hfi1_rcd_free(struct kref *kref) +{ + struct hfi1_ctxtdata *rcd = + container_of(kref, struct hfi1_ctxtdata, kref); + + hfi1_free_ctxtdata(rcd->dd, rcd); + kfree(rcd); +} + +int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) +{ + if (rcd) + return kref_put(&rcd->kref, hfi1_rcd_free); + + return 0; +} + +void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) +{ + kref_get(&rcd->kref); +} + +/* * Common code for user and kernel context setup. */ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, @@ -332,6 +362,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, if (!rcd->opstats) goto bail; } + + hfi1_rcd_init(rcd); } return rcd; bail: @@ -931,14 +963,11 @@ static void shutdown_device(struct hfi1_devdata *dd) * @rcd: the ctxtdata structure * * free up any allocated data for a context - * This should not touch anything that would affect a simultaneous - * re-allocation of context data, because it is called after hfi1_mutex - * is released (and can be called from reinit as well). * It should never change any chip state, or global driver state. */ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { - unsigned e; + u32 e; if (!rcd) return; @@ -957,6 +986,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) /* all the RcvArray entries should have been cleared by now */ kfree(rcd->egrbufs.rcvtids); + rcd->egrbufs.rcvtids = NULL; for (e = 0; e < rcd->egrbufs.alloced; e++) { if (rcd->egrbufs.buffers[e].dma) @@ -966,13 +996,21 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) rcd->egrbufs.buffers[e].dma); } kfree(rcd->egrbufs.buffers); + rcd->egrbufs.alloced = 0; + rcd->egrbufs.buffers = NULL; sc_free(rcd->sc); + rcd->sc = NULL; + vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); kfree(rcd->opstats); - kfree(rcd); + + rcd->subctxt_uregbase = NULL; + rcd->subctxt_rcvegrbuf = NULL; + rcd->subctxt_rcvhdr_base = NULL; + rcd->opstats = NULL; } /* @@ -1366,7 +1404,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) tmp[ctxt] = NULL; /* debugging paranoia */ if (rcd) { hfi1_clear_tids(rcd); - hfi1_free_ctxtdata(dd, rcd); + hfi1_rcd_put(rcd); } } kfree(tmp); diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index b601c29..950c1b4 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -156,11 +156,11 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, return ret; bail: /* - * hfi1_free_ctxtdata() also releases send_context - * structure if uctxt->sc is not null + * hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will + * release send_context structure if uctxt->sc is not null */ dd->rcd[uctxt->ctxt] = NULL; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); return ret; } @@ -208,7 +208,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, hfi1_clear_ctxt_pkey(dd, uctxt); hfi1_stats.sps_ctxts--; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); } void hfi1_vnic_setup(struct hfi1_devdata *dd) @@ -751,6 +751,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); if (rc) break; + hfi1_rcd_get(dd->vnic.ctxt[i]); dd->vnic.ctxt[i]->vnic_q_idx = i; } @@ -762,6 +763,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) */ while (i-- > dd->vnic.num_ctxt) { deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + hfi1_rcd_put(dd->vnic.ctxt[i]); dd->vnic.ctxt[i] = NULL; } goto alloc_fail; @@ -791,6 +793,7 @@ static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) if (--dd->vnic.num_vports == 0) { for (i = 0; i < dd->vnic.num_ctxt; i++) { deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + hfi1_rcd_put(dd->vnic.ctxt[i]); dd->vnic.ctxt[i] = NULL; } hfi1_deinit_vnic_rsm(dd); -- cgit v1.1 From bc5214ee29220251e5507882696ded5ca183b169 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 16:00:26 -0700 Subject: IB/hfi1: Handle missing magic values in config file Driver does not check whether proper configuration file exist in EPROM, and treats empty partition as possible valid configuration, preventing fallback to default firmware. Change EPROM read function to treat missing magic number as read error. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/eprom.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index 26da124..d46b171 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -250,7 +250,6 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, { void *buffer; void *p; - u32 length; int ret; buffer = kmalloc(P1_SIZE, GFP_KERNEL); @@ -265,13 +264,13 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, /* scan for image magic that may trail the actual data */ p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); - if (p) - length = p - buffer; - else - length = P1_SIZE; + if (!p) { + kfree(buffer); + return -ENOENT; + } *data = buffer; - *size = length; + *size = p - buffer; return 0; } -- cgit v1.1 From e1267b01240ab031a9c9dd84c1ffeb23670b590f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 26 Jun 2017 08:58:22 +0300 Subject: RDMA: Remove useless MODULE_VERSION All modules in drivers/infiniband defined and used MODULE_VERSION, which was pointless because the kernel version describes their state more accurate then those arbitrary numbers. Signed-off-by: Leon Romanovsky Acked-by: Sagi Grimbrg Reviewed-by: Sagi Grimberg Acked-by: Dennis Dalessandro Reviewed-by: Dennis Dalessandro Acked-by: Selvin Xavier Acked-by: Ram Amrani Reviewed-by: Johannes Thumshirn Acked-by: Adit Ranadive Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a50870e..79a8b05 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -96,7 +96,6 @@ MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("Intel Omni-Path Architecture driver"); -MODULE_VERSION(HFI1_DRIVER_VERSION); /* * MAX_PKT_RCV is the max # if packets processed per receive interrupt. -- cgit v1.1 From ff8d836efe0629ad473b06a94a379682baf43cb9 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Sat, 17 Jun 2017 10:37:34 -0700 Subject: IB/hfi1: Add receiving queue info to qp_stats This patch adds qp->s_ack_queue indices and size to qp_stats printout. This information will provide information about the receiving side. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1a7af9f..ee2c74d 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -607,7 +607,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n", + "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -630,6 +630,10 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->s_last, qp->s_acked, qp->s_cur, qp->s_tail, qp->s_head, qp->s_size, qp->s_avail, + /* ack_queue ring pointers, size */ + qp->s_tail_ack_queue, qp->r_head_ack_queue, + HFI1_MAX_RDMA_ATOMIC, + /* remote QP info */ qp->remote_qpn, rdma_ah_get_dlid(&qp->remote_ah_attr), rdma_ah_get_sl(&qp->remote_ah_attr), -- cgit v1.1 From c53df62c7a9a82c7375308af270f90a08e94f6b6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Fri, 30 Jun 2017 13:14:40 -0700 Subject: IB/hfi1: Check return values from PCI config API calls Ensure that return values from kernel PCI config access API calls in HFI driver are checked and react properly if they are not expected (i.e. not successful). Reviewed-by: Jakub Byczkowski Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 22 +++- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/pcie.c | 238 +++++++++++++++++++++++++++++++------- 3 files changed, 215 insertions(+), 47 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 937350d..efc8481 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13846,9 +13846,10 @@ static void init_sc2vl_tables(struct hfi1_devdata *dd) * a reset following the (possible) FLR in this routine. * */ -static void init_chip(struct hfi1_devdata *dd) +static int init_chip(struct hfi1_devdata *dd) { int i; + int ret = 0; /* * Put the HFI CSRs in a known state. @@ -13896,12 +13897,22 @@ static void init_chip(struct hfi1_devdata *dd) pcie_flr(dd->pcidev); /* restore command and BARs */ - restore_pci_variables(dd); + ret = restore_pci_variables(dd); + if (ret) { + dd_dev_err(dd, "%s: Could not restore PCI variables\n", + __func__); + return ret; + } if (is_ax(dd)) { dd_dev_info(dd, "Resetting CSRs with FLR\n"); pcie_flr(dd->pcidev); - restore_pci_variables(dd); + ret = restore_pci_variables(dd); + if (ret) { + dd_dev_err(dd, "%s: Could not restore PCI variables\n", + __func__); + return ret; + } } } else { dd_dev_info(dd, "Resetting CSRs with writes\n"); @@ -13929,6 +13940,7 @@ static void init_chip(struct hfi1_devdata *dd) write_csr(dd, ASIC_QSFP1_OUT, 0x1f); write_csr(dd, ASIC_QSFP2_OUT, 0x1f); init_chip_resources(dd); + return ret; } static void init_early_variables(struct hfi1_devdata *dd) @@ -14914,7 +14926,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, goto bail_cleanup; /* obtain chip sizes, reset chip CSRs */ - init_chip(dd); + ret = init_chip(dd); + if (ret) + goto bail_cleanup; /* read in the PCIe link speed information */ ret = pcie_speeds(dd); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1a33a50..62f843d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1854,7 +1854,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *dd); int request_msix(struct hfi1_devdata *dd, u32 msireq); -void restore_pci_variables(struct hfi1_devdata *dd); +int restore_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); int get_platform_config_field(struct hfi1_devdata *dd, diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index f01841b..903ea45 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -68,7 +68,7 @@ /* * Code to adjust PCIe capabilities. */ -static void tune_pcie_caps(struct hfi1_devdata *); +static int tune_pcie_caps(struct hfi1_devdata *); /* * Do all the common PCIe setup and initialization. @@ -161,6 +161,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) { unsigned long len; resource_size_t addr; + int ret = 0; dd->pcidev = pdev; pci_set_drvdata(pdev, dd); @@ -207,19 +208,56 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) /* * Save BARs and command to rewrite after device reset. */ - pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); - pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1); - pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); - pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl); - pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl); - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, - &dd->pcie_devctl2); - pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); - pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3); - pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); + if (ret) + goto read_error; + + ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); + if (ret) + goto read_error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, + &dd->pcie_devctl); + if (ret) + goto read_error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, + &dd->pcie_lnkctl); + if (ret) + goto read_error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, + &dd->pcie_devctl2); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, + &dd->pci_lnkctl3); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); + if (ret) + goto read_error; return 0; + +read_error: + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; } /* @@ -270,8 +308,14 @@ static u32 extract_width(u16 linkstat) static void update_lbus_info(struct hfi1_devdata *dd) { u16 linkstat; + int ret; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return; + } - pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); dd->lbus_width = extract_width(linkstat); dd->lbus_speed = extract_speed(linkstat); snprintf(dd->lbus_info, sizeof(dd->lbus_info), @@ -286,6 +330,7 @@ int pcie_speeds(struct hfi1_devdata *dd) { u32 linkcap; struct pci_dev *parent = dd->pcidev->bus->self; + int ret; if (!pci_is_pcie(dd->pcidev)) { dd_dev_err(dd, "Can't find PCI Express capability!\n"); @@ -295,7 +340,12 @@ int pcie_speeds(struct hfi1_devdata *dd) /* find if our max speed is Gen3 and parent supports Gen3 speeds */ dd->link_gen3_capable = 1; - pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap); + ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; + } + if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) { dd_dev_info(dd, "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n", @@ -327,7 +377,7 @@ int pcie_speeds(struct hfi1_devdata *dd) */ int request_msix(struct hfi1_devdata *dd, u32 msireq) { - int nvec; + int nvec, ret; nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, PCI_IRQ_MSIX | PCI_IRQ_LEGACY); @@ -336,7 +386,12 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq) return nvec; } - tune_pcie_caps(dd); + ret = tune_pcie_caps(dd); + if (ret) { + dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret); + pci_free_irq_vectors(dd->pcidev); + return ret; + } /* check for legacy IRQ */ if (nvec == 1 && !dd->pcidev->msix_enabled) @@ -346,19 +401,61 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq) } /* restore command and BARs after a reset has wiped them out */ -void restore_pci_variables(struct hfi1_devdata *dd) +int restore_pci_variables(struct hfi1_devdata *dd) { - pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command); - pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0); - pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1); - pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom); - pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl); - pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl); - pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2, - dd->pcie_devctl2); - pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0); - pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3); - pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); + int ret = 0; + + ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, + dd->pcibar0); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, + dd->pcibar1); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom); + if (ret) + goto error; + + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, + dd->pcie_devctl); + if (ret) + goto error; + + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, + dd->pcie_lnkctl); + if (ret) + goto error; + + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2, + dd->pcie_devctl2); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, + dd->pci_lnkctl3); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); + if (ret) + goto error; + + return 0; + +error: + dd_dev_err(dd, "Unable to write to PCI config\n"); + return ret; } /* @@ -373,21 +470,33 @@ uint aspm_mode = ASPM_MODE_DISABLED; module_param_named(aspm, aspm_mode, uint, S_IRUGO); MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); -static void tune_pcie_caps(struct hfi1_devdata *dd) +static int tune_pcie_caps(struct hfi1_devdata *dd) { struct pci_dev *parent; u16 rc_mpss, rc_mps, ep_mpss, ep_mps; u16 rc_mrrs, ep_mrrs, max_mrrs, ectl; + int ret; /* * Turn on extended tags in DevCtl in case the BIOS has turned it off * to improve WFR SDMA bandwidth */ - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl); + ret = pcie_capability_read_word(dd->pcidev, + PCI_EXP_DEVCTL, &ectl); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; + } + if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) { dd_dev_info(dd, "Enabling PCIe extended tags\n"); ectl |= PCI_EXP_DEVCTL_EXT_TAG; - pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl); + ret = pcie_capability_write_word(dd->pcidev, + PCI_EXP_DEVCTL, ectl); + if (ret) { + dd_dev_err(dd, "Unable to write to PCI config\n"); + return ret; + } } /* Find out supported and configured values for parent (root) */ parent = dd->pcidev->bus->self; @@ -396,14 +505,14 @@ static void tune_pcie_caps(struct hfi1_devdata *dd) * access to the upstream component. */ if (!parent) - return; + return -EINVAL; if (!pci_is_root_bus(parent->bus)) { dd_dev_info(dd, "Parent not root\n"); - return; + return -EINVAL; } if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev)) - return; + return -EINVAL; rc_mpss = parent->pcie_mpss; rc_mps = ffs(pcie_get_mps(parent)) - 8; /* Find out supported and configured values for endpoint (us) */ @@ -449,6 +558,8 @@ static void tune_pcie_caps(struct hfi1_devdata *dd) ep_mrrs = max_mrrs; pcie_set_readrq(dd->pcidev, ep_mrrs); } + + return 0; } /* End of PCIe capability tuning */ @@ -680,6 +791,7 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs, u32 violation; u32 i; u8 c_minus1, c0, c_plus1; + int ret; for (i = 0; i < 11; i++) { /* set index */ @@ -691,8 +803,14 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs, pci_write_config_dword(pdev, PCIE_CFG_REG_PL102, eq_value(c_minus1, c0, c_plus1)); /* check if these coefficients violate EQ rules */ - pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105, - &violation); + ret = pci_read_config_dword(dd->pcidev, + PCIE_CFG_REG_PL105, &violation); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + hit_error = 1; + break; + } + if (violation & PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){ if (hit_error == 0) { @@ -1146,7 +1264,13 @@ retry: * that it is Gen3 capable earlier. */ dd_dev_info(dd, "%s: setting parent target link speed\n", __func__); - pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2); + ret = pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return_error = 1; + goto done; + } + dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__, (u32)lnkctl2); /* only write to parent if target is not as high as ours */ @@ -1155,20 +1279,37 @@ retry: lnkctl2 |= target_vector; dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__, (u32)lnkctl2); - pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2); + ret = pcie_capability_write_word(parent, + PCI_EXP_LNKCTL2, lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to write to PCI config\n"); + return_error = 1; + goto done; + } } else { dd_dev_info(dd, "%s: ..target speed is OK\n", __func__); } dd_dev_info(dd, "%s: setting target link speed\n", __func__); - pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2); + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return_error = 1; + goto done; + } + dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__, (u32)lnkctl2); lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK; lnkctl2 |= target_vector; dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__, (u32)lnkctl2); - pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2); + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to write to PCI config\n"); + return_error = 1; + goto done; + } /* step 5h: arm gasket logic */ /* hold DC in reset across the SBR */ @@ -1218,7 +1359,14 @@ retry: /* restore PCI space registers we know were reset */ dd_dev_info(dd, "%s: calling restore_pci_variables\n", __func__); - restore_pci_variables(dd); + ret = restore_pci_variables(dd); + if (ret) { + dd_dev_err(dd, "%s: Could not restore PCI variables\n", + __func__); + return_error = 1; + goto done; + } + /* restore firmware control */ write_csr(dd, MISC_CFG_FW_CTRL, fw_ctrl); @@ -1248,7 +1396,13 @@ retry: setextled(dd, 0); /* check for any per-lane errors */ - pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, ®32); + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, ®32); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return_error = 1; + goto done; + } + dd_dev_info(dd, "%s: per-lane errors: 0x%x\n", __func__, reg32); /* extract status, look for our HFI */ -- cgit v1.1 From cb51c5d2cda855302910ab352f3d391c1a00aba0 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 24 Jul 2017 07:45:31 -0700 Subject: IB/hfi1: Fix bar0 mapping to use write combining When the debugpat kernel boot flag is turned on the following traces are printed: [ 1884.793168] x86/PAT: Overlap at 0x90000000-0x92000000 [ 1884.803510] x86/PAT: reserve_memtype added [mem 0x91200000-0x9127ffff], track uncached-minus, req write-combining, ret uncached-minus [ 1884.818167] hfi1 0000:05:00.0: hfi1_0: WC Remapped RcvArray: ffffc9000a980000 The ioremap_wc() clearly is not returning a write combining mapping due to an overlap where the RcvArray is mapped in a uncached mapping prior to creating the proposed write combining mapping. The patch replaces the single base register for uncached CSRs that used to overlap the RcvArray with two mappings. One, kregbase1, from the bar0 up to the RcvArray and another, kregbase2, from the end of the RcvArray to the pio send buffer space. A new dd field, base2_start, is used to convert the zero-based offset in the CSR routines to the correct kregbase1/kregbase2 mapping. A single direct write of the RcvArray CSRs is replaced with hfi1_put_tid() to insure correct access using the new disjoint mapping. Additionally, the kregend field is deleted since it is only ever written. patdebug now shows the RcvArray as write combining: [ 35.688990] x86/PAT: reserve_memtype added [mem 0x91200000-0x9127ffff], track write-combining, req write-combining, ret write-combining To insulate from any potential issues with write combining, all writeq are now flushed in hfi1_put_tid() and rcv_array_wc_fill(). Reviewed-by: Mitko Haralanov Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 79 ++++++++++++++++++++++++------- drivers/infiniband/hw/hfi1/chip.h | 4 +- drivers/infiniband/hw/hfi1/driver.c | 4 +- drivers/infiniband/hw/hfi1/exp_rcv.h | 5 +- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- drivers/infiniband/hw/hfi1/hfi.h | 20 ++++---- drivers/infiniband/hw/hfi1/pcie.c | 58 ++++++++++++++++------- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 3 +- 8 files changed, 126 insertions(+), 49 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index efc8481..4fce173 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1297,25 +1297,71 @@ CNTR_ELEM(#name, \ CNTR_SYNTH, \ access_ibp_##cntr) +/** + * hfi_addr_from_offset - return addr for readq/writeq + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * + * This routine selects the appropriate base address + * based on the indicated offset. + */ +static inline void __iomem *hfi1_addr_from_offset( + const struct hfi1_devdata *dd, + u32 offset) +{ + if (offset >= dd->base2_start) + return dd->kregbase2 + (offset - dd->base2_start); + return dd->kregbase1 + offset; +} + +/** + * read_csr - read CSR at the indicated offset + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * + * Return: the value read or all FF's if there + * is no mapping + */ u64 read_csr(const struct hfi1_devdata *dd, u32 offset) { - if (dd->flags & HFI1_PRESENT) { - return readq((void __iomem *)dd->kregbase + offset); - } + if (dd->flags & HFI1_PRESENT) + return readq(hfi1_addr_from_offset(dd, offset)); return -1; } +/** + * write_csr - write CSR at the indicated offset + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * @value - value to write + */ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) { - if (dd->flags & HFI1_PRESENT) - writeq(value, (void __iomem *)dd->kregbase + offset); + if (dd->flags & HFI1_PRESENT) { + void __iomem *base = hfi1_addr_from_offset(dd, offset); + + /* avoid write to RcvArray */ + if (WARN_ON(offset >= RCV_ARRAY && offset < dd->base2_start)) + return; + writeq(value, base); + } } +/** + * get_csr_addr - return te iomem address for offset + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * + * Return: The iomem address to use in subsequent + * writeq/readq operations. + */ void __iomem *get_csr_addr( - struct hfi1_devdata *dd, + const struct hfi1_devdata *dd, u32 offset) { - return (void __iomem *)dd->kregbase + offset; + if (dd->flags & HFI1_PRESENT) + return hfi1_addr_from_offset(dd, offset); + return NULL; } static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr, @@ -9752,14 +9798,13 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order) { u64 reg; - void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc : - (dd->kregbase + RCV_ARRAY)); if (!(dd->flags & HFI1_PRESENT)) goto done; - if (type == PT_INVALID) { + if (type == PT_INVALID || type == PT_INVALID_FLUSH) { pa = 0; + order = 0; } else if (type > PT_INVALID) { dd_dev_err(dd, "unexpected receive array type %u for index %u, not handled\n", @@ -9773,13 +9818,14 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK) << RCV_ARRAY_RT_ADDR_SHIFT; - trace_hfi1_write_rcvarray(base + (index * 8), reg); - writeq(reg, base + (index * 8)); + trace_hfi1_write_rcvarray(dd->rcvarray_wc + (index * 8), reg); + writeq(reg, dd->rcvarray_wc + (index * 8)); - if (type == PT_EAGER) + if (type == PT_EAGER || type == PT_INVALID_FLUSH || (index & 3) == 3) /* - * Eager entries are written one-by-one so we have to push them - * after we write the entry. + * Eager entries are written and flushed + * + * Expected entries are flushed every 4 writes */ flush_wc(); done: @@ -13411,8 +13457,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) /* RcvArray */ for (i = 0; i < dd->chip_rcv_array_count; i++) - write_csr(dd, RCV_ARRAY + (8 * i), - RCV_ARRAY_RT_WRITE_ENABLE_SMASK); + hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0); /* RcvQPMapTable */ for (i = 0; i < 32; i++) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 3dab315..d3622cb 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -605,11 +605,11 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 *data); int write_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 data); void __iomem *get_csr_addr( - struct hfi1_devdata *dd, + const struct hfi1_devdata *dd, u32 offset); static inline void __iomem *get_kctxt_csr_addr( - struct hfi1_devdata *dd, + const struct hfi1_devdata *dd, int ctxt, u32 offset0) { diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 26628cb..4a149cc 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -195,7 +195,7 @@ int hfi1_count_active_units(void) spin_lock_irqsave(&hfi1_devs_lock, flags); list_for_each_entry(dd, &hfi1_dev_list, list) { - if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase) + if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1) continue; for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; @@ -1282,7 +1282,7 @@ int hfi1_reset_device(int unit) dd_dev_info(dd, "Reset on unit %u requested\n", unit); - if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) { + if (!dd->kregbase1 || !(dd->flags & HFI1_PRESENT)) { dd_dev_info(dd, "Invalid unit number %u or not initialized or not present\n", unit); diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h index c7d02bcd..0871904 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/exp_rcv.h @@ -137,8 +137,11 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) * Doing the WC fill writes only makes sense if the device is * present and the RcvArray has been mapped as WC memory. */ - if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) + if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) { writeq(0, dd->rcvarray_wc + (index * 8)); + if ((index & 3) == 3) + flush_wc(); + } } static inline void tid_group_add_tail(struct tid_group *grp, diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index bbf80b1..fbf5284 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -181,7 +181,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) struct hfi1_devdata, user_cdev); - if (!((dd->flags & HFI1_PRESENT) && dd->kregbase)) + if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1)) return -EINVAL; if (!atomic_inc_not_zero(&dd->user_refcount)) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 62f843d..2e5137b3 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -867,12 +867,15 @@ struct hfi1_devdata { struct device *diag_device; struct device *ui_device; - /* mem-mapped pointer to base of chip regs */ - u8 __iomem *kregbase; - /* end of mem-mapped chip space excluding sendbuf and user regs */ - u8 __iomem *kregend; - /* physical address of chip for io_remap, etc. */ + /* first mapping up to RcvArray */ + u8 __iomem *kregbase1; resource_size_t physaddr; + + /* second uncached mapping from RcvArray to pio send buffers */ + u8 __iomem *kregbase2; + /* for detecting offset above kregbase2 address */ + u32 base2_start; + /* Per VL data. Enough for all VLs but not all elements are set/used. */ struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; /* send context data */ @@ -1236,9 +1239,10 @@ static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) #define dc8051_ver_patch(a) ((a) & 0x0000ff) /* f_put_tid types */ -#define PT_EXPECTED 0 -#define PT_EAGER 1 -#define PT_INVALID 2 +#define PT_EXPECTED 0 +#define PT_EAGER 1 +#define PT_INVALID_FLUSH 2 +#define PT_INVALID 3 struct tid_rb_node; struct mmu_rb_node; diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 903ea45..cc7be22 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -180,31 +180,47 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) return -EINVAL; } - dd->kregbase = ioremap_nocache(addr, TXE_PIO_SEND); - if (!dd->kregbase) + dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY); + if (!dd->kregbase1) { + dd_dev_err(dd, "UC mapping of kregbase1 failed\n"); return -ENOMEM; + } + dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY); + dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); + dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count); + dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8; + + dd->kregbase2 = ioremap_nocache( + addr + dd->base2_start, + TXE_PIO_SEND - dd->base2_start); + if (!dd->kregbase2) { + dd_dev_err(dd, "UC mapping of kregbase2 failed\n"); + goto nomem; + } + dd_dev_info(dd, "UC base2: %p for %x\n", dd->kregbase2, + TXE_PIO_SEND - dd->base2_start); dd->piobase = ioremap_wc(addr + TXE_PIO_SEND, TXE_PIO_SIZE); if (!dd->piobase) { - iounmap(dd->kregbase); - return -ENOMEM; + dd_dev_err(dd, "WC mapping of send buffers failed\n"); + goto nomem; } + dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE); - dd->flags |= HFI1_PRESENT; /* now register routines work */ - - dd->kregend = dd->kregbase + TXE_PIO_SEND; dd->physaddr = addr; /* used for io_remap, etc. */ /* - * Re-map the chip's RcvArray as write-combining to allow us + * Map the chip's RcvArray as write-combining to allow us * to write an entire cacheline worth of entries in one shot. - * If this re-map fails, just continue - the RcvArray programming - * function will handle both cases. */ - dd->chip_rcv_array_count = read_csr(dd, RCV_ARRAY_CNT); dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY, dd->chip_rcv_array_count * 8); - dd_dev_info(dd, "WC Remapped RcvArray: %p\n", dd->rcvarray_wc); + if (!dd->rcvarray_wc) { + dd_dev_err(dd, "WC mapping of receive array failed\n"); + goto nomem; + } + dd_dev_info(dd, "WC RcvArray: %p for %x\n", + dd->rcvarray_wc, dd->chip_rcv_array_count * 8); /* * Save BARs and command to rewrite after device reset. */ @@ -253,10 +269,16 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) if (ret) goto read_error; + dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */ return 0; read_error: dd_dev_err(dd, "Unable to read from PCI config\n"); + goto bail_error; +nomem: + ret = -ENOMEM; +bail_error: + hfi1_pcie_ddcleanup(dd); return ret; } @@ -267,15 +289,19 @@ read_error: */ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd) { - u64 __iomem *base = (void __iomem *)dd->kregbase; - dd->flags &= ~HFI1_PRESENT; - dd->kregbase = NULL; - iounmap(base); + if (dd->kregbase1) + iounmap(dd->kregbase1); + dd->kregbase1 = NULL; + if (dd->kregbase2) + iounmap(dd->kregbase2); + dd->kregbase2 = NULL; if (dd->rcvarray_wc) iounmap(dd->rcvarray_wc); + dd->rcvarray_wc = NULL; if (dd->piobase) iounmap(dd->piobase); + dd->piobase = NULL; } /* return the PCIe link speed from the given link status */ diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 6318e6c..3baf719 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -814,12 +814,11 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) node->npages, node->mmu.addr, node->phys, node->dma_addr); - hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0); /* * Make sure device has seen the write before we unpin the * pages. */ - flush_wc(); + hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, PCI_DMA_FROMDEVICE); -- cgit v1.1 From bcad29137a9731bfa5e16d64bf8e8a71a268ac88 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 24 Jul 2017 07:45:37 -0700 Subject: IB/hfi1: Serve the most starved iowait entry first When an egress resource(SDMA descriptors, pio credits) is not available, a sending thread will be put on the resource's wait queue. When the resource becomes available again, up to a fixed number of sending threads can be awakened sequentially and removed from the wait queue, depending on the number of waiting threads and the number of free resources. Since each awakened sending thread will send as many packets as possible, it is highly likely that the first sending thread will consume all the egress resources. Subsequently, it will be put back to the end of the wait queue. Depending on the timing when the later sending threads wake up, they may not be able to send any packet and be again put back to the end of the wait queue sequentially, right behind the first sending thread. This starvation cycle continues until some sending threads exceed their retry limit and consequently fail. This patch fixes the issue by two simple approaches: (1) Any starved sending thread will be put to the head of the wait queue while a served sending thread will be put to the tail; (2) The most starved sending thread will be served first. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/iowait.h | 70 +++++++++++++++++++++++++++++++++- drivers/infiniband/hw/hfi1/pio.c | 13 +++++-- drivers/infiniband/hw/hfi1/qp.c | 9 +++-- drivers/infiniband/hw/hfi1/ruc.c | 5 ++- drivers/infiniband/hw/hfi1/sdma.c | 34 ++++++++++++----- drivers/infiniband/hw/hfi1/sdma.h | 3 +- drivers/infiniband/hw/hfi1/user_sdma.c | 8 ++-- drivers/infiniband/hw/hfi1/verbs.c | 6 ++- drivers/infiniband/hw/hfi1/verbs.h | 1 + drivers/infiniband/hw/hfi1/vnic.h | 1 + drivers/infiniband/hw/hfi1/vnic_sdma.c | 14 +++++-- 11 files changed, 136 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index d9740dd..591697d 100644 --- a/drivers/infiniband/hw/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h @@ -106,7 +106,9 @@ struct iowait { struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *tx, - unsigned seq); + uint seq, + bool pkts_sent + ); void (*wakeup)(struct iowait *wait, int reason); void (*sdma_drained)(struct iowait *wait); seqlock_t *lock; @@ -118,6 +120,7 @@ struct iowait { u32 count; u32 tx_limit; u32 tx_count; + u8 starved_cnt; }; #define SDMA_AVAIL_REASON 0 @@ -143,7 +146,8 @@ static inline void iowait_init( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *tx, - unsigned seq), + uint seq, + bool pkts_sent), void (*wakeup)(struct iowait *wait, int reason), void (*sdma_drained)(struct iowait *wait)) { @@ -305,4 +309,66 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait) return tx; } +/** + * iowait_queue - Put the iowait on a wait queue + * @pkts_sent: have some packets been sent before queuing? + * @w: the iowait struct + * @wait_head: the wait queue + * + * This function is called to insert an iowait struct into a + * wait queue after a resource (eg, sdma decriptor or pio + * buffer) is run out. + */ +static inline void iowait_queue(bool pkts_sent, struct iowait *w, + struct list_head *wait_head) +{ + /* + * To play fair, insert the iowait at the tail of the wait queue if it + * has already sent some packets; Otherwise, put it at the head. + */ + if (pkts_sent) { + list_add_tail(&w->list, wait_head); + w->starved_cnt = 0; + } else { + list_add(&w->list, wait_head); + w->starved_cnt++; + } +} + +/** + * iowait_starve_clear - clear the wait queue's starve count + * @pkts_sent: have some packets been sent? + * @w: the iowait struct + * + * This function is called to clear the starve count. If no + * packets have been sent, the starve count will not be cleared. + */ +static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w) +{ + if (pkts_sent) + w->starved_cnt = 0; +} + +/** + * iowait_starve_find_max - Find the maximum of the starve count + * @w: the iowait struct + * @max: a variable containing the max starve count + * @idx: the index of the current iowait in an array + * @max_idx: a variable containing the array index for the + * iowait entry that has the max starve count + * + * This function is called to compare the starve count of a + * given iowait with the given max starve count. The max starve + * count and the index will be updated if the iowait's start + * count is larger. + */ +static inline void iowait_starve_find_max(struct iowait *w, u8 *max, + uint idx, uint *max_idx) +{ + if (w->starved_cnt > *max) { + *max = w->starved_cnt; + *max_idx = idx; + } +} + #endif diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index ed72b5a..adb6a4d 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1568,7 +1568,8 @@ static void sc_piobufavail(struct send_context *sc) struct rvt_qp *qp; struct hfi1_qp_priv *priv; unsigned long flags; - unsigned i, n = 0; + uint i, n = 0, max_idx = 0; + u8 max_starved_cnt = 0; if (dd->send_contexts[sc->sw_index].type != SC_KERNEL && dd->send_contexts[sc->sw_index].type != SC_VL15) @@ -1591,6 +1592,7 @@ static void sc_piobufavail(struct send_context *sc) priv = qp->priv; list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; + iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx); /* refcount held until actual wake up */ qps[n++] = qp; } @@ -1605,9 +1607,14 @@ static void sc_piobufavail(struct send_context *sc) } write_sequnlock_irqrestore(&dev->iowait_lock, flags); - for (i = 0; i < n; i++) - hfi1_qp_wakeup(qps[i], + /* Wake up the most starved one first */ + if (n) + hfi1_qp_wakeup(qps[max_idx], RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); + for (i = 0; i < n; i++) + if (i != max_idx) + hfi1_qp_wakeup(qps[i], + RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); } /* translate a send credit update to a bit code of reasons */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 806d166..b801d84 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -68,7 +68,8 @@ static int iowait_sleep( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *stx, - unsigned seq); + unsigned int seq, + bool pkts_sent); static void iowait_wakeup(struct iowait *wait, int reason); static void iowait_sdma_drained(struct iowait *wait); static void qp_pio_drain(struct rvt_qp *qp); @@ -371,7 +372,8 @@ static int iowait_sleep( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *stx, - unsigned seq) + uint seq, + bool pkts_sent) { struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq); struct rvt_qp *qp; @@ -402,7 +404,8 @@ static int iowait_sleep( ibp->rvp.n_dmawait++; qp->s_flags |= RVT_S_WAIT_DMA_DESC; - list_add_tail(&priv->s_iowait.list, &sde->dmawait); + iowait_queue(pkts_sent, &priv->s_iowait, + &sde->dmawait); priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 9cf506a..f13ddb27 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -811,6 +811,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, static bool schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { + ps->pkts_sent = true; + if (unlikely(time_after(jiffies, ps->timeout))) { if (!ps->in_thread || workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) { @@ -907,6 +909,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) ps.timeout = jiffies + ps.timeout_int; ps.cpu = priv->s_sde ? priv->s_sde->cpu : cpumask_first(cpumask_of_node(ps.ppd->dd->node)); + ps.pkts_sent = false; /* insure a pre-built packet is handled */ ps.s_txreq = get_waiting_verbs_txreq(qp); @@ -929,7 +932,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) spin_lock_irqsave(&qp->s_lock, ps.flags); } } while (make_req(qp, &ps)); - + iowait_starve_clear(ps.pkts_sent, &priv->s_iowait); spin_unlock_irqrestore(&qp->s_lock, ps.flags); } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index d82ff57..71b4258 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -246,7 +246,7 @@ static void __sdma_process_event( enum sdma_events event); static void dump_sdma_state(struct sdma_engine *sde); static void sdma_make_progress(struct sdma_engine *sde, u64 status); -static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail); +static void sdma_desc_avail(struct sdma_engine *sde, uint avail); static void sdma_flush_descq(struct sdma_engine *sde); /** @@ -1762,13 +1762,14 @@ retry: * * This is called with head_lock held. */ -static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail) +static void sdma_desc_avail(struct sdma_engine *sde, uint avail) { struct iowait *wait, *nw; struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; - unsigned i, n = 0, seq; + uint i, n = 0, seq, max_idx = 0; struct sdma_txreq *stx; struct hfi1_ibdev *dev = &sde->dd->verbs_dev; + u8 max_starved_cnt = 0; #ifdef CONFIG_SDMA_VERBOSITY dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx, @@ -1803,6 +1804,9 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail) if (num_desc > avail) break; avail -= num_desc; + /* Find the most starved wait memeber */ + iowait_starve_find_max(wait, &max_starved_cnt, + n, &max_idx); list_del_init(&wait->list); waits[n++] = wait; } @@ -1811,8 +1815,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail) } } while (read_seqretry(&dev->iowait_lock, seq)); + /* Schedule the most starved one first */ + if (n) + waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON); + for (i = 0; i < n; i++) - waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON); + if (i != max_idx) + waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON); } /* head_lock must be held */ @@ -2349,7 +2358,8 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx) static int sdma_check_progress( struct sdma_engine *sde, struct iowait *wait, - struct sdma_txreq *tx) + struct sdma_txreq *tx, + bool pkts_sent) { int ret; @@ -2362,7 +2372,7 @@ static int sdma_check_progress( seq = raw_seqcount_begin( (const seqcount_t *)&sde->head_lock.seqcount); - ret = wait->sleep(sde, wait, tx, seq); + ret = wait->sleep(sde, wait, tx, seq, pkts_sent); if (ret == -EAGAIN) sde->desc_avail = sdma_descq_freecnt(sde); } else { @@ -2376,6 +2386,7 @@ static int sdma_check_progress( * @sde: sdma engine to use * @wait: wait structure to use when full (may be NULL) * @tx: sdma_txreq to submit + * @pkts_sent: has any packet been sent yet? * * The call submits the tx into the ring. If a iowait structure is non-NULL * the packet will be queued to the list in wait. @@ -2387,7 +2398,8 @@ static int sdma_check_progress( */ int sdma_send_txreq(struct sdma_engine *sde, struct iowait *wait, - struct sdma_txreq *tx) + struct sdma_txreq *tx, + bool pkts_sent) { int ret = 0; u16 tail; @@ -2429,7 +2441,7 @@ unlock_noconn: ret = -ECOMM; goto unlock; nodesc: - ret = sdma_check_progress(sde, wait, tx); + ret = sdma_check_progress(sde, wait, tx, pkts_sent); if (ret == -EAGAIN) { ret = 0; goto retry; @@ -2498,8 +2510,10 @@ retry: } update_tail: total_count = submit_count + flush_count; - if (wait) + if (wait) { iowait_sdma_add(wait, total_count); + iowait_starve_clear(submit_count > 0, wait); + } if (tail != INVALID_TAIL) sdma_update_tail(sde, tail); spin_unlock_irqrestore(&sde->tail_lock, flags); @@ -2527,7 +2541,7 @@ unlock_noconn: ret = -ECOMM; goto update_tail; nodesc: - ret = sdma_check_progress(sde, wait, tx); + ret = sdma_check_progress(sde, wait, tx, submit_count > 0); if (ret == -EAGAIN) { ret = 0; goto retry; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 64f10b8..107011d 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -852,7 +852,8 @@ struct iowait; int sdma_send_txreq(struct sdma_engine *sde, struct iowait *wait, - struct sdma_txreq *tx); + struct sdma_txreq *tx, + bool pkts_sent); int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, struct list_head *tx_list, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8f7cfdd..ea2993f 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -272,7 +272,8 @@ static int defer_packet_queue( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *txreq, - unsigned int seq); + uint seq, + bool pkts_sent); static void activate_packet_queue(struct iowait *wait, int reason); static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, unsigned long len); @@ -294,7 +295,8 @@ static int defer_packet_queue( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *txreq, - unsigned seq) + uint seq, + bool pkts_sent) { struct hfi1_user_sdma_pkt_q *pq = container_of(wait, struct hfi1_user_sdma_pkt_q, busy); @@ -314,7 +316,7 @@ static int defer_packet_queue( xchg(&pq->state, SDMA_PKT_Q_DEFERRED); write_seqlock(&dev->iowait_lock); if (list_empty(&pq->busy.list)) - list_add_tail(&pq->busy.list, &sde->dmawait); + iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); write_sequnlock(&dev->iowait_lock); return -EBUSY; eagain: diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 9c9ded6..3ef6384 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -864,7 +864,8 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (unlikely(ret)) goto bail_build; } - ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq); + ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq, + ps->pkts_sent); if (unlikely(ret < 0)) { if (ret == -ECOMM) goto bail_ecomm; @@ -921,7 +922,8 @@ static int pio_wait(struct rvt_qp *qp, dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); - list_add_tail(&priv->s_iowait.list, &sc->piowait); + iowait_queue(ps->pkts_sent, &priv->s_iowait, + &sc->piowait); priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); rvt_get_qp(qp); diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index fdf1e1f..34267c7 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -143,6 +143,7 @@ struct hfi1_pkt_state { unsigned long timeout_int; int cpu; bool in_thread; + bool pkts_sent; }; #define HFI1_PSN_CREDIT 16 diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h index 4a621cd..eec7c14 100644 --- a/drivers/infiniband/hw/hfi1/vnic.h +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -103,6 +103,7 @@ struct hfi1_vnic_sdma { struct sdma_txreq stx; unsigned int state; u8 q_idx; + bool pkts_sent; }; /** diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index 51a817d..7815d74 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -198,11 +198,16 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, goto free_desc; tx->retry_count = 0; - ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq); + ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq, + vnic_sdma->pkts_sent); /* When -ECOMM, sdma callback will be called with ABORT status */ if (unlikely(ret && unlikely(ret != -ECOMM))) goto free_desc; + if (!ret) { + vnic_sdma->pkts_sent = true; + iowait_starve_clear(vnic_sdma->pkts_sent, &vnic_sdma->wait); + } return ret; free_desc: @@ -211,6 +216,8 @@ free_desc: tx_err: if (ret != -EBUSY) dev_kfree_skb_any(skb); + else + vnic_sdma->pkts_sent = false; return ret; } @@ -225,7 +232,8 @@ tx_err: static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *txreq, - unsigned int seq) + uint seq, + bool pkts_sent) { struct hfi1_vnic_sdma *vnic_sdma = container_of(wait, struct hfi1_vnic_sdma, wait); @@ -239,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; write_seqlock(&dev->iowait_lock); if (list_empty(&vnic_sdma->wait.list)) - list_add_tail(&vnic_sdma->wait.list, &sde->dmawait); + iowait_queue(pkts_sent, wait, &sde->dmawait); write_sequnlock(&dev->iowait_lock); return -EBUSY; } -- cgit v1.1 From 42492011ab23f44c63dad0c7096492313dc207e3 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:45:43 -0700 Subject: IB/hfi1: Assign context does not clean up file descriptor correctly on error In the error path for context allocation, the file descriptor pointer should not point to a context when an error occurs. Clean up the appropriate references on error. Fixes: Commit 62239fc6e5545b2e59f83dfbc5db231a81f37a45 ("IB/hfi1: Clean up on context initialization failure") Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 37 ++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index fbf5284..d36e177 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -94,6 +94,7 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo); static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo); +static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, @@ -813,15 +814,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) uctxt->rcvnowait = 0; uctxt->pionowait = 0; uctxt->event_flags = 0; - - hfi1_stats.sps_ctxts--; - if (++dd->freectxts == dd->num_user_contexts) - aspm_enable_all(dd); - - /* _rcd_put() should be done after releasing mutex */ - dd->rcd[uctxt->ctxt] = NULL; mutex_unlock(&hfi1_mutex); - hfi1_rcd_put(uctxt); /* dd reference */ + + deallocate_ctxt(uctxt); done: mmdrop(fdata->mm); kobject_put(&dd->kobj); @@ -898,10 +893,9 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) if (!ret) ret = init_user_ctxt(fd); - if (ret) { + if (ret) clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); - hfi1_rcd_put(fd->uctxt); - } + } else if (!ret) { ret = setup_base_ctxt(fd); if (fd->uctxt->subctxt_cnt) { @@ -917,6 +911,14 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) &fd->uctxt->event_flags); wake_up(&fd->uctxt->wait); } + if (ret) + deallocate_ctxt(fd->uctxt); + } + + /* If an error occurred, clear the reference */ + if (ret && fd->uctxt) { + hfi1_rcd_put(fd->uctxt); + fd->uctxt = NULL; } return ret; @@ -1087,6 +1089,19 @@ ctxdata_free: return ret; } +static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) +{ + mutex_lock(&hfi1_mutex); + hfi1_stats.sps_ctxts--; + if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts) + aspm_enable_all(uctxt->dd); + + /* _rcd_put() should be done after releasing mutex */ + uctxt->dd->rcd[uctxt->ctxt] = NULL; + mutex_unlock(&hfi1_mutex); + hfi1_rcd_put(uctxt); /* dd reference */ +} + static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo) { -- cgit v1.1 From 91d970abe8d756843eaac57da903bf27f834b091 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:45:49 -0700 Subject: IB/hfi1: Remove unused user context data members Several data members of the user context have become unused over time. Cleaning them up. Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 6 ------ drivers/infiniband/hw/hfi1/hfi.h | 24 ------------------------ drivers/infiniband/hw/hfi1/user_sdma.c | 11 ----------- drivers/infiniband/hw/hfi1/user_sdma.h | 1 - 4 files changed, 42 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index d36e177..e10f526 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -809,10 +809,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); - uctxt->rcvwait_to = 0; - uctxt->piowait_to = 0; - uctxt->rcvnowait = 0; - uctxt->pionowait = 0; uctxt->event_flags = 0; mutex_unlock(&hfi1_mutex); @@ -1067,8 +1063,6 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); uctxt->jkey = generate_jkey(current_uid()); - INIT_LIST_HEAD(&uctxt->sdma_queues); - spin_lock_init(&uctxt->sdma_qlock); hfi1_stats.sps_ctxts++; /* * Disable ASPM when there are open user/PSM contexts to avoid diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2e5137b3..7877ebc 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -243,24 +243,10 @@ struct hfi1_ctxtdata { /* lock protecting all Expected TID data */ struct mutex exp_lock; - /* number of pio bufs for this ctxt (all procs, if shared) */ - u32 piocnt; - /* first pio buffer for this ctxt */ - u32 pio_base; - /* chip offset of PIO buffers for this ctxt */ - u32 piobufs; /* per-context configuration flags */ unsigned long flags; /* per-context event flags for fileops/intr communication */ unsigned long event_flags; - /* WAIT_RCV that timed out, no interrupt */ - u32 rcvwait_to; - /* WAIT_PIO that timed out, no interrupt */ - u32 piowait_to; - /* WAIT_RCV already happened, no wait */ - u32 rcvnowait; - /* WAIT_PIO already happened, no wait */ - u32 pionowait; /* total number of polled urgent packets */ u32 urgent; /* saved total number of polled urgent packets for poll edge trigger */ @@ -290,7 +276,6 @@ struct hfi1_ctxtdata { u8 redirect_seq_cnt; /* ctxt rcvhdrq head offset */ u32 head; - u32 pkt_count; /* QPs waiting for context processing */ struct list_head qp_wait_list; /* interrupt handling */ @@ -299,15 +284,6 @@ struct hfi1_ctxtdata { unsigned numa_id; /* numa node of this context */ /* verbs stats per CTX */ struct hfi1_opcode_stats_perctx *opstats; - /* - * This is the kernel thread that will keep making - * progress on the user sdma requests behind the scenes. - * There is one per context (shared contexts use the master's). - */ - struct task_struct *progress; - struct list_head sdma_queues; - /* protect sdma queues */ - spinlock_t sdma_qlock; /* Is ASPM interrupt supported for this context */ bool aspm_intr_supported; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index ea2993f..64d31eb 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -346,7 +346,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_devdata *dd; struct hfi1_user_sdma_comp_q *cq; struct hfi1_user_sdma_pkt_q *pq; - unsigned long flags; if (!uctxt || !fd) return -EBADF; @@ -360,7 +359,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, if (!pq) return -ENOMEM; - INIT_LIST_HEAD(&pq->list); pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; @@ -421,10 +419,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, fd->pq = pq; fd->cq = cq; - spin_lock_irqsave(&uctxt->sdma_qlock, flags); - list_add(&pq->list, &uctxt->sdma_queues); - spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); - return 0; pq_mmu_fail: @@ -447,7 +441,6 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq; - unsigned long flags; hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, uctxt->ctxt, fd->subctxt); @@ -455,10 +448,6 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) if (pq) { if (pq->handler) hfi1_mmu_rb_unregister(pq->handler); - spin_lock_irqsave(&uctxt->sdma_qlock, flags); - if (!list_empty(&pq->list)) - list_del_init(&pq->list); - spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); iowait_sdma_drain(&pq->busy); /* Wait until all requests have been freed. */ wait_event_interruptible( diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index e5b10ae..161fdfd 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -56,7 +56,6 @@ extern uint extended_psn; struct hfi1_user_sdma_pkt_q { - struct list_head list; unsigned ctxt; u16 subctxt; u16 n_max_reqs; -- cgit v1.1 From e6f7622df177d594f11d93343c3dda7637c761e0 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:45:55 -0700 Subject: IB/hfi1: Size rcd array index correctly and consistently The array index for the rcd array is sized several different ways throughout the code. Use the user interface size (u16) as the standard size and update the necessary code to reflect this. u16 is large enough for the largest amount of supported contexts. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/aspm.h | 6 +++--- drivers/infiniband/hw/hfi1/chip.c | 10 +++++----- drivers/infiniband/hw/hfi1/chip.h | 8 ++++---- drivers/infiniband/hw/hfi1/driver.c | 15 ++++++++------- drivers/infiniband/hw/hfi1/file_ops.c | 6 +++--- drivers/infiniband/hw/hfi1/hfi.h | 4 ++-- drivers/infiniband/hw/hfi1/init.c | 9 +++++---- drivers/infiniband/hw/hfi1/trace_rx.h | 2 +- drivers/infiniband/hw/hfi1/user_sdma.h | 2 +- drivers/infiniband/hw/hfi1/vnic_main.c | 2 +- 10 files changed, 33 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 794e681..3f9a071 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -237,7 +237,7 @@ static inline void aspm_disable_all(struct hfi1_devdata *dd) { struct hfi1_ctxtdata *rcd; unsigned long flags; - unsigned i; + u16 i; for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { rcd = dd->rcd[i]; @@ -256,7 +256,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd) { struct hfi1_ctxtdata *rcd; unsigned long flags; - unsigned i; + u16 i; aspm_enable(dd); @@ -284,7 +284,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) static inline void aspm_init(struct hfi1_devdata *dd) { - unsigned i; + u16 i; spin_lock_init(&dd->aspm_lock); dd->aspm_supported = aspm_hw_l1_supported(dd); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 4fce173..06a79a26 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6802,7 +6802,7 @@ static void rxe_freeze(struct hfi1_devdata *dd) static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) { u32 rcvmask; - int i; + u16 i; /* enable all kernel contexts */ for (i = 0; i < dd->num_rcv_contexts; i++) { @@ -11722,7 +11722,7 @@ static u32 encoded_size(u32 size) return 0x1; /* if invalid, go with the minimum size */ } -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt) { struct hfi1_ctxtdata *rcd; u64 rcvctrl, reg; @@ -14542,7 +14542,7 @@ static void init_txe(struct hfi1_devdata *dd) write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE); } -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey) +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey) { struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; unsigned sctxt; @@ -14579,7 +14579,7 @@ done: return ret; } -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt) +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt) { struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; unsigned sctxt; @@ -14608,7 +14608,7 @@ done: return ret; } -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey) +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey) { struct hfi1_ctxtdata *rcd; unsigned sctxt; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index d3622cb..008c970 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1352,14 +1352,14 @@ void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt); +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt); u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey); -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt); -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey); +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey); +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt); +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey); int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 4a149cc..01235d4 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -837,9 +837,9 @@ bail: return last; } -static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt) +static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) { - int i; + u16 i; /* * For dynamically allocated kernel contexts (like vnic) switch @@ -857,9 +857,9 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt) &handle_receive_interrupt_nodma_rtail; } -static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt) +static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) { - int i; + u16 i; /* * For dynamically allocated kernel contexts (like vnic) switch @@ -879,7 +879,7 @@ static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt) void set_all_slowpath(struct hfi1_devdata *dd) { - int i; + u16 i; /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { @@ -1068,7 +1068,7 @@ void receive_interrupt_work(struct work_struct *work) struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, linkstate_active_work); struct hfi1_devdata *dd = ppd->dd; - int i; + u16 i; /* Received non-SC15 packet implies neighbor_normal */ ppd->neighbor_normal = 1; @@ -1269,7 +1269,8 @@ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, */ int hfi1_reset_device(int unit) { - int ret, i; + int ret; + u16 i; struct hfi1_devdata *dd = hfi1_lookup(unit); struct hfi1_pportdata *ppd; unsigned long flags; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index e10f526..c8f34bc 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -927,7 +927,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo) { - int i; + u16 i; struct hfi1_devdata *dd = fd->dd; u16 subctxt; @@ -978,7 +978,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo) { struct hfi1_ctxtdata *uctxt; - unsigned int ctxt; + u16 ctxt; int ret, numa; if (dd->flags & HFI1_FROZEN) { @@ -1429,7 +1429,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) { struct hfi1_ctxtdata *uctxt; struct hfi1_devdata *dd = ppd->dd; - unsigned ctxt; + u16 ctxt; int ret = 0; unsigned long flags; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7877ebc..2ce3fc5 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -217,7 +217,7 @@ struct hfi1_ctxtdata { struct kref kref; /* Device context index */ - unsigned ctxt; + u16 ctxt; /* * non-zero if ctxt can be shared, and defines the maximum number of * sub-contexts for this device context. @@ -1264,7 +1264,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd); int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd); int hfi1_create_ctxts(struct hfi1_devdata *dd); -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, +struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, int numa); void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index dfdb412..818a4b8 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -131,7 +131,7 @@ unsigned long *hfi1_cpulist; */ int hfi1_create_ctxts(struct hfi1_devdata *dd) { - unsigned i; + u16 i; int ret; /* Control context has to be always 0 */ @@ -233,7 +233,7 @@ void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) /* * Common code for user and kernel context setup. */ -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, +struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, int numa) { struct hfi1_devdata *dd = ppd->dd; @@ -615,7 +615,7 @@ static int init_after_reset(struct hfi1_devdata *dd) static void enable_chip(struct hfi1_devdata *dd) { u32 rcvmask; - u32 i; + u16 i; /* enable PIO send */ pio_send_control(dd, PSC_GLOBAL_ENABLE); @@ -692,7 +692,8 @@ wq_error: int hfi1_init(struct hfi1_devdata *dd, int reinit) { int ret = 0, pidx, lastfail = 0; - unsigned i, len; + unsigned long len; + u16 i; struct hfi1_ctxtdata *rcd; struct hfi1_pportdata *ppd; diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 8492957..bebf0a8 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -114,7 +114,7 @@ TRACE_EVENT(hfi1_rcvhdr, ); TRACE_EVENT(hfi1_receive_interrupt, - TP_PROTO(struct hfi1_devdata *dd, u32 ctxt), + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt), TP_ARGS(dd, ctxt), TP_STRUCT__entry(DD_DEV_ENTRY(dd) __field(u32, ctxt) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 161fdfd..73c2455 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -56,7 +56,7 @@ extern uint extended_psn; struct hfi1_user_sdma_pkt_q { - unsigned ctxt; + u16 ctxt; u16 subctxt; u16 n_max_reqs; atomic_t n_reqs; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 5a3f80b..9bd2744 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -106,7 +106,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata **vnic_ctxt) { struct hfi1_ctxtdata *uctxt; - unsigned int ctxt; + u16 ctxt; int ret; if (dd->flags & HFI1_FROZEN) -- cgit v1.1 From 17573972f44d6293ed4fe561816b701241cb0847 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:01 -0700 Subject: IB/hfi1: Use context pointer rather than context index The hfi1__ctxt_key functions take a context index and look up the context based on that index. Since the context index is being retrieved from the context, this doesn't seem optimal. Pass the context pointer for use, rather than the context index. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 90 ++++++++++++++--------------------- drivers/infiniband/hw/hfi1/chip.h | 8 ++-- drivers/infiniband/hw/hfi1/file_ops.c | 6 +-- 3 files changed, 45 insertions(+), 59 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 06a79a26..2f74702 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14542,99 +14542,86 @@ static void init_txe(struct hfi1_devdata *dd) write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE); } -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey) +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, + u16 jkey) { - struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; + if (!rcd || !rcd->sc) + return -EINVAL; + + hw_ctxt = rcd->sc->hw_context; reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */ ((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) << SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT); /* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */ if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY)) reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, reg); /* * Enable send-side J_KEY integrity check, unless this is A0 h/w */ if (!is_ax(dd)) { - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); } /* Enable J_KEY check on receive context. */ reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK | ((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) << RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT); - write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg); -done: - return ret; + write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, reg); + + return 0; } -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt) +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { - struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0); + if (!rcd || !rcd->sc) + return -EINVAL; + + hw_ctxt = rcd->sc->hw_context; + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, 0); /* * Disable send-side J_KEY integrity check, unless this is A0 h/w. * This check would not have been enabled for A0 h/w, see * set_ctxt_jkey(). */ if (!is_ax(dd)) { - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); } /* Turn off the J_KEY on the receive side */ - write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0); -done: - return ret; + write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, 0); + + return 0; } -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey) +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, + u16 pkey) { - struct hfi1_ctxtdata *rcd; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (ctxt < dd->num_rcv_contexts) { - rcd = dd->rcd[ctxt]; - } else { - ret = -EINVAL; - goto done; - } - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; + if (!rcd || !rcd->sc) + return -EINVAL; + + hw_ctxt = rcd->sc->hw_context; reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) << SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg); - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg); + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; reg &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); -done: - return ret; + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); + + return 0; } int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt) @@ -14645,9 +14632,6 @@ int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt) if (!ctxt || !ctxt->sc) return -EINVAL; - if (ctxt->ctxt >= dd->num_rcv_contexts) - return -EINVAL; - hw_ctxt = ctxt->sc->hw_context; reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 008c970..9951052 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1357,9 +1357,11 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey); -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt); -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey); +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, + u16 jkey); +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt, + u16 pkey); int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c8f34bc..51e6a6f 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -795,7 +795,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_RCVCTRL_NO_RHQ_DROP_DIS | HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); /* Clear the context's J_KEY */ - hfi1_clear_ctxt_jkey(dd, uctxt->ctxt); + hfi1_clear_ctxt_jkey(dd, uctxt); /* * If a send context is allocated, reset context integrity * checks to default and disable the send context. @@ -1172,7 +1172,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt) clear_rcvhdrtail(uctxt); /* Setup J_KEY before enabling the context */ - hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey); + hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) @@ -1545,7 +1545,7 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey) } if (intable) - ret = hfi1_set_ctxt_pkey(dd, uctxt->ctxt, pkey); + ret = hfi1_set_ctxt_pkey(dd, uctxt, pkey); done: return ret; } -- cgit v1.1 From 2250563e2c935d6401a2203be4de4ca2cf0db183 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:06 -0700 Subject: IB/hfi1: Pass the context pointer rather than the index The hfi1_rcvctrl() function receives an index which it then converts to an rcd. Since most functions have the rcd, use that instead. Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 14 ++++++++------ drivers/infiniband/hw/hfi1/chip.h | 3 ++- drivers/infiniband/hw/hfi1/file_ops.c | 11 +++++------ drivers/infiniband/hw/hfi1/init.c | 14 +++++++------- drivers/infiniband/hw/hfi1/intr.c | 2 +- drivers/infiniband/hw/hfi1/vnic_main.c | 4 ++-- 6 files changed, 25 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2f74702..a62ef4a 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6790,7 +6790,7 @@ static void rxe_freeze(struct hfi1_devdata *dd) /* disable all receive contexts */ for (i = 0; i < dd->num_rcv_contexts; i++) - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, dd->rcd[i]); } /* @@ -6814,9 +6814,9 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ - rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? + rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; - hfi1_rcvctrl(dd, rcvmask, i); + hfi1_rcvctrl(dd, rcvmask, rcd); } /* enable port */ @@ -11722,16 +11722,18 @@ static u32 encoded_size(u32 size) return 0x1; /* if invalid, go with the minimum size */ } -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt) +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, + struct hfi1_ctxtdata *rcd) { - struct hfi1_ctxtdata *rcd; u64 rcvctrl, reg; int did_enable = 0; + u16 ctxt; - rcd = dd->rcd[ctxt]; if (!rcd) return; + ctxt = rcd->ctxt; + hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op); rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL); diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 9951052..bef6301 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1352,7 +1352,8 @@ void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt); +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, + struct hfi1_ctxtdata *rcd); u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 51e6a6f..7be75e0 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -389,8 +389,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sc_disable(sc); ret = sc_enable(sc); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, - uctxt->ctxt); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); } else { ret = sc_restart(sc); } @@ -793,7 +792,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_ONE_PKT_EGR_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS | - HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); /* Clear the context's J_KEY */ hfi1_clear_ctxt_jkey(dd, uctxt); /* @@ -1198,7 +1197,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; else rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; - hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); } static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, @@ -1410,7 +1409,7 @@ static unsigned int poll_next(struct file *fp, spin_lock_irq(&dd->uctxt_lock); if (hdrqempty(uctxt)) { set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt); pollflag = 0; } else { pollflag = POLLIN | POLLRDNORM; @@ -1495,7 +1494,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, } else { rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; } - hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt); + hfi1_rcvctrl(dd, rcvctrl_op, uctxt); /* always; new head should be equal to new tail; see above */ bail: return 0; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 818a4b8..7f5e4c7d 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -603,8 +603,8 @@ static int init_after_reset(struct hfi1_devdata *dd) */ for (i = 0; i < dd->num_rcv_contexts; i++) hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | - HFI1_RCVCTRL_INTRAVAIL_DIS | - HFI1_RCVCTRL_TAILUPD_DIS, i); + HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_TAILUPD_DIS, dd->rcd[i]); pio_send_control(dd, PSC_GLOBAL_DISABLE); for (i = 0; i < dd->num_send_contexts; i++) sc_disable(dd->send_contexts[i].sc); @@ -634,7 +634,7 @@ static void enable_chip(struct hfi1_devdata *dd) rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; if (HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, NODROP_EGR_FULL)) rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; - hfi1_rcvctrl(dd, rcvmask, i); + hfi1_rcvctrl(dd, rcvmask, dd->rcd[i]); sc_enable(dd->rcd[i]->sc); } } @@ -915,10 +915,10 @@ static void shutdown_device(struct hfi1_devdata *dd) ppd = dd->pport + pidx; for (i = 0; i < dd->num_rcv_contexts; i++) hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS | - HFI1_RCVCTRL_CTXT_DIS | - HFI1_RCVCTRL_INTRAVAIL_DIS | - HFI1_RCVCTRL_PKEY_DIS | - HFI1_RCVCTRL_ONE_PKT_EGR_DIS, i); + HFI1_RCVCTRL_CTXT_DIS | + HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_PKEY_DIS | + HFI1_RCVCTRL_ONE_PKT_EGR_DIS, dd->rcd[i]); /* * Gracefully stop all sends allowing any in progress to * trickle out first. diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 04a5082d..9469be9 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -196,7 +196,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd) if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) { wake_up_interruptible(&rcd->wait); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd->ctxt); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd); } else if (test_and_clear_bit(HFI1_CTXT_WAITING_URG, &rcd->event_flags)) { rcd->urgent++; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 9bd2744..89b0564 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -95,7 +95,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; - hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); uctxt->is_vnic = true; done: @@ -186,7 +186,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, HFI1_RCVCTRL_INTRAVAIL_DIS | HFI1_RCVCTRL_ONE_PKT_EGR_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS | - HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); /* * VNIC contexts are allocated from user context pool. * Release them back to user context pool. -- cgit v1.1 From bf90aadd630c2c9f7f965ba1e90d41b5b46db7c9 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:12 -0700 Subject: IB/hfi1: Send MAD traps until repressed A trap should be sent to the FM until the FM sends a repress message. This is in line with the IBTA 13.4.9. Add the ability to resend traps until a repress message is received. Reviewed-by: Dennis Dalessandro Reviewed-by: Michael N. Henry Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 373 ++++++++++++++++++++++++++++--------- drivers/infiniband/hw/hfi1/mad.h | 3 +- drivers/infiniband/hw/hfi1/verbs.c | 5 + 3 files changed, 293 insertions(+), 88 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index a081a98..0a3e2df 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -59,6 +59,16 @@ #define OPA_LINK_WIDTH_RESET_OLD 0x0fff #define OPA_LINK_WIDTH_RESET 0xffff +struct trap_node { + struct list_head list; + struct opa_mad_notice_attr data; + __be64 tid; + int len; + u32 retry; + u8 in_use; + u8 repress; +}; + static int smp_length_check(u32 data_size, u32 request_len) { if (unlikely(request_len < data_size)) @@ -97,28 +107,156 @@ void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port) ib_dispatch_event(&event); } -static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) +/* + * If the port is down, clean up all pending traps. We need to be careful + * with the given trap, because it may be queued. + */ +static void cleanup_traps(struct hfi1_ibport *ibp, struct trap_node *trap) +{ + struct trap_node *node, *q; + unsigned long flags; + struct list_head trap_list; + int i; + + for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) { + spin_lock_irqsave(&ibp->rvp.lock, flags); + list_replace_init(&ibp->rvp.trap_lists[i].list, &trap_list); + ibp->rvp.trap_lists[i].list_len = 0; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + + /* + * Remove all items from the list, freeing all the non-given + * traps. + */ + list_for_each_entry_safe(node, q, &trap_list, list) { + list_del(&node->list); + if (node != trap) + kfree(node); + } + } + + /* + * If this wasn't on one of the lists it would not be freed. If it + * was on the list, it is now safe to free. + */ + kfree(trap); +} + +static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp, + struct trap_node *trap) +{ + struct trap_node *node; + struct trap_list *trap_list; + unsigned long flags; + unsigned long timeout; + int found = 0; + + /* + * Since the retry (handle timeout) does not remove a trap request + * from the list, all we have to do is compare the node. + */ + spin_lock_irqsave(&ibp->rvp.lock, flags); + trap_list = &ibp->rvp.trap_lists[trap->data.generic_type & 0x0F]; + + list_for_each_entry(node, &trap_list->list, list) { + if (node == trap) { + node->retry++; + found = 1; + break; + } + } + + /* If it is not on the list, add it, limited to RVT-MAX_TRAP_LEN. */ + if (!found) { + if (trap_list->list_len < RVT_MAX_TRAP_LEN) { + trap_list->list_len++; + list_add_tail(&trap->list, &trap_list->list); + } else { + pr_warn_ratelimited("hfi1: Maximim trap limit reached for 0x%0x traps\n", + trap->data.generic_type); + kfree(trap); + } + } + + /* + * Next check to see if there is a timer pending. If not, set it up + * and get the first trap from the list. + */ + node = NULL; + if (!timer_pending(&ibp->rvp.trap_timer)) { + /* + * o14-2 + * If the time out is set we have to wait until it expires + * before the trap can be sent. + * This should be > RVT_TRAP_TIMEOUT + */ + timeout = (RVT_TRAP_TIMEOUT * + (1UL << ibp->rvp.subnet_timeout)) / 1000; + mod_timer(&ibp->rvp.trap_timer, + jiffies + usecs_to_jiffies(timeout)); + node = list_first_entry(&trap_list->list, struct trap_node, + list); + node->in_use = 1; + } + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + + return node; +} + +static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp, + struct opa_smp *smp) +{ + struct trap_list *trap_list; + struct trap_node *trap; + unsigned long flags; + int i; + + if (smp->attr_id != IB_SMP_ATTR_NOTICE) + return; + + spin_lock_irqsave(&ibp->rvp.lock, flags); + for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) { + trap_list = &ibp->rvp.trap_lists[i]; + trap = list_first_entry_or_null(&trap_list->list, + struct trap_node, list); + if (trap && trap->tid == smp->tid) { + if (trap->in_use) { + trap->repress = 1; + } else { + trap_list->list_len--; + list_del(&trap->list); + kfree(trap); + } + break; + } + } + spin_unlock_irqrestore(&ibp->rvp.lock, flags); +} + +static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap) { struct ib_mad_send_buf *send_buf; struct ib_mad_agent *agent; struct opa_smp *smp; - int ret; unsigned long flags; - unsigned long timeout; int pkey_idx; u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp; agent = ibp->rvp.send_agent; - if (!agent) + if (!agent) { + cleanup_traps(ibp, trap); return; + } /* o14-3.2.1 */ - if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) + if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) { + cleanup_traps(ibp, trap); return; + } - /* o14-2 */ - if (ibp->rvp.trap_timeout && time_before(jiffies, - ibp->rvp.trap_timeout)) + /* Add the trap to the list if necessary and see if we can send it */ + trap = check_and_add_trap(ibp, trap); + if (!trap) return; pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY); @@ -139,11 +277,21 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; smp->class_version = OPA_SM_CLASS_VERSION; smp->method = IB_MGMT_METHOD_TRAP; - ibp->rvp.tid++; - smp->tid = cpu_to_be64(ibp->rvp.tid); + + /* Only update the transaction ID for new traps (o13-5). */ + if (trap->tid == 0) { + ibp->rvp.tid++; + /* make sure that tid != 0 */ + if (ibp->rvp.tid == 0) + ibp->rvp.tid++; + trap->tid = cpu_to_be64(ibp->rvp.tid); + } + smp->tid = trap->tid; + smp->attr_id = IB_SMP_ATTR_NOTICE; /* o14-1: smp->mkey = 0; */ - memcpy(smp->route.lid.data, data, len); + + memcpy(smp->route.lid.data, &trap->data, trap->len); spin_lock_irqsave(&ibp->rvp.lock, flags); if (!ibp->rvp.sm_ah) { @@ -152,31 +300,72 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid); if (IS_ERR(ah)) { - ret = PTR_ERR(ah); - } else { - send_buf->ah = ah; - ibp->rvp.sm_ah = ibah_to_rvtah(ah); - ret = 0; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + return; } + send_buf->ah = ah; + ibp->rvp.sm_ah = ibah_to_rvtah(ah); } else { - ret = -EINVAL; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + return; } } else { send_buf->ah = &ibp->rvp.sm_ah->ibah; - ret = 0; } + + /* + * If the trap was repressed while things were getting set up, don't + * bother sending it. This could happen for a retry. + */ + if (trap->repress) { + list_del(&trap->list); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + kfree(trap); + ib_free_send_mad(send_buf); + return; + } + + trap->in_use = 0; spin_unlock_irqrestore(&ibp->rvp.lock, flags); - if (!ret) - ret = ib_post_send_mad(send_buf, NULL); - if (!ret) { - /* 4.096 usec. */ - timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000; - ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout); - } else { + if (ib_post_send_mad(send_buf, NULL)) ib_free_send_mad(send_buf); - ibp->rvp.trap_timeout = 0; +} + +void hfi1_handle_trap_timer(unsigned long data) +{ + struct hfi1_ibport *ibp = (struct hfi1_ibport *)data; + struct trap_node *trap = NULL; + unsigned long flags; + int i; + + /* Find the trap with the highest priority */ + spin_lock_irqsave(&ibp->rvp.lock, flags); + for (i = 0; !trap && i < RVT_MAX_TRAP_LISTS; i++) { + trap = list_first_entry_or_null(&ibp->rvp.trap_lists[i].list, + struct trap_node, list); } + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + + if (trap) + send_trap(ibp, trap); +} + +static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid) +{ + struct trap_node *trap; + + trap = kzalloc(sizeof(*trap), GFP_ATOMIC); + if (!trap) + return NULL; + + INIT_LIST_HEAD(&trap->list); + trap->data.generic_type = type; + trap->data.prod_type_lsb = IB_NOTICE_PROD_CA; + trap->data.trap_num = trap_num; + trap->data.issuer_lid = cpu_to_be32(lid); + + return trap; } /* @@ -185,28 +374,29 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, u32 qp1, u32 qp2, u16 lid1, u16 lid2) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; u32 _lid1 = lid1; u32 _lid2 = lid2; - memset(&data, 0, sizeof(data)); ibp->rvp.n_pkt_drops++; ibp->rvp.pkey_violations++; + trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_P_KEY, + lid); + if (!trap) + return; + /* Send violation trap */ - data.generic_type = IB_NOTICE_TYPE_SECURITY; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_BAD_P_KEY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_257_258.lid1 = cpu_to_be32(_lid1); - data.ntc_257_258.lid2 = cpu_to_be32(_lid2); - data.ntc_257_258.key = cpu_to_be32(key); - data.ntc_257_258.sl = sl << 3; - data.ntc_257_258.qp1 = cpu_to_be32(qp1); - data.ntc_257_258.qp2 = cpu_to_be32(qp2); - - send_trap(ibp, &data, sizeof(data)); + trap->data.ntc_257_258.lid1 = cpu_to_be32(_lid1); + trap->data.ntc_257_258.lid2 = cpu_to_be32(_lid2); + trap->data.ntc_257_258.key = cpu_to_be32(key); + trap->data.ntc_257_258.sl = sl << 3; + trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1); + trap->data.ntc_257_258.qp2 = cpu_to_be32(qp2); + + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } /* @@ -215,34 +405,36 @@ void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_M_KEY, + lid); + if (!trap) + return; + /* Send violation trap */ - data.generic_type = IB_NOTICE_TYPE_SECURITY; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_BAD_M_KEY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_256.lid = data.issuer_lid; - data.ntc_256.method = mad->method; - data.ntc_256.attr_id = mad->attr_id; - data.ntc_256.attr_mod = mad->attr_mod; - data.ntc_256.mkey = mkey; + trap->data.ntc_256.lid = trap->data.issuer_lid; + trap->data.ntc_256.method = mad->method; + trap->data.ntc_256.attr_id = mad->attr_id; + trap->data.ntc_256.attr_mod = mad->attr_mod; + trap->data.ntc_256.mkey = mkey; if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - data.ntc_256.dr_slid = dr_slid; - data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE; - if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) { - data.ntc_256.dr_trunc_hop |= + trap->data.ntc_256.dr_slid = dr_slid; + trap->data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE; + if (hop_cnt > ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path)) { + trap->data.ntc_256.dr_trunc_hop |= IB_NOTICE_TRAP_DR_TRUNC; - hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path); + hop_cnt = ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path); } - data.ntc_256.dr_trunc_hop |= hop_cnt; - memcpy(data.ntc_256.dr_rtn_path, return_path, + trap->data.ntc_256.dr_trunc_hop |= hop_cnt; + memcpy(trap->data.ntc_256.dr_rtn_path, return_path, hop_cnt); } - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + + send_trap(ibp, trap); } /* @@ -250,23 +442,24 @@ static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, */ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) { - struct opa_mad_notice_attr data; + struct trap_node *trap; struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); struct hfi1_devdata *dd = dd_from_dev(verbs_dev); struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_INFO, + OPA_TRAP_CHANGE_CAPABILITY, + lid); + if (!trap) + return; - data.generic_type = IB_NOTICE_TYPE_INFO; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_CHANGE_CAPABILITY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_144.lid = data.issuer_lid; - data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); - data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags); + trap->data.ntc_144.lid = trap->data.issuer_lid; + trap->data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); + trap->data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags); - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } /* @@ -274,19 +467,19 @@ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) */ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_INFO, OPA_TRAP_CHANGE_SYSGUID, + lid); + if (!trap) + return; - data.generic_type = IB_NOTICE_TYPE_INFO; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_CHANGE_SYSGUID; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid; - data.ntc_145.lid = data.issuer_lid; + trap->data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid; + trap->data.ntc_145.lid = trap->data.issuer_lid; - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } /* @@ -294,20 +487,21 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp) */ void hfi1_node_desc_chg(struct hfi1_ibport *ibp) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_INFO, + OPA_TRAP_CHANGE_CAPABILITY, + lid); + if (!trap) + return; - data.generic_type = IB_NOTICE_TYPE_INFO; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_CHANGE_CAPABILITY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_144.lid = data.issuer_lid; - data.ntc_144.change_flags = + trap->data.ntc_144.lid = trap->data.issuer_lid; + trap->data.ntc_144.change_flags = cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG); - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am, @@ -4144,6 +4338,11 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, */ ret = IB_MAD_RESULT_SUCCESS; break; + case IB_MGMT_METHOD_TRAP_REPRESS: + subn_handle_opa_trap_repress(ibp, smp); + /* Always successful */ + ret = IB_MAD_RESULT_SUCCESS; + break; default: smp->status |= IB_SMP_UNSUP_METHOD; ret = reply((struct ib_mad_hdr *)smp); diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index a4e2506..4c12450 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -428,5 +428,6 @@ struct sc2vlnt { COUNTER_MASK(1, 4)) void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port); +void hfi1_handle_trap_timer(unsigned long data); #endif /* _HFI1_MAD_H */ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 3ef6384..dc51bf2 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1535,6 +1535,11 @@ static void init_ibport(struct hfi1_pportdata *ppd) ibp->sc_to_sl[i] = i; } + for (i = 0; i < RVT_MAX_TRAP_LISTS ; i++) + INIT_LIST_HEAD(&ibp->rvp.trap_lists[i].list); + setup_timer(&ibp->rvp.trap_timer, hfi1_handle_trap_timer, + (unsigned long)ibp); + spin_lock_init(&ibp->rvp.lock); /* Set the prefix to the default value (see ch. 4.1.1) */ ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; -- cgit v1.1 From 59ec8736956456b7429bbfd613b288928416a0f0 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Mon, 24 Jul 2017 07:46:18 -0700 Subject: IB/hfi1: Fix code consistency for if/else blocks in chip.c Code structure is not consistent for if/else blocks and break instructions in set_link_state for case HLS_UP_INIT. Physical state uses break in case of an error and if/else blocks for logical use cases. These blocks should be implemented consistently. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index a62ef4a..789dbc4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10569,18 +10569,19 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) dd_dev_err(dd, "%s: logical state did not change to INIT\n", __func__); - } else { - /* clear old transient LINKINIT_REASON code */ - if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR) - ppd->linkinit_reason = - OPA_LINKINIT_REASON_LINKUP; + break; + } - /* enable the port */ - add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); + /* clear old transient LINKINIT_REASON code */ + if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR) + ppd->linkinit_reason = + OPA_LINKINIT_REASON_LINKUP; - handle_linkup_change(dd, 1); - ppd->host_link_state = HLS_UP_INIT; - } + /* enable the port */ + add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); + + handle_linkup_change(dd, 1); + ppd->host_link_state = HLS_UP_INIT; break; case HLS_UP_ARMED: if (ppd->host_link_state != HLS_UP_INIT) -- cgit v1.1 From a156abb3cf14112d3aa3f3a9ba880d48cf064cef Mon Sep 17 00:00:00 2001 From: "Byczkowski, Jakub" Date: Mon, 24 Jul 2017 07:46:24 -0700 Subject: IB/hfi1: Fix initialization failure for debug firmware Loading debug signed firmware fails if started immediately after failed attempt to load production firmware. A short delay is required so add about a 100us delay after RSA check failure. Reviewed-by: Dennis Dalessandro Reviewed-by: Easwar Hariharan Reviewed-by: Mike Marciniszyn Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/firmware.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 4042c11..83efd6b 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -615,6 +615,14 @@ retry: fw_fabric_serdes_name = ALT_FW_FABRIC_NAME; fw_sbus_name = ALT_FW_SBUS_NAME; fw_pcie_serdes_name = ALT_FW_PCIE_NAME; + + /* + * Add a delay before obtaining and loading debug firmware. + * Authorization will fail if the delay between firmware + * authorization events is shorter than 50us. Add 100us to + * make a delay time safe. + */ + usleep_range(100, 120); } if (fw_sbus_load) { -- cgit v1.1 From a618b7e40af2b2b751790d602ffa93800b594eca Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Mon, 24 Jul 2017 07:46:30 -0700 Subject: IB/hfi1: Move saving PCI values to a separate function During PCIe initialization some registers' values from PCI config space are saved in order to restore them later (i.e. after reset). Restoring those value is done by a function called restore_pci_variables, while saving them is put directly into function hfi1_pcie_ddinit. Move saving values to a separate function in the image of restoring functionality. Reviewed-by: Jakub Byczkowski Reviewed-by: Mike Marciniszyn Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 5 ++ drivers/infiniband/hw/hfi1/hfi.h | 1 + drivers/infiniband/hw/hfi1/pcie.c | 110 ++++++++++++++++++++------------------ 3 files changed, 64 insertions(+), 52 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 789dbc4..4a4405e 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14866,6 +14866,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret < 0) goto bail_free; + /* Save PCI space registers to rewrite after device reset */ + ret = save_pci_variables(dd); + if (ret < 0) + goto bail_cleanup; + /* verify that reads actually work, save revision for reset check */ dd->revision = read_csr(dd, CCE_REVISION); if (dd->revision == ~(u64)0) { diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2ce3fc5..2d32c5c 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1835,6 +1835,7 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *dd); int request_msix(struct hfi1_devdata *dd, u32 msireq); int restore_pci_variables(struct hfi1_devdata *dd); +int save_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); int get_platform_config_field(struct hfi1_devdata *dd, diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index cc7be22..82447b7 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -221,63 +221,11 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) } dd_dev_info(dd, "WC RcvArray: %p for %x\n", dd->rcvarray_wc, dd->chip_rcv_array_count * 8); - /* - * Save BARs and command to rewrite after device reset. - */ - - ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); - if (ret) - goto read_error; - - ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); - if (ret) - goto read_error; - - ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, - &dd->pcie_devctl); - if (ret) - goto read_error; - - ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, - &dd->pcie_lnkctl); - if (ret) - goto read_error; - - ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, - &dd->pcie_devctl2); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, - &dd->pci_lnkctl3); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); - if (ret) - goto read_error; dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */ return 0; - -read_error: - dd_dev_err(dd, "Unable to read from PCI config\n"); - goto bail_error; nomem: ret = -ENOMEM; -bail_error: hfi1_pcie_ddcleanup(dd); return ret; } @@ -484,6 +432,64 @@ error: return ret; } +/* Save BARs and command to rewrite after device reset */ +int save_pci_variables(struct hfi1_devdata *dd) +{ + int ret = 0; + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, + &dd->pcibar0); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, + &dd->pcibar1); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); + if (ret) + goto error; + + ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); + if (ret) + goto error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, + &dd->pcie_devctl); + if (ret) + goto error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, + &dd->pcie_lnkctl); + if (ret) + goto error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, + &dd->pcie_devctl2); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, + &dd->pci_lnkctl3); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); + if (ret) + goto error; + + return 0; + +error: + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; +} + /* * BIOS may not set PCIe bus-utilization parameters for best performance. * Check and optionally adjust them to maximize our throughput. -- cgit v1.1 From 5e2d6764a729bf8d43894b0368b0ae11a19485c0 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Mon, 24 Jul 2017 07:46:36 -0700 Subject: IB/hfi1: Verify port data VLs credits on transition to Armed There is a window where the FM can read the buffer control table and decide not to program buffers. When a port goes down, the code clears the table and if it is not programmed, posted SDMA descriptors will never complete due to no buffer credits. Reviewed-by: Mike Marciniszyn Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 16 +++++++++++++++- drivers/infiniband/hw/hfi1/intr.c | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 4a4405e..8f7ce74 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10490,6 +10490,14 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, } /* + * Verify if BCT for data VLs is non-zero. + */ +static inline bool data_vls_operational(struct hfi1_pportdata *ppd) +{ + return !!ppd->actual_vls_operational; +} + +/* * Change the physical and/or logical link state. * * Do not call this routine while inside an interrupt. It contains @@ -10587,6 +10595,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_UP_INIT) goto unexpected; + if (!data_vls_operational(ppd)) { + dd_dev_err(dd, + "%s: data VLs not operational\n", __func__); + ret = -EINVAL; + break; + } + ppd->host_link_state = HLS_UP_ARMED; set_logical_state(dd, LSTATE_ARMED); ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000); @@ -14832,7 +14847,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, } ppd->vls_supported = num_vls; ppd->vls_operational = ppd->vls_supported; - ppd->actual_vls_operational = ppd->vls_supported; /* Set the default MTU. */ for (vl = 0; vl < num_vls; vl++) dd->vld[vl].mtu = hfi1_max_mtu; diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 9469be9..96845df 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -164,6 +164,7 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) ppd->linkup = 0; /* clear HW details of the previous connection */ + ppd->actual_vls_operational = 0; reset_link_credits(dd); /* freeze after a link down to guarantee a clean egress */ -- cgit v1.1 From f13a6e5e2e0192737c3bdbdb16c5cc0181cc86e5 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:42 -0700 Subject: IB/hfi1: Split copy_to_user data copy for better security A copy_to_user() call assumes that two members of a data structure are sequential. Since this may not always be true, separate the copies to ensure a safe copy. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7be75e0..650c1e5 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -268,12 +268,14 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, /* * Copy the number of tidlist entries we used * and the length of the buffer we registered. - * These fields are adjacent in the structure so - * we can copy them at the same time. */ addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt) + + sizeof(tinfo.tidcnt))) + return -EFAULT; + + addr = arg + offsetof(struct hfi1_tid_info, length); + if (copy_to_user((void __user *)addr, &tinfo.length, sizeof(tinfo.length))) ret = -EFAULT; } -- cgit v1.1 From 5efd40cad4bf98f0d1f6b726d67e39decd85edc9 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Sat, 29 Jul 2017 08:43:20 -0700 Subject: IB/hfi1: Harden state transition to Armed and Active There is a window that allows other threads to read state of 'host_link_state' as a new, before the hardware actual state is set. This patch closes the window by indicating a new state only after hardware transition is complete. Reviewed-by: Mike Marciniszyn Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 8f7ce74..eca31dd 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10602,16 +10602,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; } - ppd->host_link_state = HLS_UP_ARMED; set_logical_state(dd, LSTATE_ARMED); ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000); if (ret) { - /* logical state didn't change, stay at init */ - ppd->host_link_state = HLS_UP_INIT; dd_dev_err(dd, "%s: logical state did not change to ARMED\n", __func__); + break; } + ppd->host_link_state = HLS_UP_ARMED; /* * The simulator does not currently implement SMA messages, * so neighbor_normal is not set. Set it here when we first @@ -10624,18 +10623,16 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_UP_ARMED) goto unexpected; - ppd->host_link_state = HLS_UP_ACTIVE; set_logical_state(dd, LSTATE_ACTIVE); ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000); if (ret) { - /* logical state didn't change, stay at armed */ - ppd->host_link_state = HLS_UP_ARMED; dd_dev_err(dd, "%s: logical state did not change to ACTIVE\n", __func__); } else { /* tell all engines to go running */ sdma_all_running(dd); + ppd->host_link_state = HLS_UP_ACTIVE; /* Signal the IB layer that the port has went active */ event.device = &dd->verbs_dev.rdi.ibdev; -- cgit v1.1 From 96603ed865b3c5a9a0d1c9ed434661519d49fde3 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Sat, 29 Jul 2017 08:43:26 -0700 Subject: IB/hfi1: Do not enable disabled port on cable insert Fix issue where a disabled port can be enabled by inserting a cable. The port should be explicitly enabled instead. Reviewed-by: Michael J. Ruhl Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index eca31dd..bd594e7 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9306,12 +9306,6 @@ int start_link(struct hfi1_pportdata *ppd) */ tune_serdes(ppd); - if (!ppd->link_enabled) { - dd_dev_info(ppd->dd, - "%s: stopping link start because link is disabled\n", - __func__); - return 0; - } if (!ppd->driver_link_ready) { dd_dev_info(ppd->dd, "%s: stopping link start because driver is not ready\n", @@ -9529,6 +9523,13 @@ void qsfp_event(struct work_struct *work) if (!qsfp_mod_present(ppd)) return; + if (ppd->host_link_state == HLS_DN_DISABLE) { + dd_dev_info(ppd->dd, + "%s: stopping link start because link is disabled\n", + __func__); + return; + } + /* * Turn DC back on after cable has been re-inserted. Up until * now, the DC has been in reset to save power. -- cgit v1.1 From e87473bc1b6c2cb08f1b760cfc8cd012822241a6 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Sat, 29 Jul 2017 08:43:32 -0700 Subject: IB/hfi1: Only set fd pointer when base context is completely initialized The allocate_ctxt() function adds the context to the fd data structure. Since the context is not completely initialized, this can cause confusion as to whether the context is valid or not. Move the fd reference from allocate_ctxt() to setup_base_ctxt(). Update the necessary functions to be aware of this move. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 55 ++++++++++++++++++------------- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 4 +-- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 3 +- drivers/infiniband/hw/hfi1/user_sdma.c | 4 +-- drivers/infiniband/hw/hfi1/user_sdma.h | 3 +- 5 files changed, 40 insertions(+), 29 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 650c1e5..a0c13fa 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -81,19 +81,22 @@ static u64 kvirt_to_phys(void *addr); static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo); -static int init_user_ctxt(struct hfi1_filedata *fd); +static int init_user_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); static void user_init(struct hfi1_ctxtdata *uctxt); static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, __u32 len); static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, __u32 len); -static int setup_base_ctxt(struct hfi1_filedata *fd); +static int setup_base_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); static int setup_subctxt(struct hfi1_ctxtdata *uctxt); static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo); static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, - struct hfi1_user_info *uinfo); + struct hfi1_user_info *uinfo, + struct hfi1_ctxtdata **cd); static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); @@ -759,7 +762,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) flush_wc(); /* drain user sdma queue */ - hfi1_user_sdma_free_queues(fdata); + hfi1_user_sdma_free_queues(fdata, uctxt); /* release the cpu */ hfi1_put_proc_affinity(fdata->rec_cpu_num); @@ -845,6 +848,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) { int ret; unsigned int swmajor, swminor; + struct hfi1_ctxtdata *uctxt = NULL; swmajor = uinfo->userversion >> 16; if (swmajor != HFI1_USER_SWMAJOR) @@ -870,7 +874,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) * couldn't find a sub context. */ if (!ret) - ret = allocate_ctxt(fd, fd->dd, uinfo); + ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt); mutex_unlock(&hfi1_mutex); @@ -888,28 +892,27 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) /* The only thing a sub context needs is the user_xxx stuff */ if (!ret) - ret = init_user_ctxt(fd); + ret = init_user_ctxt(fd, fd->uctxt); if (ret) clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); } else if (!ret) { - ret = setup_base_ctxt(fd); - if (fd->uctxt->subctxt_cnt) { + ret = setup_base_ctxt(fd, uctxt); + if (uctxt->subctxt_cnt) { /* If there is an error, set the failed bit. */ if (ret) set_bit(HFI1_CTXT_BASE_FAILED, - &fd->uctxt->event_flags); + &uctxt->event_flags); /* * Base context is done, notify anybody using a * sub-context that is waiting for this completion */ - clear_bit(HFI1_CTXT_BASE_UNINIT, - &fd->uctxt->event_flags); - wake_up(&fd->uctxt->wait); + clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); + wake_up(&uctxt->wait); } if (ret) - deallocate_ctxt(fd->uctxt); + deallocate_ctxt(uctxt); } /* If an error occurred, clear the reference */ @@ -976,7 +979,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, } static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, - struct hfi1_user_info *uinfo) + struct hfi1_user_info *uinfo, + struct hfi1_ctxtdata **cd) { struct hfi1_ctxtdata *uctxt; u16 ctxt; @@ -1071,14 +1075,13 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, */ if (dd->freectxts-- == dd->num_user_contexts) aspm_disable_all(dd); - fd->uctxt = uctxt; - /* Count the reference for the fd */ - hfi1_rcd_get(uctxt); + *cd = uctxt; return 0; ctxdata_free: + *cd = NULL; dd->rcd[ctxt] = NULL; hfi1_rcd_put(uctxt); return ret; @@ -1243,23 +1246,25 @@ static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, return ret; } -static int init_user_ctxt(struct hfi1_filedata *fd) +static int init_user_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; int ret; ret = hfi1_user_sdma_alloc_queues(uctxt, fd); if (ret) return ret; - ret = hfi1_user_exp_rcv_init(fd); + ret = hfi1_user_exp_rcv_init(fd, uctxt); + if (ret) + hfi1_user_sdma_free_queues(fd, uctxt); return ret; } -static int setup_base_ctxt(struct hfi1_filedata *fd) +static int setup_base_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; int ret = 0; @@ -1284,12 +1289,16 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) if (ret) goto setup_failed; - ret = init_user_ctxt(fd); + ret = init_user_ctxt(fd, uctxt); if (ret) goto setup_failed; user_init(uctxt); + /* Now that the context is set up, the fd can get a reference. */ + fd->uctxt = uctxt; + hfi1_rcd_get(uctxt); + return 0; setup_failed: diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 3baf719..d9036ba 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -104,9 +104,9 @@ static struct mmu_rb_ops tid_rb_ops = { * receive caching. This needs to be done after the context has * been configured with the eager/expected RcvEntry counts. */ -int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; int ret = 0; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 1bdc61b..6cbaa4c 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -51,7 +51,8 @@ #include "exp_rcv.h" -int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd); +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, struct hfi1_tid_info *tinfo); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 64d31eb..8f02707 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -437,9 +437,9 @@ pq_reqs_nomem: return ret; } -int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) +int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq; hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 73c2455..84c199d 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -81,7 +81,8 @@ struct hfi1_user_sdma_comp_q { int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_filedata *fd); -int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd); +int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct iovec *iovec, unsigned long dim, unsigned long *count); -- cgit v1.1 From c822652ea669f8f46e4e8e2fc0618f2f72103f14 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Sat, 29 Jul 2017 08:43:37 -0700 Subject: IB/hfi1: Disambiguate corruption and uninitialized error cases The error messages when checksum validation of the platform configuration fields populated into the ASIC scratch registers fails are ambiguous. Disambiguate them. Reviewed-by: Jakub Byczkowski Signed-off-by: Easwar Hariharan Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 41307e4..5c38a24 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -58,8 +58,13 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd) version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT; /* Prevent power on default of all zeroes from passing checksum */ - if (!version) + if (!version) { + dd_dev_err(dd, "%s: Config bitmap uninitialized\n", __func__); + dd_dev_err(dd, + "%s: Please update your BIOS to support active channels\n", + __func__); return 0; + } /* * ASIC scratch 0 only contains the checksum and bitmap version as @@ -84,6 +89,8 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd) if (checksum + temp_scratch == 0xFFFF) return 1; + + dd_dev_err(dd, "%s: Configuration bitmap corrupted\n", __func__); return 0; } @@ -144,11 +151,6 @@ void get_platform_config(struct hfi1_devdata *dd) save_platform_config_fields(dd); return; } - dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n", - __func__); - dd_dev_err(dd, - "%s: Please update your BIOS to support active channels\n", - __func__); } else { ret = eprom_read_platform_config(dd, (void **)&temp_platform_config, -- cgit v1.1 From 3ffea7d8cd9e3f8f96514ac499f2510ad2f31d11 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sat, 29 Jul 2017 08:43:43 -0700 Subject: IB/{rdmavt, hfi1, qib}: Fix panic with post receive and SGE compression The server side of qperf panics as follows: [242446.336860] IP: report_bug+0x64/0x10 [242446.341031] PGD 1c0c067 [242446.341032] P4D 1c0c067 [242446.343951] PUD 1c0d063 [242446.346870] PMD 8587ea067 [242446.349788] PTE 800000083e14016 [242446.352901] [242446.358352] Oops: 0003 [#1] SM [242446.437919] CPU: 1 PID: 7442 Comm: irq/92-hfi1_0 k Not tainted 4.12.0-mam-asm #1 [242446.446365] Hardware name: Intel Corporation S2600WT2/S2600WT2, BIOS SE5C610.86B.01.01.0018.C4.072020161249 07/20/201 [242446.458397] task: ffff8808392d2b80 task.stack: ffffc9000664000 [242446.465097] RIP: 0010:report_bug+0x64/0x10 [242446.469859] RSP: 0018:ffffc900066439c0 EFLAGS: 0001000 [242446.475784] RAX: ffffffffa06647e4 RBX: ffffffffa06461e1 RCX: 000000000000000 [242446.483840] RDX: 0000000000000907 RSI: ffffffffa0675040 RDI: ffffffffffff740 [242446.491897] RBP: ffffc900066439e0 R08: 0000000000000001 R09: 000000000000025 [242446.499953] R10: ffffffff81a253df R11: 0000000000000133 R12: ffffc90006643b3 [242446.508010] R13: ffffffffa065bbf0 R14: 00000000000001e5 R15: 000000000000000 [242446.516067] FS: 0000000000000000(0000) GS:ffff88085f640000(0000) knlGS:000000000000000 [242446.525191] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003 [242446.531698] CR2: ffffffffa06647ee CR3: 0000000001c09000 CR4: 00000000001406e [242446.539756] Call Trace [242446.542582] fixup_bug+0x2c/0x5 [242446.546277] do_trap+0x12b/0x18 [242446.549972] do_error_trap+0x89/0x11 [242446.554171] ? hfi1_copy_sge+0x271/0x2b0 [hfi1 [242446.559324] ? ttwu_do_wakeup+0x1e/0x14 [242446.563795] ? ttwu_do_activate+0x77/0x8 [242446.568363] do_invalid_op+0x20/0x3 [242446.572448] invalid_op+0x1e/0x3 [242446.576247] RIP: 0010:hfi1_copy_sge+0x271/0x2b0 [hfi1 [242446.582075] RSP: 0018:ffffc90006643be8 EFLAGS: 0001004 [242446.587999] RAX: 0000000000000000 RBX: ffff88083e0fa240 RCX: 000000000000000 [242446.596058] RDX: 0000000000000000 RSI: ffff880842508000 RDI: ffff88083e0fa24 [242446.604116] RBP: ffffc90006643c28 R08: 0000000000000000 R09: 000000000000000 [242446.612172] R10: ffffc90009473640 R11: 0000000000000133 R12: 000000000000000 [242446.620228] R13: 0000000000000000 R14: 0000000000002000 R15: ffff88084250800 [242446.628293] ? hfi1_copy_sge+0x1a1/0x2b0 [hfi1 [242446.633449] hfi1_rc_rcv+0x3da/0x1270 [hfi1 [242446.638312] ? sc_buffer_alloc+0x113/0x150 [hfi1 [242446.643662] hfi1_ib_rcv+0x1c9/0x2e0 [hfi1 [242446.648428] process_receive_ib+0x19a/0x270 [hfi1 [242446.653866] ? process_rcv_qp_work+0xd2/0x160 [hfi1 [242446.659505] handle_receive_interrupt_nodma_rtail+0x184/0x2e0 [hfi1 [242446.666693] ? irq_finalize_oneshot+0x100/0x10 [242446.671846] receive_context_thread+0x1b/0x140 [hfi1 [242446.677576] irq_thread_fn+0x1e/0x4 [242446.681659] irq_thread+0x13c/0x1b [242446.685646] ? irq_forced_thread_fn+0x60/0x6 [242446.690604] kthread+0x112/0x15 [242446.694298] ? irq_thread_check_affinity+0xe0/0xe [242446.699738] ? kthread_park+0x60/0x6 [242446.703919] ? do_syscall_64+0x67/0x15 [242446.708292] ret_from_fork+0x25/0x3 [242446.712374] Code: 63 78 04 44 0f b7 70 08 41 89 d0 4c 8d 2c 38 41 83 e0 01 f6 c2 02 74 17 66 45 85 c0 74 11 f6 c2 04 b9 01 00 00 00 75 bb 83 ca 04 <66> 89 50 0a 66 45 85 c0 74 52 0f b6 48 0b 41 0f b7 f6 4d 89 e0 [242446.733527] RIP: report_bug+0x64/0x100 RSP: ffffc900066439c [242446.739935] CR2: ffffffffa06647e [242446.743763] ---[ end trace 0e90a20d0aa494f7 ]-- The root cause is that the qib/hfi1 post receive call to rvt_lkey_ok() doesn't interpret the new return value from rvt_lkey_ok() properly leading to an mr reference count underrun. Additionally, remove an unused argument in rvt_sge_adjacent() aw well as an unneeded incr local in rvt_post_one_wr(). Fixes: Commit 14fe13fcd3af ("IB/rdmavt: Compress adjacent SGEs in rvt_lkey_ok()") Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/ruc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index f13ddb27..4afa00f 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -74,8 +74,10 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) if (wqe->sg_list[i].length == 0) continue; /* Check LKEY */ - if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, + NULL, &wqe->sg_list[i], + IB_ACCESS_LOCAL_WRITE); + if (unlikely(ret <= 0)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; -- cgit v1.1 From 71d47008ca1b2ab10e0432e72e572c7ce5d8d63b Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Sat, 29 Jul 2017 08:43:49 -0700 Subject: IB/hfi1: Create workqueue for link events Currently, link down interrupts queue link entries on a workqueue intended for sending events only. Create a workqueue for queuing link events. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 20 ++++++++++---------- drivers/infiniband/hw/hfi1/driver.c | 2 +- drivers/infiniband/hw/hfi1/hfi.h | 1 + drivers/infiniband/hw/hfi1/init.c | 26 ++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/pio.c | 2 +- drivers/infiniband/hw/hfi1/sdma.c | 2 +- 6 files changed, 40 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index bd594e7..8443d41 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -5545,7 +5545,7 @@ static void update_rcverr_timer(unsigned long opaque) set_link_down_reason( ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN); - queue_work(ppd->hfi1_wq, &ppd->link_bounce_work); + queue_work(ppd->link_wq, &ppd->link_bounce_work); } dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt; @@ -6100,7 +6100,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) * will not happen. We have to do it here * before turning the DC off. */ - queue_work(ppd->hfi1_wq, &ppd->link_down_work); + queue_work(ppd->link_wq, &ppd->link_down_work); } } else { dd_dev_info(dd, "%s: QSFP module inserted\n", @@ -6135,7 +6135,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) /* Schedule the QSFP work only if there is a cable attached. */ if (qsfp_mod_present(ppd)) - queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work); + queue_work(ppd->link_wq, &ppd->qsfp_info.qsfp_work); } static int request_host_lcb_access(struct hfi1_devdata *dd) @@ -7738,12 +7738,12 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) host_msg &= ~(u64)HOST_REQ_DONE; } if (host_msg & BC_SMA_MSG) { - queue_work(ppd->hfi1_wq, &ppd->sma_message_work); + queue_work(ppd->link_wq, &ppd->sma_message_work); host_msg &= ~(u64)BC_SMA_MSG; } if (host_msg & LINKUP_ACHIEVED) { dd_dev_info(dd, "8051: Link up\n"); - queue_work(ppd->hfi1_wq, &ppd->link_up_work); + queue_work(ppd->link_wq, &ppd->link_up_work); host_msg &= ~(u64)LINKUP_ACHIEVED; } if (host_msg & EXT_DEVICE_CFG_REQ) { @@ -7751,7 +7751,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) host_msg &= ~(u64)EXT_DEVICE_CFG_REQ; } if (host_msg & VERIFY_CAP_FRAME) { - queue_work(ppd->hfi1_wq, &ppd->link_vc_work); + queue_work(ppd->link_wq, &ppd->link_vc_work); host_msg &= ~(u64)VERIFY_CAP_FRAME; } if (host_msg & LINK_GOING_DOWN) { @@ -7766,7 +7766,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) host_msg &= ~(u64)LINK_GOING_DOWN; } if (host_msg & LINK_WIDTH_DOWNGRADED) { - queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work); + queue_work(ppd->link_wq, &ppd->link_downgrade_work); host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED; } if (host_msg) { @@ -7809,7 +7809,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) dd_dev_info(dd, "%s: not queuing link down\n", __func__); } else { - queue_work(ppd->hfi1_wq, &ppd->link_down_work); + queue_work(ppd->link_wq, &ppd->link_down_work); } } } @@ -8017,7 +8017,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) dd_dev_info_ratelimited(dd, "%s: PortErrorAction bounce\n", __func__); set_link_down_reason(ppd, lcl_reason, 0, lcl_reason); - queue_work(ppd->hfi1_wq, &ppd->link_bounce_work); + queue_work(ppd->link_wq, &ppd->link_bounce_work); } } @@ -9685,7 +9685,7 @@ static void try_start_link(struct hfi1_pportdata *ppd) "QSFP not responding, waiting and retrying %d\n", (int)ppd->qsfp_retry_count); ppd->qsfp_retry_count++; - queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work, + queue_delayed_work(ppd->link_wq, &ppd->start_link_work, msecs_to_jiffies(QSFP_RETRY_WAIT)); return; } diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 01235d4..0b7ca0e 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -914,7 +914,7 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, return 0; } - queue_work(rcd->ppd->hfi1_wq, lsaw); + queue_work(rcd->ppd->link_wq, lsaw); return 1; } return 0; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2d32c5c..ee6c389 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -576,6 +576,7 @@ struct hfi1_pportdata { /* SendDMA related entries */ struct workqueue_struct *hfi1_wq; + struct workqueue_struct *link_wq; /* move out of interrupt context */ struct work_struct link_vc_work; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 7f5e4c7d..be027c9 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -660,6 +660,20 @@ static int create_workqueues(struct hfi1_devdata *dd) if (!ppd->hfi1_wq) goto wq_error; } + if (!ppd->link_wq) { + /* + * Make the link workqueue single-threaded to enforce + * serialization. + */ + ppd->link_wq = + alloc_workqueue( + "hfi_link_%d_%d", + WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND, + 1, /* max_active */ + dd->unit, pidx); + if (!ppd->link_wq) + goto wq_error; + } } return 0; wq_error: @@ -670,6 +684,10 @@ wq_error: destroy_workqueue(ppd->hfi1_wq); ppd->hfi1_wq = NULL; } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } } return -ENOMEM; } @@ -954,6 +972,10 @@ static void shutdown_device(struct hfi1_devdata *dd) destroy_workqueue(ppd->hfi1_wq); ppd->hfi1_wq = NULL; } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } } sdma_exit(dd); } @@ -1575,6 +1597,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) destroy_workqueue(ppd->hfi1_wq); ppd->hfi1_wq = NULL; } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } } if (!j) hfi1_device_remove(dd); diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index adb6a4d..7108a4b 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1012,7 +1012,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", __func__, sc->sw_index, sc->hw_context, (u32)reg); - queue_work(dd->pport->hfi1_wq, + queue_work(dd->pport->link_wq, &dd->pport->link_bounce_work); break; } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 71b4258..6781bcd 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -325,7 +325,7 @@ static void sdma_wait_for_packet_egress(struct sdma_engine *sde, /* timed out - bounce the link */ dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n", __func__, sde->this_idx, (u32)reg); - queue_work(dd->pport->hfi1_wq, + queue_work(dd->pport->link_wq, &dd->pport->link_bounce_work); break; } -- cgit v1.1 From 626c077c025f9da6dce809b5a8300ed44ef02b6f Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Sat, 29 Jul 2017 08:43:55 -0700 Subject: IB/hfi1: Prevent link down request double queuing When link interrupts occur, multiple link down requests could be queued up when only one is needed. This could get the hfi1 out of sync with its link partner during LNI. Only allow one link down request to be queued at any one time. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 +++- drivers/infiniband/hw/hfi1/hfi.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 8443d41..a3af46c 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7045,6 +7045,7 @@ void handle_link_down(struct work_struct *work) /* Go offline first, then deal with reading/writing through 8051 */ was_up = !!(ppd->host_link_state & HLS_UP); set_link_state(ppd, HLS_DN_OFFLINE); + xchg(&ppd->is_link_down_queued, 0); if (was_up) { lcl_reason = 0; @@ -7805,10 +7806,11 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) */ if ((ppd->host_link_state & (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) || - ppd->link_enabled == 0) { + ppd->link_enabled == 0 || ppd->is_link_down_queued) { dd_dev_info(dd, "%s: not queuing link down\n", __func__); } else { + xchg(&ppd->is_link_down_queued, 1); queue_work(ppd->link_wq, &ppd->link_down_work); } } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index ee6c389..fb5f839 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -644,6 +644,7 @@ struct hfi1_pportdata { /* placeholders for IB MAD packet settings */ u8 overrun_threshold; u8 phy_error_threshold; + unsigned int is_link_down_queued; /* Used to override LED behavior for things like maintenance beaconing*/ /* -- cgit v1.1 From 913cc67159bc85a96d94df301ca39c1b2c540dca Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Sat, 29 Jul 2017 08:44:01 -0700 Subject: IB/hfi1: Always perform offline transition Always initiate an offline transition request when a link down occurs. The firmware will use this request to confirm that the driver has seen the link down message. A host version is set to indicate this driver behavior to the firmware. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 66 +++++++++++++++++------------------ drivers/infiniband/hw/hfi1/chip.h | 5 +++ drivers/infiniband/hw/hfi1/firmware.c | 8 +++++ 3 files changed, 46 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index a3af46c..101fbbb 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8832,6 +8832,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, & REMOTE_DEVICE_REV_MASK; } +int write_host_interface_version(struct hfi1_devdata *dd, u8 version) +{ + u32 frame; + u32 mask; + + mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT); + read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame); + /* Clear, then set field */ + frame &= ~mask; + frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT); + return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, + frame); +} + void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, u8 *ver_patch) { @@ -10262,49 +10276,35 @@ static void force_logical_link_state_down(struct hfi1_pportdata *ppd) static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) { struct hfi1_devdata *dd = ppd->dd; - u32 pstate, previous_state; + u32 previous_state; int ret; - int do_transition; - int do_wait; update_lcb_cache(dd); previous_state = ppd->host_link_state; ppd->host_link_state = HLS_GOING_OFFLINE; - pstate = read_physical_state(dd); - if (pstate == PLS_OFFLINE) { - do_transition = 0; /* in right state */ - do_wait = 0; /* ...no need to wait */ - } else if ((pstate & 0xf0) == PLS_OFFLINE) { - do_transition = 0; /* in an offline transient state */ - do_wait = 1; /* ...wait for it to settle */ - } else { - do_transition = 1; /* need to move to offline */ - do_wait = 1; /* ...will need to wait */ - } - if (do_transition) { - ret = set_physical_link_state(dd, - (rem_reason << 8) | PLS_OFFLINE); + /* start offline transition */ + ret = set_physical_link_state(dd, (rem_reason << 8) | PLS_OFFLINE); - if (ret != HCMD_SUCCESS) { - dd_dev_err(dd, - "Failed to transition to Offline link state, return %d\n", - ret); - return -EINVAL; - } - if (ppd->offline_disabled_reason == - HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)) - ppd->offline_disabled_reason = - HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); + if (ret != HCMD_SUCCESS) { + dd_dev_err(dd, + "Failed to transition to Offline link state, return %d\n", + ret); + return -EINVAL; } + if (ppd->offline_disabled_reason == + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)) + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); - if (do_wait) { - /* it can take a while for the link to go down */ - ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000); - if (ret < 0) - return ret; - } + /* + * Wait for offline transition. It can take a while for + * the link to go down. + */ + ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000); + if (ret < 0) + return ret; /* * Now in charge of LCB - must be after the physical state is diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index bef6301..6a0c691 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -384,6 +384,7 @@ #define VERIFY_CAP_LOCAL_FABRIC 0x08 #define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09 #define LOCAL_DEVICE_ID 0x0a +#define RESERVED_REGISTERS 0x0b #define LOCAL_LNI_INFO 0x0c #define REMOTE_LNI_INFO 0x0d #define MISC_STATUS 0x0e @@ -506,6 +507,9 @@ #define DOWN_REMOTE_REASON_SHIFT 16 #define DOWN_REMOTE_REASON_MASK 0xff +#define HOST_INTERFACE_VERSION_SHIFT 16 +#define HOST_INTERFACE_VERSION_MASK 0xff + /* verify capability PHY power management bits */ #define PWRM_BER_CONTROL 0x1 #define PWRM_BANDWIDTH_CONTROL 0x2 @@ -704,6 +708,7 @@ int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result); /* chip.c */ void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, u8 *ver_patch); +int write_host_interface_version(struct hfi1_devdata *dd, u8 version); void read_guid(struct hfi1_devdata *dd); int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 83efd6b..885714b 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -69,6 +69,7 @@ #define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw" #define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw" #define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw" +#define HOST_INTERFACE_VERSION 1 static uint fw_8051_load = 1; static uint fw_fabric_serdes_load = 1; @@ -1087,6 +1088,13 @@ static int load_8051_firmware(struct hfi1_devdata *dd, dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", (int)ver_major, (int)ver_minor, (int)ver_patch); dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); + ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION); + if (ret != HCMD_SUCCESS) { + dd_dev_err(dd, + "Failed to set host interface version, return 0x%x\n", + ret); + return -EIO; + } return 0; } -- cgit v1.1 From 9abb0d1bbd9529c574eacd8586e2bf68d17966cd Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 27 Jun 2017 16:49:53 +0300 Subject: RDMA: Simplify get firmware interface There is a need to forward FW version to user space application through RDMA netlink. In order to make it safe, there is need to declare nla_policy and limit the size of FW string. The new define IB_FW_VERSION_NAME_MAX will limit the size of FW version string. That define was chosen to be equal to ETHTOOL_FWVERS_LEN, because many drivers anyway are limited by that value indirectly. The introduction of this define allows us to remove the string size from get_fw_str function signature. Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index dc51bf2..c88c03c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1561,14 +1561,13 @@ static void init_ibport(struct hfi1_pportdata *ppd) RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } -static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, - size_t str_len) +static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str) { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct hfi1_ibdev *dev = dev_from_rdi(rdi); u32 ver = dd_from_dev(dev)->dc8051_ver; - snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver), + snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u", dc8051_ver_maj(ver), dc8051_ver_min(ver), dc8051_ver_patch(ver)); } -- cgit v1.1