Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking updates from David Miller: "Highlights: 1) Steady transitioning of the BPF instructure to a generic spot so all kernel subsystems can make use of it, from Alexei Starovoitov. 2) SFC driver supports busy polling, from Alexandre Rames. 3) Take advantage of hash table in UDP multicast delivery, from David Held. 4) Lighten locking, in particular by getting rid of the LRU lists, in inet frag handling. From Florian Westphal. 5) Add support for various RFC6458 control messages in SCTP, from Geir Ola Vaagland. 6) Allow to filter bridge forwarding database dumps by device, from Jamal Hadi Salim. 7) virtio-net also now supports busy polling, from Jason Wang. 8) Some low level optimization tweaks in pktgen from Jesper Dangaard Brouer. 9) Add support for ipv6 address generation modes, so that userland can have some input into the process. From Jiri Pirko. 10) Consolidate common TCP connection request code in ipv4 and ipv6, from Octavian Purdila. 11) New ARP packet logger in netfilter, from Pablo Neira Ayuso. 12) Generic resizable RCU hash table, with intial users in netlink and nftables. From Thomas Graf. 13) Maintain a name assignment type so that userspace can see where a network device name came from (enumerated by kernel, assigned explicitly by userspace, etc.) From Tom Gundersen. 14) Automatic flow label generation on transmit in ipv6, from Tom Herbert. 15) New packet timestamping facilities from Willem de Bruijn, meant to assist in measuring latencies going into/out-of the packet scheduler, latency from TCP data transmission to ACK, etc" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1536 commits) cxgb4 : Disable recursive mailbox commands when enabling vi net: reduce USB network driver config options. tg3: Modify tg3_tso_bug() to handle multiple TX rings amd-xgbe: Perform phy connect/disconnect at dev open/stop amd-xgbe: Use dma_set_mask_and_coherent to set DMA mask net: sun4i-emac: fix memory leak on bad packet sctp: fix possible seqlock seadlock in sctp_packet_transmit() Revert "net: phy: Set the driver when registering an MDIO bus device" cxgb4vf: Turn off SGE RX/TX Callback Timers and interrupts in PCI shutdown routine team: Simplify return path of team_newlink bridge: Update outdated comment on promiscuous mode net-timestamp: ACK timestamp for bytestreams net-timestamp: TCP timestamping net-timestamp: SCHED timestamp on entering packet scheduler net-timestamp: add key to disambiguate concurrent datagrams net-timestamp: move timestamp flags out of sk_flags net-timestamp: extend SCM_TIMESTAMPING ancillary data struct cxgb4i : Move stray CPL definitions to cxgb4 driver tcp: reduce spurious retransmits due to transient SACK reneging qlcnic: Initialize dcbnl_ops before register_netdev ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-08-06 09:38:14 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-08-06 09:38:14 -0700
commit: ae045e2455429c418a418a3376301a9e5753a0a8 (patch)
tree: b445bdeecd3f38aa0d0a29c9585cee49e4ccb0f1 /drivers/infiniband/hw/cxgb4/device.c
parent: f4f142ed4ef835709c7e6d12eaca10d190bcebed (diff)
parent: d247b6ab3ce6dd43665780865ec5fa145d9ab6bd (diff)
download: op-kernel-dev-ae045e2455429c418a418a3376301a9e5753a0a8.zip
op-kernel-dev-ae045e2455429c418a418a3376301a9e5753a0a8.tar.gz
1 files changed, 188 insertions, 4 deletions
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 7db82b2..f25df52 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -33,6 +33,7 @@
 #include <linux/moduleparam.h>
 #include <linux/debugfs.h>
 #include <linux/vmalloc.h>
+#include <linux/math64.h>
 
 #include <rdma/ib_verbs.h>
 
@@ -55,6 +56,15 @@ module_param(allow_db_coalescing_on_t5, int, 0644);
 MODULE_PARM_DESC(allow_db_coalescing_on_t5,
 		 "Allow DB Coalescing on T5 (default = 0)");
 
+int c4iw_wr_log = 0;
+module_param(c4iw_wr_log, int, 0444);
+MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data.");
+
+int c4iw_wr_log_size_order = 12;
+module_param(c4iw_wr_log_size_order, int, 0444);
+MODULE_PARM_DESC(c4iw_wr_log_size_order,
+		 "Number of entries (log2) in the work request timing log.");
+
 struct uld_ctx {
 	struct list_head entry;
 	struct cxgb4_lld_info lldi;
@@ -103,6 +113,117 @@ static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
 	return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos);
 }
 
+void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
+{
+	struct wr_log_entry le;
+	int idx;
+
+	if (!wq->rdev->wr_log)
+		return;
+
+	idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) &
+		(wq->rdev->wr_log_size - 1);
+	le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]);
+	getnstimeofday(&le.poll_host_ts);
+	le.valid = 1;
+	le.cqe_sge_ts = CQE_TS(cqe);
+	if (SQ_TYPE(cqe)) {
+		le.qid = wq->sq.qid;
+		le.opcode = CQE_OPCODE(cqe);
+		le.post_host_ts = wq->sq.sw_sq[wq->sq.cidx].host_ts;
+		le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts;
+		le.wr_id = CQE_WRID_SQ_IDX(cqe);
+	} else {
+		le.qid = wq->rq.qid;
+		le.opcode = FW_RI_RECEIVE;
+		le.post_host_ts = wq->rq.sw_rq[wq->rq.cidx].host_ts;
+		le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts;
+		le.wr_id = CQE_WRID_MSN(cqe);
+	}
+	wq->rdev->wr_log[idx] = le;
+}
+
+static int wr_log_show(struct seq_file *seq, void *v)
+{
+	struct c4iw_dev *dev = seq->private;
+	struct timespec prev_ts = {0, 0};
+	struct wr_log_entry *lep;
+	int prev_ts_set = 0;
+	int idx, end;
+
+#define ts2ns(ts) div64_ul((ts) * dev->rdev.lldi.cclk_ps, 1000)
+
+	idx = atomic_read(&dev->rdev.wr_log_idx) &
+		(dev->rdev.wr_log_size - 1);
+	end = idx - 1;
+	if (end < 0)
+		end = dev->rdev.wr_log_size - 1;
+	lep = &dev->rdev.wr_log[idx];
+	while (idx != end) {
+		if (lep->valid) {
+			if (!prev_ts_set) {
+				prev_ts_set = 1;
+				prev_ts = lep->poll_host_ts;
+			}
+			seq_printf(seq, "%04u: sec %lu nsec %lu qid %u opcode "
+				   "%u %s 0x%x host_wr_delta sec %lu nsec %lu "
+				   "post_sge_ts 0x%llx cqe_sge_ts 0x%llx "
+				   "poll_sge_ts 0x%llx post_poll_delta_ns %llu "
+				   "cqe_poll_delta_ns %llu\n",
+				   idx,
+				   timespec_sub(lep->poll_host_ts,
+						prev_ts).tv_sec,
+				   timespec_sub(lep->poll_host_ts,
+						prev_ts).tv_nsec,
+				   lep->qid, lep->opcode,
+				   lep->opcode == FW_RI_RECEIVE ?
+							"msn" : "wrid",
+				   lep->wr_id,
+				   timespec_sub(lep->poll_host_ts,
+						lep->post_host_ts).tv_sec,
+				   timespec_sub(lep->poll_host_ts,
+						lep->post_host_ts).tv_nsec,
+				   lep->post_sge_ts, lep->cqe_sge_ts,
+				   lep->poll_sge_ts,
+				   ts2ns(lep->poll_sge_ts - lep->post_sge_ts),
+				   ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts));
+			prev_ts = lep->poll_host_ts;
+		}
+		idx++;
+		if (idx > (dev->rdev.wr_log_size - 1))
+			idx = 0;
+		lep = &dev->rdev.wr_log[idx];
+	}
+#undef ts2ns
+	return 0;
+}
+
+static int wr_log_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, wr_log_show, inode->i_private);
+}
+
+static ssize_t wr_log_clear(struct file *file, const char __user *buf,
+			    size_t count, loff_t *pos)
+{
+	struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
+	int i;
+
+	if (dev->rdev.wr_log)
+		for (i = 0; i < dev->rdev.wr_log_size; i++)
+			dev->rdev.wr_log[i].valid = 0;
+	return count;
+}
+
+static const struct file_operations wr_log_debugfs_fops = {
+	.owner   = THIS_MODULE,
+	.open    = wr_log_open,
+	.release = single_release,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.write   = wr_log_clear,
+};
+
 static int dump_qp(int id, void *p, void *data)
 {
 	struct c4iw_qp *qp = p;
@@ -241,12 +362,32 @@ static int dump_stag(int id, void *p, void *data)
 	struct c4iw_debugfs_data *stagd = data;
 	int space;
 	int cc;
+	struct fw_ri_tpte tpte;
+	int ret;
 
 	space = stagd->bufsize - stagd->pos - 1;
 	if (space == 0)
 		return 1;
 
-	cc = snprintf(stagd->buf + stagd->pos, space, "0x%x\n", id<<8);
+	ret = cxgb4_read_tpte(stagd->devp->rdev.lldi.ports[0], (u32)id<<8,
+			      (__be32 *)&tpte);
+	if (ret) {
+		dev_err(&stagd->devp->rdev.lldi.pdev->dev,
+			"%s cxgb4_read_tpte err %d\n", __func__, ret);
+		return ret;
+	}
+	cc = snprintf(stagd->buf + stagd->pos, space,
+		      "stag: idx 0x%x valid %d key 0x%x state %d pdid %d "
+		      "perm 0x%x ps %d len 0x%llx va 0x%llx\n",
+		      (u32)id<<8,
+		      G_FW_RI_TPTE_VALID(ntohl(tpte.valid_to_pdid)),
+		      G_FW_RI_TPTE_STAGKEY(ntohl(tpte.valid_to_pdid)),
+		      G_FW_RI_TPTE_STAGSTATE(ntohl(tpte.valid_to_pdid)),
+		      G_FW_RI_TPTE_PDID(ntohl(tpte.valid_to_pdid)),
+		      G_FW_RI_TPTE_PERM(ntohl(tpte.locread_to_qpid)),
+		      G_FW_RI_TPTE_PS(ntohl(tpte.locread_to_qpid)),
+		      ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
+		      ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
 	if (cc < space)
 		stagd->pos += cc;
 	return 0;
@@ -259,7 +400,7 @@ static int stag_release(struct inode *inode, struct file *file)
 		printk(KERN_INFO "%s null stagd?\n", __func__);
 		return 0;
 	}
-	kfree(stagd->buf);
+	vfree(stagd->buf);
 	kfree(stagd);
 	return 0;
 }
@@ -282,8 +423,8 @@ static int stag_open(struct inode *inode, struct file *file)
 	idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
 	spin_unlock_irq(&stagd->devp->lock);
 
-	stagd->bufsize = count * sizeof("0x12345678\n");
-	stagd->buf = kmalloc(stagd->bufsize, GFP_KERNEL);
+	stagd->bufsize = count * 256;
+	stagd->buf = vmalloc(stagd->bufsize);
 	if (!stagd->buf) {
 		ret = -ENOMEM;
 		goto err1;
@@ -348,6 +489,7 @@ static int stats_show(struct seq_file *seq, void *v)
 		   dev->rdev.stats.act_ofld_conn_fails);
 	seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
 		   dev->rdev.stats.pas_ofld_conn_fails);
+	seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird);
 	return 0;
 }
 
@@ -583,6 +725,12 @@ static int setup_debugfs(struct c4iw_dev *devp)
 	if (de && de->d_inode)
 		de->d_inode->i_size = 4096;
 
+	if (c4iw_wr_log) {
+		de = debugfs_create_file("wr_log", S_IWUSR, devp->debugfs_root,
+					 (void *)devp, &wr_log_debugfs_fops);
+		if (de && de->d_inode)
+			de->d_inode->i_size = 4096;
+	}
 	return 0;
 }
 
@@ -696,7 +844,20 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 		pr_err(MOD "error allocating status page\n");
 		goto err4;
 	}
+
+	if (c4iw_wr_log) {
+		rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
+				       sizeof(*rdev->wr_log), GFP_KERNEL);
+		if (rdev->wr_log) {
+			rdev->wr_log_size = 1 << c4iw_wr_log_size_order;
+			atomic_set(&rdev->wr_log_idx, 0);
+		} else {
+			pr_err(MOD "error allocating wr_log. Logging disabled\n");
+		}
+	}
+
 	rdev->status_page->db_off = 0;
+
 	return 0;
 err4:
 	c4iw_rqtpool_destroy(rdev);
@@ -710,6 +871,7 @@ err1:
 
 static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 {
+	kfree(rdev->wr_log);
 	free_page((unsigned long)rdev->status_page);
 	c4iw_pblpool_destroy(rdev);
 	c4iw_rqtpool_destroy(rdev);
@@ -768,6 +930,27 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 	}
 	devp->rdev.lldi = *infop;
 
+	/* init various hw-queue params based on lld info */
+	PDBG("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
+	     __func__, devp->rdev.lldi.sge_ingpadboundary,
+	     devp->rdev.lldi.sge_egrstatuspagesize);
+
+	devp->rdev.hw_queue.t4_eq_status_entries =
+		devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1;
+	devp->rdev.hw_queue.t4_max_eq_size = 65520;
+	devp->rdev.hw_queue.t4_max_iq_size = 65520;
+	devp->rdev.hw_queue.t4_max_rq_size = 8192 -
+		devp->rdev.hw_queue.t4_eq_status_entries - 1;
+	devp->rdev.hw_queue.t4_max_sq_size =
+		devp->rdev.hw_queue.t4_max_eq_size -
+		devp->rdev.hw_queue.t4_eq_status_entries - 1;
+	devp->rdev.hw_queue.t4_max_qp_depth =
+		devp->rdev.hw_queue.t4_max_rq_size;
+	devp->rdev.hw_queue.t4_max_cq_depth =
+		devp->rdev.hw_queue.t4_max_iq_size - 2;
+	devp->rdev.hw_queue.t4_stat_len =
+		devp->rdev.lldi.sge_egrstatuspagesize;
+
 	/*
 	 * For T5 devices, we map all of BAR2 with WC.
 	 * For T4 devices with onchip qp mem, we map only that part
@@ -818,6 +1001,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 	mutex_init(&devp->rdev.stats.lock);
 	mutex_init(&devp->db_mutex);
 	INIT_LIST_HEAD(&devp->db_fc_list);
+	devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
 
 	if (c4iw_debugfs_root) {
 		devp->debugfs_root = debugfs_create_dir(
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-08-06 09:38:14 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-08-06 09:38:14 -0700
commit	ae045e2455429c418a418a3376301a9e5753a0a8 (patch)
tree	b445bdeecd3f38aa0d0a29c9585cee49e4ccb0f1 /drivers/infiniband/hw/cxgb4/device.c
parent	f4f142ed4ef835709c7e6d12eaca10d190bcebed (diff)
parent	d247b6ab3ce6dd43665780865ec5fa145d9ab6bd (diff)
download	op-kernel-dev-ae045e2455429c418a418a3376301a9e5753a0a8.zip op-kernel-dev-ae045e2455429c418a418a3376301a9e5753a0a8.tar.gz