summaryrefslogtreecommitdiffstats
path: root/drivers/net/hyperv/netvsc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 14:45:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 14:45:08 -0700
commitaae3dbb4776e7916b6cd442d00159bea27a695c1 (patch)
treed074c5d783a81e7e2e084b1eba77f57459da7e37 /drivers/net/hyperv/netvsc.c
parentec3604c7a5aae8953545b0d05495357009a960e5 (diff)
parent66bed8465a808400eb14562510e26c8818082cb8 (diff)
downloadop-kernel-dev-aae3dbb4776e7916b6cd442d00159bea27a695c1.zip
op-kernel-dev-aae3dbb4776e7916b6cd442d00159bea27a695c1.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Support ipv6 checksum offload in sunvnet driver, from Shannon Nelson. 2) Move to RB-tree instead of custom AVL code in inetpeer, from Eric Dumazet. 3) Allow generic XDP to work on virtual devices, from John Fastabend. 4) Add bpf device maps and XDP_REDIRECT, which can be used to build arbitrary switching frameworks using XDP. From John Fastabend. 5) Remove UFO offloads from the tree, gave us little other than bugs. 6) Remove the IPSEC flow cache, from Florian Westphal. 7) Support ipv6 route offload in mlxsw driver. 8) Support VF representors in bnxt_en, from Sathya Perla. 9) Add support for forward error correction modes to ethtool, from Vidya Sagar Ravipati. 10) Add time filter for packet scheduler action dumping, from Jamal Hadi Salim. 11) Extend the zerocopy sendmsg() used by virtio and tap to regular sockets via MSG_ZEROCOPY. From Willem de Bruijn. 12) Significantly rework value tracking in the BPF verifier, from Edward Cree. 13) Add new jump instructions to eBPF, from Daniel Borkmann. 14) Rework rtnetlink plumbing so that operations can be run without taking the RTNL semaphore. From Florian Westphal. 15) Support XDP in tap driver, from Jason Wang. 16) Add 32-bit eBPF JIT for ARM, from Shubham Bansal. 17) Add Huawei hinic ethernet driver. 18) Allow to report MD5 keys in TCP inet_diag dumps, from Ivan Delalande. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1780 commits) i40e: point wb_desc at the nvm_wb_desc during i40e_read_nvm_aq i40e: avoid NVM acquire deadlock during NVM update drivers: net: xgene: Remove return statement from void function drivers: net: xgene: Configure tx/rx delay for ACPI drivers: net: xgene: Read tx/rx delay for ACPI rocker: fix kcalloc parameter order rds: Fix non-atomic operation on shared flag variable net: sched: don't use GFP_KERNEL under spin lock vhost_net: correctly check tx avail during rx busy polling net: mdio-mux: add mdio_mux parameter to mdio_mux_init() rxrpc: Make service connection lookup always check for retry net: stmmac: Delete dead code for MDIO registration gianfar: Fix Tx flow control deactivation cxgb4: Ignore MPS_TX_INT_CAUSE[Bubble] for T6 cxgb4: Fix pause frame count in t4_get_port_stats cxgb4: fix memory leak tun: rename generic_xdp to skb_xdp tun: reserve extra headroom only when XDP is set net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping net: dsa: bcm_sf2: Advertise number of egress queues ...
Diffstat (limited to 'drivers/net/hyperv/netvsc.c')
-rw-r--r--drivers/net/hyperv/netvsc.c459
1 files changed, 211 insertions, 248 deletions
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index d18c332..0062b80 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -29,6 +29,9 @@
#include <linux/netdevice.h>
#include <linux/if_ether.h>
#include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
+#include <linux/prefetch.h>
+
#include <asm/sync_bitops.h>
#include "hyperv_net.h"
@@ -41,7 +44,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct hv_device *dev = net_device_ctx->device_ctx;
- struct netvsc_device *nv_dev = net_device_ctx->nvdev;
+ struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
memset(init_pkt, 0, sizeof(struct nvsp_message));
@@ -57,8 +60,6 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
sizeof(struct nvsp_message),
(unsigned long)init_pkt,
VM_PKT_DATA_INBAND, 0);
-
- net_device_ctx->datapath = vf;
}
static struct netvsc_device *alloc_net_device(void)
@@ -69,14 +70,15 @@ static struct netvsc_device *alloc_net_device(void)
if (!net_device)
return NULL;
- net_device->chan_table[0].mrc.buf
- = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
-
init_waitqueue_head(&net_device->wait_drain);
net_device->destroy = false;
atomic_set(&net_device->open_cnt, 0);
net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
+
+ net_device->recv_section_size = NETVSC_RECV_SECTION_SIZE;
+ net_device->send_section_size = NETVSC_SEND_SECTION_SIZE;
+
init_completion(&net_device->channel_init_wait);
init_waitqueue_head(&net_device->subchan_open);
@@ -90,7 +92,7 @@ static void free_netvsc_device(struct rcu_head *head)
int i;
for (i = 0; i < VRSS_CHANNEL_MAX; i++)
- vfree(nvdev->chan_table[i].mrc.buf);
+ vfree(nvdev->chan_table[i].mrc.slots);
kfree(nvdev);
}
@@ -104,7 +106,8 @@ static void netvsc_destroy_buf(struct hv_device *device)
{
struct nvsp_message *revoke_packet;
struct net_device *ndev = hv_get_drvdata(device);
- struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+ struct net_device_context *ndc = netdev_priv(ndev);
+ struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
int ret;
/*
@@ -144,6 +147,7 @@ static void netvsc_destroy_buf(struct hv_device *device)
"revoke receive buffer to netvsp\n");
return;
}
+ net_device->recv_section_cnt = 0;
}
/* Teardown the gpadl on the vsp end */
@@ -168,19 +172,13 @@ static void netvsc_destroy_buf(struct hv_device *device)
net_device->recv_buf = NULL;
}
- if (net_device->recv_section) {
- net_device->recv_section_cnt = 0;
- kfree(net_device->recv_section);
- net_device->recv_section = NULL;
- }
-
/* Deal with the send buffer we may have setup.
* If we got a send section size, it means we received a
* NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
* NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
* to send a revoke msg here
*/
- if (net_device->send_section_size) {
+ if (net_device->send_section_cnt) {
/* Send the revoke receive buffer */
revoke_packet = &net_device->revoke_packet;
memset(revoke_packet, 0, sizeof(struct nvsp_message));
@@ -212,6 +210,7 @@ static void netvsc_destroy_buf(struct hv_device *device)
"revoke send buffer to netvsp\n");
return;
}
+ net_device->send_section_cnt = 0;
}
/* Teardown the gpadl on the vsp end */
if (net_device->send_buf_gpadl_handle) {
@@ -236,25 +235,40 @@ static void netvsc_destroy_buf(struct hv_device *device)
kfree(net_device->send_section_map);
}
+int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
+{
+ struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
+ int node = cpu_to_node(nvchan->channel->target_cpu);
+ size_t size;
+
+ size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
+ nvchan->mrc.slots = vzalloc_node(size, node);
+ if (!nvchan->mrc.slots)
+ nvchan->mrc.slots = vzalloc(size);
+
+ return nvchan->mrc.slots ? 0 : -ENOMEM;
+}
+
static int netvsc_init_buf(struct hv_device *device,
- struct netvsc_device *net_device)
+ struct netvsc_device *net_device,
+ const struct netvsc_device_info *device_info)
{
- int ret = 0;
+ struct nvsp_1_message_send_receive_buffer_complete *resp;
+ struct net_device *ndev = hv_get_drvdata(device);
struct nvsp_message *init_packet;
- struct net_device *ndev;
+ unsigned int buf_size;
size_t map_words;
- int node;
-
- ndev = hv_get_drvdata(device);
+ int ret = 0;
- node = cpu_to_node(device->channel->target_cpu);
- net_device->recv_buf = vzalloc_node(net_device->recv_buf_size, node);
- if (!net_device->recv_buf)
- net_device->recv_buf = vzalloc(net_device->recv_buf_size);
+ /* Get receive buffer area. */
+ buf_size = device_info->recv_sections * net_device->recv_section_size;
+ buf_size = roundup(buf_size, PAGE_SIZE);
+ net_device->recv_buf = vzalloc(buf_size);
if (!net_device->recv_buf) {
- netdev_err(ndev, "unable to allocate receive "
- "buffer of size %d\n", net_device->recv_buf_size);
+ netdev_err(ndev,
+ "unable to allocate receive buffer of size %u\n",
+ buf_size);
ret = -ENOMEM;
goto cleanup;
}
@@ -265,7 +279,7 @@ static int netvsc_init_buf(struct hv_device *device,
* than the channel to establish the gpadl handle.
*/
ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
- net_device->recv_buf_size,
+ buf_size,
&net_device->recv_buf_gpadl_handle);
if (ret != 0) {
netdev_err(ndev,
@@ -297,49 +311,45 @@ static int netvsc_init_buf(struct hv_device *device,
wait_for_completion(&net_device->channel_init_wait);
/* Check the response */
- if (init_packet->msg.v1_msg.
- send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
- netdev_err(ndev, "Unable to complete receive buffer "
- "initialization with NetVsp - status %d\n",
- init_packet->msg.v1_msg.
- send_recv_buf_complete.status);
+ resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
+ if (resp->status != NVSP_STAT_SUCCESS) {
+ netdev_err(ndev,
+ "Unable to complete receive buffer initialization with NetVsp - status %d\n",
+ resp->status);
ret = -EINVAL;
goto cleanup;
}
/* Parse the response */
+ netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
+ resp->num_sections, resp->sections[0].sub_alloc_size,
+ resp->sections[0].num_sub_allocs);
- net_device->recv_section_cnt = init_packet->msg.
- v1_msg.send_recv_buf_complete.num_sections;
-
- net_device->recv_section = kmemdup(
- init_packet->msg.v1_msg.send_recv_buf_complete.sections,
- net_device->recv_section_cnt *
- sizeof(struct nvsp_1_receive_buffer_section),
- GFP_KERNEL);
- if (net_device->recv_section == NULL) {
+ /* There should only be one section for the entire receive buffer */
+ if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
ret = -EINVAL;
goto cleanup;
}
- /*
- * For 1st release, there should only be 1 section that represents the
- * entire receive buffer
- */
- if (net_device->recv_section_cnt != 1 ||
- net_device->recv_section->offset != 0) {
- ret = -EINVAL;
+ net_device->recv_section_size = resp->sections[0].sub_alloc_size;
+ net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
+
+ /* Setup receive completion ring */
+ net_device->recv_completion_cnt
+ = round_up(net_device->recv_section_cnt + 1,
+ PAGE_SIZE / sizeof(u64));
+ ret = netvsc_alloc_recv_comp_ring(net_device, 0);
+ if (ret)
goto cleanup;
- }
- /* Now setup the send buffer.
- */
- net_device->send_buf = vzalloc_node(net_device->send_buf_size, node);
- if (!net_device->send_buf)
- net_device->send_buf = vzalloc(net_device->send_buf_size);
+ /* Now setup the send buffer. */
+ buf_size = device_info->send_sections * net_device->send_section_size;
+ buf_size = round_up(buf_size, PAGE_SIZE);
+
+ net_device->send_buf = vzalloc(buf_size);
if (!net_device->send_buf) {
- netdev_err(ndev, "unable to allocate send "
- "buffer of size %d\n", net_device->send_buf_size);
+ netdev_err(ndev, "unable to allocate send buffer of size %u\n",
+ buf_size);
ret = -ENOMEM;
goto cleanup;
}
@@ -349,7 +359,7 @@ static int netvsc_init_buf(struct hv_device *device,
* than the channel to establish the gpadl handle.
*/
ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
- net_device->send_buf_size,
+ buf_size,
&net_device->send_buf_gpadl_handle);
if (ret != 0) {
netdev_err(ndev,
@@ -394,10 +404,8 @@ static int netvsc_init_buf(struct hv_device *device,
net_device->send_section_size = init_packet->msg.
v1_msg.send_send_buf_complete.section_size;
- /* Section count is simply the size divided by the section size.
- */
- net_device->send_section_cnt =
- net_device->send_buf_size / net_device->send_section_size;
+ /* Section count is simply the size divided by the section size. */
+ net_device->send_section_cnt = buf_size / net_device->send_section_size;
netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
net_device->send_section_size, net_device->send_section_cnt);
@@ -475,7 +483,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
}
static int netvsc_connect_vsp(struct hv_device *device,
- struct netvsc_device *net_device)
+ struct netvsc_device *net_device,
+ const struct netvsc_device_info *device_info)
{
const u32 ver_list[] = {
NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
@@ -525,14 +534,8 @@ static int netvsc_connect_vsp(struct hv_device *device,
if (ret != 0)
goto cleanup;
- /* Post the big receive buffer to NetVSP */
- if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
- net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
- else
- net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
- net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
- ret = netvsc_init_buf(device, net_device);
+ ret = netvsc_init_buf(device, net_device, device_info);
cleanup:
return ret;
@@ -550,7 +553,8 @@ void netvsc_device_remove(struct hv_device *device)
{
struct net_device *ndev = hv_get_drvdata(device);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct netvsc_device *net_device = net_device_ctx->nvdev;
+ struct netvsc_device *net_device
+ = rtnl_dereference(net_device_ctx->nvdev);
int i;
netvsc_disconnect_vsp(device);
@@ -693,7 +697,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
u32 pend_size,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
- struct hv_page_buffer **pb,
+ struct hv_page_buffer *pb,
struct sk_buff *skb)
{
char *start = net_device->send_buf;
@@ -714,9 +718,9 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
}
for (i = 0; i < page_count; i++) {
- char *src = phys_to_virt((*pb)[i].pfn << PAGE_SHIFT);
- u32 offset = (*pb)[i].offset;
- u32 len = (*pb)[i].len;
+ char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT);
+ u32 offset = pb[i].offset;
+ u32 len = pb[i].len;
memcpy(dest, (src + offset), len);
msg_size += len;
@@ -735,36 +739,32 @@ static inline int netvsc_send_pkt(
struct hv_device *device,
struct hv_netvsc_packet *packet,
struct netvsc_device *net_device,
- struct hv_page_buffer **pb,
+ struct hv_page_buffer *pb,
struct sk_buff *skb)
{
struct nvsp_message nvmsg;
- struct netvsc_channel *nvchan
- = &net_device->chan_table[packet->q_idx];
+ struct nvsp_1_message_send_rndis_packet * const rpkt =
+ &nvmsg.msg.v1_msg.send_rndis_pkt;
+ struct netvsc_channel * const nvchan =
+ &net_device->chan_table[packet->q_idx];
struct vmbus_channel *out_channel = nvchan->channel;
struct net_device *ndev = hv_get_drvdata(device);
struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
u64 req_id;
int ret;
- struct hv_page_buffer *pgbuf;
u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
- if (skb != NULL) {
- /* 0 is RMC_DATA; */
- nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
- } else {
- /* 1 is RMC_CONTROL; */
- nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
- }
+ if (skb)
+ rpkt->channel_type = 0; /* 0 is RMC_DATA */
+ else
+ rpkt->channel_type = 1; /* 1 is RMC_CONTROL */
- nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
- packet->send_buf_index;
+ rpkt->send_buf_section_index = packet->send_buf_index;
if (packet->send_buf_index == NETVSC_INVALID_INDEX)
- nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+ rpkt->send_buf_section_size = 0;
else
- nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
- packet->total_data_buflen;
+ rpkt->send_buf_section_size = packet->total_data_buflen;
req_id = (ulong)skb;
@@ -772,21 +772,18 @@ static inline int netvsc_send_pkt(
return -ENODEV;
if (packet->page_buf_cnt) {
- pgbuf = packet->cp_partial ? (*pb) +
- packet->rmsg_pgcnt : (*pb);
- ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
- pgbuf,
- packet->page_buf_cnt,
- &nvmsg,
- sizeof(struct nvsp_message),
- req_id,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (packet->cp_partial)
+ pb += packet->rmsg_pgcnt;
+
+ ret = vmbus_sendpacket_pagebuffer(out_channel,
+ pb, packet->page_buf_cnt,
+ &nvmsg, sizeof(nvmsg),
+ req_id);
} else {
- ret = vmbus_sendpacket_ctl(out_channel, &nvmsg,
- sizeof(struct nvsp_message),
- req_id,
- VM_PKT_DATA_INBAND,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ ret = vmbus_sendpacket(out_channel,
+ &nvmsg, sizeof(nvmsg),
+ req_id, VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
}
if (ret == 0) {
@@ -801,8 +798,10 @@ static inline int netvsc_send_pkt(
ret = -ENOSPC;
}
} else {
- netdev_err(ndev, "Unable to send packet %p ret %d\n",
- packet, ret);
+ netdev_err(ndev,
+ "Unable to send packet pages %u len %u, ret %d\n",
+ packet->page_buf_cnt, packet->total_data_buflen,
+ ret);
}
return ret;
@@ -820,13 +819,16 @@ static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
msdp->count = 0;
}
-int netvsc_send(struct hv_device *device,
+/* RCU already held by caller */
+int netvsc_send(struct net_device_context *ndev_ctx,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
- struct hv_page_buffer **pb,
+ struct hv_page_buffer *pb,
struct sk_buff *skb)
{
- struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
+ struct netvsc_device *net_device
+ = rcu_dereference_bh(ndev_ctx->nvdev);
+ struct hv_device *device = ndev_ctx->device_ctx;
int ret = 0;
struct netvsc_channel *nvchan;
u32 pktlen = packet->total_data_buflen, msd_len = 0;
@@ -838,7 +840,7 @@ int netvsc_send(struct hv_device *device,
bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
/* If device is rescinded, return error and packet will get dropped. */
- if (unlikely(net_device->destroy))
+ if (unlikely(!net_device || net_device->destroy))
return -ENODEV;
/* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
@@ -878,7 +880,9 @@ int netvsc_send(struct hv_device *device,
} else if (pktlen + net_device->pkt_align <
net_device->send_section_size) {
section_index = netvsc_get_next_send_section(net_device);
- if (section_index != NETVSC_INVALID_INDEX) {
+ if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
+ ++ndev_ctx->eth_stats.tx_send_full;
+ } else {
move_pkt_msd(&msd_send, &msd_skb, msdp);
msd_len = 0;
}
@@ -943,130 +947,99 @@ send_now:
return ret;
}
-static int netvsc_send_recv_completion(struct vmbus_channel *channel,
- u64 transaction_id, u32 status)
+/* Send pending recv completions */
+static int send_recv_completions(struct net_device *ndev,
+ struct netvsc_device *nvdev,
+ struct netvsc_channel *nvchan)
{
- struct nvsp_message recvcompMessage;
+ struct multi_recv_comp *mrc = &nvchan->mrc;
+ struct recv_comp_msg {
+ struct nvsp_message_header hdr;
+ u32 status;
+ } __packed;
+ struct recv_comp_msg msg = {
+ .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
+ };
int ret;
- recvcompMessage.hdr.msg_type =
- NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
-
- recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
-
- /* Send the completion */
- ret = vmbus_sendpacket(channel, &recvcompMessage,
- sizeof(struct nvsp_message_header) + sizeof(u32),
- transaction_id, VM_PKT_COMP, 0);
-
- return ret;
-}
-
-static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
- u32 *filled, u32 *avail)
-{
- struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- u32 first = mrc->first;
- u32 next = mrc->next;
-
- *filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next :
- next - first;
+ while (mrc->first != mrc->next) {
+ const struct recv_comp_data *rcd
+ = mrc->slots + mrc->first;
- *avail = NETVSC_RECVSLOT_MAX - *filled - 1;
-}
+ msg.status = rcd->status;
+ ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
+ rcd->tid, VM_PKT_COMP, 0);
+ if (unlikely(ret)) {
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
-/* Read the first filled slot, no change to index */
-static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device
- *nvdev, u16 q_idx)
-{
- struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- u32 filled, avail;
+ ++ndev_ctx->eth_stats.rx_comp_busy;
+ return ret;
+ }
- if (unlikely(!mrc->buf))
- return NULL;
+ if (++mrc->first == nvdev->recv_completion_cnt)
+ mrc->first = 0;
+ }
- count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
- if (!filled)
- return NULL;
+ /* receive completion ring has been emptied */
+ if (unlikely(nvdev->destroy))
+ wake_up(&nvdev->wait_drain);
- return mrc->buf + mrc->first * sizeof(struct recv_comp_data);
+ return 0;
}
-/* Put the first filled slot back to available pool */
-static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
+/* Count how many receive completions are outstanding */
+static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
+ const struct multi_recv_comp *mrc,
+ u32 *filled, u32 *avail)
{
- struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- int num_recv;
-
- mrc->first = (mrc->first + 1) % NETVSC_RECVSLOT_MAX;
+ u32 count = nvdev->recv_completion_cnt;
- num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
+ if (mrc->next >= mrc->first)
+ *filled = mrc->next - mrc->first;
+ else
+ *filled = (count - mrc->first) + mrc->next;
- if (nvdev->destroy && num_recv == 0)
- wake_up(&nvdev->wait_drain);
+ *avail = count - *filled - 1;
}
-/* Check and send pending recv completions */
-static void netvsc_chk_recv_comp(struct netvsc_device *nvdev,
- struct vmbus_channel *channel, u16 q_idx)
+/* Add receive complete to ring to send to host. */
+static void enq_receive_complete(struct net_device *ndev,
+ struct netvsc_device *nvdev, u16 q_idx,
+ u64 tid, u32 status)
{
+ struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
+ struct multi_recv_comp *mrc = &nvchan->mrc;
struct recv_comp_data *rcd;
- int ret;
-
- while (true) {
- rcd = read_recv_comp_slot(nvdev, q_idx);
- if (!rcd)
- break;
+ u32 filled, avail;
- ret = netvsc_send_recv_completion(channel, rcd->tid,
- rcd->status);
- if (ret)
- break;
+ recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
- put_recv_comp_slot(nvdev, q_idx);
+ if (unlikely(filled > NAPI_POLL_WEIGHT)) {
+ send_recv_completions(ndev, nvdev, nvchan);
+ recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
}
-}
-
-#define NETVSC_RCD_WATERMARK 80
-
-/* Get next available slot */
-static inline struct recv_comp_data *get_recv_comp_slot(
- struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx)
-{
- struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- u32 filled, avail, next;
- struct recv_comp_data *rcd;
- if (unlikely(!nvdev->recv_section))
- return NULL;
-
- if (unlikely(!mrc->buf))
- return NULL;
-
- if (atomic_read(&nvdev->num_outstanding_recvs) >
- nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100)
- netvsc_chk_recv_comp(nvdev, channel, q_idx);
-
- count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
- if (!avail)
- return NULL;
-
- next = mrc->next;
- rcd = mrc->buf + next * sizeof(struct recv_comp_data);
- mrc->next = (next + 1) % NETVSC_RECVSLOT_MAX;
+ if (unlikely(!avail)) {
+ netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+ q_idx, tid);
+ return;
+ }
- atomic_inc(&nvdev->num_outstanding_recvs);
+ rcd = mrc->slots + mrc->next;
+ rcd->tid = tid;
+ rcd->status = status;
- return rcd;
+ if (++mrc->next == nvdev->recv_completion_cnt)
+ mrc->next = 0;
}
static int netvsc_receive(struct net_device *ndev,
- struct netvsc_device *net_device,
- struct net_device_context *net_device_ctx,
- struct hv_device *device,
- struct vmbus_channel *channel,
- const struct vmpacket_descriptor *desc,
- struct nvsp_message *nvsp)
+ struct netvsc_device *net_device,
+ struct net_device_context *net_device_ctx,
+ struct hv_device *device,
+ struct vmbus_channel *channel,
+ const struct vmpacket_descriptor *desc,
+ struct nvsp_message *nvsp)
{
const struct vmtransfer_page_packet_header *vmxferpage_packet
= container_of(desc, const struct vmtransfer_page_packet_header, d);
@@ -1075,7 +1048,6 @@ static int netvsc_receive(struct net_device *ndev,
u32 status = NVSP_STAT_SUCCESS;
int i;
int count = 0;
- int ret;
/* Make sure this is a valid nvsp packet */
if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
@@ -1106,25 +1078,9 @@ static int netvsc_receive(struct net_device *ndev,
channel, data, buflen);
}
- if (net_device->chan_table[q_idx].mrc.buf) {
- struct recv_comp_data *rcd;
+ enq_receive_complete(ndev, net_device, q_idx,
+ vmxferpage_packet->d.trans_id, status);
- rcd = get_recv_comp_slot(net_device, channel, q_idx);
- if (rcd) {
- rcd->tid = vmxferpage_packet->d.trans_id;
- rcd->status = status;
- } else {
- netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
- q_idx, vmxferpage_packet->d.trans_id);
- }
- } else {
- ret = netvsc_send_recv_completion(channel,
- vmxferpage_packet->d.trans_id,
- status);
- if (ret)
- netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
- q_idx, vmxferpage_packet->d.trans_id, ret);
- }
return count;
}
@@ -1220,11 +1176,10 @@ int netvsc_poll(struct napi_struct *napi, int budget)
{
struct netvsc_channel *nvchan
= container_of(napi, struct netvsc_channel, napi);
+ struct netvsc_device *net_device = nvchan->net_device;
struct vmbus_channel *channel = nvchan->channel;
struct hv_device *device = netvsc_channel_to_device(channel);
- u16 q_idx = channel->offermsg.offer.sub_channel_index;
struct net_device *ndev = hv_get_drvdata(device);
- struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
int work_done = 0;
/* If starting a new interval */
@@ -1237,17 +1192,19 @@ int netvsc_poll(struct napi_struct *napi, int budget)
nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
}
- /* If receive ring was exhausted
- * and not doing busy poll
+ /* If send of pending receive completions suceeded
+ * and did not exhaust NAPI budget this time
+ * and not doing busy poll
* then re-enable host interrupts
- * and reschedule if ring is not empty.
+ * and reschedule if ring is not empty.
*/
- if (work_done < budget &&
+ if (send_recv_completions(ndev, net_device, nvchan) == 0 &&
+ work_done < budget &&
napi_complete_done(napi, work_done) &&
- hv_end_read(&channel->inbound) != 0)
+ hv_end_read(&channel->inbound)) {
+ hv_begin_read(&channel->inbound);
napi_reschedule(napi);
-
- netvsc_chk_recv_comp(net_device, channel, q_idx);
+ }
/* Driver may overshoot since multiple packets per descriptor */
return min(work_done, budget);
@@ -1259,10 +1216,15 @@ int netvsc_poll(struct napi_struct *napi, int budget)
void netvsc_channel_cb(void *context)
{
struct netvsc_channel *nvchan = context;
+ struct vmbus_channel *channel = nvchan->channel;
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+ /* preload first vmpacket descriptor */
+ prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
if (napi_schedule_prep(&nvchan->napi)) {
/* disable interupts from host */
- hv_begin_read(&nvchan->channel->inbound);
+ hv_begin_read(rbi);
__napi_schedule(&nvchan->napi);
}
@@ -1272,8 +1234,8 @@ void netvsc_channel_cb(void *context)
* netvsc_device_add - Callback when the device belonging to this
* driver is added
*/
-int netvsc_device_add(struct hv_device *device,
- const struct netvsc_device_info *device_info)
+struct netvsc_device *netvsc_device_add(struct hv_device *device,
+ const struct netvsc_device_info *device_info)
{
int i, ret = 0;
int ring_size = device_info->ring_size;
@@ -1283,7 +1245,7 @@ int netvsc_device_add(struct hv_device *device,
net_device = alloc_net_device();
if (!net_device)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
net_device->ring_size = ring_size;
@@ -1303,6 +1265,7 @@ int netvsc_device_add(struct hv_device *device,
struct netvsc_channel *nvchan = &net_device->chan_table[i];
nvchan->channel = device->channel;
+ nvchan->net_device = net_device;
u64_stats_init(&nvchan->tx_stats.syncp);
u64_stats_init(&nvchan->rx_stats.syncp);
}
@@ -1334,17 +1297,18 @@ int netvsc_device_add(struct hv_device *device,
rcu_assign_pointer(net_device_ctx->nvdev, net_device);
/* Connect with the NetVsp */
- ret = netvsc_connect_vsp(device, net_device);
+ ret = netvsc_connect_vsp(device, net_device, device_info);
if (ret != 0) {
netdev_err(ndev,
"unable to connect to NetVSP - %d\n", ret);
goto close;
}
- return ret;
+ return net_device;
close:
- netif_napi_del(&net_device->chan_table[0].napi);
+ RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+ napi_disable(&net_device->chan_table[0].napi);
/* Now, we can close the channel safely */
vmbus_close(device->channel);
@@ -1352,6 +1316,5 @@ close:
cleanup:
free_netvsc_device(&net_device->rcu);
- return ret;
-
+ return ERR_PTR(ret);
}
OpenPOWER on IntegriCloud