summaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lnet/lnet
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-06-01 10:59:48 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-06-05 19:22:35 +0200
commitbe65f9ed267fd7d8b3146b7c4be9ecdd3e0aa3ed (patch)
treef9fddf1a58b26a1f2eaf2ed7fa350c1622abbdbb /drivers/staging/lustre/lnet/lnet
parent3b93c0f4b6accb8105152900d7e414593a8b0c79 (diff)
downloadop-kernel-dev-be65f9ed267fd7d8b3146b7c4be9ecdd3e0aa3ed.zip
op-kernel-dev-be65f9ed267fd7d8b3146b7c4be9ecdd3e0aa3ed.tar.gz
staging: lustre: delete the filesystem from the tree.
The Lustre filesystem has been in the kernel tree for over 5 years now. While it has been an endless source of enjoyment for new kernel developers learning how to do basic codingstyle cleanups, as well as an semi-entertaining source of bewilderment from the vfs developers any time they have looked into the codebase to try to figure out how to port their latest api changes to this filesystem, it has not really moved forward into the "this is in shape to get out of staging" despite many half-completed attempts. And getting code out of staging is the main goal of that portion of the kernel tree. Code should not stagnate and it feels like having this code in staging is only causing the development cycle of the filesystem to take longer than it should. There is a whole separate out-of-tree copy of this codebase where the developers work on it, and then random changes are thrown over the wall at staging at some later point in time. This dual-tree development model has never worked, and the state of this codebase is proof of that. So, let's just delete the whole mess. Now the lustre developers can go off and work in their out-of-tree codebase and not have to worry about providing valid changelog entries and breaking their patches up into logical pieces. They can take the time they have spend doing those types of housekeeping chores and get the codebase into a much better shape, and it can be submitted for inclusion into the real part of the kernel tree when ready. Cc: Oleg Drokin <oleg.drokin@intel.com> Cc: Andreas Dilger <andreas.dilger@intel.com> Cc: James Simmons <jsimmons@infradead.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/staging/lustre/lnet/lnet')
-rw-r--r--drivers/staging/lustre/lnet/lnet/Makefile10
-rw-r--r--drivers/staging/lustre/lnet/lnet/acceptor.c501
-rw-r--r--drivers/staging/lustre/lnet/lnet/api-ni.c2307
-rw-r--r--drivers/staging/lustre/lnet/lnet/config.c1235
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-eq.c426
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-md.c463
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-me.c274
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-move.c2386
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-msg.c625
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-ptl.c987
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-socket.c585
-rw-r--r--drivers/staging/lustre/lnet/lnet/lo.c105
-rw-r--r--drivers/staging/lustre/lnet/lnet/module.c239
-rw-r--r--drivers/staging/lustre/lnet/lnet/net_fault.c1023
-rw-r--r--drivers/staging/lustre/lnet/lnet/nidstrings.c1261
-rw-r--r--drivers/staging/lustre/lnet/lnet/peer.c456
-rw-r--r--drivers/staging/lustre/lnet/lnet/router.c1799
-rw-r--r--drivers/staging/lustre/lnet/lnet/router_proc.c907
18 files changed, 0 insertions, 15589 deletions
diff --git a/drivers/staging/lustre/lnet/lnet/Makefile b/drivers/staging/lustre/lnet/lnet/Makefile
deleted file mode 100644
index 0a9d709..0000000
--- a/drivers/staging/lustre/lnet/lnet/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LNET) += lnet.o
-
-lnet-y := api-ni.o config.o nidstrings.o net_fault.o \
- lib-me.o lib-msg.o lib-eq.o lib-md.o lib-ptl.o \
- lib-socket.o lib-move.o module.o lo.o \
- router.o router_proc.o acceptor.o peer.o
diff --git a/drivers/staging/lustre/lnet/lnet/acceptor.c b/drivers/staging/lustre/lnet/lnet/acceptor.c
deleted file mode 100644
index 5648f17..0000000
--- a/drivers/staging/lustre/lnet/lnet/acceptor.c
+++ /dev/null
@@ -1,501 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/completion.h>
-#include <net/sock.h>
-#include <linux/lnet/lib-lnet.h>
-
-static int accept_port = 988;
-static int accept_backlog = 127;
-static int accept_timeout = 5;
-
-static struct {
- int pta_shutdown;
- struct socket *pta_sock;
- struct completion pta_signal;
-} lnet_acceptor_state = {
- .pta_shutdown = 1
-};
-
-int
-lnet_acceptor_port(void)
-{
- return accept_port;
-}
-EXPORT_SYMBOL(lnet_acceptor_port);
-
-static inline int
-lnet_accept_magic(__u32 magic, __u32 constant)
-{
- return (magic == constant ||
- magic == __swab32(constant));
-}
-
-static char *accept = "secure";
-
-module_param(accept, charp, 0444);
-MODULE_PARM_DESC(accept, "Accept connections (secure|all|none)");
-module_param(accept_port, int, 0444);
-MODULE_PARM_DESC(accept_port, "Acceptor's port (same on all nodes)");
-module_param(accept_backlog, int, 0444);
-MODULE_PARM_DESC(accept_backlog, "Acceptor's listen backlog");
-module_param(accept_timeout, int, 0644);
-MODULE_PARM_DESC(accept_timeout, "Acceptor's timeout (seconds)");
-
-static char *accept_type;
-
-static int
-lnet_acceptor_get_tunables(void)
-{
- /*
- * Userland acceptor uses 'accept_type' instead of 'accept', due to
- * conflict with 'accept(2)', but kernel acceptor still uses 'accept'
- * for compatibility. Hence the trick.
- */
- accept_type = accept;
- return 0;
-}
-
-int
-lnet_acceptor_timeout(void)
-{
- return accept_timeout;
-}
-EXPORT_SYMBOL(lnet_acceptor_timeout);
-
-void
-lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
- __u32 peer_ip, int peer_port)
-{
- switch (rc) {
- /* "normal" errors */
- case -ECONNREFUSED:
- CNETERR("Connection to %s at host %pI4h on port %d was refused: check that Lustre is running on that node.\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- case -EHOSTUNREACH:
- case -ENETUNREACH:
- CNETERR("Connection to %s at host %pI4h was unreachable: the network or that node may be down, or Lustre may be misconfigured.\n",
- libcfs_nid2str(peer_nid), &peer_ip);
- break;
- case -ETIMEDOUT:
- CNETERR("Connection to %s at host %pI4h on port %d took too long: that node may be hung or experiencing high load.\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- case -ECONNRESET:
- LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %pI4h on port %d was reset: is it running a compatible version of Lustre and is %s one of its NIDs?\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port,
- libcfs_nid2str(peer_nid));
- break;
- case -EPROTO:
- LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at host %pI4h on port %d: is it running a compatible version of Lustre?\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- case -EADDRINUSE:
- LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to connect to %s at host %pI4h on port %d\n",
- libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- default:
- LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s at host %pI4h on port %d\n",
- rc, libcfs_nid2str(peer_nid),
- &peer_ip, peer_port);
- break;
- }
-}
-EXPORT_SYMBOL(lnet_connect_console_error);
-
-int
-lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
- __u32 local_ip, __u32 peer_ip, int peer_port)
-{
- struct lnet_acceptor_connreq cr;
- struct socket *sock;
- int rc;
- int port;
- int fatal;
-
- BUILD_BUG_ON(sizeof(cr) > 16); /* too big to be on the stack */
-
- for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
- port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
- --port) {
- /* Iterate through reserved ports. */
-
- rc = lnet_sock_connect(&sock, &fatal, local_ip, port, peer_ip,
- peer_port);
- if (rc) {
- if (fatal)
- goto failed;
- continue;
- }
-
- BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
-
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
- cr.acr_nid = peer_nid;
-
- if (the_lnet.ln_testprotocompat) {
- /* single-shot proto check */
- lnet_net_lock(LNET_LOCK_EX);
- if (the_lnet.ln_testprotocompat & 4) {
- cr.acr_version++;
- the_lnet.ln_testprotocompat &= ~4;
- }
- if (the_lnet.ln_testprotocompat & 8) {
- cr.acr_magic = LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~8;
- }
- lnet_net_unlock(LNET_LOCK_EX);
- }
-
- rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout);
- if (rc)
- goto failed_sock;
-
- *sockp = sock;
- return 0;
- }
-
- rc = -EADDRINUSE;
- goto failed;
-
- failed_sock:
- sock_release(sock);
- failed:
- lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port);
- return rc;
-}
-EXPORT_SYMBOL(lnet_connect);
-
-static int
-lnet_accept(struct socket *sock, __u32 magic)
-{
- struct lnet_acceptor_connreq cr;
- __u32 peer_ip;
- int peer_port;
- int rc;
- int flip;
- struct lnet_ni *ni;
- char *str;
-
- LASSERT(sizeof(cr) <= 16); /* not too big for the stack */
-
- rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
- LASSERT(!rc); /* we succeeded before */
-
- if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
- if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) {
- /*
- * future version compatibility!
- * When LNET unifies protocols over all LNDs, the first
- * thing sent will be a version query. I send back
- * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old"
- */
- memset(&cr, 0, sizeof(cr));
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
- rc = lnet_sock_write(sock, &cr, sizeof(cr),
- accept_timeout);
-
- if (rc)
- CERROR("Error sending magic+version in response to LNET magic from %pI4h: %d\n",
- &peer_ip, rc);
- return -EPROTO;
- }
-
- if (lnet_accept_magic(magic, LNET_PROTO_TCP_MAGIC))
- str = "'old' socknal/tcpnal";
- else
- str = "unrecognised";
-
- LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %pI4h magic %08x: %s acceptor protocol\n",
- &peer_ip, magic, str);
- return -EPROTO;
- }
-
- flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC);
-
- rc = lnet_sock_read(sock, &cr.acr_version, sizeof(cr.acr_version),
- accept_timeout);
- if (rc) {
- CERROR("Error %d reading connection request version from %pI4h\n",
- rc, &peer_ip);
- return -EIO;
- }
-
- if (flip)
- __swab32s(&cr.acr_version);
-
- if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) {
- /*
- * future version compatibility!
- * An acceptor-specific protocol rev will first send a version
- * query. I send back my current version to tell her I'm
- * "old".
- */
- int peer_version = cr.acr_version;
-
- memset(&cr, 0, sizeof(cr));
- cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
- cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
-
- rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout);
- if (rc)
- CERROR("Error sending magic+version in response to version %d from %pI4h: %d\n",
- peer_version, &peer_ip, rc);
- return -EPROTO;
- }
-
- rc = lnet_sock_read(sock, &cr.acr_nid,
- sizeof(cr) -
- offsetof(struct lnet_acceptor_connreq, acr_nid),
- accept_timeout);
- if (rc) {
- CERROR("Error %d reading connection request from %pI4h\n",
- rc, &peer_ip);
- return -EIO;
- }
-
- if (flip)
- __swab64s(&cr.acr_nid);
-
- ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
- if (!ni || /* no matching net */
- ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
- if (ni)
- lnet_ni_decref(ni);
- LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %pI4h for %s: No matching NI\n",
- &peer_ip, libcfs_nid2str(cr.acr_nid));
- return -EPERM;
- }
-
- if (!ni->ni_lnd->lnd_accept) {
- /* This catches a request for the loopback LND */
- lnet_ni_decref(ni);
- LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h for %s: NI doesn not accept IP connections\n",
- &peer_ip, libcfs_nid2str(cr.acr_nid));
- return -EPERM;
- }
-
- CDEBUG(D_NET, "Accept %s from %pI4h\n",
- libcfs_nid2str(cr.acr_nid), &peer_ip);
-
- rc = ni->ni_lnd->lnd_accept(ni, sock);
-
- lnet_ni_decref(ni);
- return rc;
-}
-
-static int
-lnet_acceptor(void *arg)
-{
- struct socket *newsock;
- int rc;
- __u32 magic;
- __u32 peer_ip;
- int peer_port;
- int secure = (int)((long)arg);
-
- LASSERT(!lnet_acceptor_state.pta_sock);
-
- rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock, 0, accept_port,
- accept_backlog);
- if (rc) {
- if (rc == -EADDRINUSE)
- LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port %d: port already in use\n",
- accept_port);
- else
- LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port %d: unexpected error %d\n",
- accept_port, rc);
-
- lnet_acceptor_state.pta_sock = NULL;
- } else {
- LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
- }
-
- /* set init status and unblock parent */
- lnet_acceptor_state.pta_shutdown = rc;
- complete(&lnet_acceptor_state.pta_signal);
-
- if (rc)
- return rc;
-
- while (!lnet_acceptor_state.pta_shutdown) {
- rc = lnet_sock_accept(&newsock, lnet_acceptor_state.pta_sock);
- if (rc) {
- if (rc != -EAGAIN) {
- CWARN("Accept error %d: pausing...\n", rc);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
- continue;
- }
-
- /* maybe the LNet acceptor thread has been waken */
- if (lnet_acceptor_state.pta_shutdown) {
- sock_release(newsock);
- break;
- }
-
- rc = lnet_sock_getaddr(newsock, 1, &peer_ip, &peer_port);
- if (rc) {
- CERROR("Can't determine new connection's address\n");
- goto failed;
- }
-
- if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
- CERROR("Refusing connection from %pI4h: insecure port %d\n",
- &peer_ip, peer_port);
- goto failed;
- }
-
- rc = lnet_sock_read(newsock, &magic, sizeof(magic),
- accept_timeout);
- if (rc) {
- CERROR("Error %d reading connection request from %pI4h\n",
- rc, &peer_ip);
- goto failed;
- }
-
- rc = lnet_accept(newsock, magic);
- if (rc)
- goto failed;
-
- continue;
-
-failed:
- sock_release(newsock);
- }
-
- sock_release(lnet_acceptor_state.pta_sock);
- lnet_acceptor_state.pta_sock = NULL;
-
- CDEBUG(D_NET, "Acceptor stopping\n");
-
- /* unblock lnet_acceptor_stop() */
- complete(&lnet_acceptor_state.pta_signal);
- return 0;
-}
-
-static inline int
-accept2secure(const char *acc, long *sec)
-{
- if (!strcmp(acc, "secure")) {
- *sec = 1;
- return 1;
- } else if (!strcmp(acc, "all")) {
- *sec = 0;
- return 1;
- } else if (!strcmp(acc, "none")) {
- return 0;
- }
-
- LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n",
- acc);
- return -EINVAL;
-}
-
-int
-lnet_acceptor_start(void)
-{
- struct task_struct *task;
- int rc;
- long rc2;
- long secure;
-
- /* if acceptor is already running return immediately */
- if (!lnet_acceptor_state.pta_shutdown)
- return 0;
-
- LASSERT(!lnet_acceptor_state.pta_sock);
-
- rc = lnet_acceptor_get_tunables();
- if (rc)
- return rc;
-
- init_completion(&lnet_acceptor_state.pta_signal);
- rc = accept2secure(accept_type, &secure);
- if (rc <= 0)
- return rc;
-
- if (!lnet_count_acceptor_nis()) /* not required */
- return 0;
-
- task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
- "acceptor_%03ld", secure);
- if (IS_ERR(task)) {
- rc2 = PTR_ERR(task);
- CERROR("Can't start acceptor thread: %ld\n", rc2);
-
- return -ESRCH;
- }
-
- /* wait for acceptor to startup */
- wait_for_completion(&lnet_acceptor_state.pta_signal);
-
- if (!lnet_acceptor_state.pta_shutdown) {
- /* started OK */
- LASSERT(lnet_acceptor_state.pta_sock);
- return 0;
- }
-
- LASSERT(!lnet_acceptor_state.pta_sock);
-
- return -ENETDOWN;
-}
-
-void
-lnet_acceptor_stop(void)
-{
- struct sock *sk;
-
- if (lnet_acceptor_state.pta_shutdown) /* not running */
- return;
-
- lnet_acceptor_state.pta_shutdown = 1;
-
- sk = lnet_acceptor_state.pta_sock->sk;
-
- /* awake any sleepers using safe method */
- sk->sk_state_change(sk);
-
- /* block until acceptor signals exit */
- wait_for_completion(&lnet_acceptor_state.pta_signal);
-}
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
deleted file mode 100644
index f9ed697..0000000
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ /dev/null
@@ -1,2307 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/log2.h>
-#include <linux/ktime.h>
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-#define D_LNI D_CONSOLE
-
-struct lnet the_lnet; /* THE state of the network */
-EXPORT_SYMBOL(the_lnet);
-
-static char *ip2nets = "";
-module_param(ip2nets, charp, 0444);
-MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
-
-static char *networks = "";
-module_param(networks, charp, 0444);
-MODULE_PARM_DESC(networks, "local networks");
-
-static char *routes = "";
-module_param(routes, charp, 0444);
-MODULE_PARM_DESC(routes, "routes to non-local networks");
-
-static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
-module_param(rnet_htable_size, int, 0444);
-MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
-
-static int lnet_ping(struct lnet_process_id id, int timeout_ms,
- struct lnet_process_id __user *ids, int n_ids);
-
-static char *
-lnet_get_routes(void)
-{
- return routes;
-}
-
-static char *
-lnet_get_networks(void)
-{
- char *nets;
- int rc;
-
- if (*networks && *ip2nets) {
- LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or 'ip2nets' but not both at once\n");
- return NULL;
- }
-
- if (*ip2nets) {
- rc = lnet_parse_ip2nets(&nets, ip2nets);
- return !rc ? nets : NULL;
- }
-
- if (*networks)
- return networks;
-
- return "tcp";
-}
-
-static void
-lnet_init_locks(void)
-{
- spin_lock_init(&the_lnet.ln_eq_wait_lock);
- init_waitqueue_head(&the_lnet.ln_eq_waitq);
- init_waitqueue_head(&the_lnet.ln_rc_waitq);
- mutex_init(&the_lnet.ln_lnd_mutex);
- mutex_init(&the_lnet.ln_api_mutex);
-}
-
-static int
-lnet_create_remote_nets_table(void)
-{
- int i;
- struct list_head *hash;
-
- LASSERT(!the_lnet.ln_remote_nets_hash);
- LASSERT(the_lnet.ln_remote_nets_hbits > 0);
- hash = kvmalloc_array(LNET_REMOTE_NETS_HASH_SIZE, sizeof(*hash),
- GFP_KERNEL);
- if (!hash) {
- CERROR("Failed to create remote nets hash table\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
- INIT_LIST_HEAD(&hash[i]);
- the_lnet.ln_remote_nets_hash = hash;
- return 0;
-}
-
-static void
-lnet_destroy_remote_nets_table(void)
-{
- int i;
-
- if (!the_lnet.ln_remote_nets_hash)
- return;
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
- LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
-
- kvfree(the_lnet.ln_remote_nets_hash);
- the_lnet.ln_remote_nets_hash = NULL;
-}
-
-static void
-lnet_destroy_locks(void)
-{
- if (the_lnet.ln_res_lock) {
- cfs_percpt_lock_free(the_lnet.ln_res_lock);
- the_lnet.ln_res_lock = NULL;
- }
-
- if (the_lnet.ln_net_lock) {
- cfs_percpt_lock_free(the_lnet.ln_net_lock);
- the_lnet.ln_net_lock = NULL;
- }
-}
-
-static int
-lnet_create_locks(void)
-{
- lnet_init_locks();
-
- the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
- if (!the_lnet.ln_res_lock)
- goto failed;
-
- the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
- if (!the_lnet.ln_net_lock)
- goto failed;
-
- return 0;
-
- failed:
- lnet_destroy_locks();
- return -ENOMEM;
-}
-
-static void lnet_assert_wire_constants(void)
-{
- /*
- * Wire protocol assertions generated by 'wirecheck'
- * running on Linux robert.bartonsoftware.com 2.6.8-1.521
- * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
- * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
- */
-
- /* Constants... */
- BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded);
- BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1);
- BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0);
- BUILD_BUG_ON(LNET_MSG_ACK != 0);
- BUILD_BUG_ON(LNET_MSG_PUT != 1);
- BUILD_BUG_ON(LNET_MSG_GET != 2);
- BUILD_BUG_ON(LNET_MSG_REPLY != 3);
- BUILD_BUG_ON(LNET_MSG_HELLO != 4);
-
- /* Checks for struct ptl_handle_wire_t */
- BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) != 0);
- BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_object_cookie) != 8);
- BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8);
-
- /* Checks for struct struct lnet_magicversion */
- BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0);
- BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4);
- BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2);
- BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_minor) != 6);
- BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2);
-
- /* Checks for struct struct lnet_hdr */
- BUILD_BUG_ON((int)sizeof(struct lnet_hdr) != 72);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_nid) != 0);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_nid) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_nid) != 8);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_nid) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_pid) != 16);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_pid) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_pid) != 20);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_pid) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, type) != 24);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->type) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, payload_length) != 28);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->payload_length) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg) != 40);
-
- /* Ack */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.dst_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.dst_wmd) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.match_bits) != 48);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.match_bits) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.mlength) != 56);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.mlength) != 4);
-
- /* Put */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ack_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ack_wmd) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.match_bits) != 48);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.match_bits) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.hdr_data) != 56);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.hdr_data) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ptl_index) != 64);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ptl_index) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.offset) != 68);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.offset) != 4);
-
- /* Get */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.return_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.return_wmd) != 16);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.match_bits) != 48);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.match_bits) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.ptl_index) != 56);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.ptl_index) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.src_offset) != 60);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.src_offset) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.sink_length) != 64);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.sink_length) != 4);
-
- /* Reply */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.reply.dst_wmd) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.reply.dst_wmd) != 16);
-
- /* Hello */
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.incarnation) != 32);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.incarnation) != 8);
- BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.type) != 40);
- BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.type) != 4);
-}
-
-static struct lnet_lnd *
-lnet_find_lnd_by_type(__u32 type)
-{
- struct lnet_lnd *lnd;
- struct list_head *tmp;
-
- /* holding lnd mutex */
- list_for_each(tmp, &the_lnet.ln_lnds) {
- lnd = list_entry(tmp, struct lnet_lnd, lnd_list);
-
- if (lnd->lnd_type == type)
- return lnd;
- }
-
- return NULL;
-}
-
-void
-lnet_register_lnd(struct lnet_lnd *lnd)
-{
- mutex_lock(&the_lnet.ln_lnd_mutex);
-
- LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
- LASSERT(!lnet_find_lnd_by_type(lnd->lnd_type));
-
- list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
- lnd->lnd_refcount = 0;
-
- CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
-
- mutex_unlock(&the_lnet.ln_lnd_mutex);
-}
-EXPORT_SYMBOL(lnet_register_lnd);
-
-void
-lnet_unregister_lnd(struct lnet_lnd *lnd)
-{
- mutex_lock(&the_lnet.ln_lnd_mutex);
-
- LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
- LASSERT(!lnd->lnd_refcount);
-
- list_del(&lnd->lnd_list);
- CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
-
- mutex_unlock(&the_lnet.ln_lnd_mutex);
-}
-EXPORT_SYMBOL(lnet_unregister_lnd);
-
-void
-lnet_counters_get(struct lnet_counters *counters)
-{
- struct lnet_counters *ctr;
- int i;
-
- memset(counters, 0, sizeof(*counters));
-
- lnet_net_lock(LNET_LOCK_EX);
-
- cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
- counters->msgs_max += ctr->msgs_max;
- counters->msgs_alloc += ctr->msgs_alloc;
- counters->errors += ctr->errors;
- counters->send_count += ctr->send_count;
- counters->recv_count += ctr->recv_count;
- counters->route_count += ctr->route_count;
- counters->drop_count += ctr->drop_count;
- counters->send_length += ctr->send_length;
- counters->recv_length += ctr->recv_length;
- counters->route_length += ctr->route_length;
- counters->drop_length += ctr->drop_length;
- }
- lnet_net_unlock(LNET_LOCK_EX);
-}
-EXPORT_SYMBOL(lnet_counters_get);
-
-void
-lnet_counters_reset(void)
-{
- struct lnet_counters *counters;
- int i;
-
- lnet_net_lock(LNET_LOCK_EX);
-
- cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
- memset(counters, 0, sizeof(struct lnet_counters));
-
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static char *
-lnet_res_type2str(int type)
-{
- switch (type) {
- default:
- LBUG();
- case LNET_COOKIE_TYPE_MD:
- return "MD";
- case LNET_COOKIE_TYPE_ME:
- return "ME";
- case LNET_COOKIE_TYPE_EQ:
- return "EQ";
- }
-}
-
-static void
-lnet_res_container_cleanup(struct lnet_res_container *rec)
-{
- int count = 0;
-
- if (!rec->rec_type) /* not set yet, it's uninitialized */
- return;
-
- while (!list_empty(&rec->rec_active)) {
- struct list_head *e = rec->rec_active.next;
-
- list_del_init(e);
- if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
- kfree(list_entry(e, struct lnet_eq, eq_list));
-
- } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
- kfree(list_entry(e, struct lnet_libmd, md_list));
-
- } else { /* NB: Active MEs should be attached on portals */
- LBUG();
- }
- count++;
- }
-
- if (count > 0) {
- /*
- * Found alive MD/ME/EQ, user really should unlink/free
- * all of them before finalize LNet, but if someone didn't,
- * we have to recycle garbage for him
- */
- CERROR("%d active elements on exit of %s container\n",
- count, lnet_res_type2str(rec->rec_type));
- }
-
- kfree(rec->rec_lh_hash);
- rec->rec_lh_hash = NULL;
-
- rec->rec_type = 0; /* mark it as finalized */
-}
-
-static int
-lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
-{
- int rc = 0;
- int i;
-
- LASSERT(!rec->rec_type);
-
- rec->rec_type = type;
- INIT_LIST_HEAD(&rec->rec_active);
- rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
-
- /* Arbitrary choice of hash table size */
- rec->rec_lh_hash = kvmalloc_cpt(LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]),
- GFP_KERNEL, cpt);
- if (!rec->rec_lh_hash) {
- rc = -ENOMEM;
- goto out;
- }
-
- for (i = 0; i < LNET_LH_HASH_SIZE; i++)
- INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
-
- return 0;
-
-out:
- CERROR("Failed to setup %s resource container\n",
- lnet_res_type2str(type));
- lnet_res_container_cleanup(rec);
- return rc;
-}
-
-static void
-lnet_res_containers_destroy(struct lnet_res_container **recs)
-{
- struct lnet_res_container *rec;
- int i;
-
- cfs_percpt_for_each(rec, i, recs)
- lnet_res_container_cleanup(rec);
-
- cfs_percpt_free(recs);
-}
-
-static struct lnet_res_container **
-lnet_res_containers_create(int type)
-{
- struct lnet_res_container **recs;
- struct lnet_res_container *rec;
- int rc;
- int i;
-
- recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
- if (!recs) {
- CERROR("Failed to allocate %s resource containers\n",
- lnet_res_type2str(type));
- return NULL;
- }
-
- cfs_percpt_for_each(rec, i, recs) {
- rc = lnet_res_container_setup(rec, i, type);
- if (rc) {
- lnet_res_containers_destroy(recs);
- return NULL;
- }
- }
-
- return recs;
-}
-
-struct lnet_libhandle *
-lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
-{
- /* ALWAYS called with lnet_res_lock held */
- struct list_head *head;
- struct lnet_libhandle *lh;
- unsigned int hash;
-
- if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
- return NULL;
-
- hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
- head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
-
- list_for_each_entry(lh, head, lh_hash_chain) {
- if (lh->lh_cookie == cookie)
- return lh;
- }
-
- return NULL;
-}
-
-void
-lnet_res_lh_initialize(struct lnet_res_container *rec,
- struct lnet_libhandle *lh)
-{
- /* ALWAYS called with lnet_res_lock held */
- unsigned int ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
- unsigned int hash;
-
- lh->lh_cookie = rec->rec_lh_cookie;
- rec->rec_lh_cookie += 1 << ibits;
-
- hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
-
- list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
-}
-
-static int lnet_unprepare(void);
-
-static int
-lnet_prepare(lnet_pid_t requested_pid)
-{
- /* Prepare to bring up the network */
- struct lnet_res_container **recs;
- int rc = 0;
-
- if (requested_pid == LNET_PID_ANY) {
- /* Don't instantiate LNET just for me */
- return -ENETDOWN;
- }
-
- LASSERT(!the_lnet.ln_refcount);
-
- the_lnet.ln_routing = 0;
-
- LASSERT(!(requested_pid & LNET_PID_USERFLAG));
- the_lnet.ln_pid = requested_pid;
-
- INIT_LIST_HEAD(&the_lnet.ln_test_peers);
- INIT_LIST_HEAD(&the_lnet.ln_nis);
- INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
- INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
- INIT_LIST_HEAD(&the_lnet.ln_routers);
- INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
- INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
-
- rc = lnet_create_remote_nets_table();
- if (rc)
- goto failed;
- /*
- * NB the interface cookie in wire handles guards against delayed
- * replies and ACKs appearing valid after reboot.
- */
- the_lnet.ln_interface_cookie = ktime_get_ns();
-
- the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(struct lnet_counters));
- if (!the_lnet.ln_counters) {
- CERROR("Failed to allocate counters for LNet\n");
- rc = -ENOMEM;
- goto failed;
- }
-
- rc = lnet_peer_tables_create();
- if (rc)
- goto failed;
-
- rc = lnet_msg_containers_create();
- if (rc)
- goto failed;
-
- rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
- LNET_COOKIE_TYPE_EQ);
- if (rc)
- goto failed;
-
- recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
- if (!recs) {
- rc = -ENOMEM;
- goto failed;
- }
-
- the_lnet.ln_me_containers = recs;
-
- recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
- if (!recs) {
- rc = -ENOMEM;
- goto failed;
- }
-
- the_lnet.ln_md_containers = recs;
-
- rc = lnet_portals_create();
- if (rc) {
- CERROR("Failed to create portals for LNet: %d\n", rc);
- goto failed;
- }
-
- return 0;
-
- failed:
- lnet_unprepare();
- return rc;
-}
-
-static int
-lnet_unprepare(void)
-{
- /*
- * NB no LNET_LOCK since this is the last reference. All LND instances
- * have shut down already, so it is safe to unlink and free all
- * descriptors, even those that appear committed to a network op (eg MD
- * with non-zero pending count)
- */
- lnet_fail_nid(LNET_NID_ANY, 0);
-
- LASSERT(!the_lnet.ln_refcount);
- LASSERT(list_empty(&the_lnet.ln_test_peers));
- LASSERT(list_empty(&the_lnet.ln_nis));
- LASSERT(list_empty(&the_lnet.ln_nis_cpt));
- LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
- lnet_portals_destroy();
-
- if (the_lnet.ln_md_containers) {
- lnet_res_containers_destroy(the_lnet.ln_md_containers);
- the_lnet.ln_md_containers = NULL;
- }
-
- if (the_lnet.ln_me_containers) {
- lnet_res_containers_destroy(the_lnet.ln_me_containers);
- the_lnet.ln_me_containers = NULL;
- }
-
- lnet_res_container_cleanup(&the_lnet.ln_eq_container);
-
- lnet_msg_containers_destroy();
- lnet_peer_tables_destroy();
- lnet_rtrpools_free(0);
-
- if (the_lnet.ln_counters) {
- cfs_percpt_free(the_lnet.ln_counters);
- the_lnet.ln_counters = NULL;
- }
- lnet_destroy_remote_nets_table();
-
- return 0;
-}
-
-struct lnet_ni *
-lnet_net2ni_locked(__u32 net, int cpt)
-{
- struct list_head *tmp;
- struct lnet_ni *ni;
-
- LASSERT(cpt != LNET_LOCK_EX);
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (LNET_NIDNET(ni->ni_nid) == net) {
- lnet_ni_addref_locked(ni, cpt);
- return ni;
- }
- }
-
- return NULL;
-}
-
-struct lnet_ni *
-lnet_net2ni(__u32 net)
-{
- struct lnet_ni *ni;
-
- lnet_net_lock(0);
- ni = lnet_net2ni_locked(net, 0);
- lnet_net_unlock(0);
-
- return ni;
-}
-EXPORT_SYMBOL(lnet_net2ni);
-
-static unsigned int
-lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
-{
- __u64 key = nid;
- unsigned int val;
-
- LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
-
- if (number == 1)
- return 0;
-
- val = hash_long(key, LNET_CPT_BITS);
- /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
- if (val < number)
- return val;
-
- return (unsigned int)(key + val + (val >> 1)) % number;
-}
-
-int
-lnet_cpt_of_nid_locked(lnet_nid_t nid)
-{
- struct lnet_ni *ni;
-
- /* must called with hold of lnet_net_lock */
- if (LNET_CPT_NUMBER == 1)
- return 0; /* the only one */
-
- /* take lnet_net_lock(any) would be OK */
- if (!list_empty(&the_lnet.ln_nis_cpt)) {
- list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
- if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
- continue;
-
- LASSERT(ni->ni_cpts);
- return ni->ni_cpts[lnet_nid_cpt_hash
- (nid, ni->ni_ncpts)];
- }
- }
-
- return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-}
-
-int
-lnet_cpt_of_nid(lnet_nid_t nid)
-{
- int cpt;
- int cpt2;
-
- if (LNET_CPT_NUMBER == 1)
- return 0; /* the only one */
-
- if (list_empty(&the_lnet.ln_nis_cpt))
- return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-
- cpt = lnet_net_lock_current();
- cpt2 = lnet_cpt_of_nid_locked(nid);
- lnet_net_unlock(cpt);
-
- return cpt2;
-}
-EXPORT_SYMBOL(lnet_cpt_of_nid);
-
-int
-lnet_islocalnet(__u32 net)
-{
- struct lnet_ni *ni;
- int cpt;
-
- cpt = lnet_net_lock_current();
-
- ni = lnet_net2ni_locked(net, cpt);
- if (ni)
- lnet_ni_decref_locked(ni, cpt);
-
- lnet_net_unlock(cpt);
-
- return !!ni;
-}
-
-struct lnet_ni *
-lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
-{
- struct lnet_ni *ni;
- struct list_head *tmp;
-
- LASSERT(cpt != LNET_LOCK_EX);
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (ni->ni_nid == nid) {
- lnet_ni_addref_locked(ni, cpt);
- return ni;
- }
- }
-
- return NULL;
-}
-
-int
-lnet_islocalnid(lnet_nid_t nid)
-{
- struct lnet_ni *ni;
- int cpt;
-
- cpt = lnet_net_lock_current();
- ni = lnet_nid2ni_locked(nid, cpt);
- if (ni)
- lnet_ni_decref_locked(ni, cpt);
- lnet_net_unlock(cpt);
-
- return !!ni;
-}
-
-int
-lnet_count_acceptor_nis(void)
-{
- /* Return the # of NIs that need the acceptor. */
- int count = 0;
- struct list_head *tmp;
- struct lnet_ni *ni;
- int cpt;
-
- cpt = lnet_net_lock_current();
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (ni->ni_lnd->lnd_accept)
- count++;
- }
-
- lnet_net_unlock(cpt);
-
- return count;
-}
-
-static struct lnet_ping_info *
-lnet_ping_info_create(int num_ni)
-{
- struct lnet_ping_info *ping_info;
- unsigned int infosz;
-
- infosz = offsetof(struct lnet_ping_info, pi_ni[num_ni]);
- ping_info = kvzalloc(infosz, GFP_KERNEL);
- if (!ping_info) {
- CERROR("Can't allocate ping info[%d]\n", num_ni);
- return NULL;
- }
-
- ping_info->pi_nnis = num_ni;
- ping_info->pi_pid = the_lnet.ln_pid;
- ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
- ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
-
- return ping_info;
-}
-
-static inline int
-lnet_get_ni_count(void)
-{
- struct lnet_ni *ni;
- int count = 0;
-
- lnet_net_lock(0);
-
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
- count++;
-
- lnet_net_unlock(0);
-
- return count;
-}
-
-static inline void
-lnet_ping_info_free(struct lnet_ping_info *pinfo)
-{
- kvfree(pinfo);
-}
-
-static void
-lnet_ping_info_destroy(void)
-{
- struct lnet_ni *ni;
-
- lnet_net_lock(LNET_LOCK_EX);
-
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- lnet_ni_lock(ni);
- ni->ni_status = NULL;
- lnet_ni_unlock(ni);
- }
-
- lnet_ping_info_free(the_lnet.ln_ping_info);
- the_lnet.ln_ping_info = NULL;
-
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static void
-lnet_ping_event_handler(struct lnet_event *event)
-{
- struct lnet_ping_info *pinfo = event->md.user_ptr;
-
- if (event->unlinked)
- pinfo->pi_features = LNET_PING_FEAT_INVAL;
-}
-
-static int
-lnet_ping_info_setup(struct lnet_ping_info **ppinfo,
- struct lnet_handle_md *md_handle,
- int ni_count, bool set_eq)
-{
- struct lnet_process_id id = {LNET_NID_ANY, LNET_PID_ANY};
- struct lnet_handle_me me_handle;
- struct lnet_md md = { NULL };
- int rc, rc2;
-
- if (set_eq) {
- rc = LNetEQAlloc(0, lnet_ping_event_handler,
- &the_lnet.ln_ping_target_eq);
- if (rc) {
- CERROR("Can't allocate ping EQ: %d\n", rc);
- return rc;
- }
- }
-
- *ppinfo = lnet_ping_info_create(ni_count);
- if (!*ppinfo) {
- rc = -ENOMEM;
- goto failed_0;
- }
-
- rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
- LNET_PROTO_PING_MATCHBITS, 0,
- LNET_UNLINK, LNET_INS_AFTER,
- &me_handle);
- if (rc) {
- CERROR("Can't create ping ME: %d\n", rc);
- goto failed_1;
- }
-
- /* initialize md content */
- md.start = *ppinfo;
- md.length = offsetof(struct lnet_ping_info,
- pi_ni[(*ppinfo)->pi_nnis]);
- md.threshold = LNET_MD_THRESH_INF;
- md.max_size = 0;
- md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
- LNET_MD_MANAGE_REMOTE;
- md.user_ptr = NULL;
- md.eq_handle = the_lnet.ln_ping_target_eq;
- md.user_ptr = *ppinfo;
-
- rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
- if (rc) {
- CERROR("Can't attach ping MD: %d\n", rc);
- goto failed_2;
- }
-
- return 0;
-
-failed_2:
- rc2 = LNetMEUnlink(me_handle);
- LASSERT(!rc2);
-failed_1:
- lnet_ping_info_free(*ppinfo);
- *ppinfo = NULL;
-failed_0:
- if (set_eq)
- LNetEQFree(the_lnet.ln_ping_target_eq);
- return rc;
-}
-
-static void
-lnet_ping_md_unlink(struct lnet_ping_info *pinfo,
- struct lnet_handle_md *md_handle)
-{
- LNetMDUnlink(*md_handle);
- LNetInvalidateMDHandle(md_handle);
-
- /* NB md could be busy; this just starts the unlink */
- while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
- CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
- set_current_state(TASK_NOLOAD);
- schedule_timeout(HZ);
- }
-}
-
-static void
-lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
-{
- struct lnet_ni_status *ns;
- struct lnet_ni *ni;
- int i = 0;
-
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- LASSERT(i < ping_info->pi_nnis);
-
- ns = &ping_info->pi_ni[i];
-
- ns->ns_nid = ni->ni_nid;
-
- lnet_ni_lock(ni);
- ns->ns_status = (ni->ni_status) ?
- ni->ni_status->ns_status : LNET_NI_STATUS_UP;
- ni->ni_status = ns;
- lnet_ni_unlock(ni);
-
- i++;
- }
-}
-
-static void
-lnet_ping_target_update(struct lnet_ping_info *pinfo,
- struct lnet_handle_md md_handle)
-{
- struct lnet_ping_info *old_pinfo = NULL;
- struct lnet_handle_md old_md;
-
- /* switch the NIs to point to the new ping info created */
- lnet_net_lock(LNET_LOCK_EX);
-
- if (!the_lnet.ln_routing)
- pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
- lnet_ping_info_install_locked(pinfo);
-
- if (the_lnet.ln_ping_info) {
- old_pinfo = the_lnet.ln_ping_info;
- old_md = the_lnet.ln_ping_target_md;
- }
- the_lnet.ln_ping_target_md = md_handle;
- the_lnet.ln_ping_info = pinfo;
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- if (old_pinfo) {
- /* unlink the old ping info */
- lnet_ping_md_unlink(old_pinfo, &old_md);
- lnet_ping_info_free(old_pinfo);
- }
-}
-
-static void
-lnet_ping_target_fini(void)
-{
- int rc;
-
- lnet_ping_md_unlink(the_lnet.ln_ping_info,
- &the_lnet.ln_ping_target_md);
-
- rc = LNetEQFree(the_lnet.ln_ping_target_eq);
- LASSERT(!rc);
-
- lnet_ping_info_destroy();
-}
-
-static int
-lnet_ni_tq_credits(struct lnet_ni *ni)
-{
- int credits;
-
- LASSERT(ni->ni_ncpts >= 1);
-
- if (ni->ni_ncpts == 1)
- return ni->ni_maxtxcredits;
-
- credits = ni->ni_maxtxcredits / ni->ni_ncpts;
- credits = max(credits, 8 * ni->ni_peertxcredits);
- credits = min(credits, ni->ni_maxtxcredits);
-
- return credits;
-}
-
-static void
-lnet_ni_unlink_locked(struct lnet_ni *ni)
-{
- if (!list_empty(&ni->ni_cptlist)) {
- list_del_init(&ni->ni_cptlist);
- lnet_ni_decref_locked(ni, 0);
- }
-
- /* move it to zombie list and nobody can find it anymore */
- LASSERT(!list_empty(&ni->ni_list));
- list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
- lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */
-}
-
-static void
-lnet_clear_zombies_nis_locked(void)
-{
- int i;
- int islo;
- struct lnet_ni *ni;
- struct lnet_ni *temp;
-
- /*
- * Now wait for the NI's I just nuked to show up on ln_zombie_nis
- * and shut them down in guaranteed thread context
- */
- i = 2;
- list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis_zombie, ni_list) {
- int *ref;
- int j;
-
- list_del_init(&ni->ni_list);
- cfs_percpt_for_each(ref, j, ni->ni_refs) {
- if (!*ref)
- continue;
- /* still busy, add it back to zombie list */
- list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
- break;
- }
-
- if (!list_empty(&ni->ni_list)) {
- lnet_net_unlock(LNET_LOCK_EX);
- ++i;
- if ((i & (-i)) == i) {
- CDEBUG(D_WARNING, "Waiting for zombie LNI %s\n",
- libcfs_nid2str(ni->ni_nid));
- }
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- lnet_net_lock(LNET_LOCK_EX);
- continue;
- }
-
- ni->ni_lnd->lnd_refcount--;
- lnet_net_unlock(LNET_LOCK_EX);
-
- islo = ni->ni_lnd->lnd_type == LOLND;
-
- LASSERT(!in_interrupt());
- ni->ni_lnd->lnd_shutdown(ni);
-
- /*
- * can't deref lnd anymore now; it might have unregistered
- * itself...
- */
- if (!islo)
- CDEBUG(D_LNI, "Removed LNI %s\n",
- libcfs_nid2str(ni->ni_nid));
-
- lnet_ni_free(ni);
- i = 2;
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-}
-
-static void
-lnet_shutdown_lndnis(void)
-{
- struct lnet_ni *ni;
- struct lnet_ni *temp;
- int i;
-
- /* NB called holding the global mutex */
-
- /* All quiet on the API front */
- LASSERT(!the_lnet.ln_shutdown);
- LASSERT(!the_lnet.ln_refcount);
- LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_shutdown = 1; /* flag shutdown */
-
- /* Unlink NIs from the global table */
- list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis, ni_list) {
- lnet_ni_unlink_locked(ni);
- }
-
- /* Drop the cached loopback NI. */
- if (the_lnet.ln_loni) {
- lnet_ni_decref_locked(the_lnet.ln_loni, 0);
- the_lnet.ln_loni = NULL;
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- /*
- * Clear lazy portals and drop delayed messages which hold refs
- * on their lnet_msg::msg_rxpeer
- */
- for (i = 0; i < the_lnet.ln_nportals; i++)
- LNetClearLazyPortal(i);
-
- /*
- * Clear the peer table and wait for all peers to go (they hold refs on
- * their NIs)
- */
- lnet_peer_tables_cleanup(NULL);
-
- lnet_net_lock(LNET_LOCK_EX);
-
- lnet_clear_zombies_nis_locked();
- the_lnet.ln_shutdown = 0;
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-/* shutdown down the NI and release refcount */
-static void
-lnet_shutdown_lndni(struct lnet_ni *ni)
-{
- int i;
-
- lnet_net_lock(LNET_LOCK_EX);
- lnet_ni_unlink_locked(ni);
- lnet_net_unlock(LNET_LOCK_EX);
-
- /* clear messages for this NI on the lazy portal */
- for (i = 0; i < the_lnet.ln_nportals; i++)
- lnet_clear_lazy_portal(ni, i, "Shutting down NI");
-
- /* Do peer table cleanup for this ni */
- lnet_peer_tables_cleanup(ni);
-
- lnet_net_lock(LNET_LOCK_EX);
- lnet_clear_zombies_nis_locked();
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-static int
-lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
-{
- struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
- int rc = -EINVAL;
- int lnd_type;
- struct lnet_lnd *lnd;
- struct lnet_tx_queue *tq;
- int i;
- u32 seed;
-
- lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
-
- LASSERT(libcfs_isknown_lnd(lnd_type));
-
- if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
- lnd_type == IIBLND || lnd_type == VIBLND) {
- CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
- goto failed0;
- }
-
- /* Make sure this new NI is unique. */
- lnet_net_lock(LNET_LOCK_EX);
- rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
- lnet_net_unlock(LNET_LOCK_EX);
- if (!rc) {
- if (lnd_type == LOLND) {
- lnet_ni_free(ni);
- return 0;
- }
-
- CERROR("Net %s is not unique\n",
- libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
- rc = -EEXIST;
- goto failed0;
- }
-
- mutex_lock(&the_lnet.ln_lnd_mutex);
- lnd = lnet_find_lnd_by_type(lnd_type);
-
- if (!lnd) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- rc = request_module("%s", libcfs_lnd2modname(lnd_type));
- mutex_lock(&the_lnet.ln_lnd_mutex);
-
- lnd = lnet_find_lnd_by_type(lnd_type);
- if (!lnd) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- CERROR("Can't load LND %s, module %s, rc=%d\n",
- libcfs_lnd2str(lnd_type),
- libcfs_lnd2modname(lnd_type), rc);
- rc = -EINVAL;
- goto failed0;
- }
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- lnd->lnd_refcount++;
- lnet_net_unlock(LNET_LOCK_EX);
-
- ni->ni_lnd = lnd;
-
- if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
- lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
-
- if (lnd_tunables) {
- ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables),
- GFP_NOFS);
- if (!ni->ni_lnd_tunables) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- rc = -ENOMEM;
- goto failed0;
- }
- memcpy(ni->ni_lnd_tunables, lnd_tunables,
- sizeof(*ni->ni_lnd_tunables));
- }
-
- /*
- * If given some LND tunable parameters, parse those now to
- * override the values in the NI structure.
- */
- if (conf) {
- if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
- ni->ni_peerrtrcredits =
- conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
- if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
- ni->ni_peertimeout =
- conf->cfg_config_u.cfg_net.net_peer_timeout;
- if (conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
- ni->ni_peertxcredits =
- conf->cfg_config_u.cfg_net.net_peer_tx_credits;
- if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
- ni->ni_maxtxcredits =
- conf->cfg_config_u.cfg_net.net_max_tx_credits;
- }
-
- rc = lnd->lnd_startup(ni);
-
- mutex_unlock(&the_lnet.ln_lnd_mutex);
-
- if (rc) {
- LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
- rc, libcfs_lnd2str(lnd->lnd_type));
- lnet_net_lock(LNET_LOCK_EX);
- lnd->lnd_refcount--;
- lnet_net_unlock(LNET_LOCK_EX);
- goto failed0;
- }
-
- LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query);
-
- lnet_net_lock(LNET_LOCK_EX);
- /* refcount for ln_nis */
- lnet_ni_addref_locked(ni, 0);
- list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
- if (ni->ni_cpts) {
- lnet_ni_addref_locked(ni, 0);
- list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt);
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- if (lnd->lnd_type == LOLND) {
- lnet_ni_addref(ni);
- LASSERT(!the_lnet.ln_loni);
- the_lnet.ln_loni = ni;
- return 0;
- }
-
- if (!ni->ni_peertxcredits || !ni->ni_maxtxcredits) {
- LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
- libcfs_lnd2str(lnd->lnd_type),
- !ni->ni_peertxcredits ?
- "" : "per-peer ");
- /*
- * shutdown the NI since if we get here then it must've already
- * been started
- */
- lnet_shutdown_lndni(ni);
- return -EINVAL;
- }
-
- cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
- tq->tq_credits_min =
- tq->tq_credits_max =
- tq->tq_credits = lnet_ni_tq_credits(ni);
- }
-
- /* Nodes with small feet have little entropy. The NID for this
- * node gives the most entropy in the low bits.
- */
- seed = LNET_NIDADDR(ni->ni_nid);
- add_device_randomness(&seed, sizeof(seed));
-
- CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
- libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
- lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
- ni->ni_peerrtrcredits, ni->ni_peertimeout);
-
- return 0;
-failed0:
- lnet_ni_free(ni);
- return rc;
-}
-
-static int
-lnet_startup_lndnis(struct list_head *nilist)
-{
- struct lnet_ni *ni;
- int rc;
- int ni_count = 0;
-
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, struct lnet_ni, ni_list);
- list_del(&ni->ni_list);
- rc = lnet_startup_lndni(ni, NULL);
-
- if (rc < 0)
- goto failed;
-
- ni_count++;
- }
-
- return ni_count;
-failed:
- lnet_shutdown_lndnis();
-
- return rc;
-}
-
-/**
- * Initialize LNet library.
- *
- * Automatically called at module loading time. Caller has to call
- * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
- * latter returned 0. It must be called exactly once.
- *
- * \retval 0 on success
- * \retval -ve on failures.
- */
-int lnet_lib_init(void)
-{
- int rc;
-
- lnet_assert_wire_constants();
-
- memset(&the_lnet, 0, sizeof(the_lnet));
-
- /* refer to global cfs_cpt_tab for now */
- the_lnet.ln_cpt_table = cfs_cpt_tab;
- the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_tab);
-
- LASSERT(the_lnet.ln_cpt_number > 0);
- if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
- /* we are under risk of consuming all lh_cookie */
- CERROR("Can't have %d CPTs for LNet (max allowed is %d), please change setting of CPT-table and retry\n",
- the_lnet.ln_cpt_number, LNET_CPT_MAX);
- return -E2BIG;
- }
-
- while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
- the_lnet.ln_cpt_bits++;
-
- rc = lnet_create_locks();
- if (rc) {
- CERROR("Can't create LNet global locks: %d\n", rc);
- return rc;
- }
-
- the_lnet.ln_refcount = 0;
- LNetInvalidateEQHandle(&the_lnet.ln_rc_eqh);
- INIT_LIST_HEAD(&the_lnet.ln_lnds);
- INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
- INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
-
- /*
- * The hash table size is the number of bits it takes to express the set
- * ln_num_routes, minus 1 (better to under estimate than over so we
- * don't waste memory).
- */
- if (rnet_htable_size <= 0)
- rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
- else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
- rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
- the_lnet.ln_remote_nets_hbits = max_t(int, 1,
- order_base_2(rnet_htable_size) - 1);
-
- /*
- * All LNDs apart from the LOLND are in separate modules. They
- * register themselves when their module loads, and unregister
- * themselves when their module is unloaded.
- */
- lnet_register_lnd(&the_lolnd);
- return 0;
-}
-
-/**
- * Finalize LNet library.
- *
- * \pre lnet_lib_init() called with success.
- * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
- */
-void lnet_lib_exit(void)
-{
- LASSERT(!the_lnet.ln_refcount);
-
- while (!list_empty(&the_lnet.ln_lnds))
- lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
- struct lnet_lnd, lnd_list));
- lnet_destroy_locks();
-}
-
-/**
- * Set LNet PID and start LNet interfaces, routing, and forwarding.
- *
- * Users must call this function at least once before any other functions.
- * For each successful call there must be a corresponding call to
- * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
- * ignored.
- *
- * The PID used by LNet may be different from the one requested.
- * See LNetGetId().
- *
- * \param requested_pid PID requested by the caller.
- *
- * \return >= 0 on success, and < 0 error code on failures.
- */
-int
-LNetNIInit(lnet_pid_t requested_pid)
-{
- int im_a_router = 0;
- int rc;
- int ni_count;
- struct lnet_ping_info *pinfo;
- struct lnet_handle_md md_handle;
- struct list_head net_head;
-
- INIT_LIST_HEAD(&net_head);
-
- mutex_lock(&the_lnet.ln_api_mutex);
-
- CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
-
- if (the_lnet.ln_refcount > 0) {
- rc = the_lnet.ln_refcount++;
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
- }
-
- rc = lnet_prepare(requested_pid);
- if (rc) {
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
- }
-
- /* Add in the loopback network */
- if (!lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head)) {
- rc = -ENOMEM;
- goto err_empty_list;
- }
-
- /*
- * If LNet is being initialized via DLC it is possible
- * that the user requests not to load module parameters (ones which
- * are supported by DLC) on initialization. Therefore, make sure not
- * to load networks, routes and forwarding from module parameters
- * in this case. On cleanup in case of failure only clean up
- * routes if it has been loaded
- */
- if (!the_lnet.ln_nis_from_mod_params) {
- rc = lnet_parse_networks(&net_head, lnet_get_networks());
- if (rc < 0)
- goto err_empty_list;
- }
-
- ni_count = lnet_startup_lndnis(&net_head);
- if (ni_count < 0) {
- rc = ni_count;
- goto err_empty_list;
- }
-
- if (!the_lnet.ln_nis_from_mod_params) {
- rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
- if (rc)
- goto err_shutdown_lndnis;
-
- rc = lnet_check_routes();
- if (rc)
- goto err_destroy_routes;
-
- rc = lnet_rtrpools_alloc(im_a_router);
- if (rc)
- goto err_destroy_routes;
- }
-
- rc = lnet_acceptor_start();
- if (rc)
- goto err_destroy_routes;
-
- the_lnet.ln_refcount = 1;
- /* Now I may use my own API functions... */
-
- rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
- if (rc)
- goto err_acceptor_stop;
-
- lnet_ping_target_update(pinfo, md_handle);
-
- rc = lnet_router_checker_start();
- if (rc)
- goto err_stop_ping;
-
- lnet_fault_init();
- lnet_router_debugfs_init();
-
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- return 0;
-
-err_stop_ping:
- lnet_ping_target_fini();
-err_acceptor_stop:
- the_lnet.ln_refcount = 0;
- lnet_acceptor_stop();
-err_destroy_routes:
- if (!the_lnet.ln_nis_from_mod_params)
- lnet_destroy_routes();
-err_shutdown_lndnis:
- lnet_shutdown_lndnis();
-err_empty_list:
- lnet_unprepare();
- LASSERT(rc < 0);
- mutex_unlock(&the_lnet.ln_api_mutex);
- while (!list_empty(&net_head)) {
- struct lnet_ni *ni;
-
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
- list_del_init(&ni->ni_list);
- lnet_ni_free(ni);
- }
- return rc;
-}
-EXPORT_SYMBOL(LNetNIInit);
-
-/**
- * Stop LNet interfaces, routing, and forwarding.
- *
- * Users must call this function once for each successful call to LNetNIInit().
- * Once the LNetNIFini() operation has been started, the results of pending
- * API operations are undefined.
- *
- * \return always 0 for current implementation.
- */
-int
-LNetNIFini(void)
-{
- mutex_lock(&the_lnet.ln_api_mutex);
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (the_lnet.ln_refcount != 1) {
- the_lnet.ln_refcount--;
- } else {
- LASSERT(!the_lnet.ln_niinit_self);
-
- lnet_fault_fini();
- lnet_router_debugfs_fini();
- lnet_router_checker_stop();
- lnet_ping_target_fini();
-
- /* Teardown fns that use my own API functions BEFORE here */
- the_lnet.ln_refcount = 0;
-
- lnet_acceptor_stop();
- lnet_destroy_routes();
- lnet_shutdown_lndnis();
- lnet_unprepare();
- }
-
- mutex_unlock(&the_lnet.ln_api_mutex);
- return 0;
-}
-EXPORT_SYMBOL(LNetNIFini);
-
-/**
- * Grabs the ni data from the ni structure and fills the out
- * parameters
- *
- * \param[in] ni network interface structure
- * \param[out] config NI configuration
- */
-static void
-lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
-{
- struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
- struct lnet_ioctl_net_config *net_config;
- size_t min_size, tunable_size = 0;
- int i;
-
- if (!ni || !config)
- return;
-
- net_config = (struct lnet_ioctl_net_config *)config->cfg_bulk;
- if (!net_config)
- return;
-
- BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
- ARRAY_SIZE(net_config->ni_interfaces));
-
- for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
- if (!ni->ni_interfaces[i])
- break;
-
- strncpy(net_config->ni_interfaces[i],
- ni->ni_interfaces[i],
- sizeof(net_config->ni_interfaces[i]));
- }
-
- config->cfg_nid = ni->ni_nid;
- config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
- config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
- config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
- config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
-
- net_config->ni_status = ni->ni_status->ns_status;
-
- if (ni->ni_cpts) {
- int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
-
- for (i = 0; i < num_cpts; i++)
- net_config->ni_cpts[i] = ni->ni_cpts[i];
-
- config->cfg_ncpts = num_cpts;
- }
-
- /*
- * See if user land tools sent in a newer and larger version
- * of struct lnet_tunables than what the kernel uses.
- */
- min_size = sizeof(*config) + sizeof(*net_config);
-
- if (config->cfg_hdr.ioc_len > min_size)
- tunable_size = config->cfg_hdr.ioc_len - min_size;
-
- /* Don't copy to much data to user space */
- min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
- lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
-
- if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
- memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
- config->cfg_config_u.cfg_net.net_interface_count = 1;
-
- /* Tell user land that kernel side has less data */
- if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
- min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
- config->cfg_hdr.ioc_len -= min_size;
- }
- }
-}
-
-static int
-lnet_get_net_config(struct lnet_ioctl_config_data *config)
-{
- struct lnet_ni *ni;
- struct list_head *tmp;
- int idx = config->cfg_count;
- int cpt, i = 0;
- int rc = -ENOENT;
-
- cpt = lnet_net_lock_current();
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- if (i++ != idx)
- continue;
-
- ni = list_entry(tmp, struct lnet_ni, ni_list);
- lnet_ni_lock(ni);
- lnet_fill_ni_info(ni, config);
- lnet_ni_unlock(ni);
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-
-int
-lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
-{
- char *nets = conf->cfg_config_u.cfg_net.net_intf;
- struct lnet_ping_info *pinfo;
- struct lnet_handle_md md_handle;
- struct lnet_ni *ni;
- struct list_head net_head;
- struct lnet_remotenet *rnet;
- int rc;
-
- INIT_LIST_HEAD(&net_head);
-
- /* Create a ni structure for the network string */
- rc = lnet_parse_networks(&net_head, nets);
- if (rc <= 0)
- return !rc ? -EINVAL : rc;
-
- mutex_lock(&the_lnet.ln_api_mutex);
-
- if (rc > 1) {
- rc = -EINVAL; /* only add one interface per call */
- goto failed0;
- }
-
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
-
- lnet_net_lock(LNET_LOCK_EX);
- rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid));
- lnet_net_unlock(LNET_LOCK_EX);
- /*
- * make sure that the net added doesn't invalidate the current
- * configuration LNet is keeping
- */
- if (rnet) {
- CERROR("Adding net %s will invalidate routing configuration\n",
- nets);
- rc = -EUSERS;
- goto failed0;
- }
-
- rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
- false);
- if (rc)
- goto failed0;
-
- list_del_init(&ni->ni_list);
-
- rc = lnet_startup_lndni(ni, conf);
- if (rc)
- goto failed1;
-
- if (ni->ni_lnd->lnd_accept) {
- rc = lnet_acceptor_start();
- if (rc < 0) {
- /* shutdown the ni that we just started */
- CERROR("Failed to start up acceptor thread\n");
- lnet_shutdown_lndni(ni);
- goto failed1;
- }
- }
-
- lnet_ping_target_update(pinfo, md_handle);
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- return 0;
-
-failed1:
- lnet_ping_md_unlink(pinfo, &md_handle);
- lnet_ping_info_free(pinfo);
-failed0:
- mutex_unlock(&the_lnet.ln_api_mutex);
- while (!list_empty(&net_head)) {
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
- list_del_init(&ni->ni_list);
- lnet_ni_free(ni);
- }
- return rc;
-}
-
-int
-lnet_dyn_del_ni(__u32 net)
-{
- struct lnet_ni *ni;
- struct lnet_ping_info *pinfo;
- struct lnet_handle_md md_handle;
- int rc;
-
- /* don't allow userspace to shutdown the LOLND */
- if (LNET_NETTYP(net) == LOLND)
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- /* create and link a new ping info, before removing the old one */
- rc = lnet_ping_info_setup(&pinfo, &md_handle,
- lnet_get_ni_count() - 1, false);
- if (rc)
- goto out;
-
- ni = lnet_net2ni(net);
- if (!ni) {
- rc = -EINVAL;
- goto failed;
- }
-
- /* decrement the reference counter taken by lnet_net2ni() */
- lnet_ni_decref_locked(ni, 0);
-
- lnet_shutdown_lndni(ni);
-
- if (!lnet_count_acceptor_nis())
- lnet_acceptor_stop();
-
- lnet_ping_target_update(pinfo, md_handle);
- goto out;
-failed:
- lnet_ping_md_unlink(pinfo, &md_handle);
- lnet_ping_info_free(pinfo);
-out:
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- return rc;
-}
-
-/**
- * LNet ioctl handler.
- *
- */
-int
-LNetCtl(unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- struct lnet_ioctl_config_data *config;
- struct lnet_process_id id = {0};
- struct lnet_ni *ni;
- int rc;
- unsigned long secs_passed;
-
- BUILD_BUG_ON(LIBCFS_IOC_DATA_MAX <
- sizeof(struct lnet_ioctl_net_config) +
- sizeof(struct lnet_ioctl_config_data));
-
- switch (cmd) {
- case IOC_LIBCFS_GET_NI:
- rc = LNetGetId(data->ioc_count, &id);
- data->ioc_nid = id.nid;
- return rc;
-
- case IOC_LIBCFS_FAIL_NID:
- return lnet_fail_nid(data->ioc_nid, data->ioc_count);
-
- case IOC_LIBCFS_ADD_ROUTE:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- rc = lnet_add_route(config->cfg_net,
- config->cfg_config_u.cfg_route.rtr_hop,
- config->cfg_nid,
- config->cfg_config_u.cfg_route.rtr_priority);
- if (!rc) {
- rc = lnet_check_routes();
- if (rc)
- lnet_del_route(config->cfg_net,
- config->cfg_nid);
- }
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
-
- case IOC_LIBCFS_DEL_ROUTE:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- rc = lnet_del_route(config->cfg_net, config->cfg_nid);
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
-
- case IOC_LIBCFS_GET_ROUTE:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- return lnet_get_route(config->cfg_count,
- &config->cfg_net,
- &config->cfg_config_u.cfg_route.rtr_hop,
- &config->cfg_nid,
- &config->cfg_config_u.cfg_route.rtr_flags,
- &config->cfg_config_u.cfg_route.rtr_priority);
-
- case IOC_LIBCFS_GET_NET: {
- size_t total = sizeof(*config) +
- sizeof(struct lnet_ioctl_net_config);
- config = arg;
-
- if (config->cfg_hdr.ioc_len < total)
- return -EINVAL;
-
- return lnet_get_net_config(config);
- }
-
- case IOC_LIBCFS_GET_LNET_STATS: {
- struct lnet_ioctl_lnet_stats *lnet_stats = arg;
-
- if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
- return -EINVAL;
-
- lnet_counters_get(&lnet_stats->st_cntrs);
- return 0;
- }
-
- case IOC_LIBCFS_CONFIG_RTR:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- if (config->cfg_config_u.cfg_buffers.buf_enable) {
- rc = lnet_rtrpools_enable();
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
- }
- lnet_rtrpools_disable();
- mutex_unlock(&the_lnet.ln_api_mutex);
- return 0;
-
- case IOC_LIBCFS_ADD_BUF:
- config = arg;
-
- if (config->cfg_hdr.ioc_len < sizeof(*config))
- return -EINVAL;
-
- mutex_lock(&the_lnet.ln_api_mutex);
- rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.buf_tiny,
- config->cfg_config_u.cfg_buffers.buf_small,
- config->cfg_config_u.cfg_buffers.buf_large);
- mutex_unlock(&the_lnet.ln_api_mutex);
- return rc;
-
- case IOC_LIBCFS_GET_BUF: {
- struct lnet_ioctl_pool_cfg *pool_cfg;
- size_t total = sizeof(*config) + sizeof(*pool_cfg);
-
- config = arg;
-
- if (config->cfg_hdr.ioc_len < total)
- return -EINVAL;
-
- pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
- return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
- }
-
- case IOC_LIBCFS_GET_PEER_INFO: {
- struct lnet_ioctl_peer *peer_info = arg;
-
- if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
- return -EINVAL;
-
- return lnet_get_peer_info(peer_info->pr_count,
- &peer_info->pr_nid,
- peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
- &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
- }
-
- case IOC_LIBCFS_NOTIFY_ROUTER:
- secs_passed = (ktime_get_real_seconds() - data->ioc_u64[0]);
- secs_passed *= msecs_to_jiffies(MSEC_PER_SEC);
-
- return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
- jiffies - secs_passed);
-
- case IOC_LIBCFS_LNET_DIST:
- rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
- if (rc < 0 && rc != -EHOSTUNREACH)
- return rc;
-
- data->ioc_u32[0] = rc;
- return 0;
-
- case IOC_LIBCFS_TESTPROTOCOMPAT:
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_testprotocompat = data->ioc_flags;
- lnet_net_unlock(LNET_LOCK_EX);
- return 0;
-
- case IOC_LIBCFS_LNET_FAULT:
- return lnet_fault_ctl(data->ioc_flags, data);
-
- case IOC_LIBCFS_PING:
- id.nid = data->ioc_nid;
- id.pid = data->ioc_u32[0];
- rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
- data->ioc_pbuf1,
- data->ioc_plen1 / sizeof(struct lnet_process_id));
- if (rc < 0)
- return rc;
- data->ioc_count = rc;
- return 0;
-
- default:
- ni = lnet_net2ni(data->ioc_net);
- if (!ni)
- return -EINVAL;
-
- if (!ni->ni_lnd->lnd_ctl)
- rc = -EINVAL;
- else
- rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
-
- lnet_ni_decref(ni);
- return rc;
- }
- /* not reached */
-}
-EXPORT_SYMBOL(LNetCtl);
-
-void LNetDebugPeer(struct lnet_process_id id)
-{
- lnet_debug_peer(id.nid);
-}
-EXPORT_SYMBOL(LNetDebugPeer);
-
-/**
- * Retrieve the lnet_process_id ID of LNet interface at \a index. Note that
- * all interfaces share a same PID, as requested by LNetNIInit().
- *
- * \param index Index of the interface to look up.
- * \param id On successful return, this location will hold the
- * lnet_process_id ID of the interface.
- *
- * \retval 0 If an interface exists at \a index.
- * \retval -ENOENT If no interface has been found.
- */
-int
-LNetGetId(unsigned int index, struct lnet_process_id *id)
-{
- struct lnet_ni *ni;
- struct list_head *tmp;
- int cpt;
- int rc = -ENOENT;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_net_lock_current();
-
- list_for_each(tmp, &the_lnet.ln_nis) {
- if (index--)
- continue;
-
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- id->nid = ni->ni_nid;
- id->pid = the_lnet.ln_pid;
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-EXPORT_SYMBOL(LNetGetId);
-
-static int lnet_ping(struct lnet_process_id id, int timeout_ms,
- struct lnet_process_id __user *ids, int n_ids)
-{
- struct lnet_handle_eq eqh;
- struct lnet_handle_md mdh;
- struct lnet_event event;
- struct lnet_md md = { NULL };
- int which;
- int unlinked = 0;
- int replied = 0;
- const int a_long_time = 60000; /* mS */
- int infosz;
- struct lnet_ping_info *info;
- struct lnet_process_id tmpid;
- int i;
- int nob;
- int rc;
- int rc2;
-
- infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]);
-
- if (n_ids <= 0 ||
- id.nid == LNET_NID_ANY ||
- timeout_ms > 500000 || /* arbitrary limit! */
- n_ids > 20) /* arbitrary limit! */
- return -EINVAL;
-
- if (id.pid == LNET_PID_ANY)
- id.pid = LNET_PID_LUSTRE;
-
- info = kzalloc(infosz, GFP_KERNEL);
- if (!info)
- return -ENOMEM;
-
- /* NB 2 events max (including any unlink event) */
- rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
- if (rc) {
- CERROR("Can't allocate EQ: %d\n", rc);
- goto out_0;
- }
-
- /* initialize md content */
- md.start = info;
- md.length = infosz;
- md.threshold = 2; /*GET/REPLY*/
- md.max_size = 0;
- md.options = LNET_MD_TRUNCATE;
- md.user_ptr = NULL;
- md.eq_handle = eqh;
-
- rc = LNetMDBind(md, LNET_UNLINK, &mdh);
- if (rc) {
- CERROR("Can't bind MD: %d\n", rc);
- goto out_1;
- }
-
- rc = LNetGet(LNET_NID_ANY, mdh, id,
- LNET_RESERVED_PORTAL,
- LNET_PROTO_PING_MATCHBITS, 0);
-
- if (rc) {
- /* Don't CERROR; this could be deliberate! */
-
- rc2 = LNetMDUnlink(mdh);
- LASSERT(!rc2);
-
- /* NB must wait for the UNLINK event below... */
- unlinked = 1;
- timeout_ms = a_long_time;
- }
-
- do {
- /* MUST block for unlink to complete */
-
- rc2 = LNetEQPoll(&eqh, 1, timeout_ms, !unlinked,
- &event, &which);
-
- CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
- (rc2 <= 0) ? -1 : event.type,
- (rc2 <= 0) ? -1 : event.status,
- (rc2 > 0 && event.unlinked) ? " unlinked" : "");
-
- LASSERT(rc2 != -EOVERFLOW); /* can't miss anything */
-
- if (rc2 <= 0 || event.status) {
- /* timeout or error */
- if (!replied && !rc)
- rc = (rc2 < 0) ? rc2 :
- !rc2 ? -ETIMEDOUT :
- event.status;
-
- if (!unlinked) {
- /* Ensure completion in finite time... */
- LNetMDUnlink(mdh);
- /* No assertion (racing with network) */
- unlinked = 1;
- timeout_ms = a_long_time;
- } else if (!rc2) {
- /* timed out waiting for unlink */
- CWARN("ping %s: late network completion\n",
- libcfs_id2str(id));
- }
- } else if (event.type == LNET_EVENT_REPLY) {
- replied = 1;
- rc = event.mlength;
- }
-
- } while (rc2 <= 0 || !event.unlinked);
-
- if (!replied) {
- if (rc >= 0)
- CWARN("%s: Unexpected rc >= 0 but no reply!\n",
- libcfs_id2str(id));
- rc = -EIO;
- goto out_1;
- }
-
- nob = rc;
- LASSERT(nob >= 0 && nob <= infosz);
-
- rc = -EPROTO; /* if I can't parse... */
-
- if (nob < 8) {
- /* can't check magic/version */
- CERROR("%s: ping info too short %d\n",
- libcfs_id2str(id), nob);
- goto out_1;
- }
-
- if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
- lnet_swap_pinginfo(info);
- } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
- CERROR("%s: Unexpected magic %08x\n",
- libcfs_id2str(id), info->pi_magic);
- goto out_1;
- }
-
- if (!(info->pi_features & LNET_PING_FEAT_NI_STATUS)) {
- CERROR("%s: ping w/o NI status: 0x%x\n",
- libcfs_id2str(id), info->pi_features);
- goto out_1;
- }
-
- if (nob < offsetof(struct lnet_ping_info, pi_ni[0])) {
- CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
- nob, (int)offsetof(struct lnet_ping_info, pi_ni[0]));
- goto out_1;
- }
-
- if (info->pi_nnis < n_ids)
- n_ids = info->pi_nnis;
-
- if (nob < offsetof(struct lnet_ping_info, pi_ni[n_ids])) {
- CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
- nob, (int)offsetof(struct lnet_ping_info, pi_ni[n_ids]));
- goto out_1;
- }
-
- rc = -EFAULT; /* If I SEGV... */
-
- memset(&tmpid, 0, sizeof(tmpid));
- for (i = 0; i < n_ids; i++) {
- tmpid.pid = info->pi_pid;
- tmpid.nid = info->pi_ni[i].ns_nid;
- if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
- goto out_1;
- }
- rc = info->pi_nnis;
-
- out_1:
- rc2 = LNetEQFree(eqh);
- if (rc2)
- CERROR("rc2 %d\n", rc2);
- LASSERT(!rc2);
-
- out_0:
- kfree(info);
- return rc;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
deleted file mode 100644
index 55ecc199..0000000
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ /dev/null
@@ -1,1235 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-#include <linux/ctype.h>
-#include <linux/lnet/lib-lnet.h>
-
-struct lnet_text_buf { /* tmp struct for parsing routes */
- struct list_head ltb_list; /* stash on lists */
- int ltb_size; /* allocated size */
- char ltb_text[0]; /* text buffer */
-};
-
-static int lnet_tbnob; /* track text buf allocation */
-#define LNET_MAX_TEXTBUF_NOB (64 << 10) /* bound allocation */
-#define LNET_SINGLE_TEXTBUF_NOB (4 << 10)
-
-static void
-lnet_syntax(char *name, char *str, int offset, int width)
-{
- static char dots[LNET_SINGLE_TEXTBUF_NOB];
- static char dashes[LNET_SINGLE_TEXTBUF_NOB];
-
- memset(dots, '.', sizeof(dots));
- dots[sizeof(dots) - 1] = 0;
- memset(dashes, '-', sizeof(dashes));
- dashes[sizeof(dashes) - 1] = 0;
-
- LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str);
- LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n",
- (int)strlen(name), dots, offset, dots,
- (width < 1) ? 0 : width - 1, dashes);
-}
-
-static int
-lnet_issep(char c)
-{
- switch (c) {
- case '\n':
- case '\r':
- case ';':
- return 1;
- default:
- return 0;
- }
-}
-
-int
-lnet_net_unique(__u32 net, struct list_head *nilist)
-{
- struct list_head *tmp;
- struct lnet_ni *ni;
-
- list_for_each(tmp, nilist) {
- ni = list_entry(tmp, struct lnet_ni, ni_list);
-
- if (LNET_NIDNET(ni->ni_nid) == net)
- return 0;
- }
-
- return 1;
-}
-
-void
-lnet_ni_free(struct lnet_ni *ni)
-{
- int i;
-
- if (ni->ni_refs)
- cfs_percpt_free(ni->ni_refs);
-
- if (ni->ni_tx_queues)
- cfs_percpt_free(ni->ni_tx_queues);
-
- if (ni->ni_cpts)
- cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
-
- kfree(ni->ni_lnd_tunables);
-
- for (i = 0; i < LNET_MAX_INTERFACES && ni->ni_interfaces[i]; i++)
- kfree(ni->ni_interfaces[i]);
-
- /* release reference to net namespace */
- if (ni->ni_net_ns)
- put_net(ni->ni_net_ns);
-
- kfree(ni);
-}
-
-struct lnet_ni *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
-{
- struct lnet_tx_queue *tq;
- struct lnet_ni *ni;
- int rc;
- int i;
-
- if (!lnet_net_unique(net, nilist)) {
- LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
- libcfs_net2str(net));
- return NULL;
- }
-
- ni = kzalloc(sizeof(*ni), GFP_NOFS);
- if (!ni) {
- CERROR("Out of memory creating network %s\n",
- libcfs_net2str(net));
- return NULL;
- }
-
- spin_lock_init(&ni->ni_lock);
- INIT_LIST_HEAD(&ni->ni_cptlist);
- ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*ni->ni_refs[0]));
- if (!ni->ni_refs)
- goto failed;
-
- ni->ni_tx_queues = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*ni->ni_tx_queues[0]));
- if (!ni->ni_tx_queues)
- goto failed;
-
- cfs_percpt_for_each(tq, i, ni->ni_tx_queues)
- INIT_LIST_HEAD(&tq->tq_delayed);
-
- if (!el) {
- ni->ni_cpts = NULL;
- ni->ni_ncpts = LNET_CPT_NUMBER;
- } else {
- rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
- if (rc <= 0) {
- CERROR("Failed to set CPTs for NI %s: %d\n",
- libcfs_net2str(net), rc);
- goto failed;
- }
-
- LASSERT(rc <= LNET_CPT_NUMBER);
- if (rc == LNET_CPT_NUMBER) {
- cfs_expr_list_values_free(ni->ni_cpts, LNET_CPT_NUMBER);
- ni->ni_cpts = NULL;
- }
-
- ni->ni_ncpts = rc;
- }
-
- /* LND will fill in the address part of the NID */
- ni->ni_nid = LNET_MKNID(net, 0);
-
- /* Store net namespace in which current ni is being created */
- if (current->nsproxy->net_ns)
- ni->ni_net_ns = get_net(current->nsproxy->net_ns);
- else
- ni->ni_net_ns = NULL;
-
- ni->ni_last_alive = ktime_get_real_seconds();
- list_add_tail(&ni->ni_list, nilist);
- return ni;
- failed:
- lnet_ni_free(ni);
- return NULL;
-}
-
-int
-lnet_parse_networks(struct list_head *nilist, char *networks)
-{
- struct cfs_expr_list *el = NULL;
- char *tokens;
- char *str;
- char *tmp;
- struct lnet_ni *ni;
- __u32 net;
- int nnets = 0;
- struct list_head *temp_node;
-
- if (!networks) {
- CERROR("networks string is undefined\n");
- return -EINVAL;
- }
-
- if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) {
- /* _WAY_ conservative */
- LCONSOLE_ERROR_MSG(0x112,
- "Can't parse networks: string too long\n");
- return -EINVAL;
- }
-
- tokens = kstrdup(networks, GFP_KERNEL);
- if (!tokens) {
- CERROR("Can't allocate net tokens\n");
- return -ENOMEM;
- }
-
- tmp = tokens;
- str = tokens;
-
- while (str && *str) {
- char *comma = strchr(str, ',');
- char *bracket = strchr(str, '(');
- char *square = strchr(str, '[');
- char *iface;
- int niface;
- int rc;
-
- /*
- * NB we don't check interface conflicts here; it's the LNDs
- * responsibility (if it cares at all)
- */
- if (square && (!comma || square < comma)) {
- /*
- * i.e: o2ib0(ib0)[1,2], number between square
- * brackets are CPTs this NI needs to be bond
- */
- if (bracket && bracket > square) {
- tmp = square;
- goto failed_syntax;
- }
-
- tmp = strchr(square, ']');
- if (!tmp) {
- tmp = square;
- goto failed_syntax;
- }
-
- rc = cfs_expr_list_parse(square, tmp - square + 1,
- 0, LNET_CPT_NUMBER - 1, &el);
- if (rc) {
- tmp = square;
- goto failed_syntax;
- }
-
- while (square <= tmp)
- *square++ = ' ';
- }
-
- if (!bracket || (comma && comma < bracket)) {
- /* no interface list specified */
-
- if (comma)
- *comma++ = 0;
- net = libcfs_str2net(strim(str));
-
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- LCONSOLE_ERROR_MSG(0x113,
- "Unrecognised network type\n");
- tmp = str;
- goto failed_syntax;
- }
-
- if (LNET_NETTYP(net) != LOLND && /* LO is implicit */
- !lnet_ni_alloc(net, el, nilist))
- goto failed;
-
- if (el) {
- cfs_expr_list_free(el);
- el = NULL;
- }
-
- str = comma;
- continue;
- }
-
- *bracket = 0;
- net = libcfs_str2net(strim(str));
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- tmp = str;
- goto failed_syntax;
- }
-
- ni = lnet_ni_alloc(net, el, nilist);
- if (!ni)
- goto failed;
-
- if (el) {
- cfs_expr_list_free(el);
- el = NULL;
- }
-
- niface = 0;
- iface = bracket + 1;
-
- bracket = strchr(iface, ')');
- if (!bracket) {
- tmp = iface;
- goto failed_syntax;
- }
-
- *bracket = 0;
- do {
- comma = strchr(iface, ',');
- if (comma)
- *comma++ = 0;
-
- iface = strim(iface);
- if (!*iface) {
- tmp = iface;
- goto failed_syntax;
- }
-
- if (niface == LNET_MAX_INTERFACES) {
- LCONSOLE_ERROR_MSG(0x115,
- "Too many interfaces for net %s\n",
- libcfs_net2str(net));
- goto failed;
- }
-
- /*
- * Allocate a separate piece of memory and copy
- * into it the string, so we don't have
- * a depencency on the tokens string. This way we
- * can free the tokens at the end of the function.
- * The newly allocated ni_interfaces[] can be
- * freed when freeing the NI
- */
- ni->ni_interfaces[niface] = kstrdup(iface, GFP_KERNEL);
- if (!ni->ni_interfaces[niface]) {
- CERROR("Can't allocate net interface name\n");
- goto failed;
- }
- niface++;
- iface = comma;
- } while (iface);
-
- str = bracket + 1;
- comma = strchr(bracket + 1, ',');
- if (comma) {
- *comma = 0;
- str = strim(str);
- if (*str) {
- tmp = str;
- goto failed_syntax;
- }
- str = comma + 1;
- continue;
- }
-
- str = strim(str);
- if (*str) {
- tmp = str;
- goto failed_syntax;
- }
- }
-
- list_for_each(temp_node, nilist)
- nnets++;
-
- kfree(tokens);
- return nnets;
-
- failed_syntax:
- lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp));
- failed:
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, struct lnet_ni, ni_list);
-
- list_del(&ni->ni_list);
- lnet_ni_free(ni);
- }
-
- if (el)
- cfs_expr_list_free(el);
-
- kfree(tokens);
-
- return -EINVAL;
-}
-
-static struct lnet_text_buf *
-lnet_new_text_buf(int str_len)
-{
- struct lnet_text_buf *ltb;
- int nob;
-
- /* NB allocate space for the terminating 0 */
- nob = offsetof(struct lnet_text_buf, ltb_text[str_len + 1]);
- if (nob > LNET_SINGLE_TEXTBUF_NOB) {
- /* _way_ conservative for "route net gateway..." */
- CERROR("text buffer too big\n");
- return NULL;
- }
-
- if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) {
- CERROR("Too many text buffers\n");
- return NULL;
- }
-
- ltb = kzalloc(nob, GFP_KERNEL);
- if (!ltb)
- return NULL;
-
- ltb->ltb_size = nob;
- ltb->ltb_text[0] = 0;
- lnet_tbnob += nob;
- return ltb;
-}
-
-static void
-lnet_free_text_buf(struct lnet_text_buf *ltb)
-{
- lnet_tbnob -= ltb->ltb_size;
- kfree(ltb);
-}
-
-static void
-lnet_free_text_bufs(struct list_head *tbs)
-{
- struct lnet_text_buf *ltb;
-
- while (!list_empty(tbs)) {
- ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list);
-
- list_del(&ltb->ltb_list);
- lnet_free_text_buf(ltb);
- }
-}
-
-static int
-lnet_str2tbs_sep(struct list_head *tbs, char *str)
-{
- struct list_head pending;
- char *sep;
- int nob;
- int i;
- struct lnet_text_buf *ltb;
-
- INIT_LIST_HEAD(&pending);
-
- /* Split 'str' into separate commands */
- for (;;) {
- /* skip leading whitespace */
- while (isspace(*str))
- str++;
-
- /* scan for separator or comment */
- for (sep = str; *sep; sep++)
- if (lnet_issep(*sep) || *sep == '#')
- break;
-
- nob = (int)(sep - str);
- if (nob > 0) {
- ltb = lnet_new_text_buf(nob);
- if (!ltb) {
- lnet_free_text_bufs(&pending);
- return -ENOMEM;
- }
-
- for (i = 0; i < nob; i++)
- if (isspace(str[i]))
- ltb->ltb_text[i] = ' ';
- else
- ltb->ltb_text[i] = str[i];
-
- ltb->ltb_text[nob] = 0;
-
- list_add_tail(&ltb->ltb_list, &pending);
- }
-
- if (*sep == '#') {
- /* scan for separator */
- do {
- sep++;
- } while (*sep && !lnet_issep(*sep));
- }
-
- if (!*sep)
- break;
-
- str = sep + 1;
- }
-
- list_splice(&pending, tbs->prev);
- return 0;
-}
-
-static int
-lnet_expand1tb(struct list_head *list,
- char *str, char *sep1, char *sep2,
- char *item, int itemlen)
-{
- int len1 = (int)(sep1 - str);
- int len2 = strlen(sep2 + 1);
- struct lnet_text_buf *ltb;
-
- LASSERT(*sep1 == '[');
- LASSERT(*sep2 == ']');
-
- ltb = lnet_new_text_buf(len1 + itemlen + len2);
- if (!ltb)
- return -ENOMEM;
-
- memcpy(ltb->ltb_text, str, len1);
- memcpy(&ltb->ltb_text[len1], item, itemlen);
- memcpy(&ltb->ltb_text[len1 + itemlen], sep2 + 1, len2);
- ltb->ltb_text[len1 + itemlen + len2] = 0;
-
- list_add_tail(&ltb->ltb_list, list);
- return 0;
-}
-
-static int
-lnet_str2tbs_expand(struct list_head *tbs, char *str)
-{
- char num[16];
- struct list_head pending;
- char *sep;
- char *sep2;
- char *parsed;
- char *enditem;
- int lo;
- int hi;
- int stride;
- int i;
- int nob;
- int scanned;
-
- INIT_LIST_HEAD(&pending);
-
- sep = strchr(str, '[');
- if (!sep) /* nothing to expand */
- return 0;
-
- sep2 = strchr(sep, ']');
- if (!sep2)
- goto failed;
-
- for (parsed = sep; parsed < sep2; parsed = enditem) {
- enditem = ++parsed;
- while (enditem < sep2 && *enditem != ',')
- enditem++;
-
- if (enditem == parsed) /* no empty items */
- goto failed;
-
- if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi,
- &stride, &scanned) < 3) {
- if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) {
- /* simple string enumeration */
- if (lnet_expand1tb(&pending, str, sep, sep2,
- parsed,
- (int)(enditem - parsed))) {
- goto failed;
- }
- continue;
- }
-
- stride = 1;
- }
-
- /* range expansion */
-
- if (enditem != parsed + scanned) /* no trailing junk */
- goto failed;
-
- if (hi < 0 || lo < 0 || stride < 0 || hi < lo ||
- (hi - lo) % stride)
- goto failed;
-
- for (i = lo; i <= hi; i += stride) {
- snprintf(num, sizeof(num), "%d", i);
- nob = strlen(num);
- if (nob + 1 == sizeof(num))
- goto failed;
-
- if (lnet_expand1tb(&pending, str, sep, sep2,
- num, nob))
- goto failed;
- }
- }
-
- list_splice(&pending, tbs->prev);
- return 1;
-
- failed:
- lnet_free_text_bufs(&pending);
- return -EINVAL;
-}
-
-static int
-lnet_parse_hops(char *str, unsigned int *hops)
-{
- int len = strlen(str);
- int nob = len;
-
- return (sscanf(str, "%u%n", hops, &nob) >= 1 &&
- nob == len &&
- *hops > 0 && *hops < 256);
-}
-
-#define LNET_PRIORITY_SEPARATOR (':')
-
-static int
-lnet_parse_priority(char *str, unsigned int *priority, char **token)
-{
- int nob;
- char *sep;
- int len;
-
- sep = strchr(str, LNET_PRIORITY_SEPARATOR);
- if (!sep) {
- *priority = 0;
- return 0;
- }
- len = strlen(sep + 1);
-
- if ((sscanf((sep + 1), "%u%n", priority, &nob) < 1) || (len != nob)) {
- /*
- * Update the caller's token pointer so it treats the found
- * priority as the token to report in the error message.
- */
- *token += sep - str + 1;
- return -EINVAL;
- }
-
- CDEBUG(D_NET, "gateway %s, priority %d, nob %d\n", str, *priority, nob);
-
- /*
- * Change priority separator to \0 to be able to parse NID
- */
- *sep = '\0';
- return 0;
-}
-
-static int
-lnet_parse_route(char *str, int *im_a_router)
-{
- /* static scratch buffer OK (single threaded) */
- static char cmd[LNET_SINGLE_TEXTBUF_NOB];
-
- struct list_head nets;
- struct list_head gateways;
- struct list_head *tmp1;
- struct list_head *tmp2;
- __u32 net;
- lnet_nid_t nid;
- struct lnet_text_buf *ltb;
- int rc;
- char *sep;
- char *token = str;
- int ntokens = 0;
- int myrc = -1;
- __u32 hops;
- int got_hops = 0;
- unsigned int priority = 0;
-
- INIT_LIST_HEAD(&gateways);
- INIT_LIST_HEAD(&nets);
-
- /* save a copy of the string for error messages */
- strncpy(cmd, str, sizeof(cmd));
- cmd[sizeof(cmd) - 1] = '\0';
-
- sep = str;
- for (;;) {
- /* scan for token start */
- while (isspace(*sep))
- sep++;
- if (!*sep) {
- if (ntokens < (got_hops ? 3 : 2))
- goto token_error;
- break;
- }
-
- ntokens++;
- token = sep++;
-
- /* scan for token end */
- while (*sep && !isspace(*sep))
- sep++;
- if (*sep)
- *sep++ = 0;
-
- if (ntokens == 1) {
- tmp2 = &nets; /* expanding nets */
- } else if (ntokens == 2 &&
- lnet_parse_hops(token, &hops)) {
- got_hops = 1; /* got a hop count */
- continue;
- } else {
- tmp2 = &gateways; /* expanding gateways */
- }
-
- ltb = lnet_new_text_buf(strlen(token));
- if (!ltb)
- goto out;
-
- strcpy(ltb->ltb_text, token);
- tmp1 = &ltb->ltb_list;
- list_add_tail(tmp1, tmp2);
-
- while (tmp1 != tmp2) {
- ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list);
-
- rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text);
- if (rc < 0)
- goto token_error;
-
- tmp1 = tmp1->next;
-
- if (rc > 0) { /* expanded! */
- list_del(&ltb->ltb_list);
- lnet_free_text_buf(ltb);
- continue;
- }
-
- if (ntokens == 1) {
- net = libcfs_str2net(ltb->ltb_text);
- if (net == LNET_NIDNET(LNET_NID_ANY) ||
- LNET_NETTYP(net) == LOLND)
- goto token_error;
- } else {
- rc = lnet_parse_priority(ltb->ltb_text,
- &priority, &token);
- if (rc < 0)
- goto token_error;
-
- nid = libcfs_str2nid(ltb->ltb_text);
- if (nid == LNET_NID_ANY ||
- LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
- goto token_error;
- }
- }
- }
-
- /**
- * if there are no hops set then we want to flag this value as
- * unset since hops is an optional parameter
- */
- if (!got_hops)
- hops = LNET_UNDEFINED_HOPS;
-
- LASSERT(!list_empty(&nets));
- LASSERT(!list_empty(&gateways));
-
- list_for_each(tmp1, &nets) {
- ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list);
- net = libcfs_str2net(ltb->ltb_text);
- LASSERT(net != LNET_NIDNET(LNET_NID_ANY));
-
- list_for_each(tmp2, &gateways) {
- ltb = list_entry(tmp2, struct lnet_text_buf, ltb_list);
- nid = libcfs_str2nid(ltb->ltb_text);
- LASSERT(nid != LNET_NID_ANY);
-
- if (lnet_islocalnid(nid)) {
- *im_a_router = 1;
- continue;
- }
-
- rc = lnet_add_route(net, hops, nid, priority);
- if (rc && rc != -EEXIST && rc != -EHOSTUNREACH) {
- CERROR("Can't create route to %s via %s\n",
- libcfs_net2str(net),
- libcfs_nid2str(nid));
- goto out;
- }
- }
- }
-
- myrc = 0;
- goto out;
-
- token_error:
- lnet_syntax("routes", cmd, (int)(token - str), strlen(token));
- out:
- lnet_free_text_bufs(&nets);
- lnet_free_text_bufs(&gateways);
- return myrc;
-}
-
-static int
-lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router)
-{
- struct lnet_text_buf *ltb;
-
- while (!list_empty(tbs)) {
- ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list);
-
- if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) {
- lnet_free_text_bufs(tbs);
- return -EINVAL;
- }
-
- list_del(&ltb->ltb_list);
- lnet_free_text_buf(ltb);
- }
-
- return 0;
-}
-
-int
-lnet_parse_routes(char *routes, int *im_a_router)
-{
- struct list_head tbs;
- int rc = 0;
-
- *im_a_router = 0;
-
- INIT_LIST_HEAD(&tbs);
-
- if (lnet_str2tbs_sep(&tbs, routes) < 0) {
- CERROR("Error parsing routes\n");
- rc = -EINVAL;
- } else {
- rc = lnet_parse_route_tbs(&tbs, im_a_router);
- }
-
- LASSERT(!lnet_tbnob);
- return rc;
-}
-
-static int
-lnet_match_network_token(char *token, int len, __u32 *ipaddrs, int nip)
-{
- LIST_HEAD(list);
- int rc;
- int i;
-
- rc = cfs_ip_addr_parse(token, len, &list);
- if (rc)
- return rc;
-
- for (rc = i = 0; !rc && i < nip; i++)
- rc = cfs_ip_addr_match(ipaddrs[i], &list);
-
- cfs_expr_list_free_list(&list);
-
- return rc;
-}
-
-static int
-lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip)
-{
- static char tokens[LNET_SINGLE_TEXTBUF_NOB];
-
- int matched = 0;
- int ntokens = 0;
- int len;
- char *net = NULL;
- char *sep;
- char *token;
- int rc;
-
- LASSERT(strlen(net_entry) < sizeof(tokens));
-
- /* work on a copy of the string */
- strcpy(tokens, net_entry);
- sep = tokens;
- for (;;) {
- /* scan for token start */
- while (isspace(*sep))
- sep++;
- if (!*sep)
- break;
-
- token = sep++;
-
- /* scan for token end */
- while (*sep && !isspace(*sep))
- sep++;
- if (*sep)
- *sep++ = 0;
-
- if (!ntokens++) {
- net = token;
- continue;
- }
-
- len = strlen(token);
-
- rc = lnet_match_network_token(token, len, ipaddrs, nip);
- if (rc < 0) {
- lnet_syntax("ip2nets", net_entry,
- (int)(token - tokens), len);
- return rc;
- }
-
- if (rc)
- matched |= 1;
- }
-
- if (!matched)
- return 0;
-
- strcpy(net_entry, net); /* replace with matched net */
- return 1;
-}
-
-static __u32
-lnet_netspec2net(char *netspec)
-{
- char *bracket = strchr(netspec, '(');
- __u32 net;
-
- if (bracket)
- *bracket = 0;
-
- net = libcfs_str2net(netspec);
-
- if (bracket)
- *bracket = '(';
-
- return net;
-}
-
-static int
-lnet_splitnets(char *source, struct list_head *nets)
-{
- int offset = 0;
- int offset2;
- int len;
- struct lnet_text_buf *tb;
- struct lnet_text_buf *tb2;
- struct list_head *t;
- char *sep;
- char *bracket;
- __u32 net;
-
- LASSERT(!list_empty(nets));
- LASSERT(nets->next == nets->prev); /* single entry */
-
- tb = list_entry(nets->next, struct lnet_text_buf, ltb_list);
-
- for (;;) {
- sep = strchr(tb->ltb_text, ',');
- bracket = strchr(tb->ltb_text, '(');
-
- if (sep && bracket && bracket < sep) {
- /* netspec lists interfaces... */
-
- offset2 = offset + (int)(bracket - tb->ltb_text);
- len = strlen(bracket);
-
- bracket = strchr(bracket + 1, ')');
-
- if (!bracket ||
- !(bracket[1] == ',' || !bracket[1])) {
- lnet_syntax("ip2nets", source, offset2, len);
- return -EINVAL;
- }
-
- sep = !bracket[1] ? NULL : bracket + 1;
- }
-
- if (sep)
- *sep++ = 0;
-
- net = lnet_netspec2net(tb->ltb_text);
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- lnet_syntax("ip2nets", source, offset,
- strlen(tb->ltb_text));
- return -EINVAL;
- }
-
- list_for_each(t, nets) {
- tb2 = list_entry(t, struct lnet_text_buf, ltb_list);
-
- if (tb2 == tb)
- continue;
-
- if (net == lnet_netspec2net(tb2->ltb_text)) {
- /* duplicate network */
- lnet_syntax("ip2nets", source, offset,
- strlen(tb->ltb_text));
- return -EINVAL;
- }
- }
-
- if (!sep)
- return 0;
-
- offset += (int)(sep - tb->ltb_text);
- len = strlen(sep);
- tb2 = lnet_new_text_buf(len);
- if (!tb2)
- return -ENOMEM;
-
- strncpy(tb2->ltb_text, sep, len);
- tb2->ltb_text[len] = '\0';
- list_add_tail(&tb2->ltb_list, nets);
-
- tb = tb2;
- }
-}
-
-static int
-lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
-{
- static char networks[LNET_SINGLE_TEXTBUF_NOB];
- static char source[LNET_SINGLE_TEXTBUF_NOB];
-
- struct list_head raw_entries;
- struct list_head matched_nets;
- struct list_head current_nets;
- struct list_head *t;
- struct list_head *t2;
- struct lnet_text_buf *tb;
- struct lnet_text_buf *temp;
- struct lnet_text_buf *tb2;
- __u32 net1;
- __u32 net2;
- int len;
- int count;
- int dup;
- int rc;
-
- INIT_LIST_HEAD(&raw_entries);
- if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) {
- CERROR("Error parsing ip2nets\n");
- LASSERT(!lnet_tbnob);
- return -EINVAL;
- }
-
- INIT_LIST_HEAD(&matched_nets);
- INIT_LIST_HEAD(&current_nets);
- networks[0] = 0;
- count = 0;
- len = 0;
- rc = 0;
-
- list_for_each_entry_safe(tb, temp, &raw_entries, ltb_list) {
- strncpy(source, tb->ltb_text, sizeof(source));
- source[sizeof(source) - 1] = '\0';
-
- /* replace ltb_text with the network(s) add on match */
- rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip);
- if (rc < 0)
- break;
-
- list_del(&tb->ltb_list);
-
- if (!rc) { /* no match */
- lnet_free_text_buf(tb);
- continue;
- }
-
- /* split into separate networks */
- INIT_LIST_HEAD(&current_nets);
- list_add(&tb->ltb_list, &current_nets);
- rc = lnet_splitnets(source, &current_nets);
- if (rc < 0)
- break;
-
- dup = 0;
- list_for_each(t, &current_nets) {
- tb = list_entry(t, struct lnet_text_buf, ltb_list);
- net1 = lnet_netspec2net(tb->ltb_text);
- LASSERT(net1 != LNET_NIDNET(LNET_NID_ANY));
-
- list_for_each(t2, &matched_nets) {
- tb2 = list_entry(t2, struct lnet_text_buf,
- ltb_list);
- net2 = lnet_netspec2net(tb2->ltb_text);
- LASSERT(net2 != LNET_NIDNET(LNET_NID_ANY));
-
- if (net1 == net2) {
- dup = 1;
- break;
- }
- }
-
- if (dup)
- break;
- }
-
- if (dup) {
- lnet_free_text_bufs(&current_nets);
- continue;
- }
-
- list_for_each_safe(t, t2, &current_nets) {
- tb = list_entry(t, struct lnet_text_buf, ltb_list);
-
- list_del(&tb->ltb_list);
- list_add_tail(&tb->ltb_list, &matched_nets);
-
- len += snprintf(networks + len, sizeof(networks) - len,
- "%s%s", !len ? "" : ",",
- tb->ltb_text);
-
- if (len >= sizeof(networks)) {
- CERROR("Too many matched networks\n");
- rc = -E2BIG;
- goto out;
- }
- }
-
- count++;
- }
-
- out:
- lnet_free_text_bufs(&raw_entries);
- lnet_free_text_bufs(&matched_nets);
- lnet_free_text_bufs(&current_nets);
- LASSERT(!lnet_tbnob);
-
- if (rc < 0)
- return rc;
-
- *networksp = networks;
- return count;
-}
-
-static int
-lnet_ipaddr_enumerate(__u32 **ipaddrsp)
-{
- int up;
- __u32 netmask;
- __u32 *ipaddrs;
- __u32 *ipaddrs2;
- int nip;
- char **ifnames;
- int nif = lnet_ipif_enumerate(&ifnames);
- int i;
- int rc;
-
- if (nif <= 0)
- return nif;
-
- ipaddrs = kcalloc(nif, sizeof(*ipaddrs), GFP_KERNEL);
- if (!ipaddrs) {
- CERROR("Can't allocate ipaddrs[%d]\n", nif);
- lnet_ipif_free_enumeration(ifnames, nif);
- return -ENOMEM;
- }
-
- for (i = nip = 0; i < nif; i++) {
- if (!strcmp(ifnames[i], "lo"))
- continue;
-
- rc = lnet_ipif_query(ifnames[i], &up, &ipaddrs[nip], &netmask);
- if (rc) {
- CWARN("Can't query interface %s: %d\n",
- ifnames[i], rc);
- continue;
- }
-
- if (!up) {
- CWARN("Ignoring interface %s: it's down\n",
- ifnames[i]);
- continue;
- }
-
- nip++;
- }
-
- lnet_ipif_free_enumeration(ifnames, nif);
-
- if (nip == nif) {
- *ipaddrsp = ipaddrs;
- } else {
- if (nip > 0) {
- ipaddrs2 = kcalloc(nip, sizeof(*ipaddrs2),
- GFP_KERNEL);
- if (!ipaddrs2) {
- CERROR("Can't allocate ipaddrs[%d]\n", nip);
- nip = -ENOMEM;
- } else {
- memcpy(ipaddrs2, ipaddrs,
- nip * sizeof(*ipaddrs));
- *ipaddrsp = ipaddrs2;
- rc = nip;
- }
- }
- kfree(ipaddrs);
- }
- return nip;
-}
-
-int
-lnet_parse_ip2nets(char **networksp, char *ip2nets)
-{
- __u32 *ipaddrs = NULL;
- int nip = lnet_ipaddr_enumerate(&ipaddrs);
- int rc;
-
- if (nip < 0) {
- LCONSOLE_ERROR_MSG(0x117,
- "Error %d enumerating local IP interfaces for ip2nets to match\n",
- nip);
- return nip;
- }
-
- if (!nip) {
- LCONSOLE_ERROR_MSG(0x118,
- "No local IP interfaces for ip2nets to match\n");
- return -ENOENT;
- }
-
- rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip);
- kfree(ipaddrs);
-
- if (rc < 0) {
- LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc);
- return rc;
- }
-
- if (!rc) {
- LCONSOLE_ERROR_MSG(0x11a,
- "ip2nets does not match any local IP interfaces\n");
- return -ENOENT;
- }
-
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c
deleted file mode 100644
index c78e703..0000000
--- a/drivers/staging/lustre/lnet/lnet/lib-eq.c
+++ /dev/null
@@ -1,426 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-eq.c
- *
- * Library level Event queue management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/**
- * Create an event queue that has room for \a count number of events.
- *
- * The event queue is circular and older events will be overwritten by new
- * ones if they are not removed in time by the user using the functions
- * LNetEQGet(), LNetEQWait(), or LNetEQPoll(). It is up to the user to
- * determine the appropriate size of the event queue to prevent this loss
- * of events. Note that when EQ handler is specified in \a callback, no
- * event loss can happen, since the handler is run for each event deposited
- * into the EQ.
- *
- * \param count The number of events to be stored in the event queue. It
- * will be rounded up to the next power of two.
- * \param callback A handler function that runs when an event is deposited
- * into the EQ. The constant value LNET_EQ_HANDLER_NONE can be used to
- * indicate that no event handler is desired.
- * \param handle On successful return, this location will hold a handle for
- * the newly created EQ.
- *
- * \retval 0 On success.
- * \retval -EINVAL If an parameter is not valid.
- * \retval -ENOMEM If memory for the EQ can't be allocated.
- *
- * \see lnet_eq_handler_t for the discussion on EQ handler semantics.
- */
-int
-LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback,
- struct lnet_handle_eq *handle)
-{
- struct lnet_eq *eq;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- /*
- * We need count to be a power of 2 so that when eq_{enq,deq}_seq
- * overflow, they don't skip entries, so the queue has the same
- * apparent capacity at all times
- */
- if (count)
- count = roundup_pow_of_two(count);
-
- if (callback != LNET_EQ_HANDLER_NONE && count)
- CWARN("EQ callback is guaranteed to get every event, do you still want to set eqcount %d for polling event which will have locking overhead? Please contact with developer to confirm\n", count);
-
- /*
- * count can be 0 if only need callback, we can eliminate
- * overhead of enqueue event
- */
- if (!count && callback == LNET_EQ_HANDLER_NONE)
- return -EINVAL;
-
- eq = kzalloc(sizeof(*eq), GFP_NOFS);
- if (!eq)
- return -ENOMEM;
-
- if (count) {
- eq->eq_events = kvmalloc_array(count, sizeof(struct lnet_event),
- GFP_KERNEL | __GFP_ZERO);
- if (!eq->eq_events)
- goto failed;
- /*
- * NB allocator has set all event sequence numbers to 0,
- * so all them should be earlier than eq_deq_seq
- */
- }
-
- eq->eq_deq_seq = 1;
- eq->eq_enq_seq = 1;
- eq->eq_size = count;
- eq->eq_callback = callback;
-
- eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*eq->eq_refs[0]));
- if (!eq->eq_refs)
- goto failed;
-
- /* MUST hold both exclusive lnet_res_lock */
- lnet_res_lock(LNET_LOCK_EX);
- /*
- * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
- * both EQ lookup and poll event with only lnet_eq_wait_lock
- */
- lnet_eq_wait_lock();
-
- lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh);
- list_add(&eq->eq_list, &the_lnet.ln_eq_container.rec_active);
-
- lnet_eq_wait_unlock();
- lnet_res_unlock(LNET_LOCK_EX);
-
- lnet_eq2handle(handle, eq);
- return 0;
-
-failed:
- kvfree(eq->eq_events);
-
- if (eq->eq_refs)
- cfs_percpt_free(eq->eq_refs);
-
- kfree(eq);
- return -ENOMEM;
-}
-EXPORT_SYMBOL(LNetEQAlloc);
-
-/**
- * Release the resources associated with an event queue if it's idle;
- * otherwise do nothing and it's up to the user to try again.
- *
- * \param eqh A handle for the event queue to be released.
- *
- * \retval 0 If the EQ is not in use and freed.
- * \retval -ENOENT If \a eqh does not point to a valid EQ.
- * \retval -EBUSY If the EQ is still in use by some MDs.
- */
-int
-LNetEQFree(struct lnet_handle_eq eqh)
-{
- struct lnet_eq *eq;
- struct lnet_event *events = NULL;
- int **refs = NULL;
- int *ref;
- int rc = 0;
- int size = 0;
- int i;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- lnet_res_lock(LNET_LOCK_EX);
- /*
- * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
- * both EQ lookup and poll event with only lnet_eq_wait_lock
- */
- lnet_eq_wait_lock();
-
- eq = lnet_handle2eq(&eqh);
- if (!eq) {
- rc = -ENOENT;
- goto out;
- }
-
- cfs_percpt_for_each(ref, i, eq->eq_refs) {
- LASSERT(*ref >= 0);
- if (!*ref)
- continue;
-
- CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n",
- i, *ref);
- rc = -EBUSY;
- goto out;
- }
-
- /* stash for free after lock dropped */
- events = eq->eq_events;
- size = eq->eq_size;
- refs = eq->eq_refs;
-
- lnet_res_lh_invalidate(&eq->eq_lh);
- list_del(&eq->eq_list);
- kfree(eq);
- out:
- lnet_eq_wait_unlock();
- lnet_res_unlock(LNET_LOCK_EX);
-
- kvfree(events);
- if (refs)
- cfs_percpt_free(refs);
-
- return rc;
-}
-EXPORT_SYMBOL(LNetEQFree);
-
-void
-lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev)
-{
- /* MUST called with resource lock hold but w/o lnet_eq_wait_lock */
- int index;
-
- if (!eq->eq_size) {
- LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE);
- eq->eq_callback(ev);
- return;
- }
-
- lnet_eq_wait_lock();
- ev->sequence = eq->eq_enq_seq++;
-
- LASSERT(is_power_of_2(eq->eq_size));
- index = ev->sequence & (eq->eq_size - 1);
-
- eq->eq_events[index] = *ev;
-
- if (eq->eq_callback != LNET_EQ_HANDLER_NONE)
- eq->eq_callback(ev);
-
- /* Wake anyone waiting in LNetEQPoll() */
- if (waitqueue_active(&the_lnet.ln_eq_waitq))
- wake_up_all(&the_lnet.ln_eq_waitq);
- lnet_eq_wait_unlock();
-}
-
-static int
-lnet_eq_dequeue_event(struct lnet_eq *eq, struct lnet_event *ev)
-{
- int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
- struct lnet_event *new_event = &eq->eq_events[new_index];
- int rc;
-
- /* must called with lnet_eq_wait_lock hold */
- if (LNET_SEQ_GT(eq->eq_deq_seq, new_event->sequence))
- return 0;
-
- /* We've got a new event... */
- *ev = *new_event;
-
- CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
- new_event, eq->eq_deq_seq, eq->eq_size);
-
- /* ...but did it overwrite an event we've not seen yet? */
- if (eq->eq_deq_seq == new_event->sequence) {
- rc = 1;
- } else {
- /*
- * don't complain with CERROR: some EQs are sized small
- * anyway; if it's important, the caller should complain
- */
- CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n",
- eq->eq_deq_seq, new_event->sequence);
- rc = -EOVERFLOW;
- }
-
- eq->eq_deq_seq = new_event->sequence + 1;
- return rc;
-}
-
-/**
- * A nonblocking function that can be used to get the next event in an EQ.
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully. The event is removed from the queue.
- *
- * \param eventq A handle for the event queue.
- * \param event On successful return (1 or -EOVERFLOW), this location will
- * hold the next event in the EQ.
- *
- * \retval 0 No pending event in the EQ.
- * \retval 1 Indicates success.
- * \retval -ENOENT If \a eventq does not point to a valid EQ.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ has been dropped due to limited space in the EQ.
- */
-
-/**
- * Block the calling process until there is an event in the EQ.
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully. This function returns the next event
- * in the EQ and removes it from the EQ.
- *
- * \param eventq A handle for the event queue.
- * \param event On successful return (1 or -EOVERFLOW), this location will
- * hold the next event in the EQ.
- *
- * \retval 1 Indicates success.
- * \retval -ENOENT If \a eventq does not point to a valid EQ.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ has been dropped due to limited space in the EQ.
- */
-
-static int
-lnet_eq_wait_locked(int *timeout_ms, long state)
-__must_hold(&the_lnet.ln_eq_wait_lock)
-{
- int tms = *timeout_ms;
- int wait;
- wait_queue_entry_t wl;
- unsigned long now;
-
- if (!tms)
- return -ENXIO; /* don't want to wait and no new event */
-
- init_waitqueue_entry(&wl, current);
- set_current_state(state);
- add_wait_queue(&the_lnet.ln_eq_waitq, &wl);
-
- lnet_eq_wait_unlock();
-
- if (tms < 0) {
- schedule();
- } else {
- now = jiffies;
- schedule_timeout(msecs_to_jiffies(tms));
- tms -= jiffies_to_msecs(jiffies - now);
- if (tms < 0) /* no more wait but may have new event */
- tms = 0;
- }
-
- wait = tms; /* might need to call here again */
- *timeout_ms = tms;
-
- lnet_eq_wait_lock();
- remove_wait_queue(&the_lnet.ln_eq_waitq, &wl);
-
- return wait;
-}
-
-/**
- * Block the calling process until there's an event from a set of EQs or
- * timeout happens.
- *
- * If an event handler is associated with the EQ, the handler will run before
- * this function returns successfully, in which case the corresponding event
- * is consumed.
- *
- * LNetEQPoll() provides a timeout to allow applications to poll, block for a
- * fixed period, or block indefinitely.
- *
- * \param eventqs,neq An array of EQ handles, and size of the array.
- * \param timeout_ms Time in milliseconds to wait for an event to occur on
- * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an
- * infinite timeout.
- * \param interruptible, if true, use TASK_INTERRUPTIBLE, else TASK_NOLOAD
- * \param event,which On successful return (1 or -EOVERFLOW), \a event will
- * hold the next event in the EQs, and \a which will contain the index of the
- * EQ from which the event was taken.
- *
- * \retval 0 No pending event in the EQs after timeout.
- * \retval 1 Indicates success.
- * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
- * at least one event between this event and the last event obtained from the
- * EQ indicated by \a which has been dropped due to limited space in the EQ.
- * \retval -ENOENT If there's an invalid handle in \a eventqs.
- */
-int
-LNetEQPoll(struct lnet_handle_eq *eventqs, int neq, int timeout_ms,
- int interruptible,
- struct lnet_event *event, int *which)
-{
- int wait = 1;
- int rc;
- int i;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (neq < 1)
- return -ENOENT;
-
- lnet_eq_wait_lock();
-
- for (;;) {
- for (i = 0; i < neq; i++) {
- struct lnet_eq *eq = lnet_handle2eq(&eventqs[i]);
-
- if (!eq) {
- lnet_eq_wait_unlock();
- return -ENOENT;
- }
-
- rc = lnet_eq_dequeue_event(eq, event);
- if (rc) {
- lnet_eq_wait_unlock();
- *which = i;
- return rc;
- }
- }
-
- if (!wait)
- break;
-
- /*
- * return value of lnet_eq_wait_locked:
- * -1 : did nothing and it's sure no new event
- * 1 : sleep inside and wait until new event
- * 0 : don't want to wait anymore, but might have new event
- * so need to call dequeue again
- */
- wait = lnet_eq_wait_locked(&timeout_ms,
- interruptible ? TASK_INTERRUPTIBLE
- : TASK_NOLOAD);
- if (wait < 0) /* no new event */
- break;
- }
-
- lnet_eq_wait_unlock();
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c
deleted file mode 100644
index 8a22514..0000000
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ /dev/null
@@ -1,463 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-md.c
- *
- * Memory Descriptor management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/* must be called with lnet_res_lock held */
-void
-lnet_md_unlink(struct lnet_libmd *md)
-{
- if (!(md->md_flags & LNET_MD_FLAG_ZOMBIE)) {
- /* first unlink attempt... */
- struct lnet_me *me = md->md_me;
-
- md->md_flags |= LNET_MD_FLAG_ZOMBIE;
-
- /*
- * Disassociate from ME (if any),
- * and unlink it if it was created
- * with LNET_UNLINK
- */
- if (me) {
- /* detach MD from portal */
- lnet_ptl_detach_md(me, md);
- if (me->me_unlink == LNET_UNLINK)
- lnet_me_unlink(me);
- }
-
- /* ensure all future handle lookups fail */
- lnet_res_lh_invalidate(&md->md_lh);
- }
-
- if (md->md_refcount) {
- CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
- return;
- }
-
- CDEBUG(D_NET, "Unlinking md %p\n", md);
-
- if (md->md_eq) {
- int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
-
- LASSERT(*md->md_eq->eq_refs[cpt] > 0);
- (*md->md_eq->eq_refs[cpt])--;
- }
-
- LASSERT(!list_empty(&md->md_list));
- list_del_init(&md->md_list);
- kfree(md);
-}
-
-static int
-lnet_md_build(struct lnet_libmd *lmd, struct lnet_md *umd, int unlink)
-{
- int i;
- unsigned int niov;
- int total_length = 0;
-
- lmd->md_me = NULL;
- lmd->md_start = umd->start;
- lmd->md_offset = 0;
- lmd->md_max_size = umd->max_size;
- lmd->md_options = umd->options;
- lmd->md_user_ptr = umd->user_ptr;
- lmd->md_eq = NULL;
- lmd->md_threshold = umd->threshold;
- lmd->md_refcount = 0;
- lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0;
-
- if (umd->options & LNET_MD_IOVEC) {
- if (umd->options & LNET_MD_KIOV) /* Can't specify both */
- return -EINVAL;
-
- niov = umd->length;
- lmd->md_niov = umd->length;
- memcpy(lmd->md_iov.iov, umd->start,
- niov * sizeof(lmd->md_iov.iov[0]));
-
- for (i = 0; i < (int)niov; i++) {
- /* We take the base address on trust */
- /* invalid length */
- if (lmd->md_iov.iov[i].iov_len <= 0)
- return -EINVAL;
-
- total_length += lmd->md_iov.iov[i].iov_len;
- }
-
- lmd->md_length = total_length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) && /* use max size */
- (umd->max_size < 0 ||
- umd->max_size > total_length)) /* illegal max_size */
- return -EINVAL;
-
- } else if (umd->options & LNET_MD_KIOV) {
- niov = umd->length;
- lmd->md_niov = umd->length;
- memcpy(lmd->md_iov.kiov, umd->start,
- niov * sizeof(lmd->md_iov.kiov[0]));
-
- for (i = 0; i < (int)niov; i++) {
- /* We take the page pointer on trust */
- if (lmd->md_iov.kiov[i].bv_offset +
- lmd->md_iov.kiov[i].bv_len > PAGE_SIZE)
- return -EINVAL; /* invalid length */
-
- total_length += lmd->md_iov.kiov[i].bv_len;
- }
-
- lmd->md_length = total_length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > total_length)) /* illegal max_size */
- return -EINVAL;
- } else { /* contiguous */
- lmd->md_length = umd->length;
- niov = 1;
- lmd->md_niov = 1;
- lmd->md_iov.iov[0].iov_base = umd->start;
- lmd->md_iov.iov[0].iov_len = umd->length;
-
- if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > (int)umd->length)) /* illegal max_size */
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* must be called with resource lock held */
-static int
-lnet_md_link(struct lnet_libmd *md, struct lnet_handle_eq eq_handle, int cpt)
-{
- struct lnet_res_container *container = the_lnet.ln_md_containers[cpt];
-
- /*
- * NB we are passed an allocated, but inactive md.
- * if we return success, caller may lnet_md_unlink() it.
- * otherwise caller may only kfree() it.
- */
- /*
- * This implementation doesn't know how to create START events or
- * disable END events. Best to LASSERT our caller is compliant so
- * we find out quickly...
- */
- /*
- * TODO - reevaluate what should be here in light of
- * the removal of the start and end events
- * maybe there we shouldn't even allow LNET_EQ_NONE!)
- * LASSERT(!eq);
- */
- if (!LNetEQHandleIsInvalid(eq_handle)) {
- md->md_eq = lnet_handle2eq(&eq_handle);
-
- if (!md->md_eq)
- return -ENOENT;
-
- (*md->md_eq->eq_refs[cpt])++;
- }
-
- lnet_res_lh_initialize(container, &md->md_lh);
-
- LASSERT(list_empty(&md->md_list));
- list_add(&md->md_list, &container->rec_active);
-
- return 0;
-}
-
-/* must be called with lnet_res_lock held */
-void
-lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_md *umd)
-{
- /* NB this doesn't copy out all the iov entries so when a
- * discontiguous MD is copied out, the target gets to know the
- * original iov pointer (in start) and the number of entries it had
- * and that's all.
- */
- umd->start = lmd->md_start;
- umd->length = !(lmd->md_options &
- (LNET_MD_IOVEC | LNET_MD_KIOV)) ?
- lmd->md_length : lmd->md_niov;
- umd->threshold = lmd->md_threshold;
- umd->max_size = lmd->md_max_size;
- umd->options = lmd->md_options;
- umd->user_ptr = lmd->md_user_ptr;
- lnet_eq2handle(&umd->eq_handle, lmd->md_eq);
-}
-
-static int
-lnet_md_validate(struct lnet_md *umd)
-{
- if (!umd->start && umd->length) {
- CERROR("MD start pointer can not be NULL with length %u\n",
- umd->length);
- return -EINVAL;
- }
-
- if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) &&
- umd->length > LNET_MAX_IOV) {
- CERROR("Invalid option: too many fragments %u, %d max\n",
- umd->length, LNET_MAX_IOV);
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
- * Create a memory descriptor and attach it to a ME
- *
- * \param meh A handle for a ME to associate the new MD with.
- * \param umd Provides initial values for the user-visible parts of a MD.
- * Other than its use for initialization, there is no linkage between this
- * structure and the MD maintained by the LNet.
- * \param unlink A flag to indicate whether the MD is automatically unlinked
- * when it becomes inactive, either because the operation threshold drops to
- * zero or because the available memory becomes less than \a umd.max_size.
- * (Note that the check for unlinking a MD only occurs after the completion
- * of a successful operation on the MD.) The value LNET_UNLINK enables auto
- * unlinking; the value LNET_RETAIN disables it.
- * \param handle On successful returns, a handle to the newly created MD is
- * saved here. This handle can be used later in LNetMDUnlink().
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a umd is not valid.
- * \retval -ENOMEM If new MD cannot be allocated.
- * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a
- * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by
- * calling LNetInvalidateHandle() on it.
- * \retval -EBUSY If the ME pointed to by \a meh is already associated with
- * a MD.
- */
-int
-LNetMDAttach(struct lnet_handle_me meh, struct lnet_md umd,
- enum lnet_unlink unlink, struct lnet_handle_md *handle)
-{
- LIST_HEAD(matches);
- LIST_HEAD(drops);
- struct lnet_me *me;
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (lnet_md_validate(&umd))
- return -EINVAL;
-
- if (!(umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) {
- CERROR("Invalid option: no MD_OP set\n");
- return -EINVAL;
- }
-
- md = lnet_md_alloc(&umd);
- if (!md)
- return -ENOMEM;
-
- rc = lnet_md_build(md, &umd, unlink);
- if (rc)
- goto out_free;
-
- cpt = lnet_cpt_of_cookie(meh.cookie);
-
- lnet_res_lock(cpt);
-
- me = lnet_handle2me(&meh);
- if (!me)
- rc = -ENOENT;
- else if (me->me_md)
- rc = -EBUSY;
- else
- rc = lnet_md_link(md, umd.eq_handle, cpt);
-
- if (rc)
- goto out_unlock;
-
- /*
- * attach this MD to portal of ME and check if it matches any
- * blocked msgs on this portal
- */
- lnet_ptl_attach_md(me, md, &matches, &drops);
-
- lnet_md2handle(handle, md);
-
- lnet_res_unlock(cpt);
-
- lnet_drop_delayed_msg_list(&drops, "Bad match");
- lnet_recv_delayed_msg_list(&matches);
-
- return 0;
-
-out_unlock:
- lnet_res_unlock(cpt);
-out_free:
- kfree(md);
- return rc;
-}
-EXPORT_SYMBOL(LNetMDAttach);
-
-/**
- * Create a "free floating" memory descriptor - a MD that is not associated
- * with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations.
- *
- * \param umd,unlink See the discussion for LNetMDAttach().
- * \param handle On successful returns, a handle to the newly created MD is
- * saved here. This handle can be used later in LNetMDUnlink(), LNetPut(),
- * and LNetGet() operations.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a umd is not valid.
- * \retval -ENOMEM If new MD cannot be allocated.
- * \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that
- * it's OK to supply a NULL \a umd.eq_handle by calling
- * LNetInvalidateHandle() on it.
- */
-int
-LNetMDBind(struct lnet_md umd, enum lnet_unlink unlink,
- struct lnet_handle_md *handle)
-{
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (lnet_md_validate(&umd))
- return -EINVAL;
-
- if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) {
- CERROR("Invalid option: GET|PUT illegal on active MDs\n");
- return -EINVAL;
- }
-
- md = lnet_md_alloc(&umd);
- if (!md)
- return -ENOMEM;
-
- rc = lnet_md_build(md, &umd, unlink);
- if (rc)
- goto out_free;
-
- cpt = lnet_res_lock_current();
-
- rc = lnet_md_link(md, umd.eq_handle, cpt);
- if (rc)
- goto out_unlock;
-
- lnet_md2handle(handle, md);
-
- lnet_res_unlock(cpt);
- return 0;
-
-out_unlock:
- lnet_res_unlock(cpt);
-out_free:
- kfree(md);
-
- return rc;
-}
-EXPORT_SYMBOL(LNetMDBind);
-
-/**
- * Unlink the memory descriptor from any ME it may be linked to and release
- * the internal resources associated with it. As a result, active messages
- * associated with the MD may get aborted.
- *
- * This function does not free the memory region associated with the MD;
- * i.e., the memory the user allocated for this MD. If the ME associated with
- * this MD is not NULL and was created with auto unlink enabled, the ME is
- * unlinked as well (see LNetMEAttach()).
- *
- * Explicitly unlinking a MD via this function call has the same behavior as
- * a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK
- * is generated in the latter case.
- *
- * An unlinked event can be reported in two ways:
- * - If there's no pending operations on the MD, it's unlinked immediately
- * and an LNET_EVENT_UNLINK event is logged before this function returns.
- * - Otherwise, the MD is only marked for deletion when this function
- * returns, and the unlinked event will be piggybacked on the event of
- * the completion of the last operation by setting the unlinked field of
- * the event. No dedicated LNET_EVENT_UNLINK event is generated.
- *
- * Note that in both cases the unlinked field of the event is always set; no
- * more event will happen on the MD after such an event is logged.
- *
- * \param mdh A handle for the MD to be unlinked.
- *
- * \retval 0 On success.
- * \retval -ENOENT If \a mdh does not point to a valid MD object.
- */
-int
-LNetMDUnlink(struct lnet_handle_md mdh)
-{
- struct lnet_event ev;
- struct lnet_libmd *md;
- int cpt;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_cpt_of_cookie(mdh.cookie);
- lnet_res_lock(cpt);
-
- md = lnet_handle2md(&mdh);
- if (!md) {
- lnet_res_unlock(cpt);
- return -ENOENT;
- }
-
- md->md_flags |= LNET_MD_FLAG_ABORTED;
- /*
- * If the MD is busy, lnet_md_unlink just marks it for deletion, and
- * when the LND is done, the completion event flags that the MD was
- * unlinked. Otherwise, we enqueue an event now...
- */
- if (md->md_eq && !md->md_refcount) {
- lnet_build_unlink_event(md, &ev);
- lnet_eq_enqueue_event(md->md_eq, &ev);
- }
-
- lnet_md_unlink(md);
-
- lnet_res_unlock(cpt);
- return 0;
-}
-EXPORT_SYMBOL(LNetMDUnlink);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-me.c b/drivers/staging/lustre/lnet/lnet/lib-me.c
deleted file mode 100644
index 672e37b..0000000
--- a/drivers/staging/lustre/lnet/lnet/lib-me.c
+++ /dev/null
@@ -1,274 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-me.c
- *
- * Match Entry management routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/**
- * Create and attach a match entry to the match list of \a portal. The new
- * ME is empty, i.e. not associated with a memory descriptor. LNetMDAttach()
- * can be used to attach a MD to an empty ME.
- *
- * \param portal The portal table index where the ME should be attached.
- * \param match_id Specifies the match criteria for the process ID of
- * the requester. The constants LNET_PID_ANY and LNET_NID_ANY can be
- * used to wildcard either of the identifiers in the lnet_process_id
- * structure.
- * \param match_bits,ignore_bits Specify the match criteria to apply
- * to the match bits in the incoming request. The ignore bits are used
- * to mask out insignificant bits in the incoming match bits. The resulting
- * bits are then compared to the ME's match bits to determine if the
- * incoming request meets the match criteria.
- * \param unlink Indicates whether the ME should be unlinked when the memory
- * descriptor associated with it is unlinked (Note that the check for
- * unlinking a ME only occurs when the memory descriptor is unlinked.).
- * Valid values are LNET_RETAIN and LNET_UNLINK.
- * \param pos Indicates whether the new ME should be prepended or
- * appended to the match list. Allowed constants: LNET_INS_BEFORE,
- * LNET_INS_AFTER.
- * \param handle On successful returns, a handle to the newly created ME
- * object is saved here. This handle can be used later in LNetMEInsert(),
- * LNetMEUnlink(), or LNetMDAttach() functions.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a portal is invalid.
- * \retval -ENOMEM If new ME object cannot be allocated.
- */
-int
-LNetMEAttach(unsigned int portal,
- struct lnet_process_id match_id,
- __u64 match_bits, __u64 ignore_bits,
- enum lnet_unlink unlink, enum lnet_ins_pos pos,
- struct lnet_handle_me *handle)
-{
- struct lnet_match_table *mtable;
- struct lnet_me *me;
- struct list_head *head;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if ((int)portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- mtable = lnet_mt_of_attach(portal, match_id,
- match_bits, ignore_bits, pos);
- if (!mtable) /* can't match portal type */
- return -EPERM;
-
- me = kzalloc(sizeof(*me), GFP_NOFS);
- if (!me)
- return -ENOMEM;
-
- lnet_res_lock(mtable->mt_cpt);
-
- me->me_portal = portal;
- me->me_match_id = match_id;
- me->me_match_bits = match_bits;
- me->me_ignore_bits = ignore_bits;
- me->me_unlink = unlink;
- me->me_md = NULL;
-
- lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt],
- &me->me_lh);
- if (ignore_bits)
- head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
- else
- head = lnet_mt_match_head(mtable, match_id, match_bits);
-
- me->me_pos = head - &mtable->mt_mhash[0];
- if (pos == LNET_INS_AFTER || pos == LNET_INS_LOCAL)
- list_add_tail(&me->me_list, head);
- else
- list_add(&me->me_list, head);
-
- lnet_me2handle(handle, me);
-
- lnet_res_unlock(mtable->mt_cpt);
- return 0;
-}
-EXPORT_SYMBOL(LNetMEAttach);
-
-/**
- * Create and a match entry and insert it before or after the ME pointed to by
- * \a current_meh. The new ME is empty, i.e. not associated with a memory
- * descriptor. LNetMDAttach() can be used to attach a MD to an empty ME.
- *
- * This function is identical to LNetMEAttach() except for the position
- * where the new ME is inserted.
- *
- * \param current_meh A handle for a ME. The new ME will be inserted
- * immediately before or immediately after this ME.
- * \param match_id,match_bits,ignore_bits,unlink,pos,handle See the discussion
- * for LNetMEAttach().
- *
- * \retval 0 On success.
- * \retval -ENOMEM If new ME object cannot be allocated.
- * \retval -ENOENT If \a current_meh does not point to a valid match entry.
- */
-int
-LNetMEInsert(struct lnet_handle_me current_meh,
- struct lnet_process_id match_id,
- __u64 match_bits, __u64 ignore_bits,
- enum lnet_unlink unlink, enum lnet_ins_pos pos,
- struct lnet_handle_me *handle)
-{
- struct lnet_me *current_me;
- struct lnet_me *new_me;
- struct lnet_portal *ptl;
- int cpt;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (pos == LNET_INS_LOCAL)
- return -EPERM;
-
- new_me = kzalloc(sizeof(*new_me), GFP_NOFS);
- if (!new_me)
- return -ENOMEM;
-
- cpt = lnet_cpt_of_cookie(current_meh.cookie);
-
- lnet_res_lock(cpt);
-
- current_me = lnet_handle2me(&current_meh);
- if (!current_me) {
- kfree(new_me);
-
- lnet_res_unlock(cpt);
- return -ENOENT;
- }
-
- LASSERT(current_me->me_portal < the_lnet.ln_nportals);
-
- ptl = the_lnet.ln_portals[current_me->me_portal];
- if (lnet_ptl_is_unique(ptl)) {
- /* nosense to insertion on unique portal */
- kfree(new_me);
- lnet_res_unlock(cpt);
- return -EPERM;
- }
-
- new_me->me_pos = current_me->me_pos;
- new_me->me_portal = current_me->me_portal;
- new_me->me_match_id = match_id;
- new_me->me_match_bits = match_bits;
- new_me->me_ignore_bits = ignore_bits;
- new_me->me_unlink = unlink;
- new_me->me_md = NULL;
-
- lnet_res_lh_initialize(the_lnet.ln_me_containers[cpt], &new_me->me_lh);
-
- if (pos == LNET_INS_AFTER)
- list_add(&new_me->me_list, &current_me->me_list);
- else
- list_add_tail(&new_me->me_list, &current_me->me_list);
-
- lnet_me2handle(handle, new_me);
-
- lnet_res_unlock(cpt);
-
- return 0;
-}
-EXPORT_SYMBOL(LNetMEInsert);
-
-/**
- * Unlink a match entry from its match list.
- *
- * This operation also releases any resources associated with the ME. If a
- * memory descriptor is attached to the ME, then it will be unlinked as well
- * and an unlink event will be generated. It is an error to use the ME handle
- * after calling LNetMEUnlink().
- *
- * \param meh A handle for the ME to be unlinked.
- *
- * \retval 0 On success.
- * \retval -ENOENT If \a meh does not point to a valid ME.
- * \see LNetMDUnlink() for the discussion on delivering unlink event.
- */
-int
-LNetMEUnlink(struct lnet_handle_me meh)
-{
- struct lnet_me *me;
- struct lnet_libmd *md;
- struct lnet_event ev;
- int cpt;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_cpt_of_cookie(meh.cookie);
- lnet_res_lock(cpt);
-
- me = lnet_handle2me(&meh);
- if (!me) {
- lnet_res_unlock(cpt);
- return -ENOENT;
- }
-
- md = me->me_md;
- if (md) {
- md->md_flags |= LNET_MD_FLAG_ABORTED;
- if (md->md_eq && !md->md_refcount) {
- lnet_build_unlink_event(md, &ev);
- lnet_eq_enqueue_event(md->md_eq, &ev);
- }
- }
-
- lnet_me_unlink(me);
-
- lnet_res_unlock(cpt);
- return 0;
-}
-EXPORT_SYMBOL(LNetMEUnlink);
-
-/* call with lnet_res_lock please */
-void
-lnet_me_unlink(struct lnet_me *me)
-{
- list_del(&me->me_list);
-
- if (me->me_md) {
- struct lnet_libmd *md = me->me_md;
-
- /* detach MD from portal of this ME */
- lnet_ptl_detach_md(me, md);
- lnet_md_unlink(md);
- }
-
- lnet_res_lh_invalidate(&me->me_lh);
- kfree(me);
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
deleted file mode 100644
index f8eaf8f..0000000
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ /dev/null
@@ -1,2386 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-move.c
- *
- * Data movement routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-
-static int local_nid_dist_zero = 1;
-module_param(local_nid_dist_zero, int, 0444);
-MODULE_PARM_DESC(local_nid_dist_zero, "Reserved");
-
-int
-lnet_fail_nid(lnet_nid_t nid, unsigned int threshold)
-{
- struct lnet_test_peer *tp;
- struct lnet_test_peer *temp;
- struct list_head *el;
- struct list_head *next;
- struct list_head cull;
-
- /* NB: use lnet_net_lock(0) to serialize operations on test peers */
- if (threshold) {
- /* Adding a new entry */
- tp = kzalloc(sizeof(*tp), GFP_NOFS);
- if (!tp)
- return -ENOMEM;
-
- tp->tp_nid = nid;
- tp->tp_threshold = threshold;
-
- lnet_net_lock(0);
- list_add_tail(&tp->tp_list, &the_lnet.ln_test_peers);
- lnet_net_unlock(0);
- return 0;
- }
-
- /* removing entries */
- INIT_LIST_HEAD(&cull);
-
- lnet_net_lock(0);
-
- list_for_each_safe(el, next, &the_lnet.ln_test_peers) {
- tp = list_entry(el, struct lnet_test_peer, tp_list);
-
- if (!tp->tp_threshold || /* needs culling anyway */
- nid == LNET_NID_ANY || /* removing all entries */
- tp->tp_nid == nid) { /* matched this one */
- list_del(&tp->tp_list);
- list_add(&tp->tp_list, &cull);
- }
- }
-
- lnet_net_unlock(0);
-
- list_for_each_entry_safe(tp, temp, &cull, tp_list) {
- list_del(&tp->tp_list);
- kfree(tp);
- }
- return 0;
-}
-
-static int
-fail_peer(lnet_nid_t nid, int outgoing)
-{
- struct lnet_test_peer *tp;
- struct lnet_test_peer *temp;
- struct list_head *el;
- struct list_head *next;
- struct list_head cull;
- int fail = 0;
-
- INIT_LIST_HEAD(&cull);
-
- /* NB: use lnet_net_lock(0) to serialize operations on test peers */
- lnet_net_lock(0);
-
- list_for_each_safe(el, next, &the_lnet.ln_test_peers) {
- tp = list_entry(el, struct lnet_test_peer, tp_list);
-
- if (!tp->tp_threshold) {
- /* zombie entry */
- if (outgoing) {
- /*
- * only cull zombies on outgoing tests,
- * since we may be at interrupt priority on
- * incoming messages.
- */
- list_del(&tp->tp_list);
- list_add(&tp->tp_list, &cull);
- }
- continue;
- }
-
- if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */
- nid == tp->tp_nid) { /* fail this peer */
- fail = 1;
-
- if (tp->tp_threshold != LNET_MD_THRESH_INF) {
- tp->tp_threshold--;
- if (outgoing &&
- !tp->tp_threshold) {
- /* see above */
- list_del(&tp->tp_list);
- list_add(&tp->tp_list, &cull);
- }
- }
- break;
- }
- }
-
- lnet_net_unlock(0);
-
- list_for_each_entry_safe(tp, temp, &cull, tp_list) {
- list_del(&tp->tp_list);
-
- kfree(tp);
- }
-
- return fail;
-}
-
-unsigned int
-lnet_iov_nob(unsigned int niov, struct kvec *iov)
-{
- unsigned int nob = 0;
-
- LASSERT(!niov || iov);
- while (niov-- > 0)
- nob += (iov++)->iov_len;
-
- return nob;
-}
-EXPORT_SYMBOL(lnet_iov_nob);
-
-void
-lnet_copy_iov2iter(struct iov_iter *to,
- unsigned int nsiov, const struct kvec *siov,
- unsigned int soffset, unsigned int nob)
-{
- /* NB diov, siov are READ-ONLY */
- const char *s;
- size_t left;
-
- if (!nob)
- return;
-
- /* skip complete frags before 'soffset' */
- LASSERT(nsiov > 0);
- while (soffset >= siov->iov_len) {
- soffset -= siov->iov_len;
- siov++;
- nsiov--;
- LASSERT(nsiov > 0);
- }
-
- s = (char *)siov->iov_base + soffset;
- left = siov->iov_len - soffset;
- do {
- size_t n, copy = left;
-
- LASSERT(nsiov > 0);
-
- if (copy > nob)
- copy = nob;
- n = copy_to_iter(s, copy, to);
- if (n != copy)
- return;
- nob -= n;
-
- siov++;
- s = (char *)siov->iov_base;
- left = siov->iov_len;
- nsiov--;
- } while (nob > 0);
-}
-EXPORT_SYMBOL(lnet_copy_iov2iter);
-
-void
-lnet_copy_kiov2iter(struct iov_iter *to,
- unsigned int nsiov, const struct bio_vec *siov,
- unsigned int soffset, unsigned int nob)
-{
- if (!nob)
- return;
-
- LASSERT(!in_interrupt());
-
- LASSERT(nsiov > 0);
- while (soffset >= siov->bv_len) {
- soffset -= siov->bv_len;
- siov++;
- nsiov--;
- LASSERT(nsiov > 0);
- }
-
- do {
- size_t copy = siov->bv_len - soffset, n;
-
- LASSERT(nsiov > 0);
-
- if (copy > nob)
- copy = nob;
- n = copy_page_to_iter(siov->bv_page,
- siov->bv_offset + soffset,
- copy, to);
- if (n != copy)
- return;
- nob -= n;
- siov++;
- nsiov--;
- soffset = 0;
- } while (nob > 0);
-}
-EXPORT_SYMBOL(lnet_copy_kiov2iter);
-
-int
-lnet_extract_iov(int dst_niov, struct kvec *dst,
- int src_niov, const struct kvec *src,
- unsigned int offset, unsigned int len)
-{
- /*
- * Initialise 'dst' to the subset of 'src' starting at 'offset',
- * for exactly 'len' bytes, and return the number of entries.
- * NB not destructive to 'src'
- */
- unsigned int frag_len;
- unsigned int niov;
-
- if (!len) /* no data => */
- return 0; /* no frags */
-
- LASSERT(src_niov > 0);
- while (offset >= src->iov_len) { /* skip initial frags */
- offset -= src->iov_len;
- src_niov--;
- src++;
- LASSERT(src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT(src_niov > 0);
- LASSERT((int)niov <= dst_niov);
-
- frag_len = src->iov_len - offset;
- dst->iov_base = ((char *)src->iov_base) + offset;
-
- if (len <= frag_len) {
- dst->iov_len = len;
- return niov;
- }
-
- dst->iov_len = frag_len;
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-EXPORT_SYMBOL(lnet_extract_iov);
-
-unsigned int
-lnet_kiov_nob(unsigned int niov, struct bio_vec *kiov)
-{
- unsigned int nob = 0;
-
- LASSERT(!niov || kiov);
- while (niov-- > 0)
- nob += (kiov++)->bv_len;
-
- return nob;
-}
-EXPORT_SYMBOL(lnet_kiov_nob);
-
-int
-lnet_extract_kiov(int dst_niov, struct bio_vec *dst,
- int src_niov, const struct bio_vec *src,
- unsigned int offset, unsigned int len)
-{
- /*
- * Initialise 'dst' to the subset of 'src' starting at 'offset',
- * for exactly 'len' bytes, and return the number of entries.
- * NB not destructive to 'src'
- */
- unsigned int frag_len;
- unsigned int niov;
-
- if (!len) /* no data => */
- return 0; /* no frags */
-
- LASSERT(src_niov > 0);
- while (offset >= src->bv_len) { /* skip initial frags */
- offset -= src->bv_len;
- src_niov--;
- src++;
- LASSERT(src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT(src_niov > 0);
- LASSERT((int)niov <= dst_niov);
-
- frag_len = src->bv_len - offset;
- dst->bv_page = src->bv_page;
- dst->bv_offset = src->bv_offset + offset;
-
- if (len <= frag_len) {
- dst->bv_len = len;
- LASSERT(dst->bv_offset + dst->bv_len
- <= PAGE_SIZE);
- return niov;
- }
-
- dst->bv_len = frag_len;
- LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE);
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-EXPORT_SYMBOL(lnet_extract_kiov);
-
-void
-lnet_ni_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg,
- int delayed, unsigned int offset, unsigned int mlen,
- unsigned int rlen)
-{
- unsigned int niov = 0;
- struct kvec *iov = NULL;
- struct bio_vec *kiov = NULL;
- struct iov_iter to;
- int rc;
-
- LASSERT(!in_interrupt());
- LASSERT(!mlen || msg);
-
- if (msg) {
- LASSERT(msg->msg_receiving);
- LASSERT(!msg->msg_sending);
- LASSERT(rlen == msg->msg_len);
- LASSERT(mlen <= msg->msg_len);
- LASSERT(msg->msg_offset == offset);
- LASSERT(msg->msg_wanted == mlen);
-
- msg->msg_receiving = 0;
-
- if (mlen) {
- niov = msg->msg_niov;
- iov = msg->msg_iov;
- kiov = msg->msg_kiov;
-
- LASSERT(niov > 0);
- LASSERT(!iov != !kiov);
- }
- }
-
- if (iov) {
- iov_iter_kvec(&to, ITER_KVEC | READ, iov, niov, mlen + offset);
- iov_iter_advance(&to, offset);
- } else {
- iov_iter_bvec(&to, ITER_BVEC | READ, kiov, niov, mlen + offset);
- iov_iter_advance(&to, offset);
- }
- rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, &to, rlen);
- if (rc < 0)
- lnet_finalize(ni, msg, rc);
-}
-
-static void
-lnet_setpayloadbuffer(struct lnet_msg *msg)
-{
- struct lnet_libmd *md = msg->msg_md;
-
- LASSERT(msg->msg_len > 0);
- LASSERT(!msg->msg_routing);
- LASSERT(md);
- LASSERT(!msg->msg_niov);
- LASSERT(!msg->msg_iov);
- LASSERT(!msg->msg_kiov);
-
- msg->msg_niov = md->md_niov;
- if (md->md_options & LNET_MD_KIOV)
- msg->msg_kiov = md->md_iov.kiov;
- else
- msg->msg_iov = md->md_iov.iov;
-}
-
-void
-lnet_prep_send(struct lnet_msg *msg, int type, struct lnet_process_id target,
- unsigned int offset, unsigned int len)
-{
- msg->msg_type = type;
- msg->msg_target = target;
- msg->msg_len = len;
- msg->msg_offset = offset;
-
- if (len)
- lnet_setpayloadbuffer(msg);
-
- memset(&msg->msg_hdr, 0, sizeof(msg->msg_hdr));
- msg->msg_hdr.type = cpu_to_le32(type);
- msg->msg_hdr.dest_nid = cpu_to_le64(target.nid);
- msg->msg_hdr.dest_pid = cpu_to_le32(target.pid);
- /* src_nid will be set later */
- msg->msg_hdr.src_pid = cpu_to_le32(the_lnet.ln_pid);
- msg->msg_hdr.payload_length = cpu_to_le32(len);
-}
-
-static void
-lnet_ni_send(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- void *priv = msg->msg_private;
- int rc;
-
- LASSERT(!in_interrupt());
- LASSERT(LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
- (msg->msg_txcredit && msg->msg_peertxcredit));
-
- rc = ni->ni_lnd->lnd_send(ni, priv, msg);
- if (rc < 0)
- lnet_finalize(ni, msg, rc);
-}
-
-static int
-lnet_ni_eager_recv(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- int rc;
-
- LASSERT(!msg->msg_sending);
- LASSERT(msg->msg_receiving);
- LASSERT(!msg->msg_rx_ready_delay);
- LASSERT(ni->ni_lnd->lnd_eager_recv);
-
- msg->msg_rx_ready_delay = 1;
- rc = ni->ni_lnd->lnd_eager_recv(ni, msg->msg_private, msg,
- &msg->msg_private);
- if (rc) {
- CERROR("recv from %s / send to %s aborted: eager_recv failed %d\n",
- libcfs_nid2str(msg->msg_rxpeer->lp_nid),
- libcfs_id2str(msg->msg_target), rc);
- LASSERT(rc < 0); /* required by my callers */
- }
-
- return rc;
-}
-
-/* NB: caller shall hold a ref on 'lp' as I'd drop lnet_net_lock */
-static void
-lnet_ni_query_locked(struct lnet_ni *ni, struct lnet_peer *lp)
-{
- unsigned long last_alive = 0;
-
- LASSERT(lnet_peer_aliveness_enabled(lp));
- LASSERT(ni->ni_lnd->lnd_query);
-
- lnet_net_unlock(lp->lp_cpt);
- ni->ni_lnd->lnd_query(ni, lp->lp_nid, &last_alive);
- lnet_net_lock(lp->lp_cpt);
-
- lp->lp_last_query = jiffies;
-
- if (last_alive) /* NI has updated timestamp */
- lp->lp_last_alive = last_alive;
-}
-
-/* NB: always called with lnet_net_lock held */
-static inline int
-lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now)
-{
- int alive;
- unsigned long deadline;
-
- LASSERT(lnet_peer_aliveness_enabled(lp));
-
- /* Trust lnet_notify() if it has more recent aliveness news, but
- * ignore the initial assumed death (see lnet_peers_start_down()).
- */
- if (!lp->lp_alive && lp->lp_alive_count > 0 &&
- time_after_eq(lp->lp_timestamp, lp->lp_last_alive))
- return 0;
-
- deadline = lp->lp_last_alive + lp->lp_ni->ni_peertimeout * HZ;
- alive = time_after(deadline, now);
-
- /* Update obsolete lp_alive except for routers assumed to be dead
- * initially, because router checker would update aliveness in this
- * case, and moreover lp_last_alive at peer creation is assumed.
- */
- if (alive && !lp->lp_alive &&
- !(lnet_isrouter(lp) && !lp->lp_alive_count))
- lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
-
- return alive;
-}
-
-/*
- * NB: returns 1 when alive, 0 when dead, negative when error;
- * may drop the lnet_net_lock
- */
-static int
-lnet_peer_alive_locked(struct lnet_peer *lp)
-{
- unsigned long now = jiffies;
-
- if (!lnet_peer_aliveness_enabled(lp))
- return -ENODEV;
-
- if (lnet_peer_is_alive(lp, now))
- return 1;
-
- /*
- * Peer appears dead, but we should avoid frequent NI queries (at
- * most once per lnet_queryinterval seconds).
- */
- if (lp->lp_last_query) {
- static const int lnet_queryinterval = 1;
-
- unsigned long next_query =
- lp->lp_last_query + lnet_queryinterval * HZ;
-
- if (time_before(now, next_query)) {
- if (lp->lp_alive)
- CWARN("Unexpected aliveness of peer %s: %d < %d (%d/%d)\n",
- libcfs_nid2str(lp->lp_nid),
- (int)now, (int)next_query,
- lnet_queryinterval,
- lp->lp_ni->ni_peertimeout);
- return 0;
- }
- }
-
- /* query NI for latest aliveness news */
- lnet_ni_query_locked(lp->lp_ni, lp);
-
- if (lnet_peer_is_alive(lp, now))
- return 1;
-
- lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
- return 0;
-}
-
-/**
- * \param msg The message to be sent.
- * \param do_send True if lnet_ni_send() should be called in this function.
- * lnet_send() is going to lnet_net_unlock immediately after this, so
- * it sets do_send FALSE and I don't do the unlock/send/lock bit.
- *
- * \retval LNET_CREDIT_OK If \a msg sent or OK to send.
- * \retval LNET_CREDIT_WAIT If \a msg blocked for credit.
- * \retval -EHOSTUNREACH If the next hop of the message appears dead.
- * \retval -ECANCELED If the MD of the message has been unlinked.
- */
-static int
-lnet_post_send_locked(struct lnet_msg *msg, int do_send)
-{
- struct lnet_peer *lp = msg->msg_txpeer;
- struct lnet_ni *ni = lp->lp_ni;
- int cpt = msg->msg_tx_cpt;
- struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt];
-
- /* non-lnet_send() callers have checked before */
- LASSERT(!do_send || msg->msg_tx_delayed);
- LASSERT(!msg->msg_receiving);
- LASSERT(msg->msg_tx_committed);
-
- /* NB 'lp' is always the next hop */
- if (!(msg->msg_target.pid & LNET_PID_USERFLAG) &&
- !lnet_peer_alive_locked(lp)) {
- the_lnet.ln_counters[cpt]->drop_count++;
- the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
- lnet_net_unlock(cpt);
-
- CNETERR("Dropping message for %s: peer not alive\n",
- libcfs_id2str(msg->msg_target));
- if (do_send)
- lnet_finalize(ni, msg, -EHOSTUNREACH);
-
- lnet_net_lock(cpt);
- return -EHOSTUNREACH;
- }
-
- if (msg->msg_md &&
- (msg->msg_md->md_flags & LNET_MD_FLAG_ABORTED)) {
- lnet_net_unlock(cpt);
-
- CNETERR("Aborting message for %s: LNetM[DE]Unlink() already called on the MD/ME.\n",
- libcfs_id2str(msg->msg_target));
- if (do_send)
- lnet_finalize(ni, msg, -ECANCELED);
-
- lnet_net_lock(cpt);
- return -ECANCELED;
- }
-
- if (!msg->msg_peertxcredit) {
- LASSERT((lp->lp_txcredits < 0) ==
- !list_empty(&lp->lp_txq));
-
- msg->msg_peertxcredit = 1;
- lp->lp_txqnob += msg->msg_len + sizeof(struct lnet_hdr);
- lp->lp_txcredits--;
-
- if (lp->lp_txcredits < lp->lp_mintxcredits)
- lp->lp_mintxcredits = lp->lp_txcredits;
-
- if (lp->lp_txcredits < 0) {
- msg->msg_tx_delayed = 1;
- list_add_tail(&msg->msg_list, &lp->lp_txq);
- return LNET_CREDIT_WAIT;
- }
- }
-
- if (!msg->msg_txcredit) {
- LASSERT((tq->tq_credits < 0) ==
- !list_empty(&tq->tq_delayed));
-
- msg->msg_txcredit = 1;
- tq->tq_credits--;
-
- if (tq->tq_credits < tq->tq_credits_min)
- tq->tq_credits_min = tq->tq_credits;
-
- if (tq->tq_credits < 0) {
- msg->msg_tx_delayed = 1;
- list_add_tail(&msg->msg_list, &tq->tq_delayed);
- return LNET_CREDIT_WAIT;
- }
- }
-
- if (do_send) {
- lnet_net_unlock(cpt);
- lnet_ni_send(ni, msg);
- lnet_net_lock(cpt);
- }
- return LNET_CREDIT_OK;
-}
-
-static struct lnet_rtrbufpool *
-lnet_msg2bufpool(struct lnet_msg *msg)
-{
- struct lnet_rtrbufpool *rbp;
- int cpt;
-
- LASSERT(msg->msg_rx_committed);
-
- cpt = msg->msg_rx_cpt;
- rbp = &the_lnet.ln_rtrpools[cpt][0];
-
- LASSERT(msg->msg_len <= LNET_MTU);
- while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_SIZE) {
- rbp++;
- LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]);
- }
-
- return rbp;
-}
-
-static int
-lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv)
-{
- /*
- * lnet_parse is going to lnet_net_unlock immediately after this, so it
- * sets do_recv FALSE and I don't do the unlock/send/lock bit.
- * I return LNET_CREDIT_WAIT if msg blocked and LNET_CREDIT_OK if
- * received or OK to receive
- */
- struct lnet_peer *lp = msg->msg_rxpeer;
- struct lnet_rtrbufpool *rbp;
- struct lnet_rtrbuf *rb;
-
- LASSERT(!msg->msg_iov);
- LASSERT(!msg->msg_kiov);
- LASSERT(!msg->msg_niov);
- LASSERT(msg->msg_routing);
- LASSERT(msg->msg_receiving);
- LASSERT(!msg->msg_sending);
-
- /* non-lnet_parse callers only receive delayed messages */
- LASSERT(!do_recv || msg->msg_rx_delayed);
-
- if (!msg->msg_peerrtrcredit) {
- LASSERT((lp->lp_rtrcredits < 0) ==
- !list_empty(&lp->lp_rtrq));
-
- msg->msg_peerrtrcredit = 1;
- lp->lp_rtrcredits--;
- if (lp->lp_rtrcredits < lp->lp_minrtrcredits)
- lp->lp_minrtrcredits = lp->lp_rtrcredits;
-
- if (lp->lp_rtrcredits < 0) {
- /* must have checked eager_recv before here */
- LASSERT(msg->msg_rx_ready_delay);
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list, &lp->lp_rtrq);
- return LNET_CREDIT_WAIT;
- }
- }
-
- rbp = lnet_msg2bufpool(msg);
-
- if (!msg->msg_rtrcredit) {
- msg->msg_rtrcredit = 1;
- rbp->rbp_credits--;
- if (rbp->rbp_credits < rbp->rbp_mincredits)
- rbp->rbp_mincredits = rbp->rbp_credits;
-
- if (rbp->rbp_credits < 0) {
- /* must have checked eager_recv before here */
- LASSERT(msg->msg_rx_ready_delay);
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list, &rbp->rbp_msgs);
- return LNET_CREDIT_WAIT;
- }
- }
-
- LASSERT(!list_empty(&rbp->rbp_bufs));
- rb = list_entry(rbp->rbp_bufs.next, struct lnet_rtrbuf, rb_list);
- list_del(&rb->rb_list);
-
- msg->msg_niov = rbp->rbp_npages;
- msg->msg_kiov = &rb->rb_kiov[0];
-
- if (do_recv) {
- int cpt = msg->msg_rx_cpt;
-
- lnet_net_unlock(cpt);
- lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
- 0, msg->msg_len, msg->msg_len);
- lnet_net_lock(cpt);
- }
- return LNET_CREDIT_OK;
-}
-
-void
-lnet_return_tx_credits_locked(struct lnet_msg *msg)
-{
- struct lnet_peer *txpeer = msg->msg_txpeer;
- struct lnet_msg *msg2;
-
- if (msg->msg_txcredit) {
- struct lnet_ni *ni = txpeer->lp_ni;
- struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
-
- /* give back NI txcredits */
- msg->msg_txcredit = 0;
-
- LASSERT((tq->tq_credits < 0) ==
- !list_empty(&tq->tq_delayed));
-
- tq->tq_credits++;
- if (tq->tq_credits <= 0) {
- msg2 = list_entry(tq->tq_delayed.next,
- struct lnet_msg, msg_list);
- list_del(&msg2->msg_list);
-
- LASSERT(msg2->msg_txpeer->lp_ni == ni);
- LASSERT(msg2->msg_tx_delayed);
-
- (void)lnet_post_send_locked(msg2, 1);
- }
- }
-
- if (msg->msg_peertxcredit) {
- /* give back peer txcredits */
- msg->msg_peertxcredit = 0;
-
- LASSERT((txpeer->lp_txcredits < 0) ==
- !list_empty(&txpeer->lp_txq));
-
- txpeer->lp_txqnob -= msg->msg_len + sizeof(struct lnet_hdr);
- LASSERT(txpeer->lp_txqnob >= 0);
-
- txpeer->lp_txcredits++;
- if (txpeer->lp_txcredits <= 0) {
- msg2 = list_entry(txpeer->lp_txq.next,
- struct lnet_msg, msg_list);
- list_del(&msg2->msg_list);
-
- LASSERT(msg2->msg_txpeer == txpeer);
- LASSERT(msg2->msg_tx_delayed);
-
- (void)lnet_post_send_locked(msg2, 1);
- }
- }
-
- if (txpeer) {
- msg->msg_txpeer = NULL;
- lnet_peer_decref_locked(txpeer);
- }
-}
-
-void
-lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp)
-{
- struct lnet_msg *msg;
-
- if (list_empty(&rbp->rbp_msgs))
- return;
- msg = list_entry(rbp->rbp_msgs.next,
- struct lnet_msg, msg_list);
- list_del(&msg->msg_list);
-
- (void)lnet_post_routed_recv_locked(msg, 1);
-}
-
-void
-lnet_drop_routed_msgs_locked(struct list_head *list, int cpt)
-{
- struct list_head drop;
- struct lnet_msg *msg;
- struct lnet_msg *tmp;
-
- INIT_LIST_HEAD(&drop);
-
- list_splice_init(list, &drop);
-
- lnet_net_unlock(cpt);
-
- list_for_each_entry_safe(msg, tmp, &drop, msg_list) {
- lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL,
- 0, 0, 0, msg->msg_hdr.payload_length);
- list_del_init(&msg->msg_list);
- lnet_finalize(NULL, msg, -ECANCELED);
- }
-
- lnet_net_lock(cpt);
-}
-
-void
-lnet_return_rx_credits_locked(struct lnet_msg *msg)
-{
- struct lnet_peer *rxpeer = msg->msg_rxpeer;
- struct lnet_msg *msg2;
-
- if (msg->msg_rtrcredit) {
- /* give back global router credits */
- struct lnet_rtrbuf *rb;
- struct lnet_rtrbufpool *rbp;
-
- /*
- * NB If a msg ever blocks for a buffer in rbp_msgs, it stays
- * there until it gets one allocated, or aborts the wait
- * itself
- */
- LASSERT(msg->msg_kiov);
-
- rb = container_of(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]);
- rbp = rb->rb_pool;
-
- msg->msg_kiov = NULL;
- msg->msg_rtrcredit = 0;
-
- LASSERT(rbp == lnet_msg2bufpool(msg));
-
- LASSERT((rbp->rbp_credits > 0) ==
- !list_empty(&rbp->rbp_bufs));
-
- /*
- * If routing is now turned off, we just drop this buffer and
- * don't bother trying to return credits.
- */
- if (!the_lnet.ln_routing) {
- lnet_destroy_rtrbuf(rb, rbp->rbp_npages);
- goto routing_off;
- }
-
- /*
- * It is possible that a user has lowered the desired number of
- * buffers in this pool. Make sure we never put back
- * more buffers than the stated number.
- */
- if (unlikely(rbp->rbp_credits >= rbp->rbp_req_nbuffers)) {
- /* Discard this buffer so we don't have too many. */
- lnet_destroy_rtrbuf(rb, rbp->rbp_npages);
- rbp->rbp_nbuffers--;
- } else {
- list_add(&rb->rb_list, &rbp->rbp_bufs);
- rbp->rbp_credits++;
- if (rbp->rbp_credits <= 0)
- lnet_schedule_blocked_locked(rbp);
- }
- }
-
-routing_off:
- if (msg->msg_peerrtrcredit) {
- /* give back peer router credits */
- msg->msg_peerrtrcredit = 0;
-
- LASSERT((rxpeer->lp_rtrcredits < 0) ==
- !list_empty(&rxpeer->lp_rtrq));
-
- rxpeer->lp_rtrcredits++;
- /*
- * drop all messages which are queued to be routed on that
- * peer.
- */
- if (!the_lnet.ln_routing) {
- lnet_drop_routed_msgs_locked(&rxpeer->lp_rtrq,
- msg->msg_rx_cpt);
- } else if (rxpeer->lp_rtrcredits <= 0) {
- msg2 = list_entry(rxpeer->lp_rtrq.next,
- struct lnet_msg, msg_list);
- list_del(&msg2->msg_list);
-
- (void)lnet_post_routed_recv_locked(msg2, 1);
- }
- }
- if (rxpeer) {
- msg->msg_rxpeer = NULL;
- lnet_peer_decref_locked(rxpeer);
- }
-}
-
-static int
-lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
-{
- struct lnet_peer *p1 = r1->lr_gateway;
- struct lnet_peer *p2 = r2->lr_gateway;
- int r1_hops = (r1->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r1->lr_hops;
- int r2_hops = (r2->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r2->lr_hops;
-
- if (r1->lr_priority < r2->lr_priority)
- return 1;
-
- if (r1->lr_priority > r2->lr_priority)
- return -ERANGE;
-
- if (r1_hops < r2_hops)
- return 1;
-
- if (r1_hops > r2_hops)
- return -ERANGE;
-
- if (p1->lp_txqnob < p2->lp_txqnob)
- return 1;
-
- if (p1->lp_txqnob > p2->lp_txqnob)
- return -ERANGE;
-
- if (p1->lp_txcredits > p2->lp_txcredits)
- return 1;
-
- if (p1->lp_txcredits < p2->lp_txcredits)
- return -ERANGE;
-
- if (r1->lr_seq - r2->lr_seq <= 0)
- return 1;
-
- return -ERANGE;
-}
-
-static struct lnet_peer *
-lnet_find_route_locked(struct lnet_ni *ni, lnet_nid_t target,
- lnet_nid_t rtr_nid)
-{
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- struct lnet_route *best_route;
- struct lnet_route *last_route;
- struct lnet_peer *lp_best;
- struct lnet_peer *lp;
- int rc;
-
- /*
- * If @rtr_nid is not LNET_NID_ANY, return the gateway with
- * rtr_nid nid, otherwise find the best gateway I can use
- */
- rnet = lnet_find_net_locked(LNET_NIDNET(target));
- if (!rnet)
- return NULL;
-
- lp_best = NULL;
- best_route = NULL;
- last_route = NULL;
- list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
- lp = route->lr_gateway;
-
- if (!lnet_is_route_alive(route))
- continue;
-
- if (ni && lp->lp_ni != ni)
- continue;
-
- if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
- return lp;
-
- if (!lp_best) {
- best_route = route;
- last_route = route;
- lp_best = lp;
- continue;
- }
-
- /* no protection on below fields, but it's harmless */
- if (last_route->lr_seq - route->lr_seq < 0)
- last_route = route;
-
- rc = lnet_compare_routes(route, best_route);
- if (rc < 0)
- continue;
-
- best_route = route;
- lp_best = lp;
- }
-
- /*
- * set sequence number on the best router to the latest sequence + 1
- * so we can round-robin all routers, it's race and inaccurate but
- * harmless and functional
- */
- if (best_route)
- best_route->lr_seq = last_route->lr_seq + 1;
- return lp_best;
-}
-
-int
-lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid)
-{
- lnet_nid_t dst_nid = msg->msg_target.nid;
- struct lnet_ni *src_ni;
- struct lnet_ni *local_ni;
- struct lnet_peer *lp;
- int cpt;
- int cpt2;
- int rc;
-
- /*
- * NB: rtr_nid is set to LNET_NID_ANY for all current use-cases,
- * but we might want to use pre-determined router for ACK/REPLY
- * in the future
- */
- /* NB: ni == interface pre-determined (ACK/REPLY) */
- LASSERT(!msg->msg_txpeer);
- LASSERT(!msg->msg_sending);
- LASSERT(!msg->msg_target_is_router);
- LASSERT(!msg->msg_receiving);
-
- msg->msg_sending = 1;
-
- LASSERT(!msg->msg_tx_committed);
- cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid);
- again:
- lnet_net_lock(cpt);
-
- if (the_lnet.ln_shutdown) {
- lnet_net_unlock(cpt);
- return -ESHUTDOWN;
- }
-
- if (src_nid == LNET_NID_ANY) {
- src_ni = NULL;
- } else {
- src_ni = lnet_nid2ni_locked(src_nid, cpt);
- if (!src_ni) {
- lnet_net_unlock(cpt);
- LCONSOLE_WARN("Can't send to %s: src %s is not a local nid\n",
- libcfs_nid2str(dst_nid),
- libcfs_nid2str(src_nid));
- return -EINVAL;
- }
- LASSERT(!msg->msg_routing);
- }
-
- /* Is this for someone on a local network? */
- local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid), cpt);
-
- if (local_ni) {
- if (!src_ni) {
- src_ni = local_ni;
- src_nid = src_ni->ni_nid;
- } else if (src_ni == local_ni) {
- lnet_ni_decref_locked(local_ni, cpt);
- } else {
- lnet_ni_decref_locked(local_ni, cpt);
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
- LCONSOLE_WARN("No route to %s via from %s\n",
- libcfs_nid2str(dst_nid),
- libcfs_nid2str(src_nid));
- return -EINVAL;
- }
-
- LASSERT(src_nid != LNET_NID_ANY);
- lnet_msg_commit(msg, cpt);
-
- if (!msg->msg_routing)
- msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
-
- if (src_ni == the_lnet.ln_loni) {
- /* No send credit hassles with LOLND */
- lnet_net_unlock(cpt);
- lnet_ni_send(src_ni, msg);
-
- lnet_net_lock(cpt);
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
- return 0;
- }
-
- rc = lnet_nid2peer_locked(&lp, dst_nid, cpt);
- /* lp has ref on src_ni; lose mine */
- lnet_ni_decref_locked(src_ni, cpt);
- if (rc) {
- lnet_net_unlock(cpt);
- LCONSOLE_WARN("Error %d finding peer %s\n", rc,
- libcfs_nid2str(dst_nid));
- /* ENOMEM or shutting down */
- return rc;
- }
- LASSERT(lp->lp_ni == src_ni);
- } else {
- /* sending to a remote network */
- lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
- if (!lp) {
- if (src_ni)
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
-
- LCONSOLE_WARN("No route to %s via %s (all routers down)\n",
- libcfs_id2str(msg->msg_target),
- libcfs_nid2str(src_nid));
- return -EHOSTUNREACH;
- }
-
- /*
- * rtr_nid is LNET_NID_ANY or NID of pre-determined router,
- * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't
- * pre-determined router, this can happen if router table
- * was changed when we release the lock
- */
- if (rtr_nid != lp->lp_nid) {
- cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid);
- if (cpt2 != cpt) {
- if (src_ni)
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
-
- rtr_nid = lp->lp_nid;
- cpt = cpt2;
- goto again;
- }
- }
-
- CDEBUG(D_NET, "Best route to %s via %s for %s %d\n",
- libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid),
- lnet_msgtyp2str(msg->msg_type), msg->msg_len);
-
- if (!src_ni) {
- src_ni = lp->lp_ni;
- src_nid = src_ni->ni_nid;
- } else {
- LASSERT(src_ni == lp->lp_ni);
- lnet_ni_decref_locked(src_ni, cpt);
- }
-
- lnet_peer_addref_locked(lp);
-
- LASSERT(src_nid != LNET_NID_ANY);
- lnet_msg_commit(msg, cpt);
-
- if (!msg->msg_routing) {
- /* I'm the source and now I know which NI to send on */
- msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
- }
-
- msg->msg_target_is_router = 1;
- msg->msg_target.nid = lp->lp_nid;
- msg->msg_target.pid = LNET_PID_LUSTRE;
- }
-
- /* 'lp' is our best choice of peer */
-
- LASSERT(!msg->msg_peertxcredit);
- LASSERT(!msg->msg_txcredit);
- LASSERT(!msg->msg_txpeer);
-
- msg->msg_txpeer = lp; /* msg takes my ref on lp */
-
- rc = lnet_post_send_locked(msg, 0);
- lnet_net_unlock(cpt);
-
- if (rc < 0)
- return rc;
-
- if (rc == LNET_CREDIT_OK)
- lnet_ni_send(src_ni, msg);
-
- return 0; /* rc == LNET_CREDIT_OK or LNET_CREDIT_WAIT */
-}
-
-void
-lnet_drop_message(struct lnet_ni *ni, int cpt, void *private, unsigned int nob)
-{
- lnet_net_lock(cpt);
- the_lnet.ln_counters[cpt]->drop_count++;
- the_lnet.ln_counters[cpt]->drop_length += nob;
- lnet_net_unlock(cpt);
-
- lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob);
-}
-
-static void
-lnet_recv_put(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
-
- if (msg->msg_wanted)
- lnet_setpayloadbuffer(msg);
-
- lnet_build_msg_event(msg, LNET_EVENT_PUT);
-
- /*
- * Must I ACK? If so I'll grab the ack_wmd out of the header and put
- * it back into the ACK during lnet_finalize()
- */
- msg->msg_ack = !lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
- !(msg->msg_md->md_options & LNET_MD_ACK_DISABLE);
-
- lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_rx_delayed,
- msg->msg_offset, msg->msg_wanted, hdr->payload_length);
-}
-
-static int
-lnet_parse_put(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_match_info info;
- bool ready_delay;
- int rc;
-
- /* Convert put fields to host byte order */
- le64_to_cpus(&hdr->msg.put.match_bits);
- le32_to_cpus(&hdr->msg.put.ptl_index);
- le32_to_cpus(&hdr->msg.put.offset);
-
- info.mi_id.nid = hdr->src_nid;
- info.mi_id.pid = hdr->src_pid;
- info.mi_opc = LNET_MD_OP_PUT;
- info.mi_portal = hdr->msg.put.ptl_index;
- info.mi_rlength = hdr->payload_length;
- info.mi_roffset = hdr->msg.put.offset;
- info.mi_mbits = hdr->msg.put.match_bits;
-
- msg->msg_rx_ready_delay = !ni->ni_lnd->lnd_eager_recv;
- ready_delay = msg->msg_rx_ready_delay;
-
- again:
- rc = lnet_ptl_match_md(&info, msg);
- switch (rc) {
- default:
- LBUG();
-
- case LNET_MATCHMD_OK:
- lnet_recv_put(ni, msg);
- return 0;
-
- case LNET_MATCHMD_NONE:
- /**
- * no eager_recv or has already called it, should
- * have been attached on delayed list
- */
- if (ready_delay)
- return 0;
-
- rc = lnet_ni_eager_recv(ni, msg);
- if (!rc) {
- ready_delay = true;
- goto again;
- }
- /* fall through */
-
- case LNET_MATCHMD_DROP:
- CNETERR("Dropping PUT from %s portal %d match %llu offset %d length %d: %d\n",
- libcfs_id2str(info.mi_id), info.mi_portal,
- info.mi_mbits, info.mi_roffset, info.mi_rlength, rc);
-
- return -ENOENT; /* -ve: OK but no match */
- }
-}
-
-static int
-lnet_parse_get(struct lnet_ni *ni, struct lnet_msg *msg, int rdma_get)
-{
- struct lnet_match_info info;
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_handle_wire reply_wmd;
- int rc;
-
- /* Convert get fields to host byte order */
- le64_to_cpus(&hdr->msg.get.match_bits);
- le32_to_cpus(&hdr->msg.get.ptl_index);
- le32_to_cpus(&hdr->msg.get.sink_length);
- le32_to_cpus(&hdr->msg.get.src_offset);
-
- info.mi_id.nid = hdr->src_nid;
- info.mi_id.pid = hdr->src_pid;
- info.mi_opc = LNET_MD_OP_GET;
- info.mi_portal = hdr->msg.get.ptl_index;
- info.mi_rlength = hdr->msg.get.sink_length;
- info.mi_roffset = hdr->msg.get.src_offset;
- info.mi_mbits = hdr->msg.get.match_bits;
-
- rc = lnet_ptl_match_md(&info, msg);
- if (rc == LNET_MATCHMD_DROP) {
- CNETERR("Dropping GET from %s portal %d match %llu offset %d length %d\n",
- libcfs_id2str(info.mi_id), info.mi_portal,
- info.mi_mbits, info.mi_roffset, info.mi_rlength);
- return -ENOENT; /* -ve: OK but no match */
- }
-
- LASSERT(rc == LNET_MATCHMD_OK);
-
- lnet_build_msg_event(msg, LNET_EVENT_GET);
-
- reply_wmd = hdr->msg.get.return_wmd;
-
- lnet_prep_send(msg, LNET_MSG_REPLY, info.mi_id,
- msg->msg_offset, msg->msg_wanted);
-
- msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
-
- if (rdma_get) {
- /* The LND completes the REPLY from her recv procedure */
- lnet_ni_recv(ni, msg->msg_private, msg, 0,
- msg->msg_offset, msg->msg_len, msg->msg_len);
- return 0;
- }
-
- lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0);
- msg->msg_receiving = 0;
-
- rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY);
- if (rc < 0) {
- /* didn't get as far as lnet_ni_send() */
- CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
- libcfs_nid2str(ni->ni_nid),
- libcfs_id2str(info.mi_id), rc);
-
- lnet_finalize(ni, msg, rc);
- }
-
- return 0;
-}
-
-static int
-lnet_parse_reply(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- void *private = msg->msg_private;
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_process_id src = {0};
- struct lnet_libmd *md;
- int rlength;
- int mlength;
- int cpt;
-
- cpt = lnet_cpt_of_cookie(hdr->msg.reply.dst_wmd.wh_object_cookie);
- lnet_res_lock(cpt);
-
- src.nid = hdr->src_nid;
- src.pid = hdr->src_pid;
-
- /* NB handles only looked up by creator (no flips) */
- md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd);
- if (!md || !md->md_threshold || md->md_me) {
- CNETERR("%s: Dropping REPLY from %s for %s MD %#llx.%#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- !md ? "invalid" : "inactive",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie);
- if (md && md->md_me)
- CERROR("REPLY MD also attached to portal %d\n",
- md->md_me->me_portal);
-
- lnet_res_unlock(cpt);
- return -ENOENT; /* -ve: OK but no match */
- }
-
- LASSERT(!md->md_offset);
-
- rlength = hdr->payload_length;
- mlength = min_t(uint, rlength, md->md_length);
-
- if (mlength < rlength &&
- !(md->md_options & LNET_MD_TRUNCATE)) {
- CNETERR("%s: Dropping REPLY from %s length %d for MD %#llx would overflow (%d)\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- rlength, hdr->msg.reply.dst_wmd.wh_object_cookie,
- mlength);
- lnet_res_unlock(cpt);
- return -ENOENT; /* -ve: OK but no match */
- }
-
- CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md %#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie);
-
- lnet_msg_attach_md(msg, md, 0, mlength);
-
- if (mlength)
- lnet_setpayloadbuffer(msg);
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_REPLY);
-
- lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength);
- return 0;
-}
-
-static int
-lnet_parse_ack(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_process_id src = {0};
- struct lnet_libmd *md;
- int cpt;
-
- src.nid = hdr->src_nid;
- src.pid = hdr->src_pid;
-
- /* Convert ack fields to host byte order */
- le64_to_cpus(&hdr->msg.ack.match_bits);
- le32_to_cpus(&hdr->msg.ack.mlength);
-
- cpt = lnet_cpt_of_cookie(hdr->msg.ack.dst_wmd.wh_object_cookie);
- lnet_res_lock(cpt);
-
- /* NB handles only looked up by creator (no flips) */
- md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd);
- if (!md || !md->md_threshold || md->md_me) {
- /* Don't moan; this is expected */
- CDEBUG(D_NET,
- "%s: Dropping ACK from %s to %s MD %#llx.%#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- !md ? "invalid" : "inactive",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie);
- if (md && md->md_me)
- CERROR("Source MD also attached to portal %d\n",
- md->md_me->me_portal);
-
- lnet_res_unlock(cpt);
- return -ENOENT; /* -ve! */
- }
-
- CDEBUG(D_NET, "%s: ACK from %s into md %#llx\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
- hdr->msg.ack.dst_wmd.wh_object_cookie);
-
- lnet_msg_attach_md(msg, md, 0, 0);
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_ACK);
-
- lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len);
- return 0;
-}
-
-/**
- * \retval LNET_CREDIT_OK If \a msg is forwarded
- * \retval LNET_CREDIT_WAIT If \a msg is blocked because w/o buffer
- * \retval -ve error code
- */
-int
-lnet_parse_forward_locked(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- int rc = 0;
-
- if (!the_lnet.ln_routing)
- return -ECANCELED;
-
- if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
- lnet_msg2bufpool(msg)->rbp_credits <= 0) {
- if (!ni->ni_lnd->lnd_eager_recv) {
- msg->msg_rx_ready_delay = 1;
- } else {
- lnet_net_unlock(msg->msg_rx_cpt);
- rc = lnet_ni_eager_recv(ni, msg);
- lnet_net_lock(msg->msg_rx_cpt);
- }
- }
-
- if (!rc)
- rc = lnet_post_routed_recv_locked(msg, 0);
- return rc;
-}
-
-int
-lnet_parse_local(struct lnet_ni *ni, struct lnet_msg *msg)
-{
- int rc;
-
- switch (msg->msg_type) {
- case LNET_MSG_ACK:
- rc = lnet_parse_ack(ni, msg);
- break;
- case LNET_MSG_PUT:
- rc = lnet_parse_put(ni, msg);
- break;
- case LNET_MSG_GET:
- rc = lnet_parse_get(ni, msg, msg->msg_rdma_get);
- break;
- case LNET_MSG_REPLY:
- rc = lnet_parse_reply(ni, msg);
- break;
- default: /* prevent an unused label if !kernel */
- LASSERT(0);
- return -EPROTO;
- }
-
- LASSERT(!rc || rc == -ENOENT);
- return rc;
-}
-
-char *
-lnet_msgtyp2str(int type)
-{
- switch (type) {
- case LNET_MSG_ACK:
- return "ACK";
- case LNET_MSG_PUT:
- return "PUT";
- case LNET_MSG_GET:
- return "GET";
- case LNET_MSG_REPLY:
- return "REPLY";
- case LNET_MSG_HELLO:
- return "HELLO";
- default:
- return "<UNKNOWN>";
- }
-}
-
-void
-lnet_print_hdr(struct lnet_hdr *hdr)
-{
- struct lnet_process_id src = {0};
- struct lnet_process_id dst = {0};
- char *type_str = lnet_msgtyp2str(hdr->type);
-
- src.nid = hdr->src_nid;
- src.pid = hdr->src_pid;
-
- dst.nid = hdr->dest_nid;
- dst.pid = hdr->dest_pid;
-
- CWARN("P3 Header at %p of type %s\n", hdr, type_str);
- CWARN(" From %s\n", libcfs_id2str(src));
- CWARN(" To %s\n", libcfs_id2str(dst));
-
- switch (hdr->type) {
- default:
- break;
-
- case LNET_MSG_PUT:
- CWARN(" Ptl index %d, ack md %#llx.%#llx, match bits %llu\n",
- hdr->msg.put.ptl_index,
- hdr->msg.put.ack_wmd.wh_interface_cookie,
- hdr->msg.put.ack_wmd.wh_object_cookie,
- hdr->msg.put.match_bits);
- CWARN(" Length %d, offset %d, hdr data %#llx\n",
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.hdr_data);
- break;
-
- case LNET_MSG_GET:
- CWARN(" Ptl index %d, return md %#llx.%#llx, match bits %llu\n",
- hdr->msg.get.ptl_index,
- hdr->msg.get.return_wmd.wh_interface_cookie,
- hdr->msg.get.return_wmd.wh_object_cookie,
- hdr->msg.get.match_bits);
- CWARN(" Length %d, src offset %d\n",
- hdr->msg.get.sink_length,
- hdr->msg.get.src_offset);
- break;
-
- case LNET_MSG_ACK:
- CWARN(" dst md %#llx.%#llx, manipulated length %d\n",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie,
- hdr->msg.ack.mlength);
- break;
-
- case LNET_MSG_REPLY:
- CWARN(" dst md %#llx.%#llx, length %d\n",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie,
- hdr->payload_length);
- }
-}
-
-int
-lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid,
- void *private, int rdma_req)
-{
- int rc = 0;
- int cpt;
- int for_me;
- struct lnet_msg *msg;
- lnet_pid_t dest_pid;
- lnet_nid_t dest_nid;
- lnet_nid_t src_nid;
- __u32 payload_length;
- __u32 type;
-
- LASSERT(!in_interrupt());
-
- type = le32_to_cpu(hdr->type);
- src_nid = le64_to_cpu(hdr->src_nid);
- dest_nid = le64_to_cpu(hdr->dest_nid);
- dest_pid = le32_to_cpu(hdr->dest_pid);
- payload_length = le32_to_cpu(hdr->payload_length);
-
- for_me = (ni->ni_nid == dest_nid);
- cpt = lnet_cpt_of_nid(from_nid);
-
- switch (type) {
- case LNET_MSG_ACK:
- case LNET_MSG_GET:
- if (payload_length > 0) {
- CERROR("%s, src %s: bad %s payload %d (0 expected)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type), payload_length);
- return -EPROTO;
- }
- break;
-
- case LNET_MSG_PUT:
- case LNET_MSG_REPLY:
- if (payload_length >
- (__u32)(for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) {
- CERROR("%s, src %s: bad %s payload %d (%d max expected)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type),
- payload_length,
- for_me ? LNET_MAX_PAYLOAD : LNET_MTU);
- return -EPROTO;
- }
- break;
-
- default:
- CERROR("%s, src %s: Bad message type 0x%x\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid), type);
- return -EPROTO;
- }
-
- if (the_lnet.ln_routing &&
- ni->ni_last_alive != ktime_get_real_seconds()) {
- /* NB: so far here is the only place to set NI status to "up */
- lnet_ni_lock(ni);
- ni->ni_last_alive = ktime_get_real_seconds();
- if (ni->ni_status &&
- ni->ni_status->ns_status == LNET_NI_STATUS_DOWN)
- ni->ni_status->ns_status = LNET_NI_STATUS_UP;
- lnet_ni_unlock(ni);
- }
-
- /*
- * Regard a bad destination NID as a protocol error. Senders should
- * know what they're doing; if they don't they're misconfigured, buggy
- * or malicious so we chop them off at the knees :)
- */
- if (!for_me) {
- if (LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) {
- /* should have gone direct */
- CERROR("%s, src %s: Bad dest nid %s (should have been sent direct)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (lnet_islocalnid(dest_nid)) {
- /*
- * dest is another local NI; sender should have used
- * this node's NID on its own network
- */
- CERROR("%s, src %s: Bad dest nid %s (it's my nid but on a different network)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (rdma_req && type == LNET_MSG_GET) {
- CERROR("%s, src %s: Bad optimized GET for %s (final destination must be me)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- return -EPROTO;
- }
-
- if (!the_lnet.ln_routing) {
- CERROR("%s, src %s: Dropping message for %s (routing not enabled)\n",
- libcfs_nid2str(from_nid),
- libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid));
- goto drop;
- }
- }
-
- /*
- * Message looks OK; we're not going to return an error, so we MUST
- * call back lnd_recv() come what may...
- */
- if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer(src_nid, 0)) { /* shall we now? */
- CERROR("%s, src %s: Dropping %s to simulate failure\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type));
- goto drop;
- }
-
- if (!list_empty(&the_lnet.ln_drop_rules) &&
- lnet_drop_rule_match(hdr)) {
- CDEBUG(D_NET, "%s, src %s, dst %s: Dropping %s to simulate silent message loss\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- libcfs_nid2str(dest_nid), lnet_msgtyp2str(type));
- goto drop;
- }
-
- msg = kzalloc(sizeof(*msg), GFP_NOFS);
- if (!msg) {
- CERROR("%s, src %s: Dropping %s (out of memory)\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type));
- goto drop;
- }
-
- /* msg zeroed by kzalloc()
- * i.e. flags all clear, pointers NULL etc
- */
- msg->msg_type = type;
- msg->msg_private = private;
- msg->msg_receiving = 1;
- msg->msg_rdma_get = rdma_req;
- msg->msg_wanted = payload_length;
- msg->msg_len = payload_length;
- msg->msg_offset = 0;
- msg->msg_hdr = *hdr;
- /* for building message event */
- msg->msg_from = from_nid;
- if (!for_me) {
- msg->msg_target.pid = dest_pid;
- msg->msg_target.nid = dest_nid;
- msg->msg_routing = 1;
-
- } else {
- /* convert common msg->hdr fields to host byteorder */
- msg->msg_hdr.type = type;
- msg->msg_hdr.src_nid = src_nid;
- le32_to_cpus(&msg->msg_hdr.src_pid);
- msg->msg_hdr.dest_nid = dest_nid;
- msg->msg_hdr.dest_pid = dest_pid;
- msg->msg_hdr.payload_length = payload_length;
- }
-
- lnet_net_lock(cpt);
- rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt);
- if (rc) {
- lnet_net_unlock(cpt);
- CERROR("%s, src %s: Dropping %s (error %d looking up sender)\n",
- libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
- lnet_msgtyp2str(type), rc);
- kfree(msg);
- if (rc == -ESHUTDOWN)
- /* We are shutting down. Don't do anything more */
- return 0;
- goto drop;
- }
-
- if (lnet_isrouter(msg->msg_rxpeer)) {
- lnet_peer_set_alive(msg->msg_rxpeer);
- if (avoid_asym_router_failure &&
- LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) {
- /* received a remote message from router, update
- * remote NI status on this router.
- * NB: multi-hop routed message will be ignored.
- */
- lnet_router_ni_update_locked(msg->msg_rxpeer,
- LNET_NIDNET(src_nid));
- }
- }
-
- lnet_msg_commit(msg, cpt);
-
- /* message delay simulation */
- if (unlikely(!list_empty(&the_lnet.ln_delay_rules) &&
- lnet_delay_rule_match_locked(hdr, msg))) {
- lnet_net_unlock(cpt);
- return 0;
- }
-
- if (!for_me) {
- rc = lnet_parse_forward_locked(ni, msg);
- lnet_net_unlock(cpt);
-
- if (rc < 0)
- goto free_drop;
-
- if (rc == LNET_CREDIT_OK) {
- lnet_ni_recv(ni, msg->msg_private, msg, 0,
- 0, payload_length, payload_length);
- }
- return 0;
- }
-
- lnet_net_unlock(cpt);
-
- rc = lnet_parse_local(ni, msg);
- if (rc)
- goto free_drop;
- return 0;
-
- free_drop:
- LASSERT(!msg->msg_md);
- lnet_finalize(ni, msg, rc);
-
- drop:
- lnet_drop_message(ni, cpt, private, payload_length);
- return 0;
-}
-EXPORT_SYMBOL(lnet_parse);
-
-void
-lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
-{
- while (!list_empty(head)) {
- struct lnet_process_id id = {0};
- struct lnet_msg *msg;
-
- msg = list_entry(head->next, struct lnet_msg, msg_list);
- list_del(&msg->msg_list);
-
- id.nid = msg->msg_hdr.src_nid;
- id.pid = msg->msg_hdr.src_pid;
-
- LASSERT(!msg->msg_md);
- LASSERT(msg->msg_rx_delayed);
- LASSERT(msg->msg_rxpeer);
- LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
-
- CWARN("Dropping delayed PUT from %s portal %d match %llu offset %d length %d: %s\n",
- libcfs_id2str(id),
- msg->msg_hdr.msg.put.ptl_index,
- msg->msg_hdr.msg.put.match_bits,
- msg->msg_hdr.msg.put.offset,
- msg->msg_hdr.payload_length, reason);
-
- /*
- * NB I can't drop msg's ref on msg_rxpeer until after I've
- * called lnet_drop_message(), so I just hang onto msg as well
- * until that's done
- */
- lnet_drop_message(msg->msg_rxpeer->lp_ni,
- msg->msg_rxpeer->lp_cpt,
- msg->msg_private, msg->msg_len);
- /*
- * NB: message will not generate event because w/o attached MD,
- * but we still should give error code so lnet_msg_decommit()
- * can skip counters operations and other checks.
- */
- lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
- }
-}
-
-void
-lnet_recv_delayed_msg_list(struct list_head *head)
-{
- while (!list_empty(head)) {
- struct lnet_msg *msg;
- struct lnet_process_id id;
-
- msg = list_entry(head->next, struct lnet_msg, msg_list);
- list_del(&msg->msg_list);
-
- /*
- * md won't disappear under me, since each msg
- * holds a ref on it
- */
- id.nid = msg->msg_hdr.src_nid;
- id.pid = msg->msg_hdr.src_pid;
-
- LASSERT(msg->msg_rx_delayed);
- LASSERT(msg->msg_md);
- LASSERT(msg->msg_rxpeer);
- LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
-
- CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",
- libcfs_id2str(id), msg->msg_hdr.msg.put.ptl_index,
- msg->msg_hdr.msg.put.match_bits,
- msg->msg_hdr.msg.put.offset,
- msg->msg_hdr.payload_length);
-
- lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
- }
-}
-
-/**
- * Initiate an asynchronous PUT operation.
- *
- * There are several events associated with a PUT: completion of the send on
- * the initiator node (LNET_EVENT_SEND), and when the send completes
- * successfully, the receipt of an acknowledgment (LNET_EVENT_ACK) indicating
- * that the operation was accepted by the target. The event LNET_EVENT_PUT is
- * used at the target node to indicate the completion of incoming data
- * delivery.
- *
- * The local events will be logged in the EQ associated with the MD pointed to
- * by \a mdh handle. Using a MD without an associated EQ results in these
- * events being discarded. In this case, the caller must have another
- * mechanism (e.g., a higher level protocol) for determining when it is safe
- * to modify the memory region associated with the MD.
- *
- * Note that LNet does not guarantee the order of LNET_EVENT_SEND and
- * LNET_EVENT_ACK, though intuitively ACK should happen after SEND.
- *
- * \param self Indicates the NID of a local interface through which to send
- * the PUT request. Use LNET_NID_ANY to let LNet choose one by itself.
- * \param mdh A handle for the MD that describes the memory to be sent. The MD
- * must be "free floating" (See LNetMDBind()).
- * \param ack Controls whether an acknowledgment is requested.
- * Acknowledgments are only sent when they are requested by the initiating
- * process and the target MD enables them.
- * \param target A process identifier for the target process.
- * \param portal The index in the \a target's portal table.
- * \param match_bits The match bits to use for MD selection at the target
- * process.
- * \param offset The offset into the target MD (only used when the target
- * MD has the LNET_MD_MANAGE_REMOTE option set).
- * \param hdr_data 64 bits of user data that can be included in the message
- * header. This data is written to an event queue entry at the target if an
- * EQ is present on the matching MD.
- *
- * \retval 0 Success, and only in this case events will be generated
- * and logged to EQ (if it exists).
- * \retval -EIO Simulated failure.
- * \retval -ENOMEM Memory allocation failure.
- * \retval -ENOENT Invalid MD object.
- *
- * \see lnet_event::hdr_data and lnet_event_kind.
- */
-int
-LNetPut(lnet_nid_t self, struct lnet_handle_md mdh, enum lnet_ack_req ack,
- struct lnet_process_id target, unsigned int portal,
- __u64 match_bits, unsigned int offset,
- __u64 hdr_data)
-{
- struct lnet_msg *msg;
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer(target.nid, 1)) { /* shall we now? */
- CERROR("Dropping PUT to %s: simulated failure\n",
- libcfs_id2str(target));
- return -EIO;
- }
-
- msg = kzalloc(sizeof(*msg), GFP_NOFS);
- if (!msg) {
- CERROR("Dropping PUT to %s: ENOMEM on struct lnet_msg\n",
- libcfs_id2str(target));
- return -ENOMEM;
- }
- msg->msg_vmflush = !!(current->flags & PF_MEMALLOC);
-
- cpt = lnet_cpt_of_cookie(mdh.cookie);
- lnet_res_lock(cpt);
-
- md = lnet_handle2md(&mdh);
- if (!md || !md->md_threshold || md->md_me) {
- CERROR("Dropping PUT (%llu:%d:%s): MD (%d) invalid\n",
- match_bits, portal, libcfs_id2str(target),
- !md ? -1 : md->md_threshold);
- if (md && md->md_me)
- CERROR("Source MD also attached to portal %d\n",
- md->md_me->me_portal);
- lnet_res_unlock(cpt);
-
- kfree(msg);
- return -ENOENT;
- }
-
- CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target));
-
- lnet_msg_attach_md(msg, md, 0, 0);
-
- lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length);
-
- msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits);
- msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal);
- msg->msg_hdr.msg.put.offset = cpu_to_le32(offset);
- msg->msg_hdr.msg.put.hdr_data = hdr_data;
-
- /* NB handles only looked up by creator (no flips) */
- if (ack == LNET_ACK_REQ) {
- msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
- the_lnet.ln_interface_cookie;
- msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
- md->md_lh.lh_cookie;
- } else {
- msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
- LNET_WIRE_HANDLE_COOKIE_NONE;
- msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
- LNET_WIRE_HANDLE_COOKIE_NONE;
- }
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_SEND);
-
- rc = lnet_send(self, msg, LNET_NID_ANY);
- if (rc) {
- CNETERR("Error sending PUT to %s: %d\n",
- libcfs_id2str(target), rc);
- lnet_finalize(NULL, msg, rc);
- }
-
- /* completion will be signalled by an event */
- return 0;
-}
-EXPORT_SYMBOL(LNetPut);
-
-struct lnet_msg *
-lnet_create_reply_msg(struct lnet_ni *ni, struct lnet_msg *getmsg)
-{
- /*
- * The LND can DMA direct to the GET md (i.e. no REPLY msg). This
- * returns a msg for the LND to pass to lnet_finalize() when the sink
- * data has been received.
- *
- * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
- * lnet_finalize() is called on it, so the LND must call this first
- */
- struct lnet_msg *msg = kzalloc(sizeof(*msg), GFP_NOFS);
- struct lnet_libmd *getmd = getmsg->msg_md;
- struct lnet_process_id peer_id = getmsg->msg_target;
- int cpt;
-
- LASSERT(!getmsg->msg_target_is_router);
- LASSERT(!getmsg->msg_routing);
-
- if (!msg) {
- CERROR("%s: Dropping REPLY from %s: can't allocate msg\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id));
- goto drop;
- }
-
- cpt = lnet_cpt_of_cookie(getmd->md_lh.lh_cookie);
- lnet_res_lock(cpt);
-
- LASSERT(getmd->md_refcount > 0);
-
- if (!getmd->md_threshold) {
- CERROR("%s: Dropping REPLY from %s for inactive MD %p\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id),
- getmd);
- lnet_res_unlock(cpt);
- goto drop;
- }
-
- LASSERT(!getmd->md_offset);
-
- CDEBUG(D_NET, "%s: Reply from %s md %p\n",
- libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd);
-
- /* setup information for lnet_build_msg_event */
- msg->msg_from = peer_id.nid;
- msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */
- msg->msg_hdr.src_nid = peer_id.nid;
- msg->msg_hdr.payload_length = getmd->md_length;
- msg->msg_receiving = 1; /* required by lnet_msg_attach_md */
-
- lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length);
- lnet_res_unlock(cpt);
-
- cpt = lnet_cpt_of_nid(peer_id.nid);
-
- lnet_net_lock(cpt);
- lnet_msg_commit(msg, cpt);
- lnet_net_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_REPLY);
-
- return msg;
-
- drop:
- cpt = lnet_cpt_of_nid(peer_id.nid);
-
- lnet_net_lock(cpt);
- the_lnet.ln_counters[cpt]->drop_count++;
- the_lnet.ln_counters[cpt]->drop_length += getmd->md_length;
- lnet_net_unlock(cpt);
-
- kfree(msg);
-
- return NULL;
-}
-EXPORT_SYMBOL(lnet_create_reply_msg);
-
-void
-lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *reply,
- unsigned int len)
-{
- /*
- * Set the REPLY length, now the RDMA that elides the REPLY message has
- * completed and I know it.
- */
- LASSERT(reply);
- LASSERT(reply->msg_type == LNET_MSG_GET);
- LASSERT(reply->msg_ev.type == LNET_EVENT_REPLY);
-
- /*
- * NB I trusted my peer to RDMA. If she tells me she's written beyond
- * the end of my buffer, I might as well be dead.
- */
- LASSERT(len <= reply->msg_ev.mlength);
-
- reply->msg_ev.mlength = len;
-}
-EXPORT_SYMBOL(lnet_set_reply_msg_len);
-
-/**
- * Initiate an asynchronous GET operation.
- *
- * On the initiator node, an LNET_EVENT_SEND is logged when the GET request
- * is sent, and an LNET_EVENT_REPLY is logged when the data returned from
- * the target node in the REPLY has been written to local MD.
- *
- * On the target node, an LNET_EVENT_GET is logged when the GET request
- * arrives and is accepted into a MD.
- *
- * \param self,target,portal,match_bits,offset See the discussion in LNetPut().
- * \param mdh A handle for the MD that describes the memory into which the
- * requested data will be received. The MD must be "free floating"
- * (See LNetMDBind()).
- *
- * \retval 0 Success, and only in this case events will be generated
- * and logged to EQ (if it exists) of the MD.
- * \retval -EIO Simulated failure.
- * \retval -ENOMEM Memory allocation failure.
- * \retval -ENOENT Invalid MD object.
- */
-int
-LNetGet(lnet_nid_t self, struct lnet_handle_md mdh,
- struct lnet_process_id target, unsigned int portal,
- __u64 match_bits, unsigned int offset)
-{
- struct lnet_msg *msg;
- struct lnet_libmd *md;
- int cpt;
- int rc;
-
- LASSERT(the_lnet.ln_refcount > 0);
-
- if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */
- fail_peer(target.nid, 1)) { /* shall we now? */
- CERROR("Dropping GET to %s: simulated failure\n",
- libcfs_id2str(target));
- return -EIO;
- }
-
- msg = kzalloc(sizeof(*msg), GFP_NOFS);
- if (!msg) {
- CERROR("Dropping GET to %s: ENOMEM on struct lnet_msg\n",
- libcfs_id2str(target));
- return -ENOMEM;
- }
-
- cpt = lnet_cpt_of_cookie(mdh.cookie);
- lnet_res_lock(cpt);
-
- md = lnet_handle2md(&mdh);
- if (!md || !md->md_threshold || md->md_me) {
- CERROR("Dropping GET (%llu:%d:%s): MD (%d) invalid\n",
- match_bits, portal, libcfs_id2str(target),
- !md ? -1 : md->md_threshold);
- if (md && md->md_me)
- CERROR("REPLY MD also attached to portal %d\n",
- md->md_me->me_portal);
-
- lnet_res_unlock(cpt);
-
- kfree(msg);
- return -ENOENT;
- }
-
- CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target));
-
- lnet_msg_attach_md(msg, md, 0, 0);
-
- lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0);
-
- msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits);
- msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal);
- msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset);
- msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length);
-
- /* NB handles only looked up by creator (no flips) */
- msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie =
- the_lnet.ln_interface_cookie;
- msg->msg_hdr.msg.get.return_wmd.wh_object_cookie =
- md->md_lh.lh_cookie;
-
- lnet_res_unlock(cpt);
-
- lnet_build_msg_event(msg, LNET_EVENT_SEND);
-
- rc = lnet_send(self, msg, LNET_NID_ANY);
- if (rc < 0) {
- CNETERR("Error sending GET to %s: %d\n",
- libcfs_id2str(target), rc);
- lnet_finalize(NULL, msg, rc);
- }
-
- /* completion will be signalled by an event */
- return 0;
-}
-EXPORT_SYMBOL(LNetGet);
-
-/**
- * Calculate distance to node at \a dstnid.
- *
- * \param dstnid Target NID.
- * \param srcnidp If not NULL, NID of the local interface to reach \a dstnid
- * is saved here.
- * \param orderp If not NULL, order of the route to reach \a dstnid is saved
- * here.
- *
- * \retval 0 If \a dstnid belongs to a local interface, and reserved option
- * local_nid_dist_zero is set, which is the default.
- * \retval positives Distance to target NID, i.e. number of hops plus one.
- * \retval -EHOSTUNREACH If \a dstnid is not reachable.
- */
-int
-LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
-{
- struct list_head *e;
- struct lnet_ni *ni;
- struct lnet_remotenet *rnet;
- __u32 dstnet = LNET_NIDNET(dstnid);
- int hops;
- int cpt;
- __u32 order = 2;
- struct list_head *rn_list;
-
- /*
- * if !local_nid_dist_zero, I don't return a distance of 0 ever
- * (when lustre sees a distance of 0, it substitutes 0@lo), so I
- * keep order 0 free for 0@lo and order 1 free for a local NID
- * match
- */
- LASSERT(the_lnet.ln_refcount > 0);
-
- cpt = lnet_net_lock_current();
-
- list_for_each(e, &the_lnet.ln_nis) {
- ni = list_entry(e, struct lnet_ni, ni_list);
-
- if (ni->ni_nid == dstnid) {
- if (srcnidp)
- *srcnidp = dstnid;
- if (orderp) {
- if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND)
- *orderp = 0;
- else
- *orderp = 1;
- }
- lnet_net_unlock(cpt);
-
- return local_nid_dist_zero ? 0 : 1;
- }
-
- if (LNET_NIDNET(ni->ni_nid) == dstnet) {
- /*
- * Check if ni was originally created in
- * current net namespace.
- * If not, assign order above 0xffff0000,
- * to make this ni not a priority.
- */
- if (!net_eq(ni->ni_net_ns, current->nsproxy->net_ns))
- order += 0xffff0000;
-
- if (srcnidp)
- *srcnidp = ni->ni_nid;
- if (orderp)
- *orderp = order;
- lnet_net_unlock(cpt);
- return 1;
- }
-
- order++;
- }
-
- rn_list = lnet_net2rnethash(dstnet);
- list_for_each(e, rn_list) {
- rnet = list_entry(e, struct lnet_remotenet, lrn_list);
-
- if (rnet->lrn_net == dstnet) {
- struct lnet_route *route;
- struct lnet_route *shortest = NULL;
- __u32 shortest_hops = LNET_UNDEFINED_HOPS;
- __u32 route_hops;
-
- LASSERT(!list_empty(&rnet->lrn_routes));
-
- list_for_each_entry(route, &rnet->lrn_routes,
- lr_list) {
- route_hops = route->lr_hops;
- if (route_hops == LNET_UNDEFINED_HOPS)
- route_hops = 1;
- if (!shortest ||
- route_hops < shortest_hops) {
- shortest = route;
- shortest_hops = route_hops;
- }
- }
-
- LASSERT(shortest);
- hops = shortest_hops;
- if (srcnidp)
- *srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
- if (orderp)
- *orderp = order;
- lnet_net_unlock(cpt);
- return hops + 1;
- }
- order++;
- }
-
- lnet_net_unlock(cpt);
- return -EHOSTUNREACH;
-}
-EXPORT_SYMBOL(LNetDist);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c
deleted file mode 100644
index 0091273..0000000
--- a/drivers/staging/lustre/lnet/lnet/lib-msg.c
+++ /dev/null
@@ -1,625 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-msg.c
- *
- * Message decoding, parsing and finalizing routines
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-void
-lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev)
-{
- memset(ev, 0, sizeof(*ev));
-
- ev->status = 0;
- ev->unlinked = 1;
- ev->type = LNET_EVENT_UNLINK;
- lnet_md_deconstruct(md, &ev->md);
- lnet_md2handle(&ev->md_handle, md);
-}
-
-/*
- * Don't need any lock, must be called after lnet_commit_md
- */
-void
-lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type)
-{
- struct lnet_hdr *hdr = &msg->msg_hdr;
- struct lnet_event *ev = &msg->msg_ev;
-
- LASSERT(!msg->msg_routing);
-
- ev->type = ev_type;
-
- if (ev_type == LNET_EVENT_SEND) {
- /* event for active message */
- ev->target.nid = le64_to_cpu(hdr->dest_nid);
- ev->target.pid = le32_to_cpu(hdr->dest_pid);
- ev->initiator.nid = LNET_NID_ANY;
- ev->initiator.pid = the_lnet.ln_pid;
- ev->sender = LNET_NID_ANY;
- } else {
- /* event for passive message */
- ev->target.pid = hdr->dest_pid;
- ev->target.nid = hdr->dest_nid;
- ev->initiator.pid = hdr->src_pid;
- ev->initiator.nid = hdr->src_nid;
- ev->rlength = hdr->payload_length;
- ev->sender = msg->msg_from;
- ev->mlength = msg->msg_wanted;
- ev->offset = msg->msg_offset;
- }
-
- switch (ev_type) {
- default:
- LBUG();
-
- case LNET_EVENT_PUT: /* passive PUT */
- ev->pt_index = hdr->msg.put.ptl_index;
- ev->match_bits = hdr->msg.put.match_bits;
- ev->hdr_data = hdr->msg.put.hdr_data;
- return;
-
- case LNET_EVENT_GET: /* passive GET */
- ev->pt_index = hdr->msg.get.ptl_index;
- ev->match_bits = hdr->msg.get.match_bits;
- ev->hdr_data = 0;
- return;
-
- case LNET_EVENT_ACK: /* ACK */
- ev->match_bits = hdr->msg.ack.match_bits;
- ev->mlength = hdr->msg.ack.mlength;
- return;
-
- case LNET_EVENT_REPLY: /* REPLY */
- return;
-
- case LNET_EVENT_SEND: /* active message */
- if (msg->msg_type == LNET_MSG_PUT) {
- ev->pt_index = le32_to_cpu(hdr->msg.put.ptl_index);
- ev->match_bits = le64_to_cpu(hdr->msg.put.match_bits);
- ev->offset = le32_to_cpu(hdr->msg.put.offset);
- ev->mlength =
- ev->rlength = le32_to_cpu(hdr->payload_length);
- ev->hdr_data = le64_to_cpu(hdr->msg.put.hdr_data);
-
- } else {
- LASSERT(msg->msg_type == LNET_MSG_GET);
- ev->pt_index = le32_to_cpu(hdr->msg.get.ptl_index);
- ev->match_bits = le64_to_cpu(hdr->msg.get.match_bits);
- ev->mlength =
- ev->rlength = le32_to_cpu(hdr->msg.get.sink_length);
- ev->offset = le32_to_cpu(hdr->msg.get.src_offset);
- ev->hdr_data = 0;
- }
- return;
- }
-}
-
-void
-lnet_msg_commit(struct lnet_msg *msg, int cpt)
-{
- struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt];
- struct lnet_counters *counters = the_lnet.ln_counters[cpt];
-
- /* routed message can be committed for both receiving and sending */
- LASSERT(!msg->msg_tx_committed);
-
- if (msg->msg_sending) {
- LASSERT(!msg->msg_receiving);
-
- msg->msg_tx_cpt = cpt;
- msg->msg_tx_committed = 1;
- if (msg->msg_rx_committed) { /* routed message REPLY */
- LASSERT(msg->msg_onactivelist);
- return;
- }
- } else {
- LASSERT(!msg->msg_sending);
- msg->msg_rx_cpt = cpt;
- msg->msg_rx_committed = 1;
- }
-
- LASSERT(!msg->msg_onactivelist);
- msg->msg_onactivelist = 1;
- list_add(&msg->msg_activelist, &container->msc_active);
-
- counters->msgs_alloc++;
- if (counters->msgs_alloc > counters->msgs_max)
- counters->msgs_max = counters->msgs_alloc;
-}
-
-static void
-lnet_msg_decommit_tx(struct lnet_msg *msg, int status)
-{
- struct lnet_counters *counters;
- struct lnet_event *ev = &msg->msg_ev;
-
- LASSERT(msg->msg_tx_committed);
- if (status)
- goto out;
-
- counters = the_lnet.ln_counters[msg->msg_tx_cpt];
- switch (ev->type) {
- default: /* routed message */
- LASSERT(msg->msg_routing);
- LASSERT(msg->msg_rx_committed);
- LASSERT(!ev->type);
-
- counters->route_length += msg->msg_len;
- counters->route_count++;
- goto out;
-
- case LNET_EVENT_PUT:
- /* should have been decommitted */
- LASSERT(!msg->msg_rx_committed);
- /* overwritten while sending ACK */
- LASSERT(msg->msg_type == LNET_MSG_ACK);
- msg->msg_type = LNET_MSG_PUT; /* fix type */
- break;
-
- case LNET_EVENT_SEND:
- LASSERT(!msg->msg_rx_committed);
- if (msg->msg_type == LNET_MSG_PUT)
- counters->send_length += msg->msg_len;
- break;
-
- case LNET_EVENT_GET:
- LASSERT(msg->msg_rx_committed);
- /*
- * overwritten while sending reply, we should never be
- * here for optimized GET
- */
- LASSERT(msg->msg_type == LNET_MSG_REPLY);
- msg->msg_type = LNET_MSG_GET; /* fix type */
- break;
- }
-
- counters->send_count++;
- out:
- lnet_return_tx_credits_locked(msg);
- msg->msg_tx_committed = 0;
-}
-
-static void
-lnet_msg_decommit_rx(struct lnet_msg *msg, int status)
-{
- struct lnet_counters *counters;
- struct lnet_event *ev = &msg->msg_ev;
-
- LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */
- LASSERT(msg->msg_rx_committed);
-
- if (status)
- goto out;
-
- counters = the_lnet.ln_counters[msg->msg_rx_cpt];
- switch (ev->type) {
- default:
- LASSERT(!ev->type);
- LASSERT(msg->msg_routing);
- goto out;
-
- case LNET_EVENT_ACK:
- LASSERT(msg->msg_type == LNET_MSG_ACK);
- break;
-
- case LNET_EVENT_GET:
- /*
- * type is "REPLY" if it's an optimized GET on passive side,
- * because optimized GET will never be committed for sending,
- * so message type wouldn't be changed back to "GET" by
- * lnet_msg_decommit_tx(), see details in lnet_parse_get()
- */
- LASSERT(msg->msg_type == LNET_MSG_REPLY ||
- msg->msg_type == LNET_MSG_GET);
- counters->send_length += msg->msg_wanted;
- break;
-
- case LNET_EVENT_PUT:
- LASSERT(msg->msg_type == LNET_MSG_PUT);
- break;
-
- case LNET_EVENT_REPLY:
- /*
- * type is "GET" if it's an optimized GET on active side,
- * see details in lnet_create_reply_msg()
- */
- LASSERT(msg->msg_type == LNET_MSG_GET ||
- msg->msg_type == LNET_MSG_REPLY);
- break;
- }
-
- counters->recv_count++;
- if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
- counters->recv_length += msg->msg_wanted;
-
- out:
- lnet_return_rx_credits_locked(msg);
- msg->msg_rx_committed = 0;
-}
-
-void
-lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status)
-{
- int cpt2 = cpt;
-
- LASSERT(msg->msg_tx_committed || msg->msg_rx_committed);
- LASSERT(msg->msg_onactivelist);
-
- if (msg->msg_tx_committed) { /* always decommit for sending first */
- LASSERT(cpt == msg->msg_tx_cpt);
- lnet_msg_decommit_tx(msg, status);
- }
-
- if (msg->msg_rx_committed) {
- /* forwarding msg committed for both receiving and sending */
- if (cpt != msg->msg_rx_cpt) {
- lnet_net_unlock(cpt);
- cpt2 = msg->msg_rx_cpt;
- lnet_net_lock(cpt2);
- }
- lnet_msg_decommit_rx(msg, status);
- }
-
- list_del(&msg->msg_activelist);
- msg->msg_onactivelist = 0;
-
- the_lnet.ln_counters[cpt2]->msgs_alloc--;
-
- if (cpt2 != cpt) {
- lnet_net_unlock(cpt2);
- lnet_net_lock(cpt);
- }
-}
-
-void
-lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md,
- unsigned int offset, unsigned int mlen)
-{
- /* NB: @offset and @len are only useful for receiving */
- /*
- * Here, we attach the MD on lnet_msg and mark it busy and
- * decrementing its threshold. Come what may, the lnet_msg "owns"
- * the MD until a call to lnet_msg_detach_md or lnet_finalize()
- * signals completion.
- */
- LASSERT(!msg->msg_routing);
-
- msg->msg_md = md;
- if (msg->msg_receiving) { /* committed for receiving */
- msg->msg_offset = offset;
- msg->msg_wanted = mlen;
- }
-
- md->md_refcount++;
- if (md->md_threshold != LNET_MD_THRESH_INF) {
- LASSERT(md->md_threshold > 0);
- md->md_threshold--;
- }
-
- /* build umd in event */
- lnet_md2handle(&msg->msg_ev.md_handle, md);
- lnet_md_deconstruct(md, &msg->msg_ev.md);
-}
-
-void
-lnet_msg_detach_md(struct lnet_msg *msg, int status)
-{
- struct lnet_libmd *md = msg->msg_md;
- int unlink;
-
- /* Now it's safe to drop my caller's ref */
- md->md_refcount--;
- LASSERT(md->md_refcount >= 0);
-
- unlink = lnet_md_unlinkable(md);
- if (md->md_eq) {
- msg->msg_ev.status = status;
- msg->msg_ev.unlinked = unlink;
- lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev);
- }
-
- if (unlink)
- lnet_md_unlink(md);
-
- msg->msg_md = NULL;
-}
-
-static int
-lnet_complete_msg_locked(struct lnet_msg *msg, int cpt)
-{
- struct lnet_handle_wire ack_wmd;
- int rc;
- int status = msg->msg_ev.status;
-
- LASSERT(msg->msg_onactivelist);
-
- if (!status && msg->msg_ack) {
- /* Only send an ACK if the PUT completed successfully */
-
- lnet_msg_decommit(msg, cpt, 0);
-
- msg->msg_ack = 0;
- lnet_net_unlock(cpt);
-
- LASSERT(msg->msg_ev.type == LNET_EVENT_PUT);
- LASSERT(!msg->msg_routing);
-
- ack_wmd = msg->msg_hdr.msg.put.ack_wmd;
-
- lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0);
-
- msg->msg_hdr.msg.ack.dst_wmd = ack_wmd;
- msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits;
- msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength);
-
- /*
- * NB: we probably want to use NID of msg::msg_from as 3rd
- * parameter (router NID) if it's routed message
- */
- rc = lnet_send(msg->msg_ev.target.nid, msg, LNET_NID_ANY);
-
- lnet_net_lock(cpt);
- /*
- * NB: message is committed for sending, we should return
- * on success because LND will finalize this message later.
- *
- * Also, there is possibility that message is committed for
- * sending and also failed before delivering to LND,
- * i.e: ENOMEM, in that case we can't fall through either
- * because CPT for sending can be different with CPT for
- * receiving, so we should return back to lnet_finalize()
- * to make sure we are locking the correct partition.
- */
- return rc;
-
- } else if (!status && /* OK so far */
- (msg->msg_routing && !msg->msg_sending)) {
- /* not forwarded */
- LASSERT(!msg->msg_receiving); /* called back recv already */
- lnet_net_unlock(cpt);
-
- rc = lnet_send(LNET_NID_ANY, msg, LNET_NID_ANY);
-
- lnet_net_lock(cpt);
- /*
- * NB: message is committed for sending, we should return
- * on success because LND will finalize this message later.
- *
- * Also, there is possibility that message is committed for
- * sending and also failed before delivering to LND,
- * i.e: ENOMEM, in that case we can't fall through either:
- * - The rule is message must decommit for sending first if
- * the it's committed for both sending and receiving
- * - CPT for sending can be different with CPT for receiving,
- * so we should return back to lnet_finalize() to make
- * sure we are locking the correct partition.
- */
- return rc;
- }
-
- lnet_msg_decommit(msg, cpt, status);
- kfree(msg);
- return 0;
-}
-
-void
-lnet_finalize(struct lnet_ni *ni, struct lnet_msg *msg, int status)
-{
- struct lnet_msg_container *container;
- int my_slot;
- int cpt;
- int rc;
- int i;
-
- LASSERT(!in_interrupt());
-
- if (!msg)
- return;
-
- msg->msg_ev.status = status;
-
- if (msg->msg_md) {
- cpt = lnet_cpt_of_cookie(msg->msg_md->md_lh.lh_cookie);
-
- lnet_res_lock(cpt);
- lnet_msg_detach_md(msg, status);
- lnet_res_unlock(cpt);
- }
-
- again:
- rc = 0;
- if (!msg->msg_tx_committed && !msg->msg_rx_committed) {
- /* not committed to network yet */
- LASSERT(!msg->msg_onactivelist);
- kfree(msg);
- return;
- }
-
- /*
- * NB: routed message can be committed for both receiving and sending,
- * we should finalize in LIFO order and keep counters correct.
- * (finalize sending first then finalize receiving)
- */
- cpt = msg->msg_tx_committed ? msg->msg_tx_cpt : msg->msg_rx_cpt;
- lnet_net_lock(cpt);
-
- container = the_lnet.ln_msg_containers[cpt];
- list_add_tail(&msg->msg_list, &container->msc_finalizing);
-
- /*
- * Recursion breaker. Don't complete the message here if I am (or
- * enough other threads are) already completing messages
- */
- my_slot = -1;
- for (i = 0; i < container->msc_nfinalizers; i++) {
- if (container->msc_finalizers[i] == current)
- break;
-
- if (my_slot < 0 && !container->msc_finalizers[i])
- my_slot = i;
- }
-
- if (i < container->msc_nfinalizers || my_slot < 0) {
- lnet_net_unlock(cpt);
- return;
- }
-
- container->msc_finalizers[my_slot] = current;
-
- while (!list_empty(&container->msc_finalizing)) {
- msg = list_entry(container->msc_finalizing.next,
- struct lnet_msg, msg_list);
-
- list_del(&msg->msg_list);
-
- /*
- * NB drops and regains the lnet lock if it actually does
- * anything, so my finalizing friends can chomp along too
- */
- rc = lnet_complete_msg_locked(msg, cpt);
- if (rc)
- break;
- }
-
- if (unlikely(!list_empty(&the_lnet.ln_delay_rules))) {
- lnet_net_unlock(cpt);
- lnet_delay_rule_check();
- lnet_net_lock(cpt);
- }
-
- container->msc_finalizers[my_slot] = NULL;
- lnet_net_unlock(cpt);
-
- if (rc)
- goto again;
-}
-EXPORT_SYMBOL(lnet_finalize);
-
-void
-lnet_msg_container_cleanup(struct lnet_msg_container *container)
-{
- int count = 0;
-
- if (!container->msc_init)
- return;
-
- while (!list_empty(&container->msc_active)) {
- struct lnet_msg *msg;
-
- msg = list_entry(container->msc_active.next,
- struct lnet_msg, msg_activelist);
- LASSERT(msg->msg_onactivelist);
- msg->msg_onactivelist = 0;
- list_del(&msg->msg_activelist);
- kfree(msg);
- count++;
- }
-
- if (count > 0)
- CERROR("%d active msg on exit\n", count);
-
- kvfree(container->msc_finalizers);
- container->msc_finalizers = NULL;
- container->msc_init = 0;
-}
-
-int
-lnet_msg_container_setup(struct lnet_msg_container *container, int cpt)
-{
- container->msc_init = 1;
-
- INIT_LIST_HEAD(&container->msc_active);
- INIT_LIST_HEAD(&container->msc_finalizing);
-
- /* number of CPUs */
- container->msc_nfinalizers = cfs_cpt_weight(lnet_cpt_table(), cpt);
-
- container->msc_finalizers = kvzalloc_cpt(container->msc_nfinalizers *
- sizeof(*container->msc_finalizers),
- GFP_KERNEL, cpt);
-
- if (!container->msc_finalizers) {
- CERROR("Failed to allocate message finalizers\n");
- lnet_msg_container_cleanup(container);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void
-lnet_msg_containers_destroy(void)
-{
- struct lnet_msg_container *container;
- int i;
-
- if (!the_lnet.ln_msg_containers)
- return;
-
- cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers)
- lnet_msg_container_cleanup(container);
-
- cfs_percpt_free(the_lnet.ln_msg_containers);
- the_lnet.ln_msg_containers = NULL;
-}
-
-int
-lnet_msg_containers_create(void)
-{
- struct lnet_msg_container *container;
- int rc;
- int i;
-
- the_lnet.ln_msg_containers = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*container));
-
- if (!the_lnet.ln_msg_containers) {
- CERROR("Failed to allocate cpu-partition data for network\n");
- return -ENOMEM;
- }
-
- cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) {
- rc = lnet_msg_container_setup(container, i);
- if (rc) {
- lnet_msg_containers_destroy();
- return rc;
- }
- }
-
- return 0;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-ptl.c b/drivers/staging/lustre/lnet/lnet/lib-ptl.c
deleted file mode 100644
index fc47379..0000000
--- a/drivers/staging/lustre/lnet/lnet/lib-ptl.c
+++ /dev/null
@@ -1,987 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/lib-ptl.c
- *
- * portal & match routines
- *
- * Author: liang@whamcloud.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/* NB: add /proc interfaces in upcoming patches */
-int portal_rotor = LNET_PTL_ROTOR_HASH_RT;
-module_param(portal_rotor, int, 0644);
-MODULE_PARM_DESC(portal_rotor, "redirect PUTs to different cpu-partitions");
-
-static int
-lnet_ptl_match_type(unsigned int index, struct lnet_process_id match_id,
- __u64 mbits, __u64 ignore_bits)
-{
- struct lnet_portal *ptl = the_lnet.ln_portals[index];
- int unique;
-
- unique = !ignore_bits &&
- match_id.nid != LNET_NID_ANY &&
- match_id.pid != LNET_PID_ANY;
-
- LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl));
-
- /* prefer to check w/o any lock */
- if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl)))
- goto match;
-
- /* unset, new portal */
- lnet_ptl_lock(ptl);
- /* check again with lock */
- if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) {
- lnet_ptl_unlock(ptl);
- goto match;
- }
-
- /* still not set */
- if (unique)
- lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE);
- else
- lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD);
-
- lnet_ptl_unlock(ptl);
-
- return 1;
-
- match:
- if ((lnet_ptl_is_unique(ptl) && !unique) ||
- (lnet_ptl_is_wildcard(ptl) && unique))
- return 0;
- return 1;
-}
-
-static void
-lnet_ptl_enable_mt(struct lnet_portal *ptl, int cpt)
-{
- struct lnet_match_table *mtable = ptl->ptl_mtables[cpt];
- int i;
-
- /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
- LASSERT(lnet_ptl_is_wildcard(ptl));
-
- mtable->mt_enabled = 1;
-
- ptl->ptl_mt_maps[ptl->ptl_mt_nmaps] = cpt;
- for (i = ptl->ptl_mt_nmaps - 1; i >= 0; i--) {
- LASSERT(ptl->ptl_mt_maps[i] != cpt);
- if (ptl->ptl_mt_maps[i] < cpt)
- break;
-
- /* swap to order */
- ptl->ptl_mt_maps[i + 1] = ptl->ptl_mt_maps[i];
- ptl->ptl_mt_maps[i] = cpt;
- }
-
- ptl->ptl_mt_nmaps++;
-}
-
-static void
-lnet_ptl_disable_mt(struct lnet_portal *ptl, int cpt)
-{
- struct lnet_match_table *mtable = ptl->ptl_mtables[cpt];
- int i;
-
- /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
- LASSERT(lnet_ptl_is_wildcard(ptl));
-
- if (LNET_CPT_NUMBER == 1)
- return; /* never disable the only match-table */
-
- mtable->mt_enabled = 0;
-
- LASSERT(ptl->ptl_mt_nmaps > 0 &&
- ptl->ptl_mt_nmaps <= LNET_CPT_NUMBER);
-
- /* remove it from mt_maps */
- ptl->ptl_mt_nmaps--;
- for (i = 0; i < ptl->ptl_mt_nmaps; i++) {
- if (ptl->ptl_mt_maps[i] >= cpt) /* overwrite it */
- ptl->ptl_mt_maps[i] = ptl->ptl_mt_maps[i + 1];
- }
-}
-
-static int
-lnet_try_match_md(struct lnet_libmd *md,
- struct lnet_match_info *info, struct lnet_msg *msg)
-{
- /*
- * ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock;
- * lnet_match_blocked_msg() relies on this to avoid races
- */
- unsigned int offset;
- unsigned int mlength;
- struct lnet_me *me = md->md_me;
-
- /* MD exhausted */
- if (lnet_md_exhausted(md))
- return LNET_MATCHMD_NONE | LNET_MATCHMD_EXHAUSTED;
-
- /* mismatched MD op */
- if (!(md->md_options & info->mi_opc))
- return LNET_MATCHMD_NONE;
-
- /* mismatched ME nid/pid? */
- if (me->me_match_id.nid != LNET_NID_ANY &&
- me->me_match_id.nid != info->mi_id.nid)
- return LNET_MATCHMD_NONE;
-
- if (me->me_match_id.pid != LNET_PID_ANY &&
- me->me_match_id.pid != info->mi_id.pid)
- return LNET_MATCHMD_NONE;
-
- /* mismatched ME matchbits? */
- if ((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits)
- return LNET_MATCHMD_NONE;
-
- /* Hurrah! This _is_ a match; check it out... */
-
- if (!(md->md_options & LNET_MD_MANAGE_REMOTE))
- offset = md->md_offset;
- else
- offset = info->mi_roffset;
-
- if (md->md_options & LNET_MD_MAX_SIZE) {
- mlength = md->md_max_size;
- LASSERT(md->md_offset + mlength <= md->md_length);
- } else {
- mlength = md->md_length - offset;
- }
-
- if (info->mi_rlength <= mlength) { /* fits in allowed space */
- mlength = info->mi_rlength;
- } else if (!(md->md_options & LNET_MD_TRUNCATE)) {
- /* this packet _really_ is too big */
- CERROR("Matching packet from %s, match %llu length %d too big: %d left, %d allowed\n",
- libcfs_id2str(info->mi_id), info->mi_mbits,
- info->mi_rlength, md->md_length - offset, mlength);
-
- return LNET_MATCHMD_DROP;
- }
-
- /* Commit to this ME/MD */
- CDEBUG(D_NET, "Incoming %s index %x from %s of length %d/%d into md %#llx [%d] + %d\n",
- (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get",
- info->mi_portal, libcfs_id2str(info->mi_id), mlength,
- info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset);
-
- lnet_msg_attach_md(msg, md, offset, mlength);
- md->md_offset = offset + mlength;
-
- if (!lnet_md_exhausted(md))
- return LNET_MATCHMD_OK;
-
- /*
- * Auto-unlink NOW, so the ME gets unlinked if required.
- * We bumped md->md_refcount above so the MD just gets flagged
- * for unlink when it is finalized.
- */
- if (md->md_flags & LNET_MD_FLAG_AUTO_UNLINK)
- lnet_md_unlink(md);
-
- return LNET_MATCHMD_OK | LNET_MATCHMD_EXHAUSTED;
-}
-
-static struct lnet_match_table *
-lnet_match2mt(struct lnet_portal *ptl, struct lnet_process_id id, __u64 mbits)
-{
- if (LNET_CPT_NUMBER == 1)
- return ptl->ptl_mtables[0]; /* the only one */
-
- /* if it's a unique portal, return match-table hashed by NID */
- return lnet_ptl_is_unique(ptl) ?
- ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
-}
-
-struct lnet_match_table *
-lnet_mt_of_attach(unsigned int index, struct lnet_process_id id,
- __u64 mbits, __u64 ignore_bits, enum lnet_ins_pos pos)
-{
- struct lnet_portal *ptl;
- struct lnet_match_table *mtable;
-
- /* NB: called w/o lock */
- LASSERT(index < the_lnet.ln_nportals);
-
- if (!lnet_ptl_match_type(index, id, mbits, ignore_bits))
- return NULL;
-
- ptl = the_lnet.ln_portals[index];
-
- mtable = lnet_match2mt(ptl, id, mbits);
- if (mtable) /* unique portal or only one match-table */
- return mtable;
-
- /* it's a wildcard portal */
- switch (pos) {
- default:
- return NULL;
- case LNET_INS_BEFORE:
- case LNET_INS_AFTER:
- /*
- * posted by no affinity thread, always hash to specific
- * match-table to avoid buffer stealing which is heavy
- */
- return ptl->ptl_mtables[ptl->ptl_index % LNET_CPT_NUMBER];
- case LNET_INS_LOCAL:
- /* posted by cpu-affinity thread */
- return ptl->ptl_mtables[lnet_cpt_current()];
- }
-}
-
-static struct lnet_match_table *
-lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
-{
- struct lnet_match_table *mtable;
- struct lnet_portal *ptl;
- unsigned int nmaps;
- unsigned int rotor;
- unsigned int cpt;
- bool routed;
-
- /* NB: called w/o lock */
- LASSERT(info->mi_portal < the_lnet.ln_nportals);
- ptl = the_lnet.ln_portals[info->mi_portal];
-
- LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl));
-
- mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits);
- if (mtable)
- return mtable;
-
- /* it's a wildcard portal */
- routed = LNET_NIDNET(msg->msg_hdr.src_nid) !=
- LNET_NIDNET(msg->msg_hdr.dest_nid);
-
- if (portal_rotor == LNET_PTL_ROTOR_OFF ||
- (portal_rotor != LNET_PTL_ROTOR_ON && !routed)) {
- cpt = lnet_cpt_current();
- if (ptl->ptl_mtables[cpt]->mt_enabled)
- return ptl->ptl_mtables[cpt];
- }
-
- rotor = ptl->ptl_rotor++; /* get round-robin factor */
- if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
- cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
- else
- cpt = rotor % LNET_CPT_NUMBER;
-
- if (!ptl->ptl_mtables[cpt]->mt_enabled) {
- /* is there any active entry for this portal? */
- nmaps = ptl->ptl_mt_nmaps;
- /* map to an active mtable to avoid heavy "stealing" */
- if (nmaps) {
- /*
- * NB: there is possibility that ptl_mt_maps is being
- * changed because we are not under protection of
- * lnet_ptl_lock, but it shouldn't hurt anything
- */
- cpt = ptl->ptl_mt_maps[rotor % nmaps];
- }
- }
-
- return ptl->ptl_mtables[cpt];
-}
-
-static int
-lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos)
-{
- __u64 *bmap;
- int i;
-
- if (!lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
- return 0;
-
- if (pos < 0) { /* check all bits */
- for (i = 0; i < LNET_MT_EXHAUSTED_BMAP; i++) {
- if (mtable->mt_exhausted[i] != (__u64)(-1))
- return 0;
- }
- return 1;
- }
-
- LASSERT(pos <= LNET_MT_HASH_IGNORE);
- /* mtable::mt_mhash[pos] is marked as exhausted or not */
- bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
- pos &= (1 << LNET_MT_BITS_U64) - 1;
-
- return (*bmap & BIT(pos));
-}
-
-static void
-lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted)
-{
- __u64 *bmap;
-
- LASSERT(lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]));
- LASSERT(pos <= LNET_MT_HASH_IGNORE);
-
- /* set mtable::mt_mhash[pos] as exhausted/non-exhausted */
- bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
- pos &= (1 << LNET_MT_BITS_U64) - 1;
-
- if (!exhausted)
- *bmap &= ~(1ULL << pos);
- else
- *bmap |= 1ULL << pos;
-}
-
-struct list_head *
-lnet_mt_match_head(struct lnet_match_table *mtable,
- struct lnet_process_id id, __u64 mbits)
-{
- struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal];
- unsigned long hash = mbits;
-
- if (!lnet_ptl_is_wildcard(ptl)) {
- hash += id.nid + id.pid;
-
- LASSERT(lnet_ptl_is_unique(ptl));
- hash = hash_long(hash, LNET_MT_HASH_BITS);
- }
- return &mtable->mt_mhash[hash & LNET_MT_HASH_MASK];
-}
-
-int
-lnet_mt_match_md(struct lnet_match_table *mtable,
- struct lnet_match_info *info, struct lnet_msg *msg)
-{
- struct list_head *head;
- struct lnet_me *me;
- struct lnet_me *tmp;
- int exhausted = 0;
- int rc;
-
- /* any ME with ignore bits? */
- if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE]))
- head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
- else
- head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
- again:
- /* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */
- if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
- exhausted = LNET_MATCHMD_EXHAUSTED;
-
- list_for_each_entry_safe(me, tmp, head, me_list) {
- /* ME attached but MD not attached yet */
- if (!me->me_md)
- continue;
-
- LASSERT(me == me->me_md->md_me);
-
- rc = lnet_try_match_md(me->me_md, info, msg);
- if (!(rc & LNET_MATCHMD_EXHAUSTED))
- exhausted = 0; /* mlist is not empty */
-
- if (rc & LNET_MATCHMD_FINISH) {
- /*
- * don't return EXHAUSTED bit because we don't know
- * whether the mlist is empty or not
- */
- return rc & ~LNET_MATCHMD_EXHAUSTED;
- }
- }
-
- if (exhausted == LNET_MATCHMD_EXHAUSTED) { /* @head is exhausted */
- lnet_mt_set_exhausted(mtable, head - mtable->mt_mhash, 1);
- if (!lnet_mt_test_exhausted(mtable, -1))
- exhausted = 0;
- }
-
- if (!exhausted && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) {
- head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
- goto again; /* re-check MEs w/o ignore-bits */
- }
-
- if (info->mi_opc == LNET_MD_OP_GET ||
- !lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal]))
- return exhausted | LNET_MATCHMD_DROP;
-
- return exhausted | LNET_MATCHMD_NONE;
-}
-
-static int
-lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg)
-{
- int rc;
-
- /*
- * message arrived before any buffer posting on this portal,
- * simply delay or drop this message
- */
- if (likely(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)))
- return 0;
-
- lnet_ptl_lock(ptl);
- /* check it again with hold of lock */
- if (lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)) {
- lnet_ptl_unlock(ptl);
- return 0;
- }
-
- if (lnet_ptl_is_lazy(ptl)) {
- if (msg->msg_rx_ready_delay) {
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list,
- &ptl->ptl_msg_delayed);
- }
- rc = LNET_MATCHMD_NONE;
- } else {
- rc = LNET_MATCHMD_DROP;
- }
-
- lnet_ptl_unlock(ptl);
- return rc;
-}
-
-static int
-lnet_ptl_match_delay(struct lnet_portal *ptl,
- struct lnet_match_info *info, struct lnet_msg *msg)
-{
- int first = ptl->ptl_mt_maps[0]; /* read w/o lock */
- int rc = 0;
- int i;
-
- /**
- * Steal buffer from other CPTs, and delay msg if nothing to
- * steal. This function is more expensive than a regular
- * match, but we don't expect it can happen a lot. The return
- * code contains one of LNET_MATCHMD_OK, LNET_MATCHMD_DROP, or
- * LNET_MATCHMD_NONE.
- */
- LASSERT(lnet_ptl_is_wildcard(ptl));
-
- for (i = 0; i < LNET_CPT_NUMBER; i++) {
- struct lnet_match_table *mtable;
- int cpt;
-
- cpt = (first + i) % LNET_CPT_NUMBER;
- mtable = ptl->ptl_mtables[cpt];
- if (i && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled)
- continue;
-
- lnet_res_lock(cpt);
- lnet_ptl_lock(ptl);
-
- if (!i) {
- /* The first try, add to stealing list. */
- list_add_tail(&msg->msg_list,
- &ptl->ptl_msg_stealing);
- }
-
- if (!list_empty(&msg->msg_list)) {
- /* On stealing list. */
- rc = lnet_mt_match_md(mtable, info, msg);
-
- if ((rc & LNET_MATCHMD_EXHAUSTED) &&
- mtable->mt_enabled)
- lnet_ptl_disable_mt(ptl, cpt);
-
- if (rc & LNET_MATCHMD_FINISH) {
- /* Match found, remove from stealing list. */
- list_del_init(&msg->msg_list);
- } else if (i == LNET_CPT_NUMBER - 1 || /* (1) */
- !ptl->ptl_mt_nmaps || /* (2) */
- (ptl->ptl_mt_nmaps == 1 && /* (3) */
- ptl->ptl_mt_maps[0] == cpt)) {
- /**
- * No match found, and this is either
- * (1) the last cpt to check, or
- * (2) there is no active cpt, or
- * (3) this is the only active cpt.
- * There is nothing to steal: delay or
- * drop the message.
- */
- list_del_init(&msg->msg_list);
-
- if (lnet_ptl_is_lazy(ptl)) {
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list,
- &ptl->ptl_msg_delayed);
- rc = LNET_MATCHMD_NONE;
- } else {
- rc = LNET_MATCHMD_DROP;
- }
- } else {
- /* Do another iteration. */
- rc = 0;
- }
- } else {
- /**
- * No longer on stealing list: another thread
- * matched the message in lnet_ptl_attach_md().
- * We are now expected to handle the message.
- */
- rc = !msg->msg_md ?
- LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
- }
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(cpt);
-
- /**
- * Note that test (1) above ensures that we always
- * exit the loop through this break statement.
- *
- * LNET_MATCHMD_NONE means msg was added to the
- * delayed queue, and we may no longer reference it
- * after lnet_ptl_unlock() and lnet_res_unlock().
- */
- if (rc & (LNET_MATCHMD_FINISH | LNET_MATCHMD_NONE))
- break;
- }
-
- return rc;
-}
-
-int
-lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
-{
- struct lnet_match_table *mtable;
- struct lnet_portal *ptl;
- int rc;
-
- CDEBUG(D_NET, "Request from %s of length %d into portal %d MB=%#llx\n",
- libcfs_id2str(info->mi_id), info->mi_rlength, info->mi_portal,
- info->mi_mbits);
-
- if (info->mi_portal >= the_lnet.ln_nportals) {
- CERROR("Invalid portal %d not in [0-%d]\n",
- info->mi_portal, the_lnet.ln_nportals);
- return LNET_MATCHMD_DROP;
- }
-
- ptl = the_lnet.ln_portals[info->mi_portal];
- rc = lnet_ptl_match_early(ptl, msg);
- if (rc) /* matched or delayed early message */
- return rc;
-
- mtable = lnet_mt_of_match(info, msg);
- lnet_res_lock(mtable->mt_cpt);
-
- if (the_lnet.ln_shutdown) {
- rc = LNET_MATCHMD_DROP;
- goto out1;
- }
-
- rc = lnet_mt_match_md(mtable, info, msg);
- if ((rc & LNET_MATCHMD_EXHAUSTED) && mtable->mt_enabled) {
- lnet_ptl_lock(ptl);
- lnet_ptl_disable_mt(ptl, mtable->mt_cpt);
- lnet_ptl_unlock(ptl);
- }
-
- if (rc & LNET_MATCHMD_FINISH) /* matched or dropping */
- goto out1;
-
- if (!msg->msg_rx_ready_delay)
- goto out1;
-
- LASSERT(lnet_ptl_is_lazy(ptl));
- LASSERT(!msg->msg_rx_delayed);
-
- /* NB: we don't expect "delay" can happen a lot */
- if (lnet_ptl_is_unique(ptl) || LNET_CPT_NUMBER == 1) {
- lnet_ptl_lock(ptl);
-
- msg->msg_rx_delayed = 1;
- list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed);
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(mtable->mt_cpt);
- rc = LNET_MATCHMD_NONE;
- } else {
- lnet_res_unlock(mtable->mt_cpt);
- rc = lnet_ptl_match_delay(ptl, info, msg);
- }
-
- /* LNET_MATCHMD_NONE means msg was added to the delay queue */
- if (rc & LNET_MATCHMD_NONE) {
- CDEBUG(D_NET,
- "Delaying %s from %s ptl %d MB %#llx off %d len %d\n",
- info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET",
- libcfs_id2str(info->mi_id), info->mi_portal,
- info->mi_mbits, info->mi_roffset, info->mi_rlength);
- }
- goto out0;
- out1:
- lnet_res_unlock(mtable->mt_cpt);
- out0:
- /* EXHAUSTED bit is only meaningful for internal functions */
- return rc & ~LNET_MATCHMD_EXHAUSTED;
-}
-
-void
-lnet_ptl_detach_md(struct lnet_me *me, struct lnet_libmd *md)
-{
- LASSERT(me->me_md == md && md->md_me == me);
-
- me->me_md = NULL;
- md->md_me = NULL;
-}
-
-/* called with lnet_res_lock held */
-void
-lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md,
- struct list_head *matches, struct list_head *drops)
-{
- struct lnet_portal *ptl = the_lnet.ln_portals[me->me_portal];
- struct lnet_match_table *mtable;
- struct list_head *head;
- struct lnet_msg *tmp;
- struct lnet_msg *msg;
- int exhausted = 0;
- int cpt;
-
- LASSERT(!md->md_refcount); /* a brand new MD */
-
- me->me_md = md;
- md->md_me = me;
-
- cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
- mtable = ptl->ptl_mtables[cpt];
-
- if (list_empty(&ptl->ptl_msg_stealing) &&
- list_empty(&ptl->ptl_msg_delayed) &&
- !lnet_mt_test_exhausted(mtable, me->me_pos))
- return;
-
- lnet_ptl_lock(ptl);
- head = &ptl->ptl_msg_stealing;
- again:
- list_for_each_entry_safe(msg, tmp, head, msg_list) {
- struct lnet_match_info info;
- struct lnet_hdr *hdr;
- int rc;
-
- LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing);
-
- hdr = &msg->msg_hdr;
- info.mi_id.nid = hdr->src_nid;
- info.mi_id.pid = hdr->src_pid;
- info.mi_opc = LNET_MD_OP_PUT;
- info.mi_portal = hdr->msg.put.ptl_index;
- info.mi_rlength = hdr->payload_length;
- info.mi_roffset = hdr->msg.put.offset;
- info.mi_mbits = hdr->msg.put.match_bits;
-
- rc = lnet_try_match_md(md, &info, msg);
-
- exhausted = (rc & LNET_MATCHMD_EXHAUSTED);
- if (rc & LNET_MATCHMD_NONE) {
- if (exhausted)
- break;
- continue;
- }
-
- /* Hurrah! This _is_ a match */
- LASSERT(rc & LNET_MATCHMD_FINISH);
- list_del_init(&msg->msg_list);
-
- if (head == &ptl->ptl_msg_stealing) {
- if (exhausted)
- break;
- /* stealing thread will handle the message */
- continue;
- }
-
- if (rc & LNET_MATCHMD_OK) {
- list_add_tail(&msg->msg_list, matches);
-
- CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",
- libcfs_id2str(info.mi_id),
- info.mi_portal, info.mi_mbits,
- info.mi_roffset, info.mi_rlength);
- } else {
- list_add_tail(&msg->msg_list, drops);
- }
-
- if (exhausted)
- break;
- }
-
- if (!exhausted && head == &ptl->ptl_msg_stealing) {
- head = &ptl->ptl_msg_delayed;
- goto again;
- }
-
- if (lnet_ptl_is_wildcard(ptl) && !exhausted) {
- lnet_mt_set_exhausted(mtable, me->me_pos, 0);
- if (!mtable->mt_enabled)
- lnet_ptl_enable_mt(ptl, cpt);
- }
-
- lnet_ptl_unlock(ptl);
-}
-
-static void
-lnet_ptl_cleanup(struct lnet_portal *ptl)
-{
- struct lnet_match_table *mtable;
- int i;
-
- if (!ptl->ptl_mtables) /* uninitialized portal */
- return;
-
- LASSERT(list_empty(&ptl->ptl_msg_delayed));
- LASSERT(list_empty(&ptl->ptl_msg_stealing));
- cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
- struct list_head *mhash;
- struct lnet_me *me;
- int j;
-
- if (!mtable->mt_mhash) /* uninitialized match-table */
- continue;
-
- mhash = mtable->mt_mhash;
- /* cleanup ME */
- for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) {
- while (!list_empty(&mhash[j])) {
- me = list_entry(mhash[j].next,
- struct lnet_me, me_list);
- CERROR("Active ME %p on exit\n", me);
- list_del(&me->me_list);
- kfree(me);
- }
- }
- /* the extra entry is for MEs with ignore bits */
- kvfree(mhash);
- }
-
- cfs_percpt_free(ptl->ptl_mtables);
- ptl->ptl_mtables = NULL;
-}
-
-static int
-lnet_ptl_setup(struct lnet_portal *ptl, int index)
-{
- struct lnet_match_table *mtable;
- struct list_head *mhash;
- int i;
- int j;
-
- ptl->ptl_mtables = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(struct lnet_match_table));
- if (!ptl->ptl_mtables) {
- CERROR("Failed to create match table for portal %d\n", index);
- return -ENOMEM;
- }
-
- ptl->ptl_index = index;
- INIT_LIST_HEAD(&ptl->ptl_msg_delayed);
- INIT_LIST_HEAD(&ptl->ptl_msg_stealing);
- spin_lock_init(&ptl->ptl_lock);
- cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
- /* the extra entry is for MEs with ignore bits */
- mhash = kvzalloc_cpt(sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1),
- GFP_KERNEL, i);
- if (!mhash) {
- CERROR("Failed to create match hash for portal %d\n",
- index);
- goto failed;
- }
-
- memset(&mtable->mt_exhausted[0], -1,
- sizeof(mtable->mt_exhausted[0]) *
- LNET_MT_EXHAUSTED_BMAP);
- mtable->mt_mhash = mhash;
- for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++)
- INIT_LIST_HEAD(&mhash[j]);
-
- mtable->mt_portal = index;
- mtable->mt_cpt = i;
- }
-
- return 0;
- failed:
- lnet_ptl_cleanup(ptl);
- return -ENOMEM;
-}
-
-void
-lnet_portals_destroy(void)
-{
- int i;
-
- if (!the_lnet.ln_portals)
- return;
-
- for (i = 0; i < the_lnet.ln_nportals; i++)
- lnet_ptl_cleanup(the_lnet.ln_portals[i]);
-
- cfs_array_free(the_lnet.ln_portals);
- the_lnet.ln_portals = NULL;
- the_lnet.ln_nportals = 0;
-}
-
-int
-lnet_portals_create(void)
-{
- int size;
- int i;
-
- size = offsetof(struct lnet_portal, ptl_mt_maps[LNET_CPT_NUMBER]);
-
- the_lnet.ln_portals = cfs_array_alloc(MAX_PORTALS, size);
- if (!the_lnet.ln_portals) {
- CERROR("Failed to allocate portals table\n");
- return -ENOMEM;
- }
- the_lnet.ln_nportals = MAX_PORTALS;
-
- for (i = 0; i < the_lnet.ln_nportals; i++) {
- if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) {
- lnet_portals_destroy();
- return -ENOMEM;
- }
- }
-
- return 0;
-}
-
-/**
- * Turn on the lazy portal attribute. Use with caution!
- *
- * This portal attribute only affects incoming PUT requests to the portal,
- * and is off by default. By default, if there's no matching MD for an
- * incoming PUT request, it is simply dropped. With the lazy attribute on,
- * such requests are queued indefinitely until either a matching MD is
- * posted to the portal or the lazy attribute is turned off.
- *
- * It would prevent dropped requests, however it should be regarded as the
- * last line of defense - i.e. users must keep a close watch on active
- * buffers on a lazy portal and once it becomes too low post more buffers as
- * soon as possible. This is because delayed requests usually have detrimental
- * effects on underlying network connections. A few delayed requests often
- * suffice to bring an underlying connection to a complete halt, due to flow
- * control mechanisms.
- *
- * There's also a DOS attack risk. If users don't post match-all MDs on a
- * lazy portal, a malicious peer can easily stop a service by sending some
- * PUT requests with match bits that won't match any MD. A routed server is
- * especially vulnerable since the connections to its neighbor routers are
- * shared among all clients.
- *
- * \param portal Index of the portal to enable the lazy attribute on.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a portal is not a valid index.
- */
-int
-LNetSetLazyPortal(int portal)
-{
- struct lnet_portal *ptl;
-
- if (portal < 0 || portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
- ptl = the_lnet.ln_portals[portal];
-
- lnet_res_lock(LNET_LOCK_EX);
- lnet_ptl_lock(ptl);
-
- lnet_ptl_setopt(ptl, LNET_PTL_LAZY);
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(LNET_LOCK_EX);
-
- return 0;
-}
-EXPORT_SYMBOL(LNetSetLazyPortal);
-
-int
-lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
-{
- struct lnet_portal *ptl;
- LIST_HEAD(zombies);
-
- if (portal < 0 || portal >= the_lnet.ln_nportals)
- return -EINVAL;
-
- ptl = the_lnet.ln_portals[portal];
-
- lnet_res_lock(LNET_LOCK_EX);
- lnet_ptl_lock(ptl);
-
- if (!lnet_ptl_is_lazy(ptl)) {
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(LNET_LOCK_EX);
- return 0;
- }
-
- if (ni) {
- struct lnet_msg *msg, *tmp;
-
- /* grab all messages which are on the NI passed in */
- list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
- msg_list) {
- if (msg->msg_rxpeer->lp_ni == ni)
- list_move(&msg->msg_list, &zombies);
- }
- } else {
- if (the_lnet.ln_shutdown)
- CWARN("Active lazy portal %d on exit\n", portal);
- else
- CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
-
- /* grab all the blocked messages atomically */
- list_splice_init(&ptl->ptl_msg_delayed, &zombies);
-
- lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
- }
-
- lnet_ptl_unlock(ptl);
- lnet_res_unlock(LNET_LOCK_EX);
-
- lnet_drop_delayed_msg_list(&zombies, reason);
-
- return 0;
-}
-
-/**
- * Turn off the lazy portal attribute. Delayed requests on the portal,
- * if any, will be all dropped when this function returns.
- *
- * \param portal Index of the portal to disable the lazy attribute on.
- *
- * \retval 0 On success.
- * \retval -EINVAL If \a portal is not a valid index.
- */
-int
-LNetClearLazyPortal(int portal)
-{
- return lnet_clear_lazy_portal(NULL, portal,
- "Clearing lazy portal attr");
-}
-EXPORT_SYMBOL(LNetClearLazyPortal);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
deleted file mode 100644
index 9b61260..0000000
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ /dev/null
@@ -1,585 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- */
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/if.h>
-#include <linux/in.h>
-#include <linux/net.h>
-#include <linux/file.h>
-#include <linux/pagemap.h>
-/* For sys_open & sys_close */
-#include <linux/syscalls.h>
-#include <net/sock.h>
-
-#include <linux/lnet/lib-lnet.h>
-
-static int
-kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
-{
- mm_segment_t oldfs = get_fs();
- int err;
-
- set_fs(KERNEL_DS);
- err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
- set_fs(oldfs);
-
- return err;
-}
-
-static int
-lnet_sock_ioctl(int cmd, unsigned long arg)
-{
- struct file *sock_filp;
- struct socket *sock;
- int rc;
-
- rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
- if (rc) {
- CERROR("Can't create socket: %d\n", rc);
- return rc;
- }
-
- sock_filp = sock_alloc_file(sock, 0, NULL);
- if (IS_ERR(sock_filp))
- return PTR_ERR(sock_filp);
-
- rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
-
- fput(sock_filp);
- return rc;
-}
-
-int
-lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
-{
- struct ifreq ifr;
- int nob;
- int rc;
- __be32 val;
-
- nob = strnlen(name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- CERROR("Interface name %s too long\n", name);
- return -EINVAL;
- }
-
- BUILD_BUG_ON(sizeof(ifr.ifr_name) < IFNAMSIZ);
-
- if (strlen(name) > sizeof(ifr.ifr_name) - 1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
- if (rc) {
- CERROR("Can't get flags for interface %s\n", name);
- return rc;
- }
-
- if (!(ifr.ifr_flags & IFF_UP)) {
- CDEBUG(D_NET, "Interface %s down\n", name);
- *up = 0;
- *ip = *mask = 0;
- return 0;
- }
- *up = 1;
-
- if (strlen(name) > sizeof(ifr.ifr_name) - 1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- ifr.ifr_addr.sa_family = AF_INET;
- rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
- if (rc) {
- CERROR("Can't get IP address for interface %s\n", name);
- return rc;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
- *ip = ntohl(val);
-
- if (strlen(name) > sizeof(ifr.ifr_name) - 1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- ifr.ifr_addr.sa_family = AF_INET;
- rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
- if (rc) {
- CERROR("Can't get netmask for interface %s\n", name);
- return rc;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
- *mask = ntohl(val);
-
- return 0;
-}
-EXPORT_SYMBOL(lnet_ipif_query);
-
-int
-lnet_ipif_enumerate(char ***namesp)
-{
- /* Allocate and fill in 'names', returning # interfaces/error */
- char **names;
- int toobig;
- int nalloc;
- int nfound;
- struct ifreq *ifr;
- struct ifconf ifc;
- int rc;
- int nob;
- int i;
-
- nalloc = 16; /* first guess at max interfaces */
- toobig = 0;
- for (;;) {
- if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
- toobig = 1;
- nalloc = PAGE_SIZE / sizeof(*ifr);
- CWARN("Too many interfaces: only enumerating first %d\n",
- nalloc);
- }
-
- ifr = kzalloc(nalloc * sizeof(*ifr), GFP_KERNEL);
- if (!ifr) {
- CERROR("ENOMEM enumerating up to %d interfaces\n",
- nalloc);
- rc = -ENOMEM;
- goto out0;
- }
-
- ifc.ifc_buf = (char *)ifr;
- ifc.ifc_len = nalloc * sizeof(*ifr);
-
- rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
- if (rc < 0) {
- CERROR("Error %d enumerating interfaces\n", rc);
- goto out1;
- }
-
- LASSERT(!rc);
-
- nfound = ifc.ifc_len / sizeof(*ifr);
- LASSERT(nfound <= nalloc);
-
- if (nfound < nalloc || toobig)
- break;
-
- kfree(ifr);
- nalloc *= 2;
- }
-
- if (!nfound)
- goto out1;
-
- names = kzalloc(nfound * sizeof(*names), GFP_KERNEL);
- if (!names) {
- rc = -ENOMEM;
- goto out1;
- }
-
- for (i = 0; i < nfound; i++) {
- nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- /* no space for terminating NULL */
- CERROR("interface name %.*s too long (%d max)\n",
- nob, ifr[i].ifr_name, IFNAMSIZ);
- rc = -ENAMETOOLONG;
- goto out2;
- }
-
- names[i] = kmalloc(IFNAMSIZ, GFP_KERNEL);
- if (!names[i]) {
- rc = -ENOMEM;
- goto out2;
- }
-
- memcpy(names[i], ifr[i].ifr_name, nob);
- names[i][nob] = 0;
- }
-
- *namesp = names;
- rc = nfound;
-
-out2:
- if (rc < 0)
- lnet_ipif_free_enumeration(names, nfound);
-out1:
- kfree(ifr);
-out0:
- return rc;
-}
-EXPORT_SYMBOL(lnet_ipif_enumerate);
-
-void
-lnet_ipif_free_enumeration(char **names, int n)
-{
- int i;
-
- LASSERT(n > 0);
-
- for (i = 0; i < n && names[i]; i++)
- kfree(names[i]);
-
- kfree(names);
-}
-EXPORT_SYMBOL(lnet_ipif_free_enumeration);
-
-int
-lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
- unsigned long then;
- struct timeval tv;
- struct kvec iov = { .iov_base = buffer, .iov_len = nob };
- struct msghdr msg = {NULL,};
-
- LASSERT(nob > 0);
- /*
- * Caller may pass a zero timeout if she thinks the socket buffer is
- * empty enough to take the whole message immediately
- */
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob);
- for (;;) {
- msg.msg_flags = !timeout ? MSG_DONTWAIT : 0;
- if (timeout) {
- /* Set send timeout to remaining time */
- jiffies_to_timeval(jiffies_left, &tv);
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
- (char *)&tv, sizeof(tv));
- if (rc) {
- CERROR("Can't set socket send timeout %ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
- }
-
- then = jiffies;
- rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
- jiffies_left -= jiffies - then;
-
- if (rc < 0)
- return rc;
-
- if (!rc) {
- CERROR("Unexpected zero rc\n");
- return -ECONNABORTED;
- }
-
- if (!msg_data_left(&msg))
- break;
-
- if (jiffies_left <= 0)
- return -EAGAIN;
- }
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_write);
-
-int
-lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
- unsigned long then;
- struct timeval tv;
- struct kvec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_flags = 0
- };
-
- LASSERT(nob > 0);
- LASSERT(jiffies_left > 0);
-
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, nob);
-
- for (;;) {
- /* Set receive timeout to remaining time */
- jiffies_to_timeval(jiffies_left, &tv);
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
- (char *)&tv, sizeof(tv));
- if (rc) {
- CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
-
- then = jiffies;
- rc = sock_recvmsg(sock, &msg, 0);
- jiffies_left -= jiffies - then;
-
- if (rc < 0)
- return rc;
-
- if (!rc)
- return -ECONNRESET;
-
- if (!msg_data_left(&msg))
- return 0;
-
- if (jiffies_left <= 0)
- return -ETIMEDOUT;
- }
-}
-EXPORT_SYMBOL(lnet_sock_read);
-
-static int
-lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
- int local_port)
-{
- struct sockaddr_in locaddr;
- struct socket *sock;
- int rc;
- int option;
-
- /* All errors are fatal except bind failure if the port is in use */
- *fatal = 1;
-
- rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
- *sockp = sock;
- if (rc) {
- CERROR("Can't create socket: %d\n", rc);
- return rc;
- }
-
- option = 1;
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&option, sizeof(option));
- if (rc) {
- CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
- goto failed;
- }
-
- if (local_ip || local_port) {
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons(local_port);
- if (!local_ip)
- locaddr.sin_addr.s_addr = htonl(INADDR_ANY);
- else
- locaddr.sin_addr.s_addr = htonl(local_ip);
-
- rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
- sizeof(locaddr));
- if (rc == -EADDRINUSE) {
- CDEBUG(D_NET, "Port %d already in use\n", local_port);
- *fatal = 0;
- goto failed;
- }
- if (rc) {
- CERROR("Error trying to bind to port %d: %d\n",
- local_port, rc);
- goto failed;
- }
- }
- return 0;
-
-failed:
- sock_release(sock);
- return rc;
-}
-
-int
-lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
-{
- int option;
- int rc;
-
- if (txbufsize) {
- option = txbufsize;
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
- (char *)&option, sizeof(option));
- if (rc) {
- CERROR("Can't set send buffer %d: %d\n",
- option, rc);
- return rc;
- }
- }
-
- if (rxbufsize) {
- option = rxbufsize;
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
- (char *)&option, sizeof(option));
- if (rc) {
- CERROR("Can't set receive buffer %d: %d\n",
- option, rc);
- return rc;
- }
- }
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_setbuf);
-
-int
-lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
-{
- struct sockaddr_in sin;
- int rc;
-
- if (remote)
- rc = kernel_getpeername(sock, (struct sockaddr *)&sin);
- else
- rc = kernel_getsockname(sock, (struct sockaddr *)&sin);
- if (rc < 0) {
- CERROR("Error %d getting sock %s IP/port\n",
- rc, remote ? "peer" : "local");
- return rc;
- }
-
- if (ip)
- *ip = ntohl(sin.sin_addr.s_addr);
-
- if (port)
- *port = ntohs(sin.sin_port);
-
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_getaddr);
-
-int
-lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
-{
- if (txbufsize)
- *txbufsize = sock->sk->sk_sndbuf;
-
- if (rxbufsize)
- *rxbufsize = sock->sk->sk_rcvbuf;
-
- return 0;
-}
-EXPORT_SYMBOL(lnet_sock_getbuf);
-
-int
-lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
- int backlog)
-{
- int fatal;
- int rc;
-
- rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
- if (rc) {
- if (!fatal)
- CERROR("Can't create socket: port %d already in use\n",
- local_port);
- return rc;
- }
-
- rc = kernel_listen(*sockp, backlog);
- if (!rc)
- return 0;
-
- CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
- sock_release(*sockp);
- return rc;
-}
-
-int
-lnet_sock_accept(struct socket **newsockp, struct socket *sock)
-{
- wait_queue_entry_t wait;
- struct socket *newsock;
- int rc;
-
- /*
- * XXX this should add a ref to sock->ops->owner, if
- * TCP could be a module
- */
- rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
- if (rc) {
- CERROR("Can't allocate socket\n");
- return rc;
- }
-
- newsock->ops = sock->ops;
-
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
- if (rc == -EAGAIN) {
- /* Nothing ready, so wait for activity */
- init_waitqueue_entry(&wait, current);
- add_wait_queue(sk_sleep(sock->sk), &wait);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- remove_wait_queue(sk_sleep(sock->sk), &wait);
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
- }
-
- if (rc)
- goto failed;
-
- *newsockp = newsock;
- return 0;
-
-failed:
- sock_release(newsock);
- return rc;
-}
-
-int
-lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip,
- int local_port, __u32 peer_ip, int peer_port)
-{
- struct sockaddr_in srvaddr;
- int rc;
-
- rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
- if (rc)
- return rc;
-
- memset(&srvaddr, 0, sizeof(srvaddr));
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons(peer_port);
- srvaddr.sin_addr.s_addr = htonl(peer_ip);
-
- rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
- sizeof(srvaddr), 0);
- if (!rc)
- return 0;
-
- /*
- * EADDRNOTAVAIL probably means we're already connected to the same
- * peer/port on the same local port on a differently typed
- * connection. Let our caller retry with a different local
- * port...
- */
- *fatal = !(rc == -EADDRNOTAVAIL);
-
- CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
- "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
- &local_ip, local_port, &peer_ip, peer_port);
-
- sock_release(*sockp);
- return rc;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c
deleted file mode 100644
index 7456b98..0000000
--- a/drivers/staging/lustre/lnet/lnet/lo.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-static int
-lolnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
-{
- LASSERT(!lntmsg->msg_routing);
- LASSERT(!lntmsg->msg_target_is_router);
-
- return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0);
-}
-
-static int
-lolnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
- int delayed, struct iov_iter *to, unsigned int rlen)
-{
- struct lnet_msg *sendmsg = private;
-
- if (lntmsg) { /* not discarding */
- if (sendmsg->msg_iov)
- lnet_copy_iov2iter(to,
- sendmsg->msg_niov,
- sendmsg->msg_iov,
- sendmsg->msg_offset,
- iov_iter_count(to));
- else
- lnet_copy_kiov2iter(to,
- sendmsg->msg_niov,
- sendmsg->msg_kiov,
- sendmsg->msg_offset,
- iov_iter_count(to));
-
- lnet_finalize(ni, lntmsg, 0);
- }
-
- lnet_finalize(ni, sendmsg, 0);
- return 0;
-}
-
-static int lolnd_instanced;
-
-static void
-lolnd_shutdown(struct lnet_ni *ni)
-{
- CDEBUG(D_NET, "shutdown\n");
- LASSERT(lolnd_instanced);
-
- lolnd_instanced = 0;
-}
-
-static int
-lolnd_startup(struct lnet_ni *ni)
-{
- LASSERT(ni->ni_lnd == &the_lolnd);
- LASSERT(!lolnd_instanced);
- lolnd_instanced = 1;
-
- return 0;
-}
-
-struct lnet_lnd the_lolnd = {
- /* .lnd_list = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list},
- /* .lnd_refcount = */ 0,
- /* .lnd_type = */ LOLND,
- /* .lnd_startup = */ lolnd_startup,
- /* .lnd_shutdown = */ lolnd_shutdown,
- /* .lnt_ctl = */ NULL,
- /* .lnd_send = */ lolnd_send,
- /* .lnd_recv = */ lolnd_recv,
- /* .lnd_eager_recv = */ NULL,
- /* .lnd_notify = */ NULL,
- /* .lnd_accept = */ NULL
-};
diff --git a/drivers/staging/lustre/lnet/lnet/module.c b/drivers/staging/lustre/lnet/lnet/module.c
deleted file mode 100644
index 9d06664..0000000
--- a/drivers/staging/lustre/lnet/lnet/module.c
+++ /dev/null
@@ -1,239 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-static int config_on_load;
-module_param(config_on_load, int, 0444);
-MODULE_PARM_DESC(config_on_load, "configure network at module load");
-
-static struct mutex lnet_config_mutex;
-
-static int
-lnet_configure(void *arg)
-{
- /* 'arg' only there so I can be passed to cfs_create_thread() */
- int rc = 0;
-
- mutex_lock(&lnet_config_mutex);
-
- if (!the_lnet.ln_niinit_self) {
- rc = try_module_get(THIS_MODULE);
-
- if (rc != 1)
- goto out;
-
- rc = LNetNIInit(LNET_PID_LUSTRE);
- if (rc >= 0) {
- the_lnet.ln_niinit_self = 1;
- rc = 0;
- } else {
- module_put(THIS_MODULE);
- }
- }
-
-out:
- mutex_unlock(&lnet_config_mutex);
- return rc;
-}
-
-static int
-lnet_unconfigure(void)
-{
- int refcount;
-
- mutex_lock(&lnet_config_mutex);
-
- if (the_lnet.ln_niinit_self) {
- the_lnet.ln_niinit_self = 0;
- LNetNIFini();
- module_put(THIS_MODULE);
- }
-
- mutex_lock(&the_lnet.ln_api_mutex);
- refcount = the_lnet.ln_refcount;
- mutex_unlock(&the_lnet.ln_api_mutex);
-
- mutex_unlock(&lnet_config_mutex);
- return !refcount ? 0 : -EBUSY;
-}
-
-static int
-lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr)
-{
- struct lnet_ioctl_config_data *conf =
- (struct lnet_ioctl_config_data *)hdr;
- int rc;
-
- if (conf->cfg_hdr.ioc_len < sizeof(*conf))
- return -EINVAL;
-
- mutex_lock(&lnet_config_mutex);
- if (!the_lnet.ln_niinit_self) {
- rc = -EINVAL;
- goto out_unlock;
- }
- rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf);
-out_unlock:
- mutex_unlock(&lnet_config_mutex);
-
- return rc;
-}
-
-static int
-lnet_dyn_unconfigure(struct libcfs_ioctl_hdr *hdr)
-{
- struct lnet_ioctl_config_data *conf =
- (struct lnet_ioctl_config_data *)hdr;
- int rc;
-
- if (conf->cfg_hdr.ioc_len < sizeof(*conf))
- return -EINVAL;
-
- mutex_lock(&lnet_config_mutex);
- if (!the_lnet.ln_niinit_self) {
- rc = -EINVAL;
- goto out_unlock;
- }
- rc = lnet_dyn_del_ni(conf->cfg_net);
-out_unlock:
- mutex_unlock(&lnet_config_mutex);
-
- return rc;
-}
-
-static int
-lnet_ioctl(struct notifier_block *nb,
- unsigned long cmd, void *vdata)
-{
- int rc;
- struct libcfs_ioctl_hdr *hdr = vdata;
-
- switch (cmd) {
- case IOC_LIBCFS_CONFIGURE: {
- struct libcfs_ioctl_data *data =
- (struct libcfs_ioctl_data *)hdr;
-
- if (data->ioc_hdr.ioc_len < sizeof(*data)) {
- rc = -EINVAL;
- } else {
- the_lnet.ln_nis_from_mod_params = data->ioc_flags;
- rc = lnet_configure(NULL);
- }
- break;
- }
-
- case IOC_LIBCFS_UNCONFIGURE:
- rc = lnet_unconfigure();
- break;
-
- case IOC_LIBCFS_ADD_NET:
- rc = lnet_dyn_configure(hdr);
- break;
-
- case IOC_LIBCFS_DEL_NET:
- rc = lnet_dyn_unconfigure(hdr);
- break;
-
- default:
- /*
- * Passing LNET_PID_ANY only gives me a ref if the net is up
- * already; I'll need it to ensure the net can't go down while
- * I'm called into it
- */
- rc = LNetNIInit(LNET_PID_ANY);
- if (rc >= 0) {
- rc = LNetCtl(cmd, hdr);
- LNetNIFini();
- }
- break;
- }
- return notifier_from_ioctl_errno(rc);
-}
-
-static struct notifier_block lnet_ioctl_handler = {
- .notifier_call = lnet_ioctl,
-};
-
-static int __init lnet_init(void)
-{
- int rc;
-
- mutex_init(&lnet_config_mutex);
-
- rc = libcfs_setup();
- if (rc)
- return rc;
-
- rc = lnet_lib_init();
- if (rc) {
- CERROR("lnet_lib_init: error %d\n", rc);
- return rc;
- }
-
- rc = blocking_notifier_chain_register(&libcfs_ioctl_list,
- &lnet_ioctl_handler);
- LASSERT(!rc);
-
- if (config_on_load) {
- /*
- * Have to schedule a separate thread to avoid deadlocking
- * in modload
- */
- (void)kthread_run(lnet_configure, NULL, "lnet_initd");
- }
-
- return 0;
-}
-
-static void __exit lnet_exit(void)
-{
- int rc;
-
- rc = blocking_notifier_chain_unregister(&libcfs_ioctl_list,
- &lnet_ioctl_handler);
- LASSERT(!rc);
-
- lnet_lib_exit();
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Networking layer");
-MODULE_VERSION(LNET_VERSION);
-MODULE_LICENSE("GPL");
-
-module_init(lnet_init);
-module_exit(lnet_exit);
diff --git a/drivers/staging/lustre/lnet/lnet/net_fault.c b/drivers/staging/lustre/lnet/lnet/net_fault.c
deleted file mode 100644
index 0066394..0000000
--- a/drivers/staging/lustre/lnet/lnet/net_fault.c
+++ /dev/null
@@ -1,1023 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2014, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Seagate, Inc.
- *
- * lnet/lnet/net_fault.c
- *
- * Lustre network fault simulation
- *
- * Author: liang.zhen@intel.com
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnetctl.h>
-
-#define LNET_MSG_MASK (LNET_PUT_BIT | LNET_ACK_BIT | \
- LNET_GET_BIT | LNET_REPLY_BIT)
-
-struct lnet_drop_rule {
- /** link chain on the_lnet.ln_drop_rules */
- struct list_head dr_link;
- /** attributes of this rule */
- struct lnet_fault_attr dr_attr;
- /** lock to protect \a dr_drop_at and \a dr_stat */
- spinlock_t dr_lock;
- /**
- * the message sequence to drop, which means message is dropped when
- * dr_stat.drs_count == dr_drop_at
- */
- unsigned long dr_drop_at;
- /**
- * seconds to drop the next message, it's exclusive with dr_drop_at
- */
- unsigned long dr_drop_time;
- /** baseline to caculate dr_drop_time */
- unsigned long dr_time_base;
- /** statistic of dropped messages */
- struct lnet_fault_stat dr_stat;
-};
-
-static bool
-lnet_fault_nid_match(lnet_nid_t nid, lnet_nid_t msg_nid)
-{
- if (nid == msg_nid || nid == LNET_NID_ANY)
- return true;
-
- if (LNET_NIDNET(nid) != LNET_NIDNET(msg_nid))
- return false;
-
- /* 255.255.255.255@net is wildcard for all addresses in a network */
- return LNET_NIDADDR(nid) == LNET_NIDADDR(LNET_NID_ANY);
-}
-
-static bool
-lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal)
-{
- if (!lnet_fault_nid_match(attr->fa_src, src) ||
- !lnet_fault_nid_match(attr->fa_dst, dst))
- return false;
-
- if (!(attr->fa_msg_mask & (1 << type)))
- return false;
-
- /**
- * NB: ACK and REPLY have no portal, but they should have been
- * rejected by message mask
- */
- if (attr->fa_ptl_mask && /* has portal filter */
- !(attr->fa_ptl_mask & (1ULL << portal)))
- return false;
-
- return true;
-}
-
-static int
-lnet_fault_attr_validate(struct lnet_fault_attr *attr)
-{
- if (!attr->fa_msg_mask)
- attr->fa_msg_mask = LNET_MSG_MASK; /* all message types */
-
- if (!attr->fa_ptl_mask) /* no portal filter */
- return 0;
-
- /* NB: only PUT and GET can be filtered if portal filter has been set */
- attr->fa_msg_mask &= LNET_GET_BIT | LNET_PUT_BIT;
- if (!attr->fa_msg_mask) {
- CDEBUG(D_NET, "can't find valid message type bits %x\n",
- attr->fa_msg_mask);
- return -EINVAL;
- }
- return 0;
-}
-
-static void
-lnet_fault_stat_inc(struct lnet_fault_stat *stat, unsigned int type)
-{
- /* NB: fs_counter is NOT updated by this function */
- switch (type) {
- case LNET_MSG_PUT:
- stat->fs_put++;
- return;
- case LNET_MSG_ACK:
- stat->fs_ack++;
- return;
- case LNET_MSG_GET:
- stat->fs_get++;
- return;
- case LNET_MSG_REPLY:
- stat->fs_reply++;
- return;
- }
-}
-
-/**
- * LNet message drop simulation
- */
-
-/**
- * Add a new drop rule to LNet
- * There is no check for duplicated drop rule, all rules will be checked for
- * incoming message.
- */
-static int
-lnet_drop_rule_add(struct lnet_fault_attr *attr)
-{
- struct lnet_drop_rule *rule;
-
- if (attr->u.drop.da_rate & attr->u.drop.da_interval) {
- CDEBUG(D_NET, "please provide either drop rate or drop interval, but not both at the same time %d/%d\n",
- attr->u.drop.da_rate, attr->u.drop.da_interval);
- return -EINVAL;
- }
-
- if (lnet_fault_attr_validate(attr))
- return -EINVAL;
-
- rule = kzalloc(sizeof(*rule), GFP_NOFS);
- if (!rule)
- return -ENOMEM;
-
- spin_lock_init(&rule->dr_lock);
-
- rule->dr_attr = *attr;
- if (attr->u.drop.da_interval) {
- rule->dr_time_base = jiffies + attr->u.drop.da_interval * HZ;
- rule->dr_drop_time = jiffies +
- prandom_u32_max(attr->u.drop.da_interval) * HZ;
- } else {
- rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- list_add(&rule->dr_link, &the_lnet.ln_drop_rules);
- lnet_net_unlock(LNET_LOCK_EX);
-
- CDEBUG(D_NET, "Added drop rule: src %s, dst %s, rate %d, interval %d\n",
- libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src),
- attr->u.drop.da_rate, attr->u.drop.da_interval);
- return 0;
-}
-
-/**
- * Remove matched drop rules from lnet, all rules that can match \a src and
- * \a dst will be removed.
- * If \a src is zero, then all rules have \a dst as destination will be remove
- * If \a dst is zero, then all rules have \a src as source will be removed
- * If both of them are zero, all rules will be removed
- */
-static int
-lnet_drop_rule_del(lnet_nid_t src, lnet_nid_t dst)
-{
- struct lnet_drop_rule *rule;
- struct lnet_drop_rule *tmp;
- struct list_head zombies;
- int n = 0;
-
- INIT_LIST_HEAD(&zombies);
-
- lnet_net_lock(LNET_LOCK_EX);
- list_for_each_entry_safe(rule, tmp, &the_lnet.ln_drop_rules, dr_link) {
- if (rule->dr_attr.fa_src != src && src)
- continue;
-
- if (rule->dr_attr.fa_dst != dst && dst)
- continue;
-
- list_move(&rule->dr_link, &zombies);
- }
- lnet_net_unlock(LNET_LOCK_EX);
-
- list_for_each_entry_safe(rule, tmp, &zombies, dr_link) {
- CDEBUG(D_NET, "Remove drop rule: src %s->dst: %s (1/%d, %d)\n",
- libcfs_nid2str(rule->dr_attr.fa_src),
- libcfs_nid2str(rule->dr_attr.fa_dst),
- rule->dr_attr.u.drop.da_rate,
- rule->dr_attr.u.drop.da_interval);
-
- list_del(&rule->dr_link);
- kfree(rule);
- n++;
- }
-
- return n;
-}
-
-/**
- * List drop rule at position of \a pos
- */
-static int
-lnet_drop_rule_list(int pos, struct lnet_fault_attr *attr,
- struct lnet_fault_stat *stat)
-{
- struct lnet_drop_rule *rule;
- int cpt;
- int i = 0;
- int rc = -ENOENT;
-
- cpt = lnet_net_lock_current();
- list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- if (i++ < pos)
- continue;
-
- spin_lock(&rule->dr_lock);
- *attr = rule->dr_attr;
- *stat = rule->dr_stat;
- spin_unlock(&rule->dr_lock);
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-
-/**
- * reset counters for all drop rules
- */
-static void
-lnet_drop_rule_reset(void)
-{
- struct lnet_drop_rule *rule;
- int cpt;
-
- cpt = lnet_net_lock_current();
-
- list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- struct lnet_fault_attr *attr = &rule->dr_attr;
-
- spin_lock(&rule->dr_lock);
-
- memset(&rule->dr_stat, 0, sizeof(rule->dr_stat));
- if (attr->u.drop.da_rate) {
- rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
- } else {
- rule->dr_drop_time = jiffies +
- prandom_u32_max(attr->u.drop.da_interval) * HZ;
- rule->dr_time_base = jiffies + attr->u.drop.da_interval * HZ;
- }
- spin_unlock(&rule->dr_lock);
- }
-
- lnet_net_unlock(cpt);
-}
-
-/**
- * check source/destination NID, portal, message type and drop rate,
- * decide whether should drop this message or not
- */
-static bool
-drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal)
-{
- struct lnet_fault_attr *attr = &rule->dr_attr;
- bool drop;
-
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
- return false;
-
- /* match this rule, check drop rate now */
- spin_lock(&rule->dr_lock);
- if (rule->dr_drop_time) { /* time based drop */
- unsigned long now = jiffies;
-
- rule->dr_stat.fs_count++;
- drop = time_after_eq(now, rule->dr_drop_time);
- if (drop) {
- if (time_after(now, rule->dr_time_base))
- rule->dr_time_base = now;
-
- rule->dr_drop_time = rule->dr_time_base +
- prandom_u32_max(attr->u.drop.da_interval) * HZ;
- rule->dr_time_base += attr->u.drop.da_interval * HZ;
-
- CDEBUG(D_NET, "Drop Rule %s->%s: next drop : %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst),
- rule->dr_drop_time);
- }
-
- } else { /* rate based drop */
- drop = rule->dr_stat.fs_count++ == rule->dr_drop_at;
-
- if (!do_div(rule->dr_stat.fs_count, attr->u.drop.da_rate)) {
- rule->dr_drop_at = rule->dr_stat.fs_count +
- prandom_u32_max(attr->u.drop.da_rate);
- CDEBUG(D_NET, "Drop Rule %s->%s: next drop: %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst), rule->dr_drop_at);
- }
- }
-
- if (drop) { /* drop this message, update counters */
- lnet_fault_stat_inc(&rule->dr_stat, type);
- rule->dr_stat.u.drop.ds_dropped++;
- }
-
- spin_unlock(&rule->dr_lock);
- return drop;
-}
-
-/**
- * Check if message from \a src to \a dst can match any existed drop rule
- */
-bool
-lnet_drop_rule_match(struct lnet_hdr *hdr)
-{
- struct lnet_drop_rule *rule;
- lnet_nid_t src = le64_to_cpu(hdr->src_nid);
- lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
- unsigned int typ = le32_to_cpu(hdr->type);
- unsigned int ptl = -1;
- bool drop = false;
- int cpt;
-
- /**
- * NB: if Portal is specified, then only PUT and GET will be
- * filtered by drop rule
- */
- if (typ == LNET_MSG_PUT)
- ptl = le32_to_cpu(hdr->msg.put.ptl_index);
- else if (typ == LNET_MSG_GET)
- ptl = le32_to_cpu(hdr->msg.get.ptl_index);
-
- cpt = lnet_net_lock_current();
- list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- drop = drop_rule_match(rule, src, dst, typ, ptl);
- if (drop)
- break;
- }
-
- lnet_net_unlock(cpt);
- return drop;
-}
-
-/**
- * LNet Delay Simulation
- */
-/** timestamp (second) to send delayed message */
-#define msg_delay_send msg_ev.hdr_data
-
-struct lnet_delay_rule {
- /** link chain on the_lnet.ln_delay_rules */
- struct list_head dl_link;
- /** link chain on delay_dd.dd_sched_rules */
- struct list_head dl_sched_link;
- /** attributes of this rule */
- struct lnet_fault_attr dl_attr;
- /** lock to protect \a below members */
- spinlock_t dl_lock;
- /** refcount of delay rule */
- atomic_t dl_refcount;
- /**
- * the message sequence to delay, which means message is delayed when
- * dl_stat.fs_count == dl_delay_at
- */
- unsigned long dl_delay_at;
- /**
- * seconds to delay the next message, it's exclusive with dl_delay_at
- */
- unsigned long dl_delay_time;
- /** baseline to caculate dl_delay_time */
- unsigned long dl_time_base;
- /** jiffies to send the next delayed message */
- unsigned long dl_msg_send;
- /** delayed message list */
- struct list_head dl_msg_list;
- /** statistic of delayed messages */
- struct lnet_fault_stat dl_stat;
- /** timer to wakeup delay_daemon */
- struct timer_list dl_timer;
-};
-
-struct delay_daemon_data {
- /** serialise rule add/remove */
- struct mutex dd_mutex;
- /** protect rules on \a dd_sched_rules */
- spinlock_t dd_lock;
- /** scheduled delay rules (by timer) */
- struct list_head dd_sched_rules;
- /** daemon thread sleeps at here */
- wait_queue_head_t dd_waitq;
- /** controller (lctl command) wait at here */
- wait_queue_head_t dd_ctl_waitq;
- /** daemon is running */
- unsigned int dd_running;
- /** daemon stopped */
- unsigned int dd_stopped;
-};
-
-static struct delay_daemon_data delay_dd;
-
-static unsigned long
-round_timeout(unsigned long timeout)
-{
- return (unsigned int)rounddown(timeout, HZ) + HZ;
-}
-
-static void
-delay_rule_decref(struct lnet_delay_rule *rule)
-{
- if (atomic_dec_and_test(&rule->dl_refcount)) {
- LASSERT(list_empty(&rule->dl_sched_link));
- LASSERT(list_empty(&rule->dl_msg_list));
- LASSERT(list_empty(&rule->dl_link));
-
- kfree(rule);
- }
-}
-
-/**
- * check source/destination NID, portal, message type and delay rate,
- * decide whether should delay this message or not
- */
-static bool
-delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal,
- struct lnet_msg *msg)
-{
- struct lnet_fault_attr *attr = &rule->dl_attr;
- bool delay;
-
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
- return false;
-
- /* match this rule, check delay rate now */
- spin_lock(&rule->dl_lock);
- if (rule->dl_delay_time) { /* time based delay */
- unsigned long now = jiffies;
-
- rule->dl_stat.fs_count++;
- delay = time_after_eq(now, rule->dl_delay_time);
- if (delay) {
- if (time_after(now, rule->dl_time_base))
- rule->dl_time_base = now;
-
- rule->dl_delay_time = rule->dl_time_base +
- prandom_u32_max(attr->u.delay.la_interval) * HZ;
- rule->dl_time_base += attr->u.delay.la_interval * HZ;
-
- CDEBUG(D_NET, "Delay Rule %s->%s: next delay : %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst),
- rule->dl_delay_time);
- }
-
- } else { /* rate based delay */
- delay = rule->dl_stat.fs_count++ == rule->dl_delay_at;
- /* generate the next random rate sequence */
- if (!do_div(rule->dl_stat.fs_count, attr->u.delay.la_rate)) {
- rule->dl_delay_at = rule->dl_stat.fs_count +
- prandom_u32_max(attr->u.delay.la_rate);
- CDEBUG(D_NET, "Delay Rule %s->%s: next delay: %lu\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst), rule->dl_delay_at);
- }
- }
-
- if (!delay) {
- spin_unlock(&rule->dl_lock);
- return false;
- }
-
- /* delay this message, update counters */
- lnet_fault_stat_inc(&rule->dl_stat, type);
- rule->dl_stat.u.delay.ls_delayed++;
-
- list_add_tail(&msg->msg_list, &rule->dl_msg_list);
- msg->msg_delay_send = round_timeout(
- jiffies + attr->u.delay.la_latency * HZ);
- if (rule->dl_msg_send == -1) {
- rule->dl_msg_send = msg->msg_delay_send;
- mod_timer(&rule->dl_timer, rule->dl_msg_send);
- }
-
- spin_unlock(&rule->dl_lock);
- return true;
-}
-
-/**
- * check if \a msg can match any Delay Rule, receiving of this message
- * will be delayed if there is a match.
- */
-bool
-lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg)
-{
- struct lnet_delay_rule *rule;
- lnet_nid_t src = le64_to_cpu(hdr->src_nid);
- lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
- unsigned int typ = le32_to_cpu(hdr->type);
- unsigned int ptl = -1;
-
- /* NB: called with hold of lnet_net_lock */
-
- /**
- * NB: if Portal is specified, then only PUT and GET will be
- * filtered by delay rule
- */
- if (typ == LNET_MSG_PUT)
- ptl = le32_to_cpu(hdr->msg.put.ptl_index);
- else if (typ == LNET_MSG_GET)
- ptl = le32_to_cpu(hdr->msg.get.ptl_index);
-
- list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
- if (delay_rule_match(rule, src, dst, typ, ptl, msg))
- return true;
- }
-
- return false;
-}
-
-/** check out delayed messages for send */
-static void
-delayed_msg_check(struct lnet_delay_rule *rule, bool all,
- struct list_head *msg_list)
-{
- struct lnet_msg *msg;
- struct lnet_msg *tmp;
- unsigned long now = jiffies;
-
- if (!all && rule->dl_msg_send > now)
- return;
-
- spin_lock(&rule->dl_lock);
- list_for_each_entry_safe(msg, tmp, &rule->dl_msg_list, msg_list) {
- if (!all && msg->msg_delay_send > now)
- break;
-
- msg->msg_delay_send = 0;
- list_move_tail(&msg->msg_list, msg_list);
- }
-
- if (list_empty(&rule->dl_msg_list)) {
- del_timer(&rule->dl_timer);
- rule->dl_msg_send = -1;
-
- } else if (!list_empty(msg_list)) {
- /*
- * dequeued some timedout messages, update timer for the
- * next delayed message on rule
- */
- msg = list_entry(rule->dl_msg_list.next,
- struct lnet_msg, msg_list);
- rule->dl_msg_send = msg->msg_delay_send;
- mod_timer(&rule->dl_timer, rule->dl_msg_send);
- }
- spin_unlock(&rule->dl_lock);
-}
-
-static void
-delayed_msg_process(struct list_head *msg_list, bool drop)
-{
- struct lnet_msg *msg;
-
- while (!list_empty(msg_list)) {
- struct lnet_ni *ni;
- int cpt;
- int rc;
-
- msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
- LASSERT(msg->msg_rxpeer);
-
- ni = msg->msg_rxpeer->lp_ni;
- cpt = msg->msg_rx_cpt;
-
- list_del_init(&msg->msg_list);
- if (drop) {
- rc = -ECANCELED;
-
- } else if (!msg->msg_routing) {
- rc = lnet_parse_local(ni, msg);
- if (!rc)
- continue;
-
- } else {
- lnet_net_lock(cpt);
- rc = lnet_parse_forward_locked(ni, msg);
- lnet_net_unlock(cpt);
-
- switch (rc) {
- case LNET_CREDIT_OK:
- lnet_ni_recv(ni, msg->msg_private, msg, 0,
- 0, msg->msg_len, msg->msg_len);
- /* fall through */
- case LNET_CREDIT_WAIT:
- continue;
- default: /* failures */
- break;
- }
- }
-
- lnet_drop_message(ni, cpt, msg->msg_private, msg->msg_len);
- lnet_finalize(ni, msg, rc);
- }
-}
-
-/**
- * Process delayed messages for scheduled rules
- * This function can either be called by delay_rule_daemon, or by lnet_finalise
- */
-void
-lnet_delay_rule_check(void)
-{
- struct lnet_delay_rule *rule;
- struct list_head msgs;
-
- INIT_LIST_HEAD(&msgs);
- while (1) {
- if (list_empty(&delay_dd.dd_sched_rules))
- break;
-
- spin_lock_bh(&delay_dd.dd_lock);
- if (list_empty(&delay_dd.dd_sched_rules)) {
- spin_unlock_bh(&delay_dd.dd_lock);
- break;
- }
-
- rule = list_entry(delay_dd.dd_sched_rules.next,
- struct lnet_delay_rule, dl_sched_link);
- list_del_init(&rule->dl_sched_link);
- spin_unlock_bh(&delay_dd.dd_lock);
-
- delayed_msg_check(rule, false, &msgs);
- delay_rule_decref(rule); /* -1 for delay_dd.dd_sched_rules */
- }
-
- if (!list_empty(&msgs))
- delayed_msg_process(&msgs, false);
-}
-
-/** daemon thread to handle delayed messages */
-static int
-lnet_delay_rule_daemon(void *arg)
-{
- delay_dd.dd_running = 1;
- wake_up(&delay_dd.dd_ctl_waitq);
-
- while (delay_dd.dd_running) {
- wait_event_interruptible(delay_dd.dd_waitq,
- !delay_dd.dd_running ||
- !list_empty(&delay_dd.dd_sched_rules));
- lnet_delay_rule_check();
- }
-
- /* in case more rules have been enqueued after my last check */
- lnet_delay_rule_check();
- delay_dd.dd_stopped = 1;
- wake_up(&delay_dd.dd_ctl_waitq);
-
- return 0;
-}
-
-static void
-delay_timer_cb(struct timer_list *t)
-{
- struct lnet_delay_rule *rule = from_timer(rule, t, dl_timer);
-
- spin_lock_bh(&delay_dd.dd_lock);
- if (list_empty(&rule->dl_sched_link) && delay_dd.dd_running) {
- atomic_inc(&rule->dl_refcount);
- list_add_tail(&rule->dl_sched_link, &delay_dd.dd_sched_rules);
- wake_up(&delay_dd.dd_waitq);
- }
- spin_unlock_bh(&delay_dd.dd_lock);
-}
-
-/**
- * Add a new delay rule to LNet
- * There is no check for duplicated delay rule, all rules will be checked for
- * incoming message.
- */
-int
-lnet_delay_rule_add(struct lnet_fault_attr *attr)
-{
- struct lnet_delay_rule *rule;
- int rc = 0;
-
- if (attr->u.delay.la_rate & attr->u.delay.la_interval) {
- CDEBUG(D_NET, "please provide either delay rate or delay interval, but not both at the same time %d/%d\n",
- attr->u.delay.la_rate, attr->u.delay.la_interval);
- return -EINVAL;
- }
-
- if (!attr->u.delay.la_latency) {
- CDEBUG(D_NET, "delay latency cannot be zero\n");
- return -EINVAL;
- }
-
- if (lnet_fault_attr_validate(attr))
- return -EINVAL;
-
- rule = kzalloc(sizeof(*rule), GFP_NOFS);
- if (!rule)
- return -ENOMEM;
-
- mutex_lock(&delay_dd.dd_mutex);
- if (!delay_dd.dd_running) {
- struct task_struct *task;
-
- /**
- * NB: although LND threads will process delayed message
- * in lnet_finalize, but there is no guarantee that LND
- * threads will be waken up if no other message needs to
- * be handled.
- * Only one daemon thread, performance is not the concern
- * of this simualation module.
- */
- task = kthread_run(lnet_delay_rule_daemon, NULL, "lnet_dd");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- goto failed;
- }
- wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_running);
- }
-
- timer_setup(&rule->dl_timer, delay_timer_cb, 0);
-
- spin_lock_init(&rule->dl_lock);
- INIT_LIST_HEAD(&rule->dl_msg_list);
- INIT_LIST_HEAD(&rule->dl_sched_link);
-
- rule->dl_attr = *attr;
- if (attr->u.delay.la_interval) {
- rule->dl_time_base = jiffies + attr->u.delay.la_interval * HZ;
- rule->dl_delay_time = jiffies +
- prandom_u32_max(attr->u.delay.la_interval) * HZ;
- } else {
- rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
- }
-
- rule->dl_msg_send = -1;
-
- lnet_net_lock(LNET_LOCK_EX);
- atomic_set(&rule->dl_refcount, 1);
- list_add(&rule->dl_link, &the_lnet.ln_delay_rules);
- lnet_net_unlock(LNET_LOCK_EX);
-
- CDEBUG(D_NET, "Added delay rule: src %s, dst %s, rate %d\n",
- libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src),
- attr->u.delay.la_rate);
-
- mutex_unlock(&delay_dd.dd_mutex);
- return 0;
-failed:
- mutex_unlock(&delay_dd.dd_mutex);
- kfree(rule);
- return rc;
-}
-
-/**
- * Remove matched Delay Rules from lnet, if \a shutdown is true or both \a src
- * and \a dst are zero, all rules will be removed, otherwise only matched rules
- * will be removed.
- * If \a src is zero, then all rules have \a dst as destination will be remove
- * If \a dst is zero, then all rules have \a src as source will be removed
- *
- * When a delay rule is removed, all delayed messages of this rule will be
- * processed immediately.
- */
-int
-lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown)
-{
- struct lnet_delay_rule *rule;
- struct lnet_delay_rule *tmp;
- struct list_head rule_list;
- struct list_head msg_list;
- int n = 0;
- bool cleanup;
-
- INIT_LIST_HEAD(&rule_list);
- INIT_LIST_HEAD(&msg_list);
-
- if (shutdown) {
- src = 0;
- dst = 0;
- }
-
- mutex_lock(&delay_dd.dd_mutex);
- lnet_net_lock(LNET_LOCK_EX);
-
- list_for_each_entry_safe(rule, tmp, &the_lnet.ln_delay_rules, dl_link) {
- if (rule->dl_attr.fa_src != src && src)
- continue;
-
- if (rule->dl_attr.fa_dst != dst && dst)
- continue;
-
- CDEBUG(D_NET, "Remove delay rule: src %s->dst: %s (1/%d, %d)\n",
- libcfs_nid2str(rule->dl_attr.fa_src),
- libcfs_nid2str(rule->dl_attr.fa_dst),
- rule->dl_attr.u.delay.la_rate,
- rule->dl_attr.u.delay.la_interval);
- /* refcount is taken over by rule_list */
- list_move(&rule->dl_link, &rule_list);
- }
-
- /* check if we need to shutdown delay_daemon */
- cleanup = list_empty(&the_lnet.ln_delay_rules) &&
- !list_empty(&rule_list);
- lnet_net_unlock(LNET_LOCK_EX);
-
- list_for_each_entry_safe(rule, tmp, &rule_list, dl_link) {
- list_del_init(&rule->dl_link);
-
- del_timer_sync(&rule->dl_timer);
- delayed_msg_check(rule, true, &msg_list);
- delay_rule_decref(rule); /* -1 for the_lnet.ln_delay_rules */
- n++;
- }
-
- if (cleanup) { /* no more delay rule, shutdown delay_daemon */
- LASSERT(delay_dd.dd_running);
- delay_dd.dd_running = 0;
- wake_up(&delay_dd.dd_waitq);
-
- while (!delay_dd.dd_stopped)
- wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_stopped);
- }
- mutex_unlock(&delay_dd.dd_mutex);
-
- if (!list_empty(&msg_list))
- delayed_msg_process(&msg_list, shutdown);
-
- return n;
-}
-
-/**
- * List Delay Rule at position of \a pos
- */
-int
-lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr,
- struct lnet_fault_stat *stat)
-{
- struct lnet_delay_rule *rule;
- int cpt;
- int i = 0;
- int rc = -ENOENT;
-
- cpt = lnet_net_lock_current();
- list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
- if (i++ < pos)
- continue;
-
- spin_lock(&rule->dl_lock);
- *attr = rule->dl_attr;
- *stat = rule->dl_stat;
- spin_unlock(&rule->dl_lock);
- rc = 0;
- break;
- }
-
- lnet_net_unlock(cpt);
- return rc;
-}
-
-/**
- * reset counters for all Delay Rules
- */
-void
-lnet_delay_rule_reset(void)
-{
- struct lnet_delay_rule *rule;
- int cpt;
-
- cpt = lnet_net_lock_current();
-
- list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
- struct lnet_fault_attr *attr = &rule->dl_attr;
-
- spin_lock(&rule->dl_lock);
-
- memset(&rule->dl_stat, 0, sizeof(rule->dl_stat));
- if (attr->u.delay.la_rate) {
- rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
- } else {
- rule->dl_delay_time =
- jiffies + prandom_u32_max(
- attr->u.delay.la_interval) * HZ;
- rule->dl_time_base = jiffies + attr->u.delay.la_interval * HZ;
- }
- spin_unlock(&rule->dl_lock);
- }
-
- lnet_net_unlock(cpt);
-}
-
-int
-lnet_fault_ctl(int opc, struct libcfs_ioctl_data *data)
-{
- struct lnet_fault_attr *attr;
- struct lnet_fault_stat *stat;
-
- attr = (struct lnet_fault_attr *)data->ioc_inlbuf1;
-
- switch (opc) {
- default:
- return -EINVAL;
-
- case LNET_CTL_DROP_ADD:
- if (!attr)
- return -EINVAL;
-
- return lnet_drop_rule_add(attr);
-
- case LNET_CTL_DROP_DEL:
- if (!attr)
- return -EINVAL;
-
- data->ioc_count = lnet_drop_rule_del(attr->fa_src,
- attr->fa_dst);
- return 0;
-
- case LNET_CTL_DROP_RESET:
- lnet_drop_rule_reset();
- return 0;
-
- case LNET_CTL_DROP_LIST:
- stat = (struct lnet_fault_stat *)data->ioc_inlbuf2;
- if (!attr || !stat)
- return -EINVAL;
-
- return lnet_drop_rule_list(data->ioc_count, attr, stat);
-
- case LNET_CTL_DELAY_ADD:
- if (!attr)
- return -EINVAL;
-
- return lnet_delay_rule_add(attr);
-
- case LNET_CTL_DELAY_DEL:
- if (!attr)
- return -EINVAL;
-
- data->ioc_count = lnet_delay_rule_del(attr->fa_src,
- attr->fa_dst, false);
- return 0;
-
- case LNET_CTL_DELAY_RESET:
- lnet_delay_rule_reset();
- return 0;
-
- case LNET_CTL_DELAY_LIST:
- stat = (struct lnet_fault_stat *)data->ioc_inlbuf2;
- if (!attr || !stat)
- return -EINVAL;
-
- return lnet_delay_rule_list(data->ioc_count, attr, stat);
- }
-}
-
-int
-lnet_fault_init(void)
-{
- BUILD_BUG_ON(LNET_PUT_BIT != 1 << LNET_MSG_PUT);
- BUILD_BUG_ON(LNET_ACK_BIT != 1 << LNET_MSG_ACK);
- BUILD_BUG_ON(LNET_GET_BIT != 1 << LNET_MSG_GET);
- BUILD_BUG_ON(LNET_REPLY_BIT != 1 << LNET_MSG_REPLY);
-
- mutex_init(&delay_dd.dd_mutex);
- spin_lock_init(&delay_dd.dd_lock);
- init_waitqueue_head(&delay_dd.dd_waitq);
- init_waitqueue_head(&delay_dd.dd_ctl_waitq);
- INIT_LIST_HEAD(&delay_dd.dd_sched_rules);
-
- return 0;
-}
-
-void
-lnet_fault_fini(void)
-{
- lnet_drop_rule_del(0, 0);
- lnet_delay_rule_del(0, 0, true);
-
- LASSERT(list_empty(&the_lnet.ln_drop_rules));
- LASSERT(list_empty(&the_lnet.ln_delay_rules));
- LASSERT(list_empty(&delay_dd.dd_sched_rules));
-}
diff --git a/drivers/staging/lustre/lnet/lnet/nidstrings.c b/drivers/staging/lustre/lnet/lnet/nidstrings.c
deleted file mode 100644
index 0f6c3fa..0000000
--- a/drivers/staging/lustre/lnet/lnet/nidstrings.c
+++ /dev/null
@@ -1,1261 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/nidstrings.c
- *
- * Author: Phil Schwan <phil@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/libcfs/libcfs.h>
-#include <linux/libcfs/libcfs_string.h>
-#include <uapi/linux/lnet/nidstr.h>
-
-/* max value for numeric network address */
-#define MAX_NUMERIC_VALUE 0xffffffff
-
-#define IPSTRING_LENGTH 16
-
-/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids
- * consistent in all conversion functions. Some code fragments are copied
- * around for the sake of clarity...
- */
-
-/* CAVEAT EMPTOR! Racey temporary buffer allocation!
- * Choose the number of nidstrings to support the MAXIMUM expected number of
- * concurrent users. If there are more, the returned string will be volatile.
- * NB this number must allow for a process to be descheduled for a timeslice
- * between getting its string and using it.
- */
-
-static char libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE];
-static int libcfs_nidstring_idx;
-
-static DEFINE_SPINLOCK(libcfs_nidstring_lock);
-
-static struct netstrfns *libcfs_namenum2netstrfns(const char *name);
-
-char *
-libcfs_next_nidstring(void)
-{
- char *str;
- unsigned long flags;
-
- spin_lock_irqsave(&libcfs_nidstring_lock, flags);
-
- str = libcfs_nidstrings[libcfs_nidstring_idx++];
- if (libcfs_nidstring_idx == ARRAY_SIZE(libcfs_nidstrings))
- libcfs_nidstring_idx = 0;
-
- spin_unlock_irqrestore(&libcfs_nidstring_lock, flags);
- return str;
-}
-EXPORT_SYMBOL(libcfs_next_nidstring);
-
-/**
- * Nid range list syntax.
- * \verbatim
- *
- * <nidlist> :== <nidrange> [ ' ' <nidrange> ]
- * <nidrange> :== <addrrange> '@' <net>
- * <addrrange> :== '*' |
- * <ipaddr_range> |
- * <cfs_expr_list>
- * <ipaddr_range> :== <cfs_expr_list>.<cfs_expr_list>.<cfs_expr_list>.
- * <cfs_expr_list>
- * <cfs_expr_list> :== <number> |
- * <expr_list>
- * <expr_list> :== '[' <range_expr> [ ',' <range_expr>] ']'
- * <range_expr> :== <number> |
- * <number> '-' <number> |
- * <number> '-' <number> '/' <number>
- * <net> :== <netname> | <netname><number>
- * <netname> :== "lo" | "tcp" | "o2ib" | "cib" | "openib" | "iib" |
- * "vib" | "ra" | "elan" | "mx" | "ptl"
- * \endverbatim
- */
-
-/**
- * Structure to represent \<nidrange\> token of the syntax.
- *
- * One of this is created for each \<net\> parsed.
- */
-struct nidrange {
- /**
- * Link to list of this structures which is built on nid range
- * list parsing.
- */
- struct list_head nr_link;
- /**
- * List head for addrrange::ar_link.
- */
- struct list_head nr_addrranges;
- /**
- * Flag indicating that *@<net> is found.
- */
- int nr_all;
- /**
- * Pointer to corresponding element of libcfs_netstrfns.
- */
- struct netstrfns *nr_netstrfns;
- /**
- * Number of network. E.g. 5 if \<net\> is "elan5".
- */
- int nr_netnum;
-};
-
-/**
- * Structure to represent \<addrrange\> token of the syntax.
- */
-struct addrrange {
- /**
- * Link to nidrange::nr_addrranges.
- */
- struct list_head ar_link;
- /**
- * List head for cfs_expr_list::el_list.
- */
- struct list_head ar_numaddr_ranges;
-};
-
-/**
- * Parses \<addrrange\> token on the syntax.
- *
- * Allocates struct addrrange and links to \a nidrange via
- * (nidrange::nr_addrranges)
- *
- * \retval 0 if \a src parses to '*' | \<ipaddr_range\> | \<cfs_expr_list\>
- * \retval -errno otherwise
- */
-static int
-parse_addrange(const struct cfs_lstr *src, struct nidrange *nidrange)
-{
- struct addrrange *addrrange;
-
- if (src->ls_len == 1 && src->ls_str[0] == '*') {
- nidrange->nr_all = 1;
- return 0;
- }
-
- addrrange = kzalloc(sizeof(struct addrrange), GFP_NOFS);
- if (!addrrange)
- return -ENOMEM;
- list_add_tail(&addrrange->ar_link, &nidrange->nr_addrranges);
- INIT_LIST_HEAD(&addrrange->ar_numaddr_ranges);
-
- return nidrange->nr_netstrfns->nf_parse_addrlist(src->ls_str,
- src->ls_len,
- &addrrange->ar_numaddr_ranges);
-}
-
-/**
- * Finds or creates struct nidrange.
- *
- * Checks if \a src is a valid network name, looks for corresponding
- * nidrange on the ist of nidranges (\a nidlist), creates new struct
- * nidrange if it is not found.
- *
- * \retval pointer to struct nidrange matching network specified via \a src
- * \retval NULL if \a src does not match any network
- */
-static struct nidrange *
-add_nidrange(const struct cfs_lstr *src,
- struct list_head *nidlist)
-{
- struct netstrfns *nf;
- struct nidrange *nr;
- int endlen;
- unsigned int netnum;
-
- if (src->ls_len >= LNET_NIDSTR_SIZE)
- return NULL;
-
- nf = libcfs_namenum2netstrfns(src->ls_str);
- if (!nf)
- return NULL;
- endlen = src->ls_len - strlen(nf->nf_name);
- if (!endlen)
- /* network name only, e.g. "elan" or "tcp" */
- netnum = 0;
- else {
- /*
- * e.g. "elan25" or "tcp23", refuse to parse if
- * network name is not appended with decimal or
- * hexadecimal number
- */
- if (!cfs_str2num_check(src->ls_str + strlen(nf->nf_name),
- endlen, &netnum, 0, MAX_NUMERIC_VALUE))
- return NULL;
- }
-
- list_for_each_entry(nr, nidlist, nr_link) {
- if (nr->nr_netstrfns != nf)
- continue;
- if (nr->nr_netnum != netnum)
- continue;
- return nr;
- }
-
- nr = kzalloc(sizeof(struct nidrange), GFP_NOFS);
- if (!nr)
- return NULL;
- list_add_tail(&nr->nr_link, nidlist);
- INIT_LIST_HEAD(&nr->nr_addrranges);
- nr->nr_netstrfns = nf;
- nr->nr_all = 0;
- nr->nr_netnum = netnum;
-
- return nr;
-}
-
-/**
- * Parses \<nidrange\> token of the syntax.
- *
- * \retval 1 if \a src parses to \<addrrange\> '@' \<net\>
- * \retval 0 otherwise
- */
-static int
-parse_nidrange(struct cfs_lstr *src, struct list_head *nidlist)
-{
- struct cfs_lstr addrrange;
- struct cfs_lstr net;
- struct nidrange *nr;
-
- if (!cfs_gettok(src, '@', &addrrange))
- goto failed;
-
- if (!cfs_gettok(src, '@', &net) || src->ls_str)
- goto failed;
-
- nr = add_nidrange(&net, nidlist);
- if (!nr)
- goto failed;
-
- if (parse_addrange(&addrrange, nr))
- goto failed;
-
- return 1;
-failed:
- return 0;
-}
-
-/**
- * Frees addrrange structures of \a list.
- *
- * For each struct addrrange structure found on \a list it frees
- * cfs_expr_list list attached to it and frees the addrrange itself.
- *
- * \retval none
- */
-static void
-free_addrranges(struct list_head *list)
-{
- while (!list_empty(list)) {
- struct addrrange *ar;
-
- ar = list_entry(list->next, struct addrrange, ar_link);
-
- cfs_expr_list_free_list(&ar->ar_numaddr_ranges);
- list_del(&ar->ar_link);
- kfree(ar);
- }
-}
-
-/**
- * Frees nidrange strutures of \a list.
- *
- * For each struct nidrange structure found on \a list it frees
- * addrrange list attached to it and frees the nidrange itself.
- *
- * \retval none
- */
-void
-cfs_free_nidlist(struct list_head *list)
-{
- struct list_head *pos, *next;
- struct nidrange *nr;
-
- list_for_each_safe(pos, next, list) {
- nr = list_entry(pos, struct nidrange, nr_link);
- free_addrranges(&nr->nr_addrranges);
- list_del(pos);
- kfree(nr);
- }
-}
-EXPORT_SYMBOL(cfs_free_nidlist);
-
-/**
- * Parses nid range list.
- *
- * Parses with rigorous syntax and overflow checking \a str into
- * \<nidrange\> [ ' ' \<nidrange\> ], compiles \a str into set of
- * structures and links that structure to \a nidlist. The resulting
- * list can be used to match a NID againts set of NIDS defined by \a
- * str.
- * \see cfs_match_nid
- *
- * \retval 1 on success
- * \retval 0 otherwise
- */
-int
-cfs_parse_nidlist(char *str, int len, struct list_head *nidlist)
-{
- struct cfs_lstr src;
- struct cfs_lstr res;
- int rc;
-
- src.ls_str = str;
- src.ls_len = len;
- INIT_LIST_HEAD(nidlist);
- while (src.ls_str) {
- rc = cfs_gettok(&src, ' ', &res);
- if (!rc) {
- cfs_free_nidlist(nidlist);
- return 0;
- }
- rc = parse_nidrange(&res, nidlist);
- if (!rc) {
- cfs_free_nidlist(nidlist);
- return 0;
- }
- }
- return 1;
-}
-EXPORT_SYMBOL(cfs_parse_nidlist);
-
-/**
- * Matches a nid (\a nid) against the compiled list of nidranges (\a nidlist).
- *
- * \see cfs_parse_nidlist()
- *
- * \retval 1 on match
- * \retval 0 otherwises
- */
-int cfs_match_nid(lnet_nid_t nid, struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct addrrange *ar;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- if (nr->nr_netstrfns->nf_type != LNET_NETTYP(LNET_NIDNET(nid)))
- continue;
- if (nr->nr_netnum != LNET_NETNUM(LNET_NIDNET(nid)))
- continue;
- if (nr->nr_all)
- return 1;
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link)
- if (nr->nr_netstrfns->nf_match_addr(LNET_NIDADDR(nid),
- &ar->ar_numaddr_ranges))
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL(cfs_match_nid);
-
-/**
- * Print the network part of the nidrange \a nr into the specified \a buffer.
- *
- * \retval number of characters written
- */
-static int
-cfs_print_network(char *buffer, int count, struct nidrange *nr)
-{
- struct netstrfns *nf = nr->nr_netstrfns;
-
- if (!nr->nr_netnum)
- return scnprintf(buffer, count, "@%s", nf->nf_name);
- else
- return scnprintf(buffer, count, "@%s%u",
- nf->nf_name, nr->nr_netnum);
-}
-
-/**
- * Print a list of addrrange (\a addrranges) into the specified \a buffer.
- * At max \a count characters can be printed into \a buffer.
- *
- * \retval number of characters written
- */
-static int
-cfs_print_addrranges(char *buffer, int count, struct list_head *addrranges,
- struct nidrange *nr)
-{
- int i = 0;
- struct addrrange *ar;
- struct netstrfns *nf = nr->nr_netstrfns;
-
- list_for_each_entry(ar, addrranges, ar_link) {
- if (i)
- i += scnprintf(buffer + i, count - i, " ");
- i += nf->nf_print_addrlist(buffer + i, count - i,
- &ar->ar_numaddr_ranges);
- i += cfs_print_network(buffer + i, count - i, nr);
- }
- return i;
-}
-
-/**
- * Print a list of nidranges (\a nidlist) into the specified \a buffer.
- * At max \a count characters can be printed into \a buffer.
- * Nidranges are separated by a space character.
- *
- * \retval number of characters written
- */
-int cfs_print_nidlist(char *buffer, int count, struct list_head *nidlist)
-{
- int i = 0;
- struct nidrange *nr;
-
- if (count <= 0)
- return 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- if (i)
- i += scnprintf(buffer + i, count - i, " ");
-
- if (nr->nr_all) {
- LASSERT(list_empty(&nr->nr_addrranges));
- i += scnprintf(buffer + i, count - i, "*");
- i += cfs_print_network(buffer + i, count - i, nr);
- } else {
- i += cfs_print_addrranges(buffer + i, count - i,
- &nr->nr_addrranges, nr);
- }
- }
- return i;
-}
-EXPORT_SYMBOL(cfs_print_nidlist);
-
-/**
- * Determines minimum and maximum addresses for a single
- * numeric address range
- *
- * \param ar
- * \param min_nid
- * \param max_nid
- */
-static void cfs_ip_ar_min_max(struct addrrange *ar, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- __u32 tmp_ip_addr = 0;
- unsigned int min_ip[4] = {0};
- unsigned int max_ip[4] = {0};
- int re_count = 0;
-
- list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) {
- list_for_each_entry(re, &el->el_exprs, re_link) {
- min_ip[re_count] = re->re_lo;
- max_ip[re_count] = re->re_hi;
- re_count++;
- }
- }
-
- tmp_ip_addr = ((min_ip[0] << 24) | (min_ip[1] << 16) |
- (min_ip[2] << 8) | min_ip[3]);
-
- if (min_nid)
- *min_nid = tmp_ip_addr;
-
- tmp_ip_addr = ((max_ip[0] << 24) | (max_ip[1] << 16) |
- (max_ip[2] << 8) | max_ip[3]);
-
- if (max_nid)
- *max_nid = tmp_ip_addr;
-}
-
-/**
- * Determines minimum and maximum addresses for a single
- * numeric address range
- *
- * \param ar
- * \param min_nid
- * \param max_nid
- */
-static void cfs_num_ar_min_max(struct addrrange *ar, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- unsigned int min_addr = 0;
- unsigned int max_addr = 0;
-
- list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) {
- list_for_each_entry(re, &el->el_exprs, re_link) {
- if (re->re_lo < min_addr || !min_addr)
- min_addr = re->re_lo;
- if (re->re_hi > max_addr)
- max_addr = re->re_hi;
- }
- }
-
- if (min_nid)
- *min_nid = min_addr;
- if (max_nid)
- *max_nid = max_addr;
-}
-
-/**
- * Determines whether an expression list in an nidrange contains exactly
- * one contiguous address range. Calls the correct netstrfns for the LND
- *
- * \param *nidlist
- *
- * \retval true if contiguous
- * \retval false if not contiguous
- */
-bool cfs_nidrange_is_contiguous(struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct netstrfns *nf = NULL;
- char *lndname = NULL;
- int netnum = -1;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- nf = nr->nr_netstrfns;
- if (!lndname)
- lndname = nf->nf_name;
- if (netnum == -1)
- netnum = nr->nr_netnum;
-
- if (strcmp(lndname, nf->nf_name) ||
- netnum != nr->nr_netnum)
- return false;
- }
-
- if (!nf)
- return false;
-
- if (!nf->nf_is_contiguous(nidlist))
- return false;
-
- return true;
-}
-EXPORT_SYMBOL(cfs_nidrange_is_contiguous);
-
-/**
- * Determines whether an expression list in an num nidrange contains exactly
- * one contiguous address range.
- *
- * \param *nidlist
- *
- * \retval true if contiguous
- * \retval false if not contiguous
- */
-static bool cfs_num_is_contiguous(struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- int last_hi = 0;
- __u32 last_end_nid = 0;
- __u32 current_start_nid = 0;
- __u32 current_end_nid = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- cfs_num_ar_min_max(ar, &current_start_nid,
- &current_end_nid);
- if (last_end_nid &&
- (current_start_nid - last_end_nid != 1))
- return false;
- last_end_nid = current_end_nid;
- list_for_each_entry(el, &ar->ar_numaddr_ranges,
- el_link) {
- list_for_each_entry(re, &el->el_exprs,
- re_link) {
- if (re->re_stride > 1)
- return false;
- else if (last_hi &&
- re->re_hi - last_hi != 1)
- return false;
- last_hi = re->re_hi;
- }
- }
- }
- }
-
- return true;
-}
-
-/**
- * Determines whether an expression list in an ip nidrange contains exactly
- * one contiguous address range.
- *
- * \param *nidlist
- *
- * \retval true if contiguous
- * \retval false if not contiguous
- */
-static bool cfs_ip_is_contiguous(struct list_head *nidlist)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- struct cfs_expr_list *el;
- struct cfs_range_expr *re;
- int expr_count;
- int last_hi = 255;
- int last_diff = 0;
- __u32 last_end_nid = 0;
- __u32 current_start_nid = 0;
- __u32 current_end_nid = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- last_hi = 255;
- last_diff = 0;
- cfs_ip_ar_min_max(ar, &current_start_nid,
- &current_end_nid);
- if (last_end_nid &&
- (current_start_nid - last_end_nid != 1))
- return false;
- last_end_nid = current_end_nid;
- list_for_each_entry(el, &ar->ar_numaddr_ranges,
- el_link) {
- expr_count = 0;
- list_for_each_entry(re, &el->el_exprs,
- re_link) {
- expr_count++;
- if (re->re_stride > 1 ||
- (last_diff > 0 && last_hi != 255) ||
- (last_diff > 0 && last_hi == 255 &&
- re->re_lo > 0))
- return false;
- last_hi = re->re_hi;
- last_diff = re->re_hi - re->re_lo;
- }
- }
- }
- }
-
- return true;
-}
-
-/**
- * Takes a linked list of nidrange expressions, determines the minimum
- * and maximum nid and creates appropriate nid structures
- *
- * \param *nidlist
- * \param *min_nid
- * \param *max_nid
- */
-void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid,
- char *max_nid, size_t nidstr_length)
-{
- struct nidrange *nr;
- struct netstrfns *nf = NULL;
- int netnum = -1;
- __u32 min_addr;
- __u32 max_addr;
- char *lndname = NULL;
- char min_addr_str[IPSTRING_LENGTH];
- char max_addr_str[IPSTRING_LENGTH];
-
- list_for_each_entry(nr, nidlist, nr_link) {
- nf = nr->nr_netstrfns;
- lndname = nf->nf_name;
- if (netnum == -1)
- netnum = nr->nr_netnum;
-
- nf->nf_min_max(nidlist, &min_addr, &max_addr);
- }
- nf->nf_addr2str(min_addr, min_addr_str, sizeof(min_addr_str));
- nf->nf_addr2str(max_addr, max_addr_str, sizeof(max_addr_str));
-
- snprintf(min_nid, nidstr_length, "%s@%s%d", min_addr_str, lndname,
- netnum);
- snprintf(max_nid, nidstr_length, "%s@%s%d", max_addr_str, lndname,
- netnum);
-}
-EXPORT_SYMBOL(cfs_nidrange_find_min_max);
-
-/**
- * Determines the min and max NID values for num LNDs
- *
- * \param *nidlist
- * \param *min_nid
- * \param *max_nid
- */
-static void cfs_num_min_max(struct list_head *nidlist, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- unsigned int tmp_min_addr = 0;
- unsigned int tmp_max_addr = 0;
- unsigned int min_addr = 0;
- unsigned int max_addr = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- cfs_num_ar_min_max(ar, &tmp_min_addr,
- &tmp_max_addr);
- if (tmp_min_addr < min_addr || !min_addr)
- min_addr = tmp_min_addr;
- if (tmp_max_addr > max_addr)
- max_addr = tmp_min_addr;
- }
- }
- *max_nid = max_addr;
- *min_nid = min_addr;
-}
-
-/**
- * Takes an nidlist and determines the minimum and maximum
- * ip addresses.
- *
- * \param *nidlist
- * \param *min_nid
- * \param *max_nid
- */
-static void cfs_ip_min_max(struct list_head *nidlist, __u32 *min_nid,
- __u32 *max_nid)
-{
- struct nidrange *nr;
- struct addrrange *ar;
- __u32 tmp_min_ip_addr = 0;
- __u32 tmp_max_ip_addr = 0;
- __u32 min_ip_addr = 0;
- __u32 max_ip_addr = 0;
-
- list_for_each_entry(nr, nidlist, nr_link) {
- list_for_each_entry(ar, &nr->nr_addrranges, ar_link) {
- cfs_ip_ar_min_max(ar, &tmp_min_ip_addr,
- &tmp_max_ip_addr);
- if (tmp_min_ip_addr < min_ip_addr || !min_ip_addr)
- min_ip_addr = tmp_min_ip_addr;
- if (tmp_max_ip_addr > max_ip_addr)
- max_ip_addr = tmp_max_ip_addr;
- }
- }
-
- if (min_nid)
- *min_nid = min_ip_addr;
- if (max_nid)
- *max_nid = max_ip_addr;
-}
-
-static int
-libcfs_lo_str2addr(const char *str, int nob, __u32 *addr)
-{
- *addr = 0;
- return 1;
-}
-
-static void
-libcfs_ip_addr2str(__u32 addr, char *str, size_t size)
-{
- snprintf(str, size, "%u.%u.%u.%u",
- (addr >> 24) & 0xff, (addr >> 16) & 0xff,
- (addr >> 8) & 0xff, addr & 0xff);
-}
-
-/*
- * CAVEAT EMPTOR XscanfX
- * I use "%n" at the end of a sscanf format to detect trailing junk. However
- * sscanf may return immediately if it sees the terminating '0' in a string, so
- * I initialise the %n variable to the expected length. If sscanf sets it;
- * fine, if it doesn't, then the scan ended at the end of the string, which is
- * fine too :)
- */
-static int
-libcfs_ip_str2addr(const char *str, int nob, __u32 *addr)
-{
- unsigned int a;
- unsigned int b;
- unsigned int c;
- unsigned int d;
- int n = nob; /* XscanfX */
-
- /* numeric IP? */
- if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 &&
- n == nob &&
- !(a & ~0xff) && !(b & ~0xff) &&
- !(c & ~0xff) && !(d & ~0xff)) {
- *addr = ((a << 24) | (b << 16) | (c << 8) | d);
- return 1;
- }
-
- return 0;
-}
-
-/* Used by lnet/config.c so it can't be static */
-int
-cfs_ip_addr_parse(char *str, int len, struct list_head *list)
-{
- struct cfs_expr_list *el;
- struct cfs_lstr src;
- int rc;
- int i;
-
- src.ls_str = str;
- src.ls_len = len;
- i = 0;
-
- while (src.ls_str) {
- struct cfs_lstr res;
-
- if (!cfs_gettok(&src, '.', &res)) {
- rc = -EINVAL;
- goto out;
- }
-
- rc = cfs_expr_list_parse(res.ls_str, res.ls_len, 0, 255, &el);
- if (rc)
- goto out;
-
- list_add_tail(&el->el_link, list);
- i++;
- }
-
- if (i == 4)
- return 0;
-
- rc = -EINVAL;
-out:
- cfs_expr_list_free_list(list);
-
- return rc;
-}
-
-static int
-libcfs_ip_addr_range_print(char *buffer, int count, struct list_head *list)
-{
- int i = 0, j = 0;
- struct cfs_expr_list *el;
-
- list_for_each_entry(el, list, el_link) {
- LASSERT(j++ < 4);
- if (i)
- i += scnprintf(buffer + i, count - i, ".");
- i += cfs_expr_list_print(buffer + i, count - i, el);
- }
- return i;
-}
-
-/**
- * Matches address (\a addr) against address set encoded in \a list.
- *
- * \retval 1 if \a addr matches
- * \retval 0 otherwise
- */
-int
-cfs_ip_addr_match(__u32 addr, struct list_head *list)
-{
- struct cfs_expr_list *el;
- int i = 0;
-
- list_for_each_entry_reverse(el, list, el_link) {
- if (!cfs_expr_list_match(addr & 0xff, el))
- return 0;
- addr >>= 8;
- i++;
- }
-
- return i == 4;
-}
-
-static void
-libcfs_decnum_addr2str(__u32 addr, char *str, size_t size)
-{
- snprintf(str, size, "%u", addr);
-}
-
-static int
-libcfs_num_str2addr(const char *str, int nob, __u32 *addr)
-{
- int n;
-
- n = nob;
- if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- n = nob;
- if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- n = nob;
- if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob)
- return 1;
-
- return 0;
-}
-
-/**
- * Nf_parse_addrlist method for networks using numeric addresses.
- *
- * Examples of such networks are gm and elan.
- *
- * \retval 0 if \a str parsed to numeric address
- * \retval errno otherwise
- */
-static int
-libcfs_num_parse(char *str, int len, struct list_head *list)
-{
- struct cfs_expr_list *el;
- int rc;
-
- rc = cfs_expr_list_parse(str, len, 0, MAX_NUMERIC_VALUE, &el);
- if (!rc)
- list_add_tail(&el->el_link, list);
-
- return rc;
-}
-
-static int
-libcfs_num_addr_range_print(char *buffer, int count, struct list_head *list)
-{
- int i = 0, j = 0;
- struct cfs_expr_list *el;
-
- list_for_each_entry(el, list, el_link) {
- LASSERT(j++ < 1);
- i += cfs_expr_list_print(buffer + i, count - i, el);
- }
- return i;
-}
-
-/*
- * Nf_match_addr method for networks using numeric addresses
- *
- * \retval 1 on match
- * \retval 0 otherwise
- */
-static int
-libcfs_num_match(__u32 addr, struct list_head *numaddr)
-{
- struct cfs_expr_list *el;
-
- LASSERT(!list_empty(numaddr));
- el = list_entry(numaddr->next, struct cfs_expr_list, el_link);
-
- return cfs_expr_list_match(addr, el);
-}
-
-static struct netstrfns libcfs_netstrfns[] = {
- { .nf_type = LOLND,
- .nf_name = "lo",
- .nf_modname = "klolnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_lo_str2addr,
- .nf_parse_addrlist = libcfs_num_parse,
- .nf_print_addrlist = libcfs_num_addr_range_print,
- .nf_match_addr = libcfs_num_match,
- .nf_is_contiguous = cfs_num_is_contiguous,
- .nf_min_max = cfs_num_min_max },
- { .nf_type = SOCKLND,
- .nf_name = "tcp",
- .nf_modname = "ksocklnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr,
- .nf_parse_addrlist = cfs_ip_addr_parse,
- .nf_print_addrlist = libcfs_ip_addr_range_print,
- .nf_match_addr = cfs_ip_addr_match,
- .nf_is_contiguous = cfs_ip_is_contiguous,
- .nf_min_max = cfs_ip_min_max },
- { .nf_type = O2IBLND,
- .nf_name = "o2ib",
- .nf_modname = "ko2iblnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr,
- .nf_parse_addrlist = cfs_ip_addr_parse,
- .nf_print_addrlist = libcfs_ip_addr_range_print,
- .nf_match_addr = cfs_ip_addr_match,
- .nf_is_contiguous = cfs_ip_is_contiguous,
- .nf_min_max = cfs_ip_min_max },
- { .nf_type = GNILND,
- .nf_name = "gni",
- .nf_modname = "kgnilnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_num_str2addr,
- .nf_parse_addrlist = libcfs_num_parse,
- .nf_print_addrlist = libcfs_num_addr_range_print,
- .nf_match_addr = libcfs_num_match,
- .nf_is_contiguous = cfs_num_is_contiguous,
- .nf_min_max = cfs_num_min_max },
- { .nf_type = GNIIPLND,
- .nf_name = "gip",
- .nf_modname = "kgnilnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr,
- .nf_parse_addrlist = cfs_ip_addr_parse,
- .nf_print_addrlist = libcfs_ip_addr_range_print,
- .nf_match_addr = cfs_ip_addr_match,
- .nf_is_contiguous = cfs_ip_is_contiguous,
- .nf_min_max = cfs_ip_min_max },
-};
-
-static const size_t libcfs_nnetstrfns = ARRAY_SIZE(libcfs_netstrfns);
-
-static struct netstrfns *
-libcfs_lnd2netstrfns(__u32 lnd)
-{
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++)
- if (lnd == libcfs_netstrfns[i].nf_type)
- return &libcfs_netstrfns[i];
-
- return NULL;
-}
-
-static struct netstrfns *
-libcfs_namenum2netstrfns(const char *name)
-{
- struct netstrfns *nf;
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++) {
- nf = &libcfs_netstrfns[i];
- if (!strncmp(name, nf->nf_name, strlen(nf->nf_name)))
- return nf;
- }
- return NULL;
-}
-
-static struct netstrfns *
-libcfs_name2netstrfns(const char *name)
-{
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++)
- if (!strcmp(libcfs_netstrfns[i].nf_name, name))
- return &libcfs_netstrfns[i];
-
- return NULL;
-}
-
-int
-libcfs_isknown_lnd(__u32 lnd)
-{
- return !!libcfs_lnd2netstrfns(lnd);
-}
-EXPORT_SYMBOL(libcfs_isknown_lnd);
-
-char *
-libcfs_lnd2modname(__u32 lnd)
-{
- struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
-
- return nf ? nf->nf_modname : NULL;
-}
-EXPORT_SYMBOL(libcfs_lnd2modname);
-
-int
-libcfs_str2lnd(const char *str)
-{
- struct netstrfns *nf = libcfs_name2netstrfns(str);
-
- if (nf)
- return nf->nf_type;
-
- return -ENXIO;
-}
-EXPORT_SYMBOL(libcfs_str2lnd);
-
-char *
-libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size)
-{
- struct netstrfns *nf;
-
- nf = libcfs_lnd2netstrfns(lnd);
- if (!nf)
- snprintf(buf, buf_size, "?%u?", lnd);
- else
- snprintf(buf, buf_size, "%s", nf->nf_name);
-
- return buf;
-}
-EXPORT_SYMBOL(libcfs_lnd2str_r);
-
-char *
-libcfs_net2str_r(__u32 net, char *buf, size_t buf_size)
-{
- __u32 nnum = LNET_NETNUM(net);
- __u32 lnd = LNET_NETTYP(net);
- struct netstrfns *nf;
-
- nf = libcfs_lnd2netstrfns(lnd);
- if (!nf)
- snprintf(buf, buf_size, "<%u:%u>", lnd, nnum);
- else if (!nnum)
- snprintf(buf, buf_size, "%s", nf->nf_name);
- else
- snprintf(buf, buf_size, "%s%u", nf->nf_name, nnum);
-
- return buf;
-}
-EXPORT_SYMBOL(libcfs_net2str_r);
-
-char *
-libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size)
-{
- __u32 addr = LNET_NIDADDR(nid);
- __u32 net = LNET_NIDNET(nid);
- __u32 nnum = LNET_NETNUM(net);
- __u32 lnd = LNET_NETTYP(net);
- struct netstrfns *nf;
-
- if (nid == LNET_NID_ANY) {
- strncpy(buf, "<?>", buf_size);
- buf[buf_size - 1] = '\0';
- return buf;
- }
-
- nf = libcfs_lnd2netstrfns(lnd);
- if (!nf) {
- snprintf(buf, buf_size, "%x@<%u:%u>", addr, lnd, nnum);
- } else {
- size_t addr_len;
-
- nf->nf_addr2str(addr, buf, buf_size);
- addr_len = strlen(buf);
- if (!nnum)
- snprintf(buf + addr_len, buf_size - addr_len, "@%s",
- nf->nf_name);
- else
- snprintf(buf + addr_len, buf_size - addr_len, "@%s%u",
- nf->nf_name, nnum);
- }
-
- return buf;
-}
-EXPORT_SYMBOL(libcfs_nid2str_r);
-
-static struct netstrfns *
-libcfs_str2net_internal(const char *str, __u32 *net)
-{
- struct netstrfns *nf = NULL;
- int nob;
- unsigned int netnum;
- int i;
-
- for (i = 0; i < libcfs_nnetstrfns; i++) {
- nf = &libcfs_netstrfns[i];
- if (!strncmp(str, nf->nf_name, strlen(nf->nf_name)))
- break;
- }
-
- if (i == libcfs_nnetstrfns)
- return NULL;
-
- nob = strlen(nf->nf_name);
-
- if (strlen(str) == (unsigned int)nob) {
- netnum = 0;
- } else {
- if (nf->nf_type == LOLND) /* net number not allowed */
- return NULL;
-
- str += nob;
- i = strlen(str);
- if (sscanf(str, "%u%n", &netnum, &i) < 1 ||
- i != (int)strlen(str))
- return NULL;
- }
-
- *net = LNET_MKNET(nf->nf_type, netnum);
- return nf;
-}
-
-__u32
-libcfs_str2net(const char *str)
-{
- __u32 net;
-
- if (libcfs_str2net_internal(str, &net))
- return net;
-
- return LNET_NIDNET(LNET_NID_ANY);
-}
-EXPORT_SYMBOL(libcfs_str2net);
-
-lnet_nid_t
-libcfs_str2nid(const char *str)
-{
- const char *sep = strchr(str, '@');
- struct netstrfns *nf;
- __u32 net;
- __u32 addr;
-
- if (sep) {
- nf = libcfs_str2net_internal(sep + 1, &net);
- if (!nf)
- return LNET_NID_ANY;
- } else {
- sep = str + strlen(str);
- net = LNET_MKNET(SOCKLND, 0);
- nf = libcfs_lnd2netstrfns(SOCKLND);
- LASSERT(nf);
- }
-
- if (!nf->nf_str2addr(str, (int)(sep - str), &addr))
- return LNET_NID_ANY;
-
- return LNET_MKNID(net, addr);
-}
-EXPORT_SYMBOL(libcfs_str2nid);
-
-char *
-libcfs_id2str(struct lnet_process_id id)
-{
- char *str = libcfs_next_nidstring();
-
- if (id.pid == LNET_PID_ANY) {
- snprintf(str, LNET_NIDSTR_SIZE,
- "LNET_PID_ANY-%s", libcfs_nid2str(id.nid));
- return str;
- }
-
- snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s",
- id.pid & LNET_PID_USERFLAG ? "U" : "",
- id.pid & ~LNET_PID_USERFLAG, libcfs_nid2str(id.nid));
- return str;
-}
-EXPORT_SYMBOL(libcfs_id2str);
-
-int
-libcfs_str2anynid(lnet_nid_t *nidp, const char *str)
-{
- if (!strcmp(str, "*")) {
- *nidp = LNET_NID_ANY;
- return 1;
- }
-
- *nidp = libcfs_str2nid(str);
- return *nidp != LNET_NID_ANY;
-}
-EXPORT_SYMBOL(libcfs_str2anynid);
diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c
deleted file mode 100644
index 5829414..0000000
--- a/drivers/staging/lustre/lnet/lnet/peer.c
+++ /dev/null
@@ -1,456 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/lnet/peer.c
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-#include <uapi/linux/lnet/lnet-dlc.h>
-
-int
-lnet_peer_tables_create(void)
-{
- struct lnet_peer_table *ptable;
- struct list_head *hash;
- int i;
- int j;
-
- the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
- sizeof(*ptable));
- if (!the_lnet.ln_peer_tables) {
- CERROR("Failed to allocate cpu-partition peer tables\n");
- return -ENOMEM;
- }
-
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- INIT_LIST_HEAD(&ptable->pt_deathrow);
-
- hash = kvmalloc_cpt(LNET_PEER_HASH_SIZE * sizeof(*hash),
- GFP_KERNEL, i);
- if (!hash) {
- CERROR("Failed to create peer hash table\n");
- lnet_peer_tables_destroy();
- return -ENOMEM;
- }
-
- for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
- INIT_LIST_HEAD(&hash[j]);
- ptable->pt_hash = hash; /* sign of initialization */
- }
-
- return 0;
-}
-
-void
-lnet_peer_tables_destroy(void)
-{
- struct lnet_peer_table *ptable;
- struct list_head *hash;
- int i;
- int j;
-
- if (!the_lnet.ln_peer_tables)
- return;
-
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- hash = ptable->pt_hash;
- if (!hash) /* not initialized */
- break;
-
- LASSERT(list_empty(&ptable->pt_deathrow));
-
- ptable->pt_hash = NULL;
- for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
- LASSERT(list_empty(&hash[j]));
-
- kvfree(hash);
- }
-
- cfs_percpt_free(the_lnet.ln_peer_tables);
- the_lnet.ln_peer_tables = NULL;
-}
-
-static void
-lnet_peer_table_cleanup_locked(struct lnet_ni *ni,
- struct lnet_peer_table *ptable)
-{
- int i;
- struct lnet_peer *lp;
- struct lnet_peer *tmp;
-
- for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
- list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
- lp_hashlist) {
- if (ni && ni != lp->lp_ni)
- continue;
- list_del_init(&lp->lp_hashlist);
- /* Lose hash table's ref */
- ptable->pt_zombies++;
- lnet_peer_decref_locked(lp);
- }
- }
-}
-
-static void
-lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable,
- int cpt_locked)
-{
- int i;
-
- for (i = 3; ptable->pt_zombies; i++) {
- lnet_net_unlock(cpt_locked);
-
- if (is_power_of_2(i)) {
- CDEBUG(D_WARNING,
- "Waiting for %d zombies on peer table\n",
- ptable->pt_zombies);
- }
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ >> 1);
- lnet_net_lock(cpt_locked);
- }
-}
-
-static void
-lnet_peer_table_del_rtrs_locked(struct lnet_ni *ni,
- struct lnet_peer_table *ptable,
- int cpt_locked)
-{
- struct lnet_peer *lp;
- struct lnet_peer *tmp;
- lnet_nid_t lp_nid;
- int i;
-
- for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
- list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
- lp_hashlist) {
- if (ni != lp->lp_ni)
- continue;
-
- if (!lp->lp_rtr_refcount)
- continue;
-
- lp_nid = lp->lp_nid;
-
- lnet_net_unlock(cpt_locked);
- lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid);
- lnet_net_lock(cpt_locked);
- }
- }
-}
-
-void
-lnet_peer_tables_cleanup(struct lnet_ni *ni)
-{
- struct lnet_peer_table *ptable;
- struct list_head deathrow;
- struct lnet_peer *lp;
- struct lnet_peer *temp;
- int i;
-
- INIT_LIST_HEAD(&deathrow);
-
- LASSERT(the_lnet.ln_shutdown || ni);
- /*
- * If just deleting the peers for a NI, get rid of any routes these
- * peers are gateways for.
- */
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_del_rtrs_locked(ni, ptable, i);
- lnet_net_unlock(i);
- }
-
- /*
- * Start the process of moving the applicable peers to
- * deathrow.
- */
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_cleanup_locked(ni, ptable);
- lnet_net_unlock(i);
- }
-
- /* Cleanup all entries on deathrow. */
- cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
- lnet_net_lock(i);
- lnet_peer_table_deathrow_wait_locked(ptable, i);
- list_splice_init(&ptable->pt_deathrow, &deathrow);
- lnet_net_unlock(i);
- }
-
- list_for_each_entry_safe(lp, temp, &deathrow, lp_hashlist) {
- list_del(&lp->lp_hashlist);
- kfree(lp);
- }
-}
-
-void
-lnet_destroy_peer_locked(struct lnet_peer *lp)
-{
- struct lnet_peer_table *ptable;
-
- LASSERT(!lp->lp_refcount);
- LASSERT(!lp->lp_rtr_refcount);
- LASSERT(list_empty(&lp->lp_txq));
- LASSERT(list_empty(&lp->lp_hashlist));
- LASSERT(!lp->lp_txqnob);
-
- ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
- LASSERT(ptable->pt_number > 0);
- ptable->pt_number--;
-
- lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
- lp->lp_ni = NULL;
-
- list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
- LASSERT(ptable->pt_zombies > 0);
- ptable->pt_zombies--;
-}
-
-struct lnet_peer *
-lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
-{
- struct list_head *peers;
- struct lnet_peer *lp;
-
- LASSERT(!the_lnet.ln_shutdown);
-
- peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
- list_for_each_entry(lp, peers, lp_hashlist) {
- if (lp->lp_nid == nid) {
- lnet_peer_addref_locked(lp);
- return lp;
- }
- }
-
- return NULL;
-}
-
-int
-lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt)
-{
- struct lnet_peer_table *ptable;
- struct lnet_peer *lp = NULL;
- struct lnet_peer *lp2;
- int cpt2;
- int rc = 0;
-
- *lpp = NULL;
- if (the_lnet.ln_shutdown) /* it's shutting down */
- return -ESHUTDOWN;
-
- /* cpt can be LNET_LOCK_EX if it's called from router functions */
- cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
-
- ptable = the_lnet.ln_peer_tables[cpt2];
- lp = lnet_find_peer_locked(ptable, nid);
- if (lp) {
- *lpp = lp;
- return 0;
- }
-
- if (!list_empty(&ptable->pt_deathrow)) {
- lp = list_entry(ptable->pt_deathrow.next,
- struct lnet_peer, lp_hashlist);
- list_del(&lp->lp_hashlist);
- }
-
- /*
- * take extra refcount in case another thread has shutdown LNet
- * and destroyed locks and peer-table before I finish the allocation
- */
- ptable->pt_number++;
- lnet_net_unlock(cpt);
-
- if (lp)
- memset(lp, 0, sizeof(*lp));
- else
- lp = kzalloc_cpt(sizeof(*lp), GFP_NOFS, cpt2);
-
- if (!lp) {
- rc = -ENOMEM;
- lnet_net_lock(cpt);
- goto out;
- }
-
- INIT_LIST_HEAD(&lp->lp_txq);
- INIT_LIST_HEAD(&lp->lp_rtrq);
- INIT_LIST_HEAD(&lp->lp_routes);
-
- lp->lp_notify = 0;
- lp->lp_notifylnd = 0;
- lp->lp_notifying = 0;
- lp->lp_alive_count = 0;
- lp->lp_timestamp = 0;
- lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
- lp->lp_last_alive = jiffies; /* assumes alive */
- lp->lp_last_query = 0; /* haven't asked NI yet */
- lp->lp_ping_timestamp = 0;
- lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
- lp->lp_nid = nid;
- lp->lp_cpt = cpt2;
- lp->lp_refcount = 2; /* 1 for caller; 1 for hash */
- lp->lp_rtr_refcount = 0;
-
- lnet_net_lock(cpt);
-
- if (the_lnet.ln_shutdown) {
- rc = -ESHUTDOWN;
- goto out;
- }
-
- lp2 = lnet_find_peer_locked(ptable, nid);
- if (lp2) {
- *lpp = lp2;
- goto out;
- }
-
- lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
- if (!lp->lp_ni) {
- rc = -EHOSTUNREACH;
- goto out;
- }
-
- lp->lp_txcredits = lp->lp_ni->ni_peertxcredits;
- lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
- lp->lp_rtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
- lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
-
- list_add_tail(&lp->lp_hashlist,
- &ptable->pt_hash[lnet_nid2peerhash(nid)]);
- ptable->pt_version++;
- *lpp = lp;
-
- return 0;
-out:
- if (lp)
- list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
- ptable->pt_number--;
- return rc;
-}
-
-void
-lnet_debug_peer(lnet_nid_t nid)
-{
- char *aliveness = "NA";
- struct lnet_peer *lp;
- int rc;
- int cpt;
-
- cpt = lnet_cpt_of_nid(nid);
- lnet_net_lock(cpt);
-
- rc = lnet_nid2peer_locked(&lp, nid, cpt);
- if (rc) {
- lnet_net_unlock(cpt);
- CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
- return;
- }
-
- if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
- aliveness = lp->lp_alive ? "up" : "down";
-
- CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
- libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
- aliveness, lp->lp_ni->ni_peertxcredits,
- lp->lp_rtrcredits, lp->lp_minrtrcredits,
- lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
-
- lnet_peer_decref_locked(lp);
-
- lnet_net_unlock(cpt);
-}
-
-int
-lnet_get_peer_info(__u32 peer_index, __u64 *nid,
- char aliveness[LNET_MAX_STR_LEN],
- __u32 *cpt_iter, __u32 *refcount,
- __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
- __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits,
- __u32 *peer_tx_qnob)
-{
- struct lnet_peer_table *peer_table;
- struct lnet_peer *lp;
- bool found = false;
- int lncpt, j;
-
- /* get the number of CPTs */
- lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
-
- /*
- * if the cpt number to be examined is >= the number of cpts in
- * the system then indicate that there are no more cpts to examin
- */
- if (*cpt_iter >= lncpt)
- return -ENOENT;
-
- /* get the current table */
- peer_table = the_lnet.ln_peer_tables[*cpt_iter];
- /* if the ptable is NULL then there are no more cpts to examine */
- if (!peer_table)
- return -ENOENT;
-
- lnet_net_lock(*cpt_iter);
-
- for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) {
- struct list_head *peers = &peer_table->pt_hash[j];
-
- list_for_each_entry(lp, peers, lp_hashlist) {
- if (peer_index-- > 0)
- continue;
-
- snprintf(aliveness, LNET_MAX_STR_LEN, "NA");
- if (lnet_isrouter(lp) ||
- lnet_peer_aliveness_enabled(lp))
- snprintf(aliveness, LNET_MAX_STR_LEN,
- lp->lp_alive ? "up" : "down");
-
- *nid = lp->lp_nid;
- *refcount = lp->lp_refcount;
- *ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits;
- *peer_tx_credits = lp->lp_txcredits;
- *peer_rtr_credits = lp->lp_rtrcredits;
- *peer_min_rtr_credits = lp->lp_mintxcredits;
- *peer_tx_qnob = lp->lp_txqnob;
-
- found = true;
- }
- }
- lnet_net_unlock(*cpt_iter);
-
- *cpt_iter = lncpt;
-
- return found ? 0 : -ENOENT;
-}
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
deleted file mode 100644
index 6267d5e..0000000
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ /dev/null
@@ -1,1799 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- *
- * This file is part of Portals
- * http://sourceforge.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/completion.h>
-#include <linux/lnet/lib-lnet.h>
-
-#define LNET_NRB_TINY_MIN 512 /* min value for each CPT */
-#define LNET_NRB_TINY (LNET_NRB_TINY_MIN * 4)
-#define LNET_NRB_SMALL_MIN 4096 /* min value for each CPT */
-#define LNET_NRB_SMALL (LNET_NRB_SMALL_MIN * 4)
-#define LNET_NRB_SMALL_PAGES 1
-#define LNET_NRB_LARGE_MIN 256 /* min value for each CPT */
-#define LNET_NRB_LARGE (LNET_NRB_LARGE_MIN * 4)
-#define LNET_NRB_LARGE_PAGES ((LNET_MTU + PAGE_SIZE - 1) >> \
- PAGE_SHIFT)
-
-static char *forwarding = "";
-module_param(forwarding, charp, 0444);
-MODULE_PARM_DESC(forwarding, "Explicitly enable/disable forwarding between networks");
-
-static int tiny_router_buffers;
-module_param(tiny_router_buffers, int, 0444);
-MODULE_PARM_DESC(tiny_router_buffers, "# of 0 payload messages to buffer in the router");
-static int small_router_buffers;
-module_param(small_router_buffers, int, 0444);
-MODULE_PARM_DESC(small_router_buffers, "# of small (1 page) messages to buffer in the router");
-static int large_router_buffers;
-module_param(large_router_buffers, int, 0444);
-MODULE_PARM_DESC(large_router_buffers, "# of large messages to buffer in the router");
-static int peer_buffer_credits;
-module_param(peer_buffer_credits, int, 0444);
-MODULE_PARM_DESC(peer_buffer_credits, "# router buffer credits per peer");
-
-static int auto_down = 1;
-module_param(auto_down, int, 0444);
-MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
-
-int
-lnet_peer_buffer_credits(struct lnet_ni *ni)
-{
- /* NI option overrides LNet default */
- if (ni->ni_peerrtrcredits > 0)
- return ni->ni_peerrtrcredits;
- if (peer_buffer_credits > 0)
- return peer_buffer_credits;
-
- /*
- * As an approximation, allow this peer the same number of router
- * buffers as it is allowed outstanding sends
- */
- return ni->ni_peertxcredits;
-}
-
-/* forward ref's */
-static int lnet_router_checker(void *);
-
-static int check_routers_before_use;
-module_param(check_routers_before_use, int, 0444);
-MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use");
-
-int avoid_asym_router_failure = 1;
-module_param(avoid_asym_router_failure, int, 0644);
-MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)");
-
-static int dead_router_check_interval = 60;
-module_param(dead_router_check_interval, int, 0644);
-MODULE_PARM_DESC(dead_router_check_interval, "Seconds between dead router health checks (<= 0 to disable)");
-
-static int live_router_check_interval = 60;
-module_param(live_router_check_interval, int, 0644);
-MODULE_PARM_DESC(live_router_check_interval, "Seconds between live router health checks (<= 0 to disable)");
-
-static int router_ping_timeout = 50;
-module_param(router_ping_timeout, int, 0644);
-MODULE_PARM_DESC(router_ping_timeout, "Seconds to wait for the reply to a router health query");
-
-int
-lnet_peers_start_down(void)
-{
- return check_routers_before_use;
-}
-
-void
-lnet_notify_locked(struct lnet_peer *lp, int notifylnd, int alive,
- unsigned long when)
-{
- if (time_before(when, lp->lp_timestamp)) { /* out of date information */
- CDEBUG(D_NET, "Out of date\n");
- return;
- }
-
- lp->lp_timestamp = when; /* update timestamp */
- lp->lp_ping_deadline = 0; /* disable ping timeout */
-
- if (lp->lp_alive_count && /* got old news */
- (!lp->lp_alive) == (!alive)) { /* new date for old news */
- CDEBUG(D_NET, "Old news\n");
- return;
- }
-
- /* Flag that notification is outstanding */
-
- lp->lp_alive_count++;
- lp->lp_alive = !(!alive); /* 1 bit! */
- lp->lp_notify = 1;
- lp->lp_notifylnd |= notifylnd;
- if (lp->lp_alive)
- lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */
-
- CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive);
-}
-
-static void
-lnet_ni_notify_locked(struct lnet_ni *ni, struct lnet_peer *lp)
-{
- int alive;
- int notifylnd;
-
- /*
- * Notify only in 1 thread at any time to ensure ordered notification.
- * NB individual events can be missed; the only guarantee is that you
- * always get the most recent news
- */
- if (lp->lp_notifying || !ni)
- return;
-
- lp->lp_notifying = 1;
-
- while (lp->lp_notify) {
- alive = lp->lp_alive;
- notifylnd = lp->lp_notifylnd;
-
- lp->lp_notifylnd = 0;
- lp->lp_notify = 0;
-
- if (notifylnd && ni->ni_lnd->lnd_notify) {
- lnet_net_unlock(lp->lp_cpt);
-
- /*
- * A new notification could happen now; I'll handle it
- * when control returns to me
- */
- ni->ni_lnd->lnd_notify(ni, lp->lp_nid, alive);
-
- lnet_net_lock(lp->lp_cpt);
- }
- }
-
- lp->lp_notifying = 0;
-}
-
-static void
-lnet_rtr_addref_locked(struct lnet_peer *lp)
-{
- LASSERT(lp->lp_refcount > 0);
- LASSERT(lp->lp_rtr_refcount >= 0);
-
- /* lnet_net_lock must be exclusively locked */
- lp->lp_rtr_refcount++;
- if (lp->lp_rtr_refcount == 1) {
- struct list_head *pos;
-
- /* a simple insertion sort */
- list_for_each_prev(pos, &the_lnet.ln_routers) {
- struct lnet_peer *rtr;
-
- rtr = list_entry(pos, struct lnet_peer, lp_rtr_list);
- if (rtr->lp_nid < lp->lp_nid)
- break;
- }
-
- list_add(&lp->lp_rtr_list, pos);
- /* addref for the_lnet.ln_routers */
- lnet_peer_addref_locked(lp);
- the_lnet.ln_routers_version++;
- }
-}
-
-static void
-lnet_rtr_decref_locked(struct lnet_peer *lp)
-{
- LASSERT(lp->lp_refcount > 0);
- LASSERT(lp->lp_rtr_refcount > 0);
-
- /* lnet_net_lock must be exclusively locked */
- lp->lp_rtr_refcount--;
- if (!lp->lp_rtr_refcount) {
- LASSERT(list_empty(&lp->lp_routes));
-
- if (lp->lp_rcd) {
- list_add(&lp->lp_rcd->rcd_list,
- &the_lnet.ln_rcd_deathrow);
- lp->lp_rcd = NULL;
- }
-
- list_del(&lp->lp_rtr_list);
- /* decref for the_lnet.ln_routers */
- lnet_peer_decref_locked(lp);
- the_lnet.ln_routers_version++;
- }
-}
-
-struct lnet_remotenet *
-lnet_find_net_locked(__u32 net)
-{
- struct lnet_remotenet *rnet;
- struct list_head *rn_list;
-
- LASSERT(!the_lnet.ln_shutdown);
-
- rn_list = lnet_net2rnethash(net);
- list_for_each_entry(rnet, rn_list, lrn_list) {
- if (rnet->lrn_net == net)
- return rnet;
- }
- return NULL;
-}
-
-static void lnet_shuffle_seed(void)
-{
- static int seeded;
- struct lnet_ni *ni;
-
- if (seeded)
- return;
-
- /*
- * Nodes with small feet have little entropy
- * the NID for this node gives the most entropy in the low bits
- */
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- __u32 lnd_type, seed;
-
- lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
- if (lnd_type != LOLND) {
- seed = (LNET_NIDADDR(ni->ni_nid) | lnd_type);
- add_device_randomness(&seed, sizeof(seed));
- }
- }
-
- seeded = 1;
-}
-
-/* NB expects LNET_LOCK held */
-static void
-lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route)
-{
- unsigned int len = 0;
- unsigned int offset = 0;
- struct list_head *e;
-
- lnet_shuffle_seed();
-
- list_for_each(e, &rnet->lrn_routes) {
- len++;
- }
-
- /* len+1 positions to add a new entry */
- offset = prandom_u32_max(len + 1);
- list_for_each(e, &rnet->lrn_routes) {
- if (!offset)
- break;
- offset--;
- }
- list_add(&route->lr_list, e);
- list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes);
-
- the_lnet.ln_remote_nets_version++;
- lnet_rtr_addref_locked(route->lr_gateway);
-}
-
-int
-lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
- unsigned int priority)
-{
- struct list_head *e;
- struct lnet_remotenet *rnet;
- struct lnet_remotenet *rnet2;
- struct lnet_route *route;
- struct lnet_ni *ni;
- int add_route;
- int rc;
-
- CDEBUG(D_NET, "Add route: net %s hops %d priority %u gw %s\n",
- libcfs_net2str(net), hops, priority, libcfs_nid2str(gateway));
-
- if (gateway == LNET_NID_ANY ||
- LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND ||
- net == LNET_NIDNET(LNET_NID_ANY) ||
- LNET_NETTYP(net) == LOLND ||
- LNET_NIDNET(gateway) == net ||
- (hops != LNET_UNDEFINED_HOPS && (hops < 1 || hops > 255)))
- return -EINVAL;
-
- if (lnet_islocalnet(net)) /* it's a local network */
- return -EEXIST;
-
- /* Assume net, route, all new */
- route = kzalloc(sizeof(*route), GFP_NOFS);
- rnet = kzalloc(sizeof(*rnet), GFP_NOFS);
- if (!route || !rnet) {
- CERROR("Out of memory creating route %s %d %s\n",
- libcfs_net2str(net), hops, libcfs_nid2str(gateway));
- kfree(route);
- kfree(rnet);
- return -ENOMEM;
- }
-
- INIT_LIST_HEAD(&rnet->lrn_routes);
- rnet->lrn_net = net;
- route->lr_hops = hops;
- route->lr_net = net;
- route->lr_priority = priority;
-
- lnet_net_lock(LNET_LOCK_EX);
-
- rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX);
- if (rc) {
- lnet_net_unlock(LNET_LOCK_EX);
-
- kfree(route);
- kfree(rnet);
-
- if (rc == -EHOSTUNREACH) /* gateway is not on a local net */
- return rc; /* ignore the route entry */
- CERROR("Error %d creating route %s %d %s\n", rc,
- libcfs_net2str(net), hops,
- libcfs_nid2str(gateway));
- return rc;
- }
-
- LASSERT(!the_lnet.ln_shutdown);
-
- rnet2 = lnet_find_net_locked(net);
- if (!rnet2) {
- /* new network */
- list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
- rnet2 = rnet;
- }
-
- /* Search for a duplicate route (it's a NOOP if it is) */
- add_route = 1;
- list_for_each(e, &rnet2->lrn_routes) {
- struct lnet_route *route2;
-
- route2 = list_entry(e, struct lnet_route, lr_list);
- if (route2->lr_gateway == route->lr_gateway) {
- add_route = 0;
- break;
- }
-
- /* our lookups must be true */
- LASSERT(route2->lr_gateway->lp_nid != gateway);
- }
-
- if (add_route) {
- lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
- lnet_add_route_to_rnet(rnet2, route);
-
- ni = route->lr_gateway->lp_ni;
- lnet_net_unlock(LNET_LOCK_EX);
-
- /* XXX Assume alive */
- if (ni->ni_lnd->lnd_notify)
- ni->ni_lnd->lnd_notify(ni, gateway, 1);
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-
- /* -1 for notify or !add_route */
- lnet_peer_decref_locked(route->lr_gateway);
- lnet_net_unlock(LNET_LOCK_EX);
- rc = 0;
-
- if (!add_route) {
- rc = -EEXIST;
- kfree(route);
- }
-
- if (rnet != rnet2)
- kfree(rnet);
-
- /* indicate to startup the router checker if configured */
- wake_up(&the_lnet.ln_rc_waitq);
-
- return rc;
-}
-
-int
-lnet_check_routes(void)
-{
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- struct lnet_route *route2;
- struct list_head *e1;
- struct list_head *e2;
- int cpt;
- struct list_head *rn_list;
- int i;
-
- cpt = lnet_net_lock_current();
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
- rn_list = &the_lnet.ln_remote_nets_hash[i];
- list_for_each(e1, rn_list) {
- rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
- route2 = NULL;
- list_for_each(e2, &rnet->lrn_routes) {
- lnet_nid_t nid1;
- lnet_nid_t nid2;
- int net;
-
- route = list_entry(e2, struct lnet_route, lr_list);
-
- if (!route2) {
- route2 = route;
- continue;
- }
-
- if (route->lr_gateway->lp_ni ==
- route2->lr_gateway->lp_ni)
- continue;
-
- nid1 = route->lr_gateway->lp_nid;
- nid2 = route2->lr_gateway->lp_nid;
- net = rnet->lrn_net;
-
- lnet_net_unlock(cpt);
-
- CERROR("Routes to %s via %s and %s not supported\n",
- libcfs_net2str(net),
- libcfs_nid2str(nid1),
- libcfs_nid2str(nid2));
- return -EINVAL;
- }
- }
- }
-
- lnet_net_unlock(cpt);
- return 0;
-}
-
-int
-lnet_del_route(__u32 net, lnet_nid_t gw_nid)
-{
- struct lnet_peer *gateway;
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- struct list_head *e1;
- struct list_head *e2;
- int rc = -ENOENT;
- struct list_head *rn_list;
- int idx = 0;
-
- CDEBUG(D_NET, "Del route: net %s : gw %s\n",
- libcfs_net2str(net), libcfs_nid2str(gw_nid));
-
- /*
- * NB Caller may specify either all routes via the given gateway
- * or a specific route entry actual NIDs)
- */
- lnet_net_lock(LNET_LOCK_EX);
- if (net == LNET_NIDNET(LNET_NID_ANY))
- rn_list = &the_lnet.ln_remote_nets_hash[0];
- else
- rn_list = lnet_net2rnethash(net);
-
- again:
- list_for_each(e1, rn_list) {
- rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
- if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
- net == rnet->lrn_net))
- continue;
-
- list_for_each(e2, &rnet->lrn_routes) {
- route = list_entry(e2, struct lnet_route, lr_list);
-
- gateway = route->lr_gateway;
- if (!(gw_nid == LNET_NID_ANY ||
- gw_nid == gateway->lp_nid))
- continue;
-
- list_del(&route->lr_list);
- list_del(&route->lr_gwlist);
- the_lnet.ln_remote_nets_version++;
-
- if (list_empty(&rnet->lrn_routes))
- list_del(&rnet->lrn_list);
- else
- rnet = NULL;
-
- lnet_rtr_decref_locked(gateway);
- lnet_peer_decref_locked(gateway);
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- kfree(route);
- kfree(rnet);
-
- rc = 0;
- lnet_net_lock(LNET_LOCK_EX);
- goto again;
- }
- }
-
- if (net == LNET_NIDNET(LNET_NID_ANY) &&
- ++idx < LNET_REMOTE_NETS_HASH_SIZE) {
- rn_list = &the_lnet.ln_remote_nets_hash[idx];
- goto again;
- }
- lnet_net_unlock(LNET_LOCK_EX);
-
- return rc;
-}
-
-void
-lnet_destroy_routes(void)
-{
- lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY);
-}
-
-int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg)
-{
- int i, rc = -ENOENT, j;
-
- if (!the_lnet.ln_rtrpools)
- return rc;
-
- for (i = 0; i < LNET_NRBPOOLS; i++) {
- struct lnet_rtrbufpool *rbp;
-
- lnet_net_lock(LNET_LOCK_EX);
- cfs_percpt_for_each(rbp, j, the_lnet.ln_rtrpools) {
- if (i++ != idx)
- continue;
-
- pool_cfg->pl_pools[i].pl_npages = rbp[i].rbp_npages;
- pool_cfg->pl_pools[i].pl_nbuffers = rbp[i].rbp_nbuffers;
- pool_cfg->pl_pools[i].pl_credits = rbp[i].rbp_credits;
- pool_cfg->pl_pools[i].pl_mincredits = rbp[i].rbp_mincredits;
- rc = 0;
- break;
- }
- lnet_net_unlock(LNET_LOCK_EX);
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- pool_cfg->pl_routing = the_lnet.ln_routing;
- lnet_net_unlock(LNET_LOCK_EX);
-
- return rc;
-}
-
-int
-lnet_get_route(int idx, __u32 *net, __u32 *hops,
- lnet_nid_t *gateway, __u32 *alive, __u32 *priority)
-{
- struct list_head *e1;
- struct list_head *e2;
- struct lnet_remotenet *rnet;
- struct lnet_route *route;
- int cpt;
- int i;
- struct list_head *rn_list;
-
- cpt = lnet_net_lock_current();
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
- rn_list = &the_lnet.ln_remote_nets_hash[i];
- list_for_each(e1, rn_list) {
- rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
- list_for_each(e2, &rnet->lrn_routes) {
- route = list_entry(e2, struct lnet_route,
- lr_list);
-
- if (!idx--) {
- *net = rnet->lrn_net;
- *hops = route->lr_hops;
- *priority = route->lr_priority;
- *gateway = route->lr_gateway->lp_nid;
- *alive = lnet_is_route_alive(route);
- lnet_net_unlock(cpt);
- return 0;
- }
- }
- }
- }
-
- lnet_net_unlock(cpt);
- return -ENOENT;
-}
-
-void
-lnet_swap_pinginfo(struct lnet_ping_info *info)
-{
- int i;
- struct lnet_ni_status *stat;
-
- __swab32s(&info->pi_magic);
- __swab32s(&info->pi_features);
- __swab32s(&info->pi_pid);
- __swab32s(&info->pi_nnis);
- for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
- stat = &info->pi_ni[i];
- __swab64s(&stat->ns_nid);
- __swab32s(&stat->ns_status);
- }
-}
-
-/**
- * parse router-checker pinginfo, record number of down NIs for remote
- * networks on that router.
- */
-static void
-lnet_parse_rc_info(struct lnet_rc_data *rcd)
-{
- struct lnet_ping_info *info = rcd->rcd_pinginfo;
- struct lnet_peer *gw = rcd->rcd_gateway;
- struct lnet_route *rte;
-
- if (!gw->lp_alive)
- return;
-
- if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
- lnet_swap_pinginfo(info);
-
- /* NB always racing with network! */
- if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
- CDEBUG(D_NET, "%s: Unexpected magic %08x\n",
- libcfs_nid2str(gw->lp_nid), info->pi_magic);
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
- return;
- }
-
- gw->lp_ping_feats = info->pi_features;
- if (!(gw->lp_ping_feats & LNET_PING_FEAT_MASK)) {
- CDEBUG(D_NET, "%s: Unexpected features 0x%x\n",
- libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats);
- return; /* nothing I can understand */
- }
-
- if (!(gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS))
- return; /* can't carry NI status info */
-
- list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
- int down = 0;
- int up = 0;
- int i;
-
- if (gw->lp_ping_feats & LNET_PING_FEAT_RTE_DISABLED) {
- rte->lr_downis = 1;
- continue;
- }
-
- for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
- struct lnet_ni_status *stat = &info->pi_ni[i];
- lnet_nid_t nid = stat->ns_nid;
-
- if (nid == LNET_NID_ANY) {
- CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n",
- libcfs_nid2str(gw->lp_nid));
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
- return;
- }
-
- if (LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
- continue;
-
- if (stat->ns_status == LNET_NI_STATUS_DOWN) {
- down++;
- continue;
- }
-
- if (stat->ns_status == LNET_NI_STATUS_UP) {
- if (LNET_NIDNET(nid) == rte->lr_net) {
- up = 1;
- break;
- }
- continue;
- }
-
- CDEBUG(D_NET, "%s: Unexpected status 0x%x\n",
- libcfs_nid2str(gw->lp_nid), stat->ns_status);
- gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
- return;
- }
-
- if (up) { /* ignore downed NIs if NI for dest network is up */
- rte->lr_downis = 0;
- continue;
- }
- /**
- * if @down is zero and this route is single-hop, it means
- * we can't find NI for target network
- */
- if (!down && rte->lr_hops == 1)
- down = 1;
-
- rte->lr_downis = down;
- }
-}
-
-static void
-lnet_router_checker_event(struct lnet_event *event)
-{
- struct lnet_rc_data *rcd = event->md.user_ptr;
- struct lnet_peer *lp;
-
- LASSERT(rcd);
-
- if (event->unlinked) {
- LNetInvalidateMDHandle(&rcd->rcd_mdh);
- return;
- }
-
- LASSERT(event->type == LNET_EVENT_SEND ||
- event->type == LNET_EVENT_REPLY);
-
- lp = rcd->rcd_gateway;
- LASSERT(lp);
-
- /*
- * NB: it's called with holding lnet_res_lock, we have a few
- * places need to hold both locks at the same time, please take
- * care of lock ordering
- */
- lnet_net_lock(lp->lp_cpt);
- if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) {
- /* ignore if no longer a router or rcd is replaced */
- goto out;
- }
-
- if (event->type == LNET_EVENT_SEND) {
- lp->lp_ping_notsent = 0;
- if (!event->status)
- goto out;
- }
-
- /* LNET_EVENT_REPLY */
- /*
- * A successful REPLY means the router is up. If _any_ comms
- * to the router fail I assume it's down (this will happen if
- * we ping alive routers to try to detect router death before
- * apps get burned).
- */
- lnet_notify_locked(lp, 1, !event->status, jiffies);
-
- /*
- * The router checker will wake up very shortly and do the
- * actual notification.
- * XXX If 'lp' stops being a router before then, it will still
- * have the notification pending!!!
- */
- if (avoid_asym_router_failure && !event->status)
- lnet_parse_rc_info(rcd);
-
- out:
- lnet_net_unlock(lp->lp_cpt);
-}
-
-static void
-lnet_wait_known_routerstate(void)
-{
- struct lnet_peer *rtr;
- struct list_head *entry;
- int all_known;
-
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
-
- for (;;) {
- int cpt = lnet_net_lock_current();
-
- all_known = 1;
- list_for_each(entry, &the_lnet.ln_routers) {
- rtr = list_entry(entry, struct lnet_peer, lp_rtr_list);
-
- if (!rtr->lp_alive_count) {
- all_known = 0;
- break;
- }
- }
-
- lnet_net_unlock(cpt);
-
- if (all_known)
- return;
-
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- }
-}
-
-void
-lnet_router_ni_update_locked(struct lnet_peer *gw, __u32 net)
-{
- struct lnet_route *rte;
-
- if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS)) {
- list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
- if (rte->lr_net == net) {
- rte->lr_downis = 0;
- break;
- }
- }
- }
-}
-
-static void
-lnet_update_ni_status_locked(void)
-{
- struct lnet_ni *ni;
- time64_t now;
- int timeout;
-
- LASSERT(the_lnet.ln_routing);
-
- timeout = router_ping_timeout +
- max(live_router_check_interval, dead_router_check_interval);
-
- now = ktime_get_real_seconds();
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- if (ni->ni_lnd->lnd_type == LOLND)
- continue;
-
- if (now < ni->ni_last_alive + timeout)
- continue;
-
- lnet_ni_lock(ni);
- /* re-check with lock */
- if (now < ni->ni_last_alive + timeout) {
- lnet_ni_unlock(ni);
- continue;
- }
-
- LASSERT(ni->ni_status);
-
- if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) {
- CDEBUG(D_NET, "NI(%s:%d) status changed to down\n",
- libcfs_nid2str(ni->ni_nid), timeout);
- /*
- * NB: so far, this is the only place to set
- * NI status to "down"
- */
- ni->ni_status->ns_status = LNET_NI_STATUS_DOWN;
- }
- lnet_ni_unlock(ni);
- }
-}
-
-static void
-lnet_destroy_rc_data(struct lnet_rc_data *rcd)
-{
- LASSERT(list_empty(&rcd->rcd_list));
- /* detached from network */
- LASSERT(LNetMDHandleIsInvalid(rcd->rcd_mdh));
-
- if (rcd->rcd_gateway) {
- int cpt = rcd->rcd_gateway->lp_cpt;
-
- lnet_net_lock(cpt);
- lnet_peer_decref_locked(rcd->rcd_gateway);
- lnet_net_unlock(cpt);
- }
-
- kfree(rcd->rcd_pinginfo);
-
- kfree(rcd);
-}
-
-static struct lnet_rc_data *
-lnet_create_rc_data_locked(struct lnet_peer *gateway)
-{
- struct lnet_rc_data *rcd = NULL;
- struct lnet_ping_info *pi;
- struct lnet_md md;
- int rc;
- int i;
-
- lnet_net_unlock(gateway->lp_cpt);
-
- rcd = kzalloc(sizeof(*rcd), GFP_NOFS);
- if (!rcd)
- goto out;
-
- LNetInvalidateMDHandle(&rcd->rcd_mdh);
- INIT_LIST_HEAD(&rcd->rcd_list);
-
- pi = kzalloc(LNET_PINGINFO_SIZE, GFP_NOFS);
- if (!pi)
- goto out;
-
- for (i = 0; i < LNET_MAX_RTR_NIS; i++) {
- pi->pi_ni[i].ns_nid = LNET_NID_ANY;
- pi->pi_ni[i].ns_status = LNET_NI_STATUS_INVALID;
- }
- rcd->rcd_pinginfo = pi;
-
- md.start = pi;
- md.user_ptr = rcd;
- md.length = LNET_PINGINFO_SIZE;
- md.threshold = LNET_MD_THRESH_INF;
- md.options = LNET_MD_TRUNCATE;
- md.eq_handle = the_lnet.ln_rc_eqh;
-
- LASSERT(!LNetEQHandleIsInvalid(the_lnet.ln_rc_eqh));
- rc = LNetMDBind(md, LNET_UNLINK, &rcd->rcd_mdh);
- if (rc < 0) {
- CERROR("Can't bind MD: %d\n", rc);
- goto out;
- }
- LASSERT(!rc);
-
- lnet_net_lock(gateway->lp_cpt);
- /* router table changed or someone has created rcd for this gateway */
- if (!lnet_isrouter(gateway) || gateway->lp_rcd) {
- lnet_net_unlock(gateway->lp_cpt);
- goto out;
- }
-
- lnet_peer_addref_locked(gateway);
- rcd->rcd_gateway = gateway;
- gateway->lp_rcd = rcd;
- gateway->lp_ping_notsent = 0;
-
- return rcd;
-
- out:
- if (rcd) {
- if (!LNetMDHandleIsInvalid(rcd->rcd_mdh)) {
- rc = LNetMDUnlink(rcd->rcd_mdh);
- LASSERT(!rc);
- }
- lnet_destroy_rc_data(rcd);
- }
-
- lnet_net_lock(gateway->lp_cpt);
- return gateway->lp_rcd;
-}
-
-static int
-lnet_router_check_interval(struct lnet_peer *rtr)
-{
- int secs;
-
- secs = rtr->lp_alive ? live_router_check_interval :
- dead_router_check_interval;
- if (secs < 0)
- secs = 0;
-
- return secs;
-}
-
-static void
-lnet_ping_router_locked(struct lnet_peer *rtr)
-{
- struct lnet_rc_data *rcd = NULL;
- unsigned long now = jiffies;
- int secs;
-
- lnet_peer_addref_locked(rtr);
-
- if (rtr->lp_ping_deadline && /* ping timed out? */
- time_after(now, rtr->lp_ping_deadline))
- lnet_notify_locked(rtr, 1, 0, now);
-
- /* Run any outstanding notifications */
- lnet_ni_notify_locked(rtr->lp_ni, rtr);
-
- if (!lnet_isrouter(rtr) ||
- the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
- /* router table changed or router checker is shutting down */
- lnet_peer_decref_locked(rtr);
- return;
- }
-
- rcd = rtr->lp_rcd ?
- rtr->lp_rcd : lnet_create_rc_data_locked(rtr);
-
- if (!rcd)
- return;
-
- secs = lnet_router_check_interval(rtr);
-
- CDEBUG(D_NET,
- "rtr %s %d: deadline %lu ping_notsent %d alive %d alive_count %d lp_ping_timestamp %lu\n",
- libcfs_nid2str(rtr->lp_nid), secs,
- rtr->lp_ping_deadline, rtr->lp_ping_notsent,
- rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp);
-
- if (secs && !rtr->lp_ping_notsent &&
- time_after(now, rtr->lp_ping_timestamp + secs * HZ)) {
- int rc;
- struct lnet_process_id id;
- struct lnet_handle_md mdh;
-
- id.nid = rtr->lp_nid;
- id.pid = LNET_PID_LUSTRE;
- CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id));
-
- rtr->lp_ping_notsent = 1;
- rtr->lp_ping_timestamp = now;
-
- mdh = rcd->rcd_mdh;
-
- if (!rtr->lp_ping_deadline) {
- rtr->lp_ping_deadline =
- jiffies + router_ping_timeout * HZ;
- }
-
- lnet_net_unlock(rtr->lp_cpt);
-
- rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL,
- LNET_PROTO_PING_MATCHBITS, 0);
-
- lnet_net_lock(rtr->lp_cpt);
- if (rc)
- rtr->lp_ping_notsent = 0; /* no event pending */
- }
-
- lnet_peer_decref_locked(rtr);
-}
-
-int
-lnet_router_checker_start(void)
-{
- struct task_struct *task;
- int rc;
- int eqsz = 0;
-
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
- if (check_routers_before_use &&
- dead_router_check_interval <= 0) {
- LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be set if 'check_routers_before_use' is set\n");
- return -EINVAL;
- }
-
- init_completion(&the_lnet.ln_rc_signal);
-
- rc = LNetEQAlloc(0, lnet_router_checker_event, &the_lnet.ln_rc_eqh);
- if (rc) {
- CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc);
- return -ENOMEM;
- }
-
- the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
- task = kthread_run(lnet_router_checker, NULL, "router_checker");
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- CERROR("Can't start router checker thread: %d\n", rc);
- /* block until event callback signals exit */
- wait_for_completion(&the_lnet.ln_rc_signal);
- rc = LNetEQFree(the_lnet.ln_rc_eqh);
- LASSERT(!rc);
- the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
- return -ENOMEM;
- }
-
- if (check_routers_before_use) {
- /*
- * Note that a helpful side-effect of pinging all known routers
- * at startup is that it makes them drop stale connections they
- * may have to a previous instance of me.
- */
- lnet_wait_known_routerstate();
- }
-
- return 0;
-}
-
-void
-lnet_router_checker_stop(void)
-{
- int rc;
-
- if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN)
- return;
-
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
- the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING;
- /* wakeup the RC thread if it's sleeping */
- wake_up(&the_lnet.ln_rc_waitq);
-
- /* block until event callback signals exit */
- wait_for_completion(&the_lnet.ln_rc_signal);
- LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
-
- rc = LNetEQFree(the_lnet.ln_rc_eqh);
- LASSERT(!rc);
-}
-
-static void
-lnet_prune_rc_data(int wait_unlink)
-{
- struct lnet_rc_data *rcd;
- struct lnet_rc_data *tmp;
- struct lnet_peer *lp;
- struct list_head head;
- int i = 2;
-
- if (likely(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING &&
- list_empty(&the_lnet.ln_rcd_deathrow) &&
- list_empty(&the_lnet.ln_rcd_zombie)))
- return;
-
- INIT_LIST_HEAD(&head);
-
- lnet_net_lock(LNET_LOCK_EX);
-
- if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
- /* router checker is stopping, prune all */
- list_for_each_entry(lp, &the_lnet.ln_routers,
- lp_rtr_list) {
- if (!lp->lp_rcd)
- continue;
-
- LASSERT(list_empty(&lp->lp_rcd->rcd_list));
- list_add(&lp->lp_rcd->rcd_list,
- &the_lnet.ln_rcd_deathrow);
- lp->lp_rcd = NULL;
- }
- }
-
- /* unlink all RCDs on deathrow list */
- list_splice_init(&the_lnet.ln_rcd_deathrow, &head);
-
- if (!list_empty(&head)) {
- lnet_net_unlock(LNET_LOCK_EX);
-
- list_for_each_entry(rcd, &head, rcd_list)
- LNetMDUnlink(rcd->rcd_mdh);
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-
- list_splice_init(&head, &the_lnet.ln_rcd_zombie);
-
- /* release all zombie RCDs */
- while (!list_empty(&the_lnet.ln_rcd_zombie)) {
- list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie,
- rcd_list) {
- if (LNetMDHandleIsInvalid(rcd->rcd_mdh))
- list_move(&rcd->rcd_list, &head);
- }
-
- wait_unlink = wait_unlink &&
- !list_empty(&the_lnet.ln_rcd_zombie);
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- while (!list_empty(&head)) {
- rcd = list_entry(head.next,
- struct lnet_rc_data, rcd_list);
- list_del_init(&rcd->rcd_list);
- lnet_destroy_rc_data(rcd);
- }
-
- if (!wait_unlink)
- return;
-
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
- "Waiting for rc buffers to unlink\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ / 4);
-
- lnet_net_lock(LNET_LOCK_EX);
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
-/*
- * This function is called to check if the RC should block indefinitely.
- * It's called from lnet_router_checker() as well as being passed to
- * wait_event_interruptible() to avoid the lost wake_up problem.
- *
- * When it's called from wait_event_interruptible() it is necessary to
- * also not sleep if the rc state is not running to avoid a deadlock
- * when the system is shutting down
- */
-static inline bool
-lnet_router_checker_active(void)
-{
- if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING)
- return true;
-
- /*
- * Router Checker thread needs to run when routing is enabled in
- * order to call lnet_update_ni_status_locked()
- */
- if (the_lnet.ln_routing)
- return true;
-
- return !list_empty(&the_lnet.ln_routers) &&
- (live_router_check_interval > 0 ||
- dead_router_check_interval > 0);
-}
-
-static int
-lnet_router_checker(void *arg)
-{
- struct lnet_peer *rtr;
- struct list_head *entry;
-
- while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) {
- __u64 version;
- int cpt;
- int cpt2;
-
- cpt = lnet_net_lock_current();
-rescan:
- version = the_lnet.ln_routers_version;
-
- list_for_each(entry, &the_lnet.ln_routers) {
- rtr = list_entry(entry, struct lnet_peer, lp_rtr_list);
-
- cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid);
- if (cpt != cpt2) {
- lnet_net_unlock(cpt);
- cpt = cpt2;
- lnet_net_lock(cpt);
- /* the routers list has changed */
- if (version != the_lnet.ln_routers_version)
- goto rescan;
- }
-
- lnet_ping_router_locked(rtr);
-
- /* NB dropped lock */
- if (version != the_lnet.ln_routers_version) {
- /* the routers list has changed */
- goto rescan;
- }
- }
-
- if (the_lnet.ln_routing)
- lnet_update_ni_status_locked();
-
- lnet_net_unlock(cpt);
-
- lnet_prune_rc_data(0); /* don't wait for UNLINK */
-
- /*
- * Call schedule_timeout() here always adds 1 to load average
- * because kernel counts # active tasks as nr_running
- * + nr_uninterruptible.
- */
- /*
- * if there are any routes then wakeup every second. If
- * there are no routes then sleep indefinitely until woken
- * up by a user adding a route
- */
- if (!lnet_router_checker_active())
- wait_event_interruptible(the_lnet.ln_rc_waitq,
- lnet_router_checker_active());
- else
- wait_event_interruptible_timeout(the_lnet.ln_rc_waitq,
- false,
- HZ);
- }
-
- lnet_prune_rc_data(1); /* wait for UNLINK */
-
- the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
- complete(&the_lnet.ln_rc_signal);
- /* The unlink event callback will signal final completion */
- return 0;
-}
-
-void
-lnet_destroy_rtrbuf(struct lnet_rtrbuf *rb, int npages)
-{
- while (--npages >= 0)
- __free_page(rb->rb_kiov[npages].bv_page);
-
- kfree(rb);
-}
-
-static struct lnet_rtrbuf *
-lnet_new_rtrbuf(struct lnet_rtrbufpool *rbp, int cpt)
-{
- int npages = rbp->rbp_npages;
- int sz = offsetof(struct lnet_rtrbuf, rb_kiov[npages]);
- struct page *page;
- struct lnet_rtrbuf *rb;
- int i;
-
- rb = kzalloc_cpt(sz, GFP_NOFS, cpt);
- if (!rb)
- return NULL;
-
- rb->rb_pool = rbp;
-
- for (i = 0; i < npages; i++) {
- page = alloc_pages_node(
- cfs_cpt_spread_node(lnet_cpt_table(), cpt),
- GFP_KERNEL | __GFP_ZERO, 0);
- if (!page) {
- while (--i >= 0)
- __free_page(rb->rb_kiov[i].bv_page);
-
- kfree(rb);
- return NULL;
- }
-
- rb->rb_kiov[i].bv_len = PAGE_SIZE;
- rb->rb_kiov[i].bv_offset = 0;
- rb->rb_kiov[i].bv_page = page;
- }
-
- return rb;
-}
-
-static void
-lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt)
-{
- int npages = rbp->rbp_npages;
- struct list_head tmp;
- struct lnet_rtrbuf *rb;
- struct lnet_rtrbuf *temp;
-
- if (!rbp->rbp_nbuffers) /* not initialized or already freed */
- return;
-
- INIT_LIST_HEAD(&tmp);
-
- lnet_net_lock(cpt);
- lnet_drop_routed_msgs_locked(&rbp->rbp_msgs, cpt);
- list_splice_init(&rbp->rbp_bufs, &tmp);
- rbp->rbp_req_nbuffers = 0;
- rbp->rbp_nbuffers = 0;
- rbp->rbp_credits = 0;
- rbp->rbp_mincredits = 0;
- lnet_net_unlock(cpt);
-
- /* Free buffers on the free list. */
- list_for_each_entry_safe(rb, temp, &tmp, rb_list) {
- list_del(&rb->rb_list);
- lnet_destroy_rtrbuf(rb, npages);
- }
-}
-
-static int
-lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
-{
- struct list_head rb_list;
- struct lnet_rtrbuf *rb;
- int num_rb;
- int num_buffers = 0;
- int old_req_nbufs;
- int npages = rbp->rbp_npages;
-
- lnet_net_lock(cpt);
- /*
- * If we are called for less buffers than already in the pool, we
- * just lower the req_nbuffers number and excess buffers will be
- * thrown away as they are returned to the free list. Credits
- * then get adjusted as well.
- * If we already have enough buffers allocated to serve the
- * increase requested, then we can treat that the same way as we
- * do the decrease.
- */
- num_rb = nbufs - rbp->rbp_nbuffers;
- if (nbufs <= rbp->rbp_req_nbuffers || num_rb <= 0) {
- rbp->rbp_req_nbuffers = nbufs;
- lnet_net_unlock(cpt);
- return 0;
- }
- /*
- * store the older value of rbp_req_nbuffers and then set it to
- * the new request to prevent lnet_return_rx_credits_locked() from
- * freeing buffers that we need to keep around
- */
- old_req_nbufs = rbp->rbp_req_nbuffers;
- rbp->rbp_req_nbuffers = nbufs;
- lnet_net_unlock(cpt);
-
- INIT_LIST_HEAD(&rb_list);
-
- /*
- * allocate the buffers on a local list first. If all buffers are
- * allocated successfully then join this list to the rbp buffer
- * list. If not then free all allocated buffers.
- */
- while (num_rb-- > 0) {
- rb = lnet_new_rtrbuf(rbp, cpt);
- if (!rb) {
- CERROR("Failed to allocate %d route bufs of %d pages\n",
- nbufs, npages);
-
- lnet_net_lock(cpt);
- rbp->rbp_req_nbuffers = old_req_nbufs;
- lnet_net_unlock(cpt);
-
- goto failed;
- }
-
- list_add(&rb->rb_list, &rb_list);
- num_buffers++;
- }
-
- lnet_net_lock(cpt);
-
- list_splice_tail(&rb_list, &rbp->rbp_bufs);
- rbp->rbp_nbuffers += num_buffers;
- rbp->rbp_credits += num_buffers;
- rbp->rbp_mincredits = rbp->rbp_credits;
- /*
- * We need to schedule blocked msg using the newly
- * added buffers.
- */
- while (!list_empty(&rbp->rbp_bufs) &&
- !list_empty(&rbp->rbp_msgs))
- lnet_schedule_blocked_locked(rbp);
-
- lnet_net_unlock(cpt);
-
- return 0;
-
-failed:
- while (!list_empty(&rb_list)) {
- rb = list_entry(rb_list.next, struct lnet_rtrbuf, rb_list);
- list_del(&rb->rb_list);
- lnet_destroy_rtrbuf(rb, npages);
- }
-
- return -ENOMEM;
-}
-
-static void
-lnet_rtrpool_init(struct lnet_rtrbufpool *rbp, int npages)
-{
- INIT_LIST_HEAD(&rbp->rbp_msgs);
- INIT_LIST_HEAD(&rbp->rbp_bufs);
-
- rbp->rbp_npages = npages;
- rbp->rbp_credits = 0;
- rbp->rbp_mincredits = 0;
-}
-
-void
-lnet_rtrpools_free(int keep_pools)
-{
- struct lnet_rtrbufpool *rtrp;
- int i;
-
- if (!the_lnet.ln_rtrpools) /* uninitialized or freed */
- return;
-
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- lnet_rtrpool_free_bufs(&rtrp[LNET_TINY_BUF_IDX], i);
- lnet_rtrpool_free_bufs(&rtrp[LNET_SMALL_BUF_IDX], i);
- lnet_rtrpool_free_bufs(&rtrp[LNET_LARGE_BUF_IDX], i);
- }
-
- if (!keep_pools) {
- cfs_percpt_free(the_lnet.ln_rtrpools);
- the_lnet.ln_rtrpools = NULL;
- }
-}
-
-static int
-lnet_nrb_tiny_calculate(void)
-{
- int nrbs = LNET_NRB_TINY;
-
- if (tiny_router_buffers < 0) {
- LCONSOLE_ERROR_MSG(0x10c,
- "tiny_router_buffers=%d invalid when routing enabled\n",
- tiny_router_buffers);
- return -EINVAL;
- }
-
- if (tiny_router_buffers > 0)
- nrbs = tiny_router_buffers;
-
- nrbs /= LNET_CPT_NUMBER;
- return max(nrbs, LNET_NRB_TINY_MIN);
-}
-
-static int
-lnet_nrb_small_calculate(void)
-{
- int nrbs = LNET_NRB_SMALL;
-
- if (small_router_buffers < 0) {
- LCONSOLE_ERROR_MSG(0x10c,
- "small_router_buffers=%d invalid when routing enabled\n",
- small_router_buffers);
- return -EINVAL;
- }
-
- if (small_router_buffers > 0)
- nrbs = small_router_buffers;
-
- nrbs /= LNET_CPT_NUMBER;
- return max(nrbs, LNET_NRB_SMALL_MIN);
-}
-
-static int
-lnet_nrb_large_calculate(void)
-{
- int nrbs = LNET_NRB_LARGE;
-
- if (large_router_buffers < 0) {
- LCONSOLE_ERROR_MSG(0x10c,
- "large_router_buffers=%d invalid when routing enabled\n",
- large_router_buffers);
- return -EINVAL;
- }
-
- if (large_router_buffers > 0)
- nrbs = large_router_buffers;
-
- nrbs /= LNET_CPT_NUMBER;
- return max(nrbs, LNET_NRB_LARGE_MIN);
-}
-
-int
-lnet_rtrpools_alloc(int im_a_router)
-{
- struct lnet_rtrbufpool *rtrp;
- int nrb_tiny;
- int nrb_small;
- int nrb_large;
- int rc;
- int i;
-
- if (!strcmp(forwarding, "")) {
- /* not set either way */
- if (!im_a_router)
- return 0;
- } else if (!strcmp(forwarding, "disabled")) {
- /* explicitly disabled */
- return 0;
- } else if (!strcmp(forwarding, "enabled")) {
- /* explicitly enabled */
- } else {
- LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either 'enabled' or 'disabled'\n");
- return -EINVAL;
- }
-
- nrb_tiny = lnet_nrb_tiny_calculate();
- if (nrb_tiny < 0)
- return -EINVAL;
-
- nrb_small = lnet_nrb_small_calculate();
- if (nrb_small < 0)
- return -EINVAL;
-
- nrb_large = lnet_nrb_large_calculate();
- if (nrb_large < 0)
- return -EINVAL;
-
- the_lnet.ln_rtrpools = cfs_percpt_alloc(lnet_cpt_table(),
- LNET_NRBPOOLS *
- sizeof(struct lnet_rtrbufpool));
- if (!the_lnet.ln_rtrpools) {
- LCONSOLE_ERROR_MSG(0x10c,
- "Failed to initialize router buffe pool\n");
- return -ENOMEM;
- }
-
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- lnet_rtrpool_init(&rtrp[LNET_TINY_BUF_IDX], 0);
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
- nrb_tiny, i);
- if (rc)
- goto failed;
-
- lnet_rtrpool_init(&rtrp[LNET_SMALL_BUF_IDX],
- LNET_NRB_SMALL_PAGES);
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
- nrb_small, i);
- if (rc)
- goto failed;
-
- lnet_rtrpool_init(&rtrp[LNET_LARGE_BUF_IDX],
- LNET_NRB_LARGE_PAGES);
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
- nrb_large, i);
- if (rc)
- goto failed;
- }
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_routing = 1;
- lnet_net_unlock(LNET_LOCK_EX);
-
- return 0;
-
- failed:
- lnet_rtrpools_free(0);
- return rc;
-}
-
-static int
-lnet_rtrpools_adjust_helper(int tiny, int small, int large)
-{
- int nrb = 0;
- int rc = 0;
- int i;
- struct lnet_rtrbufpool *rtrp;
-
- /*
- * If the provided values for each buffer pool are different than the
- * configured values, we need to take action.
- */
- if (tiny >= 0) {
- tiny_router_buffers = tiny;
- nrb = lnet_nrb_tiny_calculate();
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
- nrb, i);
- if (rc)
- return rc;
- }
- }
- if (small >= 0) {
- small_router_buffers = small;
- nrb = lnet_nrb_small_calculate();
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
- nrb, i);
- if (rc)
- return rc;
- }
- }
- if (large >= 0) {
- large_router_buffers = large;
- nrb = lnet_nrb_large_calculate();
- cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
- rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
- nrb, i);
- if (rc)
- return rc;
- }
- }
-
- return 0;
-}
-
-int
-lnet_rtrpools_adjust(int tiny, int small, int large)
-{
- /*
- * this function doesn't revert the changes if adding new buffers
- * failed. It's up to the user space caller to revert the
- * changes.
- */
- if (!the_lnet.ln_routing)
- return 0;
-
- return lnet_rtrpools_adjust_helper(tiny, small, large);
-}
-
-int
-lnet_rtrpools_enable(void)
-{
- int rc = 0;
-
- if (the_lnet.ln_routing)
- return 0;
-
- if (!the_lnet.ln_rtrpools)
- /*
- * If routing is turned off, and we have never
- * initialized the pools before, just call the
- * standard buffer pool allocation routine as
- * if we are just configuring this for the first
- * time.
- */
- rc = lnet_rtrpools_alloc(1);
- else
- rc = lnet_rtrpools_adjust_helper(0, 0, 0);
- if (rc)
- return rc;
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_routing = 1;
-
- the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED;
- lnet_net_unlock(LNET_LOCK_EX);
-
- return rc;
-}
-
-void
-lnet_rtrpools_disable(void)
-{
- if (!the_lnet.ln_routing)
- return;
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_routing = 0;
- the_lnet.ln_ping_info->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
-
- tiny_router_buffers = 0;
- small_router_buffers = 0;
- large_router_buffers = 0;
- lnet_net_unlock(LNET_LOCK_EX);
- lnet_rtrpools_free(1);
-}
-
-int
-lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, int alive, unsigned long when)
-{
- struct lnet_peer *lp = NULL;
- unsigned long now = jiffies;
- int cpt = lnet_cpt_of_nid(nid);
-
- LASSERT(!in_interrupt());
-
- CDEBUG(D_NET, "%s notifying %s: %s\n",
- !ni ? "userspace" : libcfs_nid2str(ni->ni_nid),
- libcfs_nid2str(nid),
- alive ? "up" : "down");
-
- if (ni &&
- LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) {
- CWARN("Ignoring notification of %s %s by %s (different net)\n",
- libcfs_nid2str(nid), alive ? "birth" : "death",
- libcfs_nid2str(ni->ni_nid));
- return -EINVAL;
- }
-
- /* can't do predictions... */
- if (time_after(when, now)) {
- CWARN("Ignoring prediction from %s of %s %s %ld seconds in the future\n",
- !ni ? "userspace" : libcfs_nid2str(ni->ni_nid),
- libcfs_nid2str(nid), alive ? "up" : "down",
- (when - now) / HZ);
- return -EINVAL;
- }
-
- if (ni && !alive && /* LND telling me she's down */
- !auto_down) { /* auto-down disabled */
- CDEBUG(D_NET, "Auto-down disabled\n");
- return 0;
- }
-
- lnet_net_lock(cpt);
-
- if (the_lnet.ln_shutdown) {
- lnet_net_unlock(cpt);
- return -ESHUTDOWN;
- }
-
- lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid);
- if (!lp) {
- /* nid not found */
- lnet_net_unlock(cpt);
- CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
- return 0;
- }
-
- /*
- * We can't fully trust LND on reporting exact peer last_alive
- * if he notifies us about dead peer. For example ksocklnd can
- * call us with when == _time_when_the_node_was_booted_ if
- * no connections were successfully established
- */
- if (ni && !alive && when < lp->lp_last_alive)
- when = lp->lp_last_alive;
-
- lnet_notify_locked(lp, !ni, alive, when);
-
- if (ni)
- lnet_ni_notify_locked(ni, lp);
-
- lnet_peer_decref_locked(lp);
-
- lnet_net_unlock(cpt);
- return 0;
-}
-EXPORT_SYMBOL(lnet_notify);
diff --git a/drivers/staging/lustre/lnet/lnet/router_proc.c b/drivers/staging/lustre/lnet/lnet/router_proc.c
deleted file mode 100644
index ae4b7f5..0000000
--- a/drivers/staging/lustre/lnet/lnet/router_proc.c
+++ /dev/null
@@ -1,907 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- *
- * This file is part of Portals
- * http://sourceforge.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/lnet/lib-lnet.h>
-
-/*
- * This is really lnet_proc.c. You might need to update sanity test 215
- * if any file format is changed.
- */
-
-#define LNET_LOFFT_BITS (sizeof(loff_t) * 8)
-/*
- * NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system
- */
-#define LNET_PROC_CPT_BITS (LNET_CPT_BITS + 1)
-/* change version, 16 bits or 8 bits */
-#define LNET_PROC_VER_BITS max_t(size_t, min_t(size_t, LNET_LOFFT_BITS, 64) / 4, 8)
-
-#define LNET_PROC_HASH_BITS LNET_PEER_HASH_BITS
-/*
- * bits for peer hash offset
- * NB: we don't use the highest bit of *ppos because it's signed
- */
-#define LNET_PROC_HOFF_BITS (LNET_LOFFT_BITS - \
- LNET_PROC_CPT_BITS - \
- LNET_PROC_VER_BITS - \
- LNET_PROC_HASH_BITS - 1)
-/* bits for hash index + position */
-#define LNET_PROC_HPOS_BITS (LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS)
-/* bits for peer hash table + hash version */
-#define LNET_PROC_VPOS_BITS (LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS)
-
-#define LNET_PROC_CPT_MASK ((1ULL << LNET_PROC_CPT_BITS) - 1)
-#define LNET_PROC_VER_MASK ((1ULL << LNET_PROC_VER_BITS) - 1)
-#define LNET_PROC_HASH_MASK ((1ULL << LNET_PROC_HASH_BITS) - 1)
-#define LNET_PROC_HOFF_MASK ((1ULL << LNET_PROC_HOFF_BITS) - 1)
-
-#define LNET_PROC_CPT_GET(pos) \
- (int)(((pos) >> LNET_PROC_VPOS_BITS) & LNET_PROC_CPT_MASK)
-
-#define LNET_PROC_VER_GET(pos) \
- (int)(((pos) >> LNET_PROC_HPOS_BITS) & LNET_PROC_VER_MASK)
-
-#define LNET_PROC_HASH_GET(pos) \
- (int)(((pos) >> LNET_PROC_HOFF_BITS) & LNET_PROC_HASH_MASK)
-
-#define LNET_PROC_HOFF_GET(pos) \
- (int)((pos) & LNET_PROC_HOFF_MASK)
-
-#define LNET_PROC_POS_MAKE(cpt, ver, hash, off) \
- (((((loff_t)(cpt)) & LNET_PROC_CPT_MASK) << LNET_PROC_VPOS_BITS) | \
- ((((loff_t)(ver)) & LNET_PROC_VER_MASK) << LNET_PROC_HPOS_BITS) | \
- ((((loff_t)(hash)) & LNET_PROC_HASH_MASK) << LNET_PROC_HOFF_BITS) | \
- ((off) & LNET_PROC_HOFF_MASK))
-
-#define LNET_PROC_VERSION(v) ((unsigned int)((v) & LNET_PROC_VER_MASK))
-
-static int __proc_lnet_stats(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- int rc;
- struct lnet_counters *ctrs;
- int len;
- char *tmpstr;
- const int tmpsiz = 256; /* 7 %u and 4 %llu */
-
- if (write) {
- lnet_counters_reset();
- return 0;
- }
-
- /* read */
-
- ctrs = kzalloc(sizeof(*ctrs), GFP_NOFS);
- if (!ctrs)
- return -ENOMEM;
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr) {
- kfree(ctrs);
- return -ENOMEM;
- }
-
- lnet_counters_get(ctrs);
-
- len = snprintf(tmpstr, tmpsiz,
- "%u %u %u %u %u %u %u %llu %llu %llu %llu",
- ctrs->msgs_alloc, ctrs->msgs_max,
- ctrs->errors,
- ctrs->send_count, ctrs->recv_count,
- ctrs->route_count, ctrs->drop_count,
- ctrs->send_length, ctrs->recv_length,
- ctrs->route_length, ctrs->drop_length);
-
- if (pos >= min_t(int, len, strlen(tmpstr)))
- rc = 0;
- else
- rc = cfs_trace_copyout_string(buffer, nob,
- tmpstr + pos, "\n");
-
- kfree(tmpstr);
- kfree(ctrs);
- return rc;
-}
-
-static int proc_lnet_stats(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_lnet_stats);
-}
-
-static int proc_lnet_routes(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- const int tmpsiz = 256;
- char *tmpstr;
- char *s;
- int rc = 0;
- int len;
- int ver;
- int off;
-
- BUILD_BUG_ON(sizeof(loff_t) < 4);
-
- off = LNET_PROC_HOFF_GET(*ppos);
- ver = LNET_PROC_VER_GET(*ppos);
-
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s, "Routing %s\n",
- the_lnet.ln_routing ? "enabled" : "disabled");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4s %8s %7s %s\n",
- "net", "hops", "priority", "state", "router");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- lnet_net_lock(0);
- ver = (unsigned int)the_lnet.ln_remote_nets_version;
- lnet_net_unlock(0);
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- } else {
- struct list_head *n;
- struct list_head *r;
- struct lnet_route *route = NULL;
- struct lnet_remotenet *rnet = NULL;
- int skip = off - 1;
- struct list_head *rn_list;
- int i;
-
- lnet_net_lock(0);
-
- if (ver != LNET_PROC_VERSION(the_lnet.ln_remote_nets_version)) {
- lnet_net_unlock(0);
- kfree(tmpstr);
- return -ESTALE;
- }
-
- for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && !route; i++) {
- rn_list = &the_lnet.ln_remote_nets_hash[i];
-
- n = rn_list->next;
-
- while (n != rn_list && !route) {
- rnet = list_entry(n, struct lnet_remotenet,
- lrn_list);
-
- r = rnet->lrn_routes.next;
-
- while (r != &rnet->lrn_routes) {
- struct lnet_route *re;
-
- re = list_entry(r, struct lnet_route,
- lr_list);
- if (!skip) {
- route = re;
- break;
- }
-
- skip--;
- r = r->next;
- }
-
- n = n->next;
- }
- }
-
- if (route) {
- __u32 net = rnet->lrn_net;
- __u32 hops = route->lr_hops;
- unsigned int priority = route->lr_priority;
- lnet_nid_t nid = route->lr_gateway->lp_nid;
- int alive = lnet_is_route_alive(route);
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-8s %4u %8u %7s %s\n",
- libcfs_net2str(net), hops,
- priority,
- alive ? "up" : "down",
- libcfs_nid2str(nid));
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
-
- lnet_net_unlock(0);
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len)) {
- rc = -EFAULT;
- } else {
- off += 1;
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- }
- }
-
- kfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-static int proc_lnet_routers(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int rc = 0;
- char *tmpstr;
- char *s;
- const int tmpsiz = 256;
- int len;
- int ver;
- int off;
-
- off = LNET_PROC_HOFF_GET(*ppos);
- ver = LNET_PROC_VER_GET(*ppos);
-
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-4s %7s %9s %6s %12s %9s %8s %7s %s\n",
- "ref", "rtr_ref", "alive_cnt", "state",
- "last_ping", "ping_sent", "deadline",
- "down_ni", "router");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- lnet_net_lock(0);
- ver = (unsigned int)the_lnet.ln_routers_version;
- lnet_net_unlock(0);
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- } else {
- struct list_head *r;
- struct lnet_peer *peer = NULL;
- int skip = off - 1;
-
- lnet_net_lock(0);
-
- if (ver != LNET_PROC_VERSION(the_lnet.ln_routers_version)) {
- lnet_net_unlock(0);
-
- kfree(tmpstr);
- return -ESTALE;
- }
-
- r = the_lnet.ln_routers.next;
-
- while (r != &the_lnet.ln_routers) {
- struct lnet_peer *lp;
-
- lp = list_entry(r, struct lnet_peer, lp_rtr_list);
- if (!skip) {
- peer = lp;
- break;
- }
-
- skip--;
- r = r->next;
- }
-
- if (peer) {
- lnet_nid_t nid = peer->lp_nid;
- unsigned long now = jiffies;
- unsigned long deadline = peer->lp_ping_deadline;
- int nrefs = peer->lp_refcount;
- int nrtrrefs = peer->lp_rtr_refcount;
- int alive_cnt = peer->lp_alive_count;
- int alive = peer->lp_alive;
- int pingsent = !peer->lp_ping_notsent;
- int last_ping = (now - peer->lp_ping_timestamp) / HZ;
- int down_ni = 0;
- struct lnet_route *rtr;
-
- if ((peer->lp_ping_feats &
- LNET_PING_FEAT_NI_STATUS)) {
- list_for_each_entry(rtr, &peer->lp_routes,
- lr_gwlist) {
- /*
- * downis on any route should be the
- * number of downis on the gateway
- */
- if (rtr->lr_downis) {
- down_ni = rtr->lr_downis;
- break;
- }
- }
- }
-
- if (!deadline)
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-4d %7d %9d %6s %12d %9d %8s %7d %s\n",
- nrefs, nrtrrefs, alive_cnt,
- alive ? "up" : "down", last_ping,
- pingsent, "NA", down_ni,
- libcfs_nid2str(nid));
- else
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-4d %7d %9d %6s %12d %9d %8lu %7d %s\n",
- nrefs, nrtrrefs, alive_cnt,
- alive ? "up" : "down", last_ping,
- pingsent,
- (deadline - now) / HZ,
- down_ni, libcfs_nid2str(nid));
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
-
- lnet_net_unlock(0);
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len)) {
- rc = -EFAULT;
- } else {
- off += 1;
- *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
- }
- }
-
- kfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-static int proc_lnet_peers(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- const int tmpsiz = 256;
- struct lnet_peer_table *ptable;
- char *tmpstr;
- char *s;
- int cpt = LNET_PROC_CPT_GET(*ppos);
- int ver = LNET_PROC_VER_GET(*ppos);
- int hash = LNET_PROC_HASH_GET(*ppos);
- int hoff = LNET_PROC_HOFF_GET(*ppos);
- int rc = 0;
- int len;
-
- BUILD_BUG_ON(LNET_PROC_HASH_BITS < LNET_PEER_HASH_BITS);
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- if (cpt >= LNET_CPT_NUMBER) {
- *lenp = 0;
- return 0;
- }
-
- tmpstr = kmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %4s %5s %5s %5s %5s %5s %5s %5s %s\n",
- "nid", "refs", "state", "last", "max",
- "rtr", "min", "tx", "min", "queue");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- hoff++;
- } else {
- struct lnet_peer *peer;
- struct list_head *p;
- int skip;
- again:
- p = NULL;
- peer = NULL;
- skip = hoff - 1;
-
- lnet_net_lock(cpt);
- ptable = the_lnet.ln_peer_tables[cpt];
- if (hoff == 1)
- ver = LNET_PROC_VERSION(ptable->pt_version);
-
- if (ver != LNET_PROC_VERSION(ptable->pt_version)) {
- lnet_net_unlock(cpt);
- kfree(tmpstr);
- return -ESTALE;
- }
-
- while (hash < LNET_PEER_HASH_SIZE) {
- if (!p)
- p = ptable->pt_hash[hash].next;
-
- while (p != &ptable->pt_hash[hash]) {
- struct lnet_peer *lp;
-
- lp = list_entry(p, struct lnet_peer,
- lp_hashlist);
- if (!skip) {
- peer = lp;
-
- /*
- * minor optimization: start from idx+1
- * on next iteration if we've just
- * drained lp_hashlist
- */
- if (lp->lp_hashlist.next ==
- &ptable->pt_hash[hash]) {
- hoff = 1;
- hash++;
- } else {
- hoff++;
- }
-
- break;
- }
-
- skip--;
- p = lp->lp_hashlist.next;
- }
-
- if (peer)
- break;
-
- p = NULL;
- hoff = 1;
- hash++;
- }
-
- if (peer) {
- lnet_nid_t nid = peer->lp_nid;
- int nrefs = peer->lp_refcount;
- int lastalive = -1;
- char *aliveness = "NA";
- int maxcr = peer->lp_ni->ni_peertxcredits;
- int txcr = peer->lp_txcredits;
- int mintxcr = peer->lp_mintxcredits;
- int rtrcr = peer->lp_rtrcredits;
- int minrtrcr = peer->lp_minrtrcredits;
- int txqnob = peer->lp_txqnob;
-
- if (lnet_isrouter(peer) ||
- lnet_peer_aliveness_enabled(peer))
- aliveness = peer->lp_alive ? "up" : "down";
-
- if (lnet_peer_aliveness_enabled(peer)) {
- unsigned long now = jiffies;
- long delta;
-
- delta = now - peer->lp_last_alive;
- lastalive = (delta) / HZ;
-
- /* No need to mess up peers contents with
- * arbitrarily long integers - it suffices to
- * know that lastalive is more than 10000s old
- */
- if (lastalive >= 10000)
- lastalive = 9999;
- }
-
- lnet_net_unlock(cpt);
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %4d %5s %5d %5d %5d %5d %5d %5d %d\n",
- libcfs_nid2str(nid), nrefs, aliveness,
- lastalive, maxcr, rtrcr, minrtrcr, txcr,
- mintxcr, txqnob);
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- } else { /* peer is NULL */
- lnet_net_unlock(cpt);
- }
-
- if (hash == LNET_PEER_HASH_SIZE) {
- cpt++;
- hash = 0;
- hoff = 1;
- if (!peer && cpt < LNET_CPT_NUMBER)
- goto again;
- }
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len))
- rc = -EFAULT;
- else
- *ppos = LNET_PROC_POS_MAKE(cpt, ver, hash, hoff);
- }
-
- kfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-static int __proc_lnet_buffers(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- char *s;
- char *tmpstr;
- int tmpsiz;
- int idx;
- int len;
- int rc;
- int i;
-
- LASSERT(!write);
-
- /* (4 %d) * 4 * LNET_CPT_NUMBER */
- tmpsiz = 64 * (LNET_NRBPOOLS + 1) * LNET_CPT_NUMBER;
- tmpstr = kvmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%5s %5s %7s %7s\n",
- "pages", "count", "credits", "min");
- LASSERT(tmpstr + tmpsiz - s > 0);
-
- if (!the_lnet.ln_rtrpools)
- goto out; /* I'm not a router */
-
- for (idx = 0; idx < LNET_NRBPOOLS; idx++) {
- struct lnet_rtrbufpool *rbp;
-
- lnet_net_lock(LNET_LOCK_EX);
- cfs_percpt_for_each(rbp, i, the_lnet.ln_rtrpools) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%5d %5d %7d %7d\n",
- rbp[idx].rbp_npages,
- rbp[idx].rbp_nbuffers,
- rbp[idx].rbp_credits,
- rbp[idx].rbp_mincredits);
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
- lnet_net_unlock(LNET_LOCK_EX);
- }
-
- out:
- len = s - tmpstr;
-
- if (pos >= min_t(int, len, strlen(tmpstr)))
- rc = 0;
- else
- rc = cfs_trace_copyout_string(buffer, nob,
- tmpstr + pos, NULL);
-
- kvfree(tmpstr);
- return rc;
-}
-
-static int proc_lnet_buffers(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_lnet_buffers);
-}
-
-static int proc_lnet_nis(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int tmpsiz = 128 * LNET_CPT_NUMBER;
- int rc = 0;
- char *tmpstr;
- char *s;
- int len;
-
- LASSERT(!write);
-
- if (!*lenp)
- return 0;
-
- tmpstr = kvmalloc(tmpsiz, GFP_KERNEL);
- if (!tmpstr)
- return -ENOMEM;
-
- s = tmpstr; /* points to current position in tmpstr[] */
-
- if (!*ppos) {
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
- "nid", "status", "alive", "refs", "peer",
- "rtr", "max", "tx", "min");
- LASSERT(tmpstr + tmpsiz - s > 0);
- } else {
- struct list_head *n;
- struct lnet_ni *ni = NULL;
- int skip = *ppos - 1;
-
- lnet_net_lock(0);
-
- n = the_lnet.ln_nis.next;
-
- while (n != &the_lnet.ln_nis) {
- struct lnet_ni *a_ni;
-
- a_ni = list_entry(n, struct lnet_ni, ni_list);
- if (!skip) {
- ni = a_ni;
- break;
- }
-
- skip--;
- n = n->next;
- }
-
- if (ni) {
- struct lnet_tx_queue *tq;
- char *stat;
- time64_t now = ktime_get_real_seconds();
- int last_alive = -1;
- int i;
- int j;
-
- if (the_lnet.ln_routing)
- last_alive = now - ni->ni_last_alive;
-
- /* @lo forever alive */
- if (ni->ni_lnd->lnd_type == LOLND)
- last_alive = 0;
-
- lnet_ni_lock(ni);
- LASSERT(ni->ni_status);
- stat = (ni->ni_status->ns_status ==
- LNET_NI_STATUS_UP) ? "up" : "down";
- lnet_ni_unlock(ni);
-
- /*
- * we actually output credits information for
- * TX queue of each partition
- */
- cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
- for (j = 0; ni->ni_cpts &&
- j < ni->ni_ncpts; j++) {
- if (i == ni->ni_cpts[j])
- break;
- }
-
- if (j == ni->ni_ncpts)
- continue;
-
- if (i)
- lnet_net_lock(i);
-
- s += snprintf(s, tmpstr + tmpsiz - s,
- "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
- libcfs_nid2str(ni->ni_nid), stat,
- last_alive, *ni->ni_refs[i],
- ni->ni_peertxcredits,
- ni->ni_peerrtrcredits,
- tq->tq_credits_max,
- tq->tq_credits,
- tq->tq_credits_min);
- if (i)
- lnet_net_unlock(i);
- }
- LASSERT(tmpstr + tmpsiz - s > 0);
- }
-
- lnet_net_unlock(0);
- }
-
- len = s - tmpstr; /* how many bytes was written */
-
- if (len > *lenp) { /* linux-supplied buffer is too small */
- rc = -EINVAL;
- } else if (len > 0) { /* wrote something */
- if (copy_to_user(buffer, tmpstr, len))
- rc = -EFAULT;
- else
- *ppos += 1;
- }
-
- kvfree(tmpstr);
-
- if (!rc)
- *lenp = len;
-
- return rc;
-}
-
-struct lnet_portal_rotors {
- int pr_value;
- const char *pr_name;
- const char *pr_desc;
-};
-
-static struct lnet_portal_rotors portal_rotors[] = {
- {
- .pr_value = LNET_PTL_ROTOR_OFF,
- .pr_name = "OFF",
- .pr_desc = "Turn off message rotor for wildcard portals"
- },
- {
- .pr_value = LNET_PTL_ROTOR_ON,
- .pr_name = "ON",
- .pr_desc = "round-robin dispatch all PUT messages for wildcard portals"
- },
- {
- .pr_value = LNET_PTL_ROTOR_RR_RT,
- .pr_name = "RR_RT",
- .pr_desc = "round-robin dispatch routed PUT message for wildcard portals"
- },
- {
- .pr_value = LNET_PTL_ROTOR_HASH_RT,
- .pr_name = "HASH_RT",
- .pr_desc = "dispatch routed PUT message by hashing source NID for wildcard portals"
- },
- {
- .pr_value = -1,
- .pr_name = NULL,
- .pr_desc = NULL
- },
-};
-
-static int __proc_lnet_portal_rotor(void *data, int write,
- loff_t pos, void __user *buffer, int nob)
-{
- const int buf_len = 128;
- char *buf;
- char *tmp;
- int rc;
- int i;
-
- buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- if (!write) {
- lnet_res_lock(0);
-
- for (i = 0; portal_rotors[i].pr_value >= 0; i++) {
- if (portal_rotors[i].pr_value == portal_rotor)
- break;
- }
-
- LASSERT(portal_rotors[i].pr_value == portal_rotor);
- lnet_res_unlock(0);
-
- rc = snprintf(buf, buf_len,
- "{\n\tportals: all\n"
- "\trotor: %s\n\tdescription: %s\n}",
- portal_rotors[i].pr_name,
- portal_rotors[i].pr_desc);
-
- if (pos >= min_t(int, rc, buf_len)) {
- rc = 0;
- } else {
- rc = cfs_trace_copyout_string(buffer, nob,
- buf + pos, "\n");
- }
- goto out;
- }
-
- rc = cfs_trace_copyin_string(buf, buf_len, buffer, nob);
- if (rc < 0)
- goto out;
-
- tmp = strim(buf);
-
- rc = -EINVAL;
- lnet_res_lock(0);
- for (i = 0; portal_rotors[i].pr_name; i++) {
- if (!strncasecmp(portal_rotors[i].pr_name, tmp,
- strlen(portal_rotors[i].pr_name))) {
- portal_rotor = portal_rotors[i].pr_value;
- rc = 0;
- break;
- }
- }
- lnet_res_unlock(0);
-out:
- kfree(buf);
- return rc;
-}
-
-static int proc_lnet_portal_rotor(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- return lprocfs_call_handler(table->data, write, ppos, buffer, lenp,
- __proc_lnet_portal_rotor);
-}
-
-static struct ctl_table lnet_table[] = {
- /*
- * NB No .strategy entries have been provided since sysctl(8) prefers
- * to go via /proc for portability.
- */
- {
- .procname = "stats",
- .mode = 0644,
- .proc_handler = &proc_lnet_stats,
- },
- {
- .procname = "routes",
- .mode = 0444,
- .proc_handler = &proc_lnet_routes,
- },
- {
- .procname = "routers",
- .mode = 0444,
- .proc_handler = &proc_lnet_routers,
- },
- {
- .procname = "peers",
- .mode = 0444,
- .proc_handler = &proc_lnet_peers,
- },
- {
- .procname = "buffers",
- .mode = 0444,
- .proc_handler = &proc_lnet_buffers,
- },
- {
- .procname = "nis",
- .mode = 0444,
- .proc_handler = &proc_lnet_nis,
- },
- {
- .procname = "portal_rotor",
- .mode = 0644,
- .proc_handler = &proc_lnet_portal_rotor,
- },
- {
- }
-};
-
-void lnet_router_debugfs_init(void)
-{
- lustre_insert_debugfs(lnet_table);
-}
-
-void lnet_router_debugfs_fini(void)
-{
-}
OpenPOWER on IntegriCloud