From 20acbd9a7aeee0b0af7107f3de791a52c949f3ac Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:06:08 +0000
Subject: rxrpc: Lock around calling a kernel service Rx notification

Place a spinlock around the invocation of call->notify_rx() for a kernel
service call and lock again when ending the call and replace the
notification pointer with a pointer to a dummy function.

This is required because it's possible for rxrpc_notify_socket() to be
called after the call has been ended by the kernel service if called from
the asynchronous work function rxrpc_process_call().

However, rxrpc_notify_socket() currently only holds the RCU read lock when
invoking ->notify_rx(), which means that the afs_call struct would need to
be disposed of by call_rcu() rather than by kfree().

But we shouldn't see any notifications from a call after calling
rxrpc_kernel_end_call(), so a lock is required in rxrpc code.

Without this, we may see the call wait queue as having a corrupt spinlock:

    BUG: spinlock bad magic on CPU#0, kworker/0:2/1612
    general protection fault: 0000 [#1] SMP
    ...
    Workqueue: krxrpcd rxrpc_process_call
    task: ffff88040b83c400 task.stack: ffff88040adfc000
    RIP: 0010:spin_bug+0x161/0x18f
    RSP: 0018:ffff88040adffcc0 EFLAGS: 00010002
    RAX: 0000000000000032 RBX: 6b6b6b6b6b6b6b6b RCX: ffffffff81ab16cf
    RDX: ffff88041fa14c01 RSI: ffff88041fa0ccb8 RDI: ffff88041fa0ccb8
    RBP: ffff88040adffcd8 R08: 00000000ffffffff R09: 00000000ffffffff
    R10: ffff88040adffc60 R11: 000000000000022c R12: ffff88040aca2208
    R13: ffffffff81a58114 R14: 0000000000000000 R15: 0000000000000000
    ....
    Call Trace:
     do_raw_spin_lock+0x1d/0x89
     _raw_spin_lock_irqsave+0x3d/0x49
     ? __wake_up_common_lock+0x4c/0xa7
     __wake_up_common_lock+0x4c/0xa7
     ? __lock_is_held+0x47/0x7a
     __wake_up+0xe/0x10
     afs_wake_up_call_waiter+0x11b/0x122 [kafs]
     rxrpc_notify_socket+0x12b/0x258
     rxrpc_process_call+0x18e/0x7d0
     process_one_work+0x298/0x4de
     ? rescuer_thread+0x280/0x280
     worker_thread+0x1d1/0x2ae
     ? rescuer_thread+0x280/0x280
     kthread+0x12c/0x134
     ? kthread_create_on_node+0x3a/0x3a
     ret_from_fork+0x27/0x40

In this case, note the corrupt data in EBX.  The address of the offending
afs_call is in R12, plus the offset to the spinlock.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/af_rxrpc.c    | 16 ++++++++++++++++
 net/rxrpc/ar-internal.h |  1 +
 net/rxrpc/call_object.c |  1 +
 net/rxrpc/recvmsg.c     |  2 ++
 4 files changed, 20 insertions(+)

(limited to 'net/rxrpc')

diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 344b2dc..9b5c46b 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -322,6 +322,14 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 }
 EXPORT_SYMBOL(rxrpc_kernel_begin_call);
 
+/*
+ * Dummy function used to stop the notifier talking to recvmsg().
+ */
+static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
+				  unsigned long call_user_ID)
+{
+}
+
 /**
  * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
  * @sock: The socket the call is on
@@ -336,6 +344,14 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
 
 	mutex_lock(&call->user_mutex);
 	rxrpc_release_call(rxrpc_sk(sock->sk), call);
+
+	/* Make sure we're not going to call back into a kernel service */
+	if (call->notify_rx) {
+		spin_lock_bh(&call->notify_lock);
+		call->notify_rx = rxrpc_dummy_notify_rx;
+		spin_unlock_bh(&call->notify_lock);
+	}
+
 	mutex_unlock(&call->user_mutex);
 	rxrpc_put_call(call, rxrpc_call_put_kernel);
 }
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ea5600b..b215199 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -525,6 +525,7 @@ struct rxrpc_call {
 	unsigned long		flags;
 	unsigned long		events;
 	spinlock_t		lock;
+	spinlock_t		notify_lock;	/* Kernel notification lock */
 	rwlock_t		state_lock;	/* lock for state transition */
 	u32			abort_code;	/* Local/remote abort code */
 	int			error;		/* Local error incurred */
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index fcdd655..4c7fbc6 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -124,6 +124,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 	INIT_LIST_HEAD(&call->sock_link);
 	init_waitqueue_head(&call->waitq);
 	spin_lock_init(&call->lock);
+	spin_lock_init(&call->notify_lock);
 	rwlock_init(&call->state_lock);
 	atomic_set(&call->usage, 1);
 	call->debug_id = atomic_inc_return(&rxrpc_debug_id);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index e4937b3..8510a98 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -40,7 +40,9 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
 	sk = &rx->sk;
 	if (rx && sk->sk_state < RXRPC_CLOSE) {
 		if (call->notify_rx) {
+			spin_lock_bh(&call->notify_lock);
 			call->notify_rx(sk, call, call->user_call_ID);
+			spin_unlock_bh(&call->notify_lock);
 		} else {
 			write_lock_bh(&rx->recvmsg_lock);
 			if (list_empty(&call->recvmsg_link)) {
-- 
cgit v1.1


From 1457cc4cfb93511de347d1d0a1c9da3e826b66fe Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:06:55 +0000
Subject: rxrpc: Fix a null ptr deref in rxrpc_fill_out_ack()

rxrpc_fill_out_ack() needs to be passed the connection pointer from its
caller rather than using call->conn as the call may be disconnected in
parallel with it, clearing call->conn, leading to:

	BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
	IP: rxrpc_send_ack_packet+0x231/0x6a4

Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/output.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net/rxrpc')

diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 71e6f71..8ee8b2d 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -35,7 +35,8 @@ struct rxrpc_abort_buffer {
 /*
  * Fill out an ACK packet.
  */
-static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
+static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
+				 struct rxrpc_call *call,
 				 struct rxrpc_ack_buffer *pkt,
 				 rxrpc_seq_t *_hard_ack,
 				 rxrpc_seq_t *_top,
@@ -77,8 +78,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
 		} while (before_eq(seq, top));
 	}
 
-	mtu = call->conn->params.peer->if_mtu;
-	mtu -= call->conn->params.peer->hdrsize;
+	mtu = conn->params.peer->if_mtu;
+	mtu -= conn->params.peer->hdrsize;
 	jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
 	pkt->ackinfo.rxMTU	= htonl(rxrpc_rx_mtu);
 	pkt->ackinfo.maxMTU	= htonl(mtu);
@@ -148,7 +149,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
 		}
 		call->ackr_reason = 0;
 	}
-	n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason);
+	n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
 
 	spin_unlock_bh(&call->lock);
 
-- 
cgit v1.1


From dcbefc30fbc2c1926bcecdd62579e3e107653d82 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Nov 2017 15:06:26 +0000
Subject: rxrpc: Fix call expiry handling

Fix call expiry handling in the following ways

 (1) If all the request data from a client call is acked, don't send a
     follow up IDLE ACK with firstPacket == 1 and previousPacket == 0 as
     this appears to fool some servers into thinking everything has been
     accepted.

 (2) Never send an abort back to the server once it has ACK'd all the
     request packets; rather just try to reuse the channel for the next
     call.  The first request DATA packet of the next call on the same
     channel will implicitly ACK the entire reply of the dead call - even
     if we haven't transmitted it yet.

 (3) Don't send RX_CALL_TIMEOUT in an ABORT packet, librx uses abort codes
     to pass local errors to the caller in addition to remote errors, and
     this is meant to be local only.

The following also need to be addressed in future patches:

 (4) Service calls should send PING ACKs as 'keep alives' if the server is
     still processing the call.

 (5) VERSION REPLY packets should be sent to the peers of service
     connections to act as keep-alives.  This is used to keep firewall
     routes in place.  The AFS CM should enable this.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/call_event.c |  2 +-
 net/rxrpc/input.c      |  2 --
 net/rxrpc/output.c     | 10 ++++++++++
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'net/rxrpc')

diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 7a77844..3574508 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -386,7 +386,7 @@ recheck_state:
 
 	now = ktime_get_real();
 	if (ktime_before(call->expire_at, now)) {
-		rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
+		rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
 		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
 		goto recheck_state;
 	}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 1e37eb1..1b59207 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -298,8 +298,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
 
 	write_unlock(&call->state_lock);
 	if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
-		rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true,
-				  rxrpc_propose_ack_client_tx_end);
 		trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
 	} else {
 		trace_rxrpc_transmit(call, rxrpc_transmit_end);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 8ee8b2d..f47659c 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -222,6 +222,16 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
 	rxrpc_serial_t serial;
 	int ret;
 
+	/* Don't bother sending aborts for a client call once the server has
+	 * hard-ACK'd all of its request data.  After that point, we're not
+	 * going to stop the operation proceeding, and whilst we might limit
+	 * the reply, it's not worth it if we can send a new call on the same
+	 * channel instead, thereby closing off this call.
+	 */
+	if (rxrpc_is_client_call(call) &&
+	    test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+		return 0;
+
 	spin_lock_bh(&call->lock);
 	if (call->conn)
 		conn = rxrpc_get_connection_maybe(call->conn);
-- 
cgit v1.1