From 20acbd9a7aeee0b0af7107f3de791a52c949f3ac Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:06:08 +0000 Subject: rxrpc: Lock around calling a kernel service Rx notification Place a spinlock around the invocation of call->notify_rx() for a kernel service call and lock again when ending the call and replace the notification pointer with a pointer to a dummy function. This is required because it's possible for rxrpc_notify_socket() to be called after the call has been ended by the kernel service if called from the asynchronous work function rxrpc_process_call(). However, rxrpc_notify_socket() currently only holds the RCU read lock when invoking ->notify_rx(), which means that the afs_call struct would need to be disposed of by call_rcu() rather than by kfree(). But we shouldn't see any notifications from a call after calling rxrpc_kernel_end_call(), so a lock is required in rxrpc code. Without this, we may see the call wait queue as having a corrupt spinlock: BUG: spinlock bad magic on CPU#0, kworker/0:2/1612 general protection fault: 0000 [#1] SMP ... Workqueue: krxrpcd rxrpc_process_call task: ffff88040b83c400 task.stack: ffff88040adfc000 RIP: 0010:spin_bug+0x161/0x18f RSP: 0018:ffff88040adffcc0 EFLAGS: 00010002 RAX: 0000000000000032 RBX: 6b6b6b6b6b6b6b6b RCX: ffffffff81ab16cf RDX: ffff88041fa14c01 RSI: ffff88041fa0ccb8 RDI: ffff88041fa0ccb8 RBP: ffff88040adffcd8 R08: 00000000ffffffff R09: 00000000ffffffff R10: ffff88040adffc60 R11: 000000000000022c R12: ffff88040aca2208 R13: ffffffff81a58114 R14: 0000000000000000 R15: 0000000000000000 .... Call Trace: do_raw_spin_lock+0x1d/0x89 _raw_spin_lock_irqsave+0x3d/0x49 ? __wake_up_common_lock+0x4c/0xa7 __wake_up_common_lock+0x4c/0xa7 ? __lock_is_held+0x47/0x7a __wake_up+0xe/0x10 afs_wake_up_call_waiter+0x11b/0x122 [kafs] rxrpc_notify_socket+0x12b/0x258 rxrpc_process_call+0x18e/0x7d0 process_one_work+0x298/0x4de ? rescuer_thread+0x280/0x280 worker_thread+0x1d1/0x2ae ? rescuer_thread+0x280/0x280 kthread+0x12c/0x134 ? kthread_create_on_node+0x3a/0x3a ret_from_fork+0x27/0x40 In this case, note the corrupt data in EBX. The address of the offending afs_call is in R12, plus the offset to the spinlock. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 16 ++++++++++++++++ net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_object.c | 1 + net/rxrpc/recvmsg.c | 2 ++ 4 files changed, 20 insertions(+) (limited to 'net/rxrpc') diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 344b2dc..9b5c46b 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -322,6 +322,14 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, } EXPORT_SYMBOL(rxrpc_kernel_begin_call); +/* + * Dummy function used to stop the notifier talking to recvmsg(). + */ +static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ +} + /** * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using * @sock: The socket the call is on @@ -336,6 +344,14 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) mutex_lock(&call->user_mutex); rxrpc_release_call(rxrpc_sk(sock->sk), call); + + /* Make sure we're not going to call back into a kernel service */ + if (call->notify_rx) { + spin_lock_bh(&call->notify_lock); + call->notify_rx = rxrpc_dummy_notify_rx; + spin_unlock_bh(&call->notify_lock); + } + mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put_kernel); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ea5600b..b215199 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -525,6 +525,7 @@ struct rxrpc_call { unsigned long flags; unsigned long events; spinlock_t lock; + spinlock_t notify_lock; /* Kernel notification lock */ rwlock_t state_lock; /* lock for state transition */ u32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index fcdd655..4c7fbc6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -124,6 +124,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) INIT_LIST_HEAD(&call->sock_link); init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); + spin_lock_init(&call->notify_lock); rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); call->debug_id = atomic_inc_return(&rxrpc_debug_id); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index e4937b3..8510a98 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -40,7 +40,9 @@ void rxrpc_notify_socket(struct rxrpc_call *call) sk = &rx->sk; if (rx && sk->sk_state < RXRPC_CLOSE) { if (call->notify_rx) { + spin_lock_bh(&call->notify_lock); call->notify_rx(sk, call, call->user_call_ID); + spin_unlock_bh(&call->notify_lock); } else { write_lock_bh(&rx->recvmsg_lock); if (list_empty(&call->recvmsg_link)) { -- cgit v1.1 From 1457cc4cfb93511de347d1d0a1c9da3e826b66fe Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:06:55 +0000 Subject: rxrpc: Fix a null ptr deref in rxrpc_fill_out_ack() rxrpc_fill_out_ack() needs to be passed the connection pointer from its caller rather than using call->conn as the call may be disconnected in parallel with it, clearing call->conn, leading to: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: rxrpc_send_ack_packet+0x231/0x6a4 Signed-off-by: David Howells --- net/rxrpc/output.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 71e6f71..8ee8b2d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -35,7 +35,8 @@ struct rxrpc_abort_buffer { /* * Fill out an ACK packet. */ -static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, +static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, + struct rxrpc_call *call, struct rxrpc_ack_buffer *pkt, rxrpc_seq_t *_hard_ack, rxrpc_seq_t *_top, @@ -77,8 +78,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, } while (before_eq(seq, top)); } - mtu = call->conn->params.peer->if_mtu; - mtu -= call->conn->params.peer->hdrsize; + mtu = conn->params.peer->if_mtu; + mtu -= conn->params.peer->hdrsize; jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); pkt->ackinfo.maxMTU = htonl(mtu); @@ -148,7 +149,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) } call->ackr_reason = 0; } - n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason); + n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); spin_unlock_bh(&call->lock); -- cgit v1.1 From dcbefc30fbc2c1926bcecdd62579e3e107653d82 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:06:26 +0000 Subject: rxrpc: Fix call expiry handling Fix call expiry handling in the following ways (1) If all the request data from a client call is acked, don't send a follow up IDLE ACK with firstPacket == 1 and previousPacket == 0 as this appears to fool some servers into thinking everything has been accepted. (2) Never send an abort back to the server once it has ACK'd all the request packets; rather just try to reuse the channel for the next call. The first request DATA packet of the next call on the same channel will implicitly ACK the entire reply of the dead call - even if we haven't transmitted it yet. (3) Don't send RX_CALL_TIMEOUT in an ABORT packet, librx uses abort codes to pass local errors to the caller in addition to remote errors, and this is meant to be local only. The following also need to be addressed in future patches: (4) Service calls should send PING ACKs as 'keep alives' if the server is still processing the call. (5) VERSION REPLY packets should be sent to the peers of service connections to act as keep-alives. This is used to keep firewall routes in place. The AFS CM should enable this. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- net/rxrpc/input.c | 2 -- net/rxrpc/output.c | 10 ++++++++++ 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 7a77844..3574508 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -386,7 +386,7 @@ recheck_state: now = ktime_get_real(); if (ktime_before(call->expire_at, now)) { - rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); + rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 1e37eb1..1b59207 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -298,8 +298,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, write_unlock(&call->state_lock); if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true, - rxrpc_propose_ack_client_tx_end); trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); } else { trace_rxrpc_transmit(call, rxrpc_transmit_end); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 8ee8b2d..f47659c 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -222,6 +222,16 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) rxrpc_serial_t serial; int ret; + /* Don't bother sending aborts for a client call once the server has + * hard-ACK'd all of its request data. After that point, we're not + * going to stop the operation proceeding, and whilst we might limit + * the reply, it's not worth it if we can send a new call on the same + * channel instead, thereby closing off this call. + */ + if (rxrpc_is_client_call(call) && + test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + return 0; + spin_lock_bh(&call->lock); if (call->conn) conn = rxrpc_get_connection_maybe(call->conn); -- cgit v1.1