From b63452c11e22382e592d3f7f9ac4966197d3eab6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:48 +0100 Subject: rxrpc: Accesses of rxrpc_local::service need to be RCU managed struct rxrpc_local->service is marked __rcu - this means that accesses of it need to be managed using RCU wrappers. There are two such places in rxrpc_release_sock() where the value is checked and cleared. Fix this by using the appropriate wrappers. Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 44c9c2b..2d59c9b 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -678,9 +678,9 @@ static int rxrpc_release_sock(struct sock *sk) sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); - if (rx->local && rx->local->service == rx) { + if (rx->local && rcu_access_pointer(rx->local->service) == rx) { write_lock(&rx->local->services_lock); - rx->local->service = NULL; + rcu_assign_pointer(rx->local->service, NULL); write_unlock(&rx->local->services_lock); } -- cgit v1.1 From 19c0dbd5406ddc669ef1516c02c6b0f5a4465628 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:48 +0100 Subject: rxrpc: Fix duplicate const Remove a duplicate const keyword. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/misc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index d38dffd..4954e6e2 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -777,7 +777,7 @@ extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10]; extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9]; extern const char *const rxrpc_pkts[]; -extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; +extern const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; #include diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 9d1c721..804a88e 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -96,7 +96,7 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_PING] = 9, }; -const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { +const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL", "-?-" }; -- cgit v1.1 From 7212a57e8eaa2572481398532d7be0c2685362b9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:49 +0100 Subject: rxrpc: Fix oops on incoming call to serviceless endpoint If an call comes in to a local endpoint that isn't listening for any incoming calls at the moment, an oops will happen. We need to check that the local endpoint's service pointer isn't NULL before we dereference it. Signed-off-by: David Howells --- net/rxrpc/call_accept.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 3cac231..22cd8a1 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -337,7 +337,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, /* Get the socket providing the service */ rx = rcu_dereference(local->service); - if (service_id == rx->srx.srx_service) + if (rx && service_id == rx->srx.srx_service) goto found_service; trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, -- cgit v1.1 From a9f312d98affab387557e2795d4e11ad82a4e4e8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:49 +0100 Subject: rxrpc: Only ping for lost reply in client call When a reply is deemed lost, we send a ping to find out the other end received all the request data packets we sent. This should be limited to client calls and we shouldn't do this on service calls. Signed-off-by: David Howells --- net/rxrpc/input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 3ad9f75..103d2b0 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -847,7 +847,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & RXRPC_TX_ANNO_LAST && - summary.nr_acks == call->tx_top - hard_ack) + summary.nr_acks == call->tx_top - hard_ack && + rxrpc_is_client_call(call)) rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, false, true, rxrpc_propose_ack_ping_for_lost_reply); -- cgit v1.1 From 26cb02aa6d3efeb543805ed9ad599dae24f7c6d4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:49 +0100 Subject: rxrpc: Fix warning by splitting rxrpc_send_call_packet() Split rxrpc_send_data_packet() to separate ACK generation (which is more complicated) from ABORT generation. This simplifies the code a bit and fixes the following warning: In file included from ../net/rxrpc/output.c:20:0: net/rxrpc/output.c: In function 'rxrpc_send_call_packet': net/rxrpc/ar-internal.h:1187:27: error: 'top' may be used uninitialized in this function [-Werror=maybe-uninitialized] net/rxrpc/output.c:103:24: note: 'top' was declared here net/rxrpc/output.c:225:25: error: 'hard_ack' may be used uninitialized in this function [-Werror=maybe-uninitialized] Reported-by: Arnd Bergmann Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +- net/rxrpc/call_accept.c | 2 +- net/rxrpc/call_event.c | 6 +- net/rxrpc/call_object.c | 2 +- net/rxrpc/output.c | 156 +++++++++++++++++++++++++++--------------------- net/rxrpc/recvmsg.c | 4 +- net/rxrpc/rxkad.c | 6 +- net/rxrpc/sendmsg.c | 7 +-- 8 files changed, 102 insertions(+), 84 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4954e6e2..ef849a1 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1068,7 +1068,8 @@ extern const s8 rxrpc_ack_priority[]; /* * output.c */ -int rxrpc_send_call_packet(struct rxrpc_call *, u8); +int rxrpc_send_ack_packet(struct rxrpc_call *); +int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); void rxrpc_reject_packets(struct rxrpc_local *); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 22cd8a1..832d854 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -565,7 +565,7 @@ out_discard: write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); if (abort) { - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); } diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 4f00476..e3130998 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -253,7 +253,7 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call); goto out; } @@ -328,7 +328,7 @@ void rxrpc_process_call(struct work_struct *work) recheck_state: if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); goto recheck_state; } @@ -347,7 +347,7 @@ recheck_state: if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { call->ack_at = call->expire_at; if (call->ackr_reason) { - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call); goto recheck_state; } } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 364b42d..0709401 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -498,7 +498,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) struct rxrpc_call, sock_link); rxrpc_get_call(call, rxrpc_call_got); rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0d47db8..2dae877 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -19,24 +19,24 @@ #include #include "ar-internal.h" -struct rxrpc_pkt_buffer { +struct rxrpc_ack_buffer { struct rxrpc_wire_header whdr; - union { - struct { - struct rxrpc_ackpacket ack; - u8 acks[255]; - u8 pad[3]; - }; - __be32 abort_code; - }; + struct rxrpc_ackpacket ack; + u8 acks[255]; + u8 pad[3]; struct rxrpc_ackinfo ackinfo; }; +struct rxrpc_abort_buffer { + struct rxrpc_wire_header whdr; + __be32 abort_code; +}; + /* * Fill out an ACK packet. */ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, - struct rxrpc_pkt_buffer *pkt, + struct rxrpc_ack_buffer *pkt, rxrpc_seq_t *_hard_ack, rxrpc_seq_t *_top) { @@ -91,22 +91,19 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, } /* - * Send an ACK or ABORT call packet. + * Send an ACK call packet. */ -int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) +int rxrpc_send_ack_packet(struct rxrpc_call *call) { struct rxrpc_connection *conn = NULL; - struct rxrpc_pkt_buffer *pkt; + struct rxrpc_ack_buffer *pkt; struct msghdr msg; struct kvec iov[2]; rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; size_t len, n; bool ping = false; - int ioc, ret; - u32 abort_code; - - _enter("%u,%s", call->debug_id, rxrpc_pkts[type]); + int ret; spin_lock_bh(&call->lock); if (call->conn) @@ -131,65 +128,37 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) pkt->whdr.cid = htonl(call->cid); pkt->whdr.callNumber = htonl(call->call_id); pkt->whdr.seq = 0; - pkt->whdr.type = type; - pkt->whdr.flags = conn->out_clientflag; + pkt->whdr.type = RXRPC_PACKET_TYPE_ACK; + pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag; pkt->whdr.userStatus = 0; pkt->whdr.securityIndex = call->security_ix; pkt->whdr._rsvd = 0; pkt->whdr.serviceId = htons(call->service_id); - iov[0].iov_base = pkt; - iov[0].iov_len = sizeof(pkt->whdr); - len = sizeof(pkt->whdr); - - switch (type) { - case RXRPC_PACKET_TYPE_ACK: - spin_lock_bh(&call->lock); - if (!call->ackr_reason) { - spin_unlock_bh(&call->lock); - ret = 0; - goto out; - } - ping = (call->ackr_reason == RXRPC_ACK_PING); - n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); - call->ackr_reason = 0; - + spin_lock_bh(&call->lock); + if (!call->ackr_reason) { spin_unlock_bh(&call->lock); - - - pkt->whdr.flags |= RXRPC_SLOW_START_OK; - - iov[0].iov_len += sizeof(pkt->ack) + n; - iov[1].iov_base = &pkt->ackinfo; - iov[1].iov_len = sizeof(pkt->ackinfo); - len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo); - ioc = 2; - break; - - case RXRPC_PACKET_TYPE_ABORT: - abort_code = call->abort_code; - pkt->abort_code = htonl(abort_code); - iov[0].iov_len += sizeof(pkt->abort_code); - len += sizeof(pkt->abort_code); - ioc = 1; - break; - - default: - BUG(); - ret = -ENOANO; + ret = 0; goto out; } + ping = (call->ackr_reason == RXRPC_ACK_PING); + n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); + call->ackr_reason = 0; + + spin_unlock_bh(&call->lock); + + iov[0].iov_base = pkt; + iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; + iov[1].iov_base = &pkt->ackinfo; + iov[1].iov_len = sizeof(pkt->ackinfo); + len = iov[0].iov_len + iov[1].iov_len; serial = atomic_inc_return(&conn->serial); pkt->whdr.serial = htonl(serial); - switch (type) { - case RXRPC_PACKET_TYPE_ACK: - trace_rxrpc_tx_ack(call, serial, - ntohl(pkt->ack.firstPacket), - ntohl(pkt->ack.serial), - pkt->ack.reason, pkt->ack.nAcks); - break; - } + trace_rxrpc_tx_ack(call, serial, + ntohl(pkt->ack.firstPacket), + ntohl(pkt->ack.serial), + pkt->ack.reason, pkt->ack.nAcks); if (ping) { call->ackr_ping = serial; @@ -205,13 +174,12 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) set_bit(RXRPC_CALL_PINGING, &call->flags); trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); } - ret = kernel_sendmsg(conn->params.local->socket, - &msg, iov, ioc, len); + + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ping) call->ackr_ping_time = ktime_get_real(); - if (type == RXRPC_PACKET_TYPE_ACK && - call->state < RXRPC_CALL_COMPLETE) { + if (call->state < RXRPC_CALL_COMPLETE) { if (ret < 0) { clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, @@ -236,6 +204,56 @@ out: } /* + * Send an ABORT call packet. + */ +int rxrpc_send_abort_packet(struct rxrpc_call *call) +{ + struct rxrpc_connection *conn = NULL; + struct rxrpc_abort_buffer pkt; + struct msghdr msg; + struct kvec iov[1]; + rxrpc_serial_t serial; + int ret; + + spin_lock_bh(&call->lock); + if (call->conn) + conn = rxrpc_get_connection_maybe(call->conn); + spin_unlock_bh(&call->lock); + if (!conn) + return -ECONNRESET; + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt.whdr.epoch = htonl(conn->proto.epoch); + pkt.whdr.cid = htonl(call->cid); + pkt.whdr.callNumber = htonl(call->call_id); + pkt.whdr.seq = 0; + pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT; + pkt.whdr.flags = conn->out_clientflag; + pkt.whdr.userStatus = 0; + pkt.whdr.securityIndex = call->security_ix; + pkt.whdr._rsvd = 0; + pkt.whdr.serviceId = htons(call->service_id); + pkt.abort_code = htonl(call->abort_code); + + iov[0].iov_base = &pkt; + iov[0].iov_len = sizeof(pkt); + + serial = atomic_inc_return(&conn->serial); + pkt.whdr.serial = htonl(serial); + + ret = kernel_sendmsg(conn->params.local->socket, + &msg, iov, 1, sizeof(pkt)); + + rxrpc_put_connection(conn); + return ret; +} + +/* * send a packet through the transport endpoint */ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index f05ea0a..11723bc 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -143,7 +143,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call); } write_lock_bh(&call->state_lock); @@ -212,7 +212,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) true, false, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason) - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call); } } diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 627abed..4374e7b 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -381,7 +381,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); _leave(" = -EPROTO"); return -EPROTO; @@ -471,7 +471,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); _leave(" = -EPROTO"); return -EPROTO; @@ -523,7 +523,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, if (cksum != expected_cksum) { rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); _leave(" = -EPROTO [csum failed]"); return -EPROTO; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 3322543..901b28c 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -197,7 +197,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, do { /* Check to see if there's a ping ACK to reply to. */ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call); if (!skb) { size_t size, chunk, max, space; @@ -514,8 +514,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } else if (cmd == RXRPC_CMD_SEND_ABORT) { ret = 0; if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED)) - ret = rxrpc_send_call_packet(call, - RXRPC_PACKET_TYPE_ABORT); + ret = rxrpc_send_abort_packet(call); } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else if (rxrpc_is_client_call(call) && @@ -597,7 +596,7 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, lock_sock(sock->sk); if (rxrpc_abort_call(why, call, 0, abort_code, error)) - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); release_sock(sock->sk); _leave(""); -- cgit v1.1 From a5af7e1fc69a46f29b977fd4b570e0ac414c2338 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:49 +0100 Subject: rxrpc: Fix loss of PING RESPONSE ACK production due to PING ACKs Separate the output of PING ACKs from the output of other sorts of ACK so that if we receive a PING ACK and schedule transmission of a PING RESPONSE ACK, the response doesn't get cancelled by a PING ACK we happen to be scheduling transmission of at the same time. If a PING RESPONSE gets lost, the other side might just sit there waiting for it and refuse to proceed otherwise. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 12 +++++++++--- net/rxrpc/call_event.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---- net/rxrpc/call_object.c | 1 + net/rxrpc/input.c | 4 ++-- net/rxrpc/misc.c | 2 +- net/rxrpc/output.c | 38 ++++++++++++++++++++++---------------- net/rxrpc/recvmsg.c | 4 ++-- net/rxrpc/sendmsg.c | 2 +- 8 files changed, 82 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ef849a1..b56676b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -398,6 +398,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ }; @@ -410,6 +411,7 @@ enum rxrpc_call_event { RXRPC_CALL_EV_ABORT, /* need to generate abort */ RXRPC_CALL_EV_TIMER, /* Timer expired */ RXRPC_CALL_EV_RESEND, /* Tx resend required */ + RXRPC_CALL_EV_PING, /* Ping send required */ }; /* @@ -466,6 +468,7 @@ struct rxrpc_call { struct rxrpc_sock __rcu *socket; /* socket responsible */ ktime_t ack_at; /* When deferred ACK needs to happen */ ktime_t resend_at; /* When next resend needs to happen */ + ktime_t ping_at; /* When next to send a ping */ ktime_t expire_at; /* When the call times out */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ @@ -558,8 +561,10 @@ struct rxrpc_call { rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ - rxrpc_serial_t ackr_ping; /* Last ping sent */ - ktime_t ackr_ping_time; /* Time last ping sent */ + + /* ping management */ + rxrpc_serial_t ping_serial; /* Last ping sent */ + ktime_t ping_time; /* Time last ping sent */ /* transmission-phase ACK management */ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ @@ -730,6 +735,7 @@ enum rxrpc_timer_trace { rxrpc_timer_init_for_reply, rxrpc_timer_expired, rxrpc_timer_set_for_ack, + rxrpc_timer_set_for_ping, rxrpc_timer_set_for_resend, rxrpc_timer_set_for_send, rxrpc_timer__nr_trace @@ -1068,7 +1074,7 @@ extern const s8 rxrpc_ack_priority[]; /* * output.c */ -int rxrpc_send_ack_packet(struct rxrpc_call *); +int rxrpc_send_ack_packet(struct rxrpc_call *, bool); int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); void rxrpc_reject_packets(struct rxrpc_local *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e3130998..eeea960 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -54,6 +54,14 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, t = call->ack_at; } + if (!ktime_after(call->ping_at, now)) { + call->ping_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) + queue = true; + } else if (ktime_before(call->ping_at, t)) { + t = call->ping_at; + } + t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); t_j += jiffies; @@ -78,6 +86,27 @@ out: } /* + * Propose a PING ACK be sent. + */ +static void rxrpc_propose_ping(struct rxrpc_call *call, + bool immediate, bool background) +{ + if (immediate) { + if (background && + !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) + rxrpc_queue_call(call); + } else { + ktime_t now = ktime_get_real(); + ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay); + + if (ktime_before(ping_at, call->ping_at)) { + call->ping_at = ping_at; + rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now); + } + } +} + +/* * propose an ACK be sent */ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, @@ -90,6 +119,14 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, ktime_t now, ack_at; s8 prior = rxrpc_ack_priority[ack_reason]; + /* Pings are handled specially because we don't want to accidentally + * lose a ping response by subsuming it into a ping. + */ + if (ack_reason == RXRPC_ACK_PING) { + rxrpc_propose_ping(call, immediate, background); + goto trace; + } + /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial * numbers, but we don't alter the timeout. */ @@ -125,7 +162,6 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, expiry = rxrpc_soft_ack_delay; break; - case RXRPC_ACK_PING: case RXRPC_ACK_IDLE: if (rxrpc_idle_ack_delay < expiry) expiry = rxrpc_idle_ack_delay; @@ -253,7 +289,7 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); - rxrpc_send_ack_packet(call); + rxrpc_send_ack_packet(call, true); goto out; } @@ -345,13 +381,17 @@ recheck_state: } if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { - call->ack_at = call->expire_at; if (call->ackr_reason) { - rxrpc_send_ack_packet(call); + rxrpc_send_ack_packet(call, false); goto recheck_state; } } + if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { + rxrpc_send_ack_packet(call, true); + goto recheck_state; + } + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) { rxrpc_resend(call, now); goto recheck_state; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 0709401..4353a29 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -205,6 +205,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); call->expire_at = expire_at; call->ack_at = expire_at; + call->ping_at = expire_at; call->resend_at = expire_at; call->timer.expires = jiffies + LONG_MAX / 2; rxrpc_set_timer(call, rxrpc_timer_begin, now); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 103d2b0..a6da83f0 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -625,9 +625,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, rxrpc_serial_t ping_serial; ktime_t ping_time; - ping_time = call->ackr_ping_time; + ping_time = call->ping_time; smp_rmb(); - ping_serial = call->ackr_ping; + ping_serial = call->ping_serial; if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || before(orig_serial, ping_serial)) diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 804a88e..1cdcba5 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -93,7 +93,6 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_EXCEEDS_WINDOW] = 6, [RXRPC_ACK_NOSPACE] = 7, [RXRPC_ACK_PING_RESPONSE] = 8, - [RXRPC_ACK_PING] = 9, }; const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { @@ -197,6 +196,7 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { [rxrpc_timer_expired] = "*EXPR*", [rxrpc_timer_init_for_reply] = "IniRpl", [rxrpc_timer_set_for_ack] = "SetAck", + [rxrpc_timer_set_for_ping] = "SetPng", [rxrpc_timer_set_for_send] = "SetTx ", [rxrpc_timer_set_for_resend] = "SetRTx", }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 2dae877..a12cea0 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -38,7 +38,8 @@ struct rxrpc_abort_buffer { static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, struct rxrpc_ack_buffer *pkt, rxrpc_seq_t *_hard_ack, - rxrpc_seq_t *_top) + rxrpc_seq_t *_top, + u8 reason) { rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; @@ -58,10 +59,10 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ack.firstPacket = htonl(hard_ack + 1); pkt->ack.previousPacket = htonl(call->ackr_prev_seq); pkt->ack.serial = htonl(serial); - pkt->ack.reason = call->ackr_reason; + pkt->ack.reason = reason; pkt->ack.nAcks = top - hard_ack; - if (pkt->ack.reason == RXRPC_ACK_PING) + if (reason == RXRPC_ACK_PING) pkt->whdr.flags |= RXRPC_REQUEST_ACK; if (after(top, hard_ack)) { @@ -93,7 +94,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, /* * Send an ACK call packet. */ -int rxrpc_send_ack_packet(struct rxrpc_call *call) +int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) { struct rxrpc_connection *conn = NULL; struct rxrpc_ack_buffer *pkt; @@ -102,8 +103,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call) rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; size_t len, n; - bool ping = false; int ret; + u8 reason; spin_lock_bh(&call->lock); if (call->conn) @@ -136,14 +137,18 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call) pkt->whdr.serviceId = htons(call->service_id); spin_lock_bh(&call->lock); - if (!call->ackr_reason) { - spin_unlock_bh(&call->lock); - ret = 0; - goto out; + if (ping) { + reason = RXRPC_ACK_PING; + } else { + reason = call->ackr_reason; + if (!call->ackr_reason) { + spin_unlock_bh(&call->lock); + ret = 0; + goto out; + } + call->ackr_reason = 0; } - ping = (call->ackr_reason == RXRPC_ACK_PING); - n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); - call->ackr_reason = 0; + n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason); spin_unlock_bh(&call->lock); @@ -161,7 +166,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call) pkt->ack.reason, pkt->ack.nAcks); if (ping) { - call->ackr_ping = serial; + call->ping_serial = serial; smp_wmb(); /* We need to stick a time in before we send the packet in case * the reply gets back before kernel_sendmsg() completes - but @@ -170,18 +175,19 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call) * the packet transmission is more likely to happen towards the * end of the kernel_sendmsg() call. */ - call->ackr_ping_time = ktime_get_real(); + call->ping_time = ktime_get_real(); set_bit(RXRPC_CALL_PINGING, &call->flags); trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ping) - call->ackr_ping_time = ktime_get_real(); + call->ping_time = ktime_get_real(); if (call->state < RXRPC_CALL_COMPLETE) { if (ret < 0) { - clear_bit(RXRPC_CALL_PINGING, &call->flags); + if (ping) + clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 11723bc..3fa7771 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -143,7 +143,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); - rxrpc_send_ack_packet(call); + rxrpc_send_ack_packet(call, false); } write_lock_bh(&call->state_lock); @@ -212,7 +212,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) true, false, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason) - rxrpc_send_ack_packet(call); + rxrpc_send_ack_packet(call, false); } } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 901b28c..55a2fb2 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -197,7 +197,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, do { /* Check to see if there's a ping ACK to reply to. */ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_send_ack_packet(call); + rxrpc_send_ack_packet(call, false); if (!skb) { size_t size, chunk, max, space; -- cgit v1.1 From b3156274ca01297b861e912175820e78c9ac4d7c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:49 +0100 Subject: rxrpc: Partially handle OpenAFS's improper termination of calls OpenAFS doesn't always correctly terminate client calls that it makes - this includes calls the OpenAFS servers make to the cache manager service. It should end the client call with either: (1) An ACK that has firstPacket set to one greater than the seq number of the reply DATA packet with the LAST_PACKET flag set (thereby hard-ACK'ing all packets). nAcks should be 0 and acks[] should be empty (ie. no soft-ACKs). (2) An ACKALL packet. OpenAFS, though, may send an ACK packet with firstPacket set to the last seq number or less and soft-ACKs listed for all packets up to and including the last DATA packet. The transmitter, however, is obliged to keep the call live and the soft-ACK'd DATA packets around until they're hard-ACK'd as the receiver is permitted to drop any merely soft-ACK'd packet and request retransmission by sending an ACK packet with a NACK in it. Further, OpenAFS will also terminate a client call by beginning the next client call on the same connection channel. This implicitly completes the previous call. This patch handles implicit ACK of a call on a channel by the reception of the first packet of the next call on that channel. If another call doesn't come along to implicitly ACK a call, then we have to time the call out. There are some bugs there that will be addressed in subsequent patches. Signed-off-by: David Howells --- net/rxrpc/input.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'net') diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index a6da83f0..44fb8d8 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -939,6 +939,33 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, } /* + * Handle a new call on a channel implicitly completing the preceding call on + * that channel. + * + * TODO: If callNumber > call_id + 1, renegotiate security. + */ +static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, + struct rxrpc_call *call) +{ + switch (call->state) { + case RXRPC_CALL_SERVER_AWAIT_ACK: + rxrpc_call_completed(call); + break; + case RXRPC_CALL_COMPLETE: + break; + default: + if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) { + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + rxrpc_queue_call(call); + } + break; + } + + __rxrpc_disconnect_call(conn, call); + rxrpc_notify_socket(call); +} + +/* * post connection-level events to the connection * - this includes challenges, responses, some aborts and call terminal packet * retransmission. @@ -1146,6 +1173,16 @@ void rxrpc_data_ready(struct sock *udp_sk) } call = rcu_dereference(chan->call); + + if (sp->hdr.callNumber > chan->call_id) { + if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) { + rcu_read_unlock(); + goto reject_packet; + } + if (call) + rxrpc_input_implicit_end_call(conn, call); + call = NULL; + } } else { skew = 0; call = NULL; -- cgit v1.1 From d7833d00915e1fb5743e94d3c207810b30e9fc38 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:50 +0100 Subject: rxrpc: Queue the call on expiry When a call expires, it must be queued for the background processor to deal with otherwise a service call that is improperly terminated will just sit there awaiting an ACK and won't expire. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index eeea960..e2a987f 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -35,8 +35,11 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, if (call->state < RXRPC_CALL_COMPLETE) { t = call->expire_at; - if (!ktime_after(t, now)) + if (!ktime_after(t, now)) { + trace_rxrpc_timer(call, why, now, now_j); + queue = true; goto out; + } if (!ktime_after(call->resend_at, now)) { call->resend_at = call->expire_at; @@ -76,12 +79,11 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, mod_timer(&call->timer, t_j); trace_rxrpc_timer(call, why, now, now_j); } - - if (queue) - rxrpc_queue_call(call); } out: + if (queue) + rxrpc_queue_call(call); read_unlock_bh(&call->state_lock); } -- cgit v1.1 From 94bc669efa3beb1f6b171f5a3225079bc457d4a2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:50 +0100 Subject: rxrpc: Add missing notification The call's background processor work item needs to notify the socket when it completes a call so that recvmsg() or the AFS fs can deal with it. Without this, call expiry isn't handled. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e2a987f..0f91d32 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -372,6 +372,7 @@ recheck_state: if (call->state == RXRPC_CALL_COMPLETE) { del_timer_sync(&call->timer); + rxrpc_notify_socket(call); goto out_put; } -- cgit v1.1 From cf69207afa2a750ba78782bb4ff4d72c1efb8e6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:50 +0100 Subject: rxrpc: Return negative error code to kernel service In rxrpc_kernel_recv_data(), when we return the error number incurred by a failed call, we must negate it before returning it as it's stored as positive (that's what we have to pass back to userspace). Signed-off-by: David Howells --- net/rxrpc/recvmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 3fa7771..db5b02a 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -652,7 +652,7 @@ excess_data: goto out; call_complete: *_abort = call->abort_code; - ret = call->error; + ret = -call->error; if (call->completion == RXRPC_CALL_SUCCEEDED) { ret = 1; if (size > 0) -- cgit v1.1 From 9749fd2beac42e32cb3e3d85489b52b9cc71a9ac Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:50 +0100 Subject: rxrpc: Need to produce an ACK for service op if op takes a long time We need to generate a DELAY ACK from the service end of an operation if we start doing the actual operation work and it takes longer than expected. This will hard-ACK the request data and allow the client to release its resources. To make this work: (1) We have to set the ack timer and propose an ACK when the call moves to the RXRPC_CALL_SERVER_ACK_REQUEST and clear the pending ACK and cancel the timer when we start transmitting the reply (the first DATA packet of the reply implicitly ACKs the request phase). (2) It must be possible to set the timer when the caller is holding call->state_lock, so split the lock-getting part of the timer function out. (3) Add trace notes for the ACK we're requesting and the timer we clear. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/call_event.c | 16 ++++++++++++---- net/rxrpc/misc.c | 2 ++ net/rxrpc/recvmsg.c | 8 ++++++-- net/rxrpc/sendmsg.c | 5 +++++ 5 files changed, 28 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b56676b..f60e355 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -733,6 +733,7 @@ extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; enum rxrpc_timer_trace { rxrpc_timer_begin, rxrpc_timer_init_for_reply, + rxrpc_timer_init_for_send_reply, rxrpc_timer_expired, rxrpc_timer_set_for_ack, rxrpc_timer_set_for_ping, @@ -749,6 +750,7 @@ enum rxrpc_propose_ack_trace { rxrpc_propose_ack_ping_for_lost_ack, rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, + rxrpc_propose_ack_processing_op, rxrpc_propose_ack_respond_to_ack, rxrpc_propose_ack_respond_to_ping, rxrpc_propose_ack_retry_tx, @@ -811,6 +813,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ +void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, enum rxrpc_propose_ack_trace); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 0f91d32..97a17ad 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -24,15 +24,13 @@ /* * Set the timer */ -void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, - ktime_t now) +void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, + ktime_t now) { unsigned long t_j, now_j = jiffies; ktime_t t; bool queue = false; - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { t = call->expire_at; if (!ktime_after(t, now)) { @@ -84,6 +82,16 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, out: if (queue) rxrpc_queue_call(call); +} + +/* + * Set the timer + */ +void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, + ktime_t now) +{ + read_lock_bh(&call->state_lock); + __rxrpc_set_timer(call, why, now); read_unlock_bh(&call->state_lock); } diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 1cdcba5..6dee55f 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -195,6 +195,7 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { [rxrpc_timer_begin] = "Begin ", [rxrpc_timer_expired] = "*EXPR*", [rxrpc_timer_init_for_reply] = "IniRpl", + [rxrpc_timer_init_for_send_reply] = "SndRpl", [rxrpc_timer_set_for_ack] = "SetAck", [rxrpc_timer_set_for_ping] = "SetPng", [rxrpc_timer_set_for_send] = "SetTx ", @@ -207,6 +208,7 @@ const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { [rxrpc_propose_ack_ping_for_lost_ack] = "LostAck", [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl", [rxrpc_propose_ack_ping_for_params] = "Params ", + [rxrpc_propose_ack_processing_op] = "ProcOp ", [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", [rxrpc_propose_ack_retry_tx] = "RetryTx", diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index db5b02a..c29362d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -151,17 +151,21 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) switch (call->state) { case RXRPC_CALL_CLIENT_RECV_REPLY: __rxrpc_call_completed(call); + write_unlock_bh(&call->state_lock); break; case RXRPC_CALL_SERVER_RECV_REQUEST: call->tx_phase = true; call->state = RXRPC_CALL_SERVER_ACK_REQUEST; + call->ack_at = call->expire_at; + write_unlock_bh(&call->state_lock); + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, + rxrpc_propose_ack_processing_op); break; default: + write_unlock_bh(&call->state_lock); break; } - - write_unlock_bh(&call->state_lock); } /* diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 55a2fb2..b214a4d 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -130,6 +130,11 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, break; case RXRPC_CALL_SERVER_ACK_REQUEST: call->state = RXRPC_CALL_SERVER_SEND_REPLY; + call->ack_at = call->expire_at; + if (call->ackr_reason == RXRPC_ACK_DELAY) + call->ackr_reason = 0; + __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply, + ktime_get_real()); if (!last) break; case RXRPC_CALL_SERVER_SEND_REPLY: -- cgit v1.1 From bf7d620abf22c321208a4da4f435e7af52551a21 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2016 08:11:51 +0100 Subject: rxrpc: Don't request an ACK on the last DATA packet of a call's Tx phase Don't request an ACK on the last DATA packet of a call's Tx phase as for a client there will be a reply packet or some sort of ACK to shift phase. If the ACK is requested, OpenAFS sends a REQUESTED-ACK ACK with soft-ACKs in it and doesn't follow up with a hard-ACK. If we don't set the flag, OpenAFS will send a DELAY ACK that hard-ACKs the reply data, thereby allowing the call to terminate cleanly. Signed-off-by: David Howells --- net/rxrpc/output.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a12cea0..5dab1ff 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -307,11 +307,12 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. */ - if (retrans || - call->cong_mode == RXRPC_CALL_SLOW_START || - (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || - ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), - ktime_get_real())) + if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && + (retrans || + call->cong_mode == RXRPC_CALL_SLOW_START || + (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real()))) whdr.flags |= RXRPC_REQUEST_ACK; if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { -- cgit v1.1 From cecbf3e932c1fa6df45fd6cc4fc8081a4cb45bcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Wed, 5 Oct 2016 12:28:25 +0200 Subject: Bluetooth: Fix local name in scan rsp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use complete name if it fits. If not and there is short name check if it fits. If not then use shortened name as prefix of complete name. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 47 +++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index c813568..fd6406d 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -973,25 +973,48 @@ void __hci_req_enable_advertising(struct hci_request *req) static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { - size_t name_len; + size_t complete_len; + size_t short_len; int max_len; max_len = HCI_MAX_AD_LENGTH - ad_len - 2; - name_len = strlen(hdev->dev_name); - if (name_len > 0 && max_len > 0) { + complete_len = strlen(hdev->dev_name); + short_len = strlen(hdev->short_name); - if (name_len > max_len) { - name_len = max_len; - ptr[1] = EIR_NAME_SHORT; - } else - ptr[1] = EIR_NAME_COMPLETE; + /* no space left for name */ + if (max_len < 1) + return ad_len; - ptr[0] = name_len + 1; + /* no name set */ + if (!complete_len) + return ad_len; - memcpy(ptr + 2, hdev->dev_name, name_len); + /* complete name fits and is eq to max short name len or smaller */ + if (complete_len <= max_len && + complete_len <= HCI_MAX_SHORT_NAME_LENGTH) { + ptr[0] = complete_len + 1; + ptr[1] = EIR_NAME_COMPLETE; + memcpy(ptr + 2, hdev->dev_name, complete_len); - ad_len += (name_len + 2); - ptr += (name_len + 2); + return ad_len + complete_len + 2; + } + + /* short name set and fits */ + if (short_len && short_len <= max_len) { + ptr[0] = short_len + 1; + ptr[1] = EIR_NAME_SHORT; + memcpy(ptr + 2, hdev->short_name, short_len); + + return ad_len + short_len + 2; + } + + /* no short name set so shorten complete name */ + if (!short_len) { + ptr[0] = max_len + 1; + ptr[1] = EIR_NAME_SHORT; + memcpy(ptr + 2, hdev->dev_name, max_len); + + return ad_len + max_len + 2; } return ad_len; -- cgit v1.1 From 7ddb30c7471ed69b75ae4c2601d45cbda5d390ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Wed, 5 Oct 2016 12:28:26 +0200 Subject: Bluetooth: Add appearance to default scan rsp data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add appearance value to beginning of scan rsp data for default advertising instance if the value is not 0. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index fd6406d..3c44c54 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1022,7 +1022,16 @@ static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) { - return append_local_name(hdev, ptr, 0); + u8 scan_rsp_len = 0; + + if (hdev->appearance) { + ptr[0] = 3; + ptr[1] = EIR_APPEARANCE; + put_unaligned_le16(hdev->appearance, ptr + 2); + scan_rsp_len += 4; + } + + return append_local_name(hdev, ptr + scan_rsp_len, scan_rsp_len); } static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, -- cgit v1.1 From 1b422066658b7cc985fa020066b72d28159d858f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Wed, 5 Oct 2016 12:28:27 +0200 Subject: Bluetooth: Refactor append name and appearance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use eir_append_data to remove code duplication. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 44 +++++++++++++++----------------------------- net/bluetooth/hci_request.h | 23 +++++++++++++++++++++++ net/bluetooth/mgmt.c | 21 --------------------- 3 files changed, 38 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 3c44c54..e228842 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -21,8 +21,6 @@ SOFTWARE IS DISCLAIMED. */ -#include - #include #include #include @@ -992,46 +990,39 @@ static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) /* complete name fits and is eq to max short name len or smaller */ if (complete_len <= max_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) { - ptr[0] = complete_len + 1; - ptr[1] = EIR_NAME_COMPLETE; - memcpy(ptr + 2, hdev->dev_name, complete_len); - - return ad_len + complete_len + 2; + return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, + hdev->dev_name, complete_len); } /* short name set and fits */ if (short_len && short_len <= max_len) { - ptr[0] = short_len + 1; - ptr[1] = EIR_NAME_SHORT; - memcpy(ptr + 2, hdev->short_name, short_len); - - return ad_len + short_len + 2; + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, + hdev->short_name, short_len); } /* no short name set so shorten complete name */ if (!short_len) { - ptr[0] = max_len + 1; - ptr[1] = EIR_NAME_SHORT; - memcpy(ptr + 2, hdev->dev_name, max_len); - - return ad_len + max_len + 2; + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, + hdev->dev_name, max_len); } return ad_len; } +static u8 append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len) +{ + return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance); +} + static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) { u8 scan_rsp_len = 0; if (hdev->appearance) { - ptr[0] = 3; - ptr[1] = EIR_APPEARANCE; - put_unaligned_le16(hdev->appearance, ptr + 2); - scan_rsp_len += 4; + scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); } - return append_local_name(hdev, ptr + scan_rsp_len, scan_rsp_len); + return append_local_name(hdev, ptr, scan_rsp_len); } static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, @@ -1048,18 +1039,13 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, instance_flags = adv_instance->flags; if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) { - ptr[0] = 3; - ptr[1] = EIR_APPEARANCE; - put_unaligned_le16(hdev->appearance, ptr + 2); - scan_rsp_len += 4; - ptr += 4; + scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); } - memcpy(ptr, adv_instance->scan_rsp_data, + memcpy(&ptr[scan_rsp_len], adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); scan_rsp_len += adv_instance->scan_rsp_len; - ptr += adv_instance->scan_rsp_len; if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME) scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index ac1e110..6b06629 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -20,6 +20,8 @@ SOFTWARE IS DISCLAIMED. */ +#include + #define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) #define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) @@ -103,3 +105,24 @@ static inline void hci_update_background_scan(struct hci_dev *hdev) void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); + +static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, + u8 *data, u8 data_len) +{ + eir[eir_len++] = sizeof(type) + data_len; + eir[eir_len++] = type; + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; + + return eir_len; +} + +static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) +{ + eir[eir_len++] = sizeof(type) + sizeof(data); + eir[eir_len++] = type; + put_unaligned_le16(data, &eir[eir_len]); + eir_len += sizeof(data); + + return eir_len; +} diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 19b8a5e..7360380 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -867,27 +867,6 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, sizeof(rp)); } -static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, - u8 data_len) -{ - eir[eir_len++] = sizeof(type) + data_len; - eir[eir_len++] = type; - memcpy(&eir[eir_len], data, data_len); - eir_len += data_len; - - return eir_len; -} - -static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) -{ - eir[eir_len++] = sizeof(type) + sizeof(data); - eir[eir_len++] = type; - put_unaligned_le16(data, &eir[eir_len]); - eir_len += sizeof(data); - - return eir_len; -} - static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) { u16 eir_len = 0; -- cgit v1.1 From 6664498280cf17a59c3e7cf1a931444c02633ed1 Mon Sep 17 00:00:00 2001 From: Anoob Soman Date: Wed, 5 Oct 2016 15:12:54 +0100 Subject: packet: call fanout_release, while UNREGISTERING a netdev If a socket has FANOUT sockopt set, a new proto_hook is registered as part of fanout_add(). When processing a NETDEV_UNREGISTER event in af_packet, __fanout_unlink is called for all sockets, but prot_hook which was registered as part of fanout_add is not removed. Call fanout_release, on a NETDEV_UNREGISTER, which removes prot_hook and removes fanout from the fanout_list. This fixes BUG_ON(!list_empty(&dev->ptype_specific)) in netdev_run_todo() Signed-off-by: Anoob Soman Signed-off-by: David S. Miller --- net/packet/af_packet.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 33a4697..11db0d6 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3952,6 +3952,7 @@ static int packet_notifier(struct notifier_block *this, } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); + fanout_release(sk); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); -- cgit v1.1 From d35c99ff77ecb2eb239731b799386f3b3637a31e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Oct 2016 04:13:18 +0900 Subject: netlink: do not enter direct reclaim from netlink_dump() Since linux-3.15, netlink_dump() can use up to 16384 bytes skb allocations. Due to struct skb_shared_info ~320 bytes overhead, we end up using order-3 (on x86) page allocations, that might trigger direct reclaim and add stress. The intent was really to attempt a large allocation but immediately fallback to a smaller one (order-1 on x86) in case of memory stress. On recent kernels (linux-4.4), we can remove __GFP_DIRECT_RECLAIM to meet the goal. Old kernels would need to remove __GFP_WAIT While we are at it, since we do an order-3 allocation, allow to use all the allocated bytes instead of 16384 to reduce syscalls during large dumps. iproute2 already uses 32KB recvmsg() buffer sizes. Alexei provided an initial patch downsizing to SKB_WITH_OVERHEAD(16384) Fixes: 9063e21fb026 ("netlink: autosize skb lengthes") Signed-off-by: Eric Dumazet Reported-by: Alexei Starovoitov Cc: Greg Thelen Reviewed-by: Greg Rose Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 627f898..62bea45 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1832,7 +1832,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, /* Record the max length of recvmsg() calls for future allocations */ nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, - 16384); + SKB_WITH_OVERHEAD(32768)); copied = data_skb->len; if (len < copied) { @@ -2083,8 +2083,9 @@ static int netlink_dump(struct sock *sk) if (alloc_min_size < nlk->max_recvmsg_len) { alloc_size = nlk->max_recvmsg_len; - skb = alloc_skb(alloc_size, GFP_KERNEL | - __GFP_NOWARN | __GFP_NORETRY); + skb = alloc_skb(alloc_size, + (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | + __GFP_NOWARN | __GFP_NORETRY); } if (!skb) { alloc_size = alloc_min_size; -- cgit v1.1 From cb4a4c691e8631089759fc5d1faf8d6ccf581497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Fri, 7 Oct 2016 01:00:49 -0700 Subject: ipv6 addrconf: disallow rtr_solicits < -1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This disallows setting /proc/sys/net/ipv6/conf/*/router_solicitations to values below -1. -1 continues to mean an unlimited number of retransmits. Note: this depends on 'ipv6 addrconf: remove addrconf_sysctl_hop_limit()' Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cbd9343..d8983e1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5729,6 +5729,7 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } +static int minus_one = -1; static const int one = 1; static const int two_five_five = 255; @@ -5789,7 +5790,8 @@ static const struct ctl_table addrconf_sysctl[] = { .data = &ipv6_devconf.rtr_solicits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &minus_one, }, { .procname = "router_solicitation_interval", -- cgit v1.1 From bd3769bfedb2b65af61744e9b40b1863e0870e2b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 10 Oct 2016 22:39:04 -0700 Subject: netfilter: Fix slab corruption. Use the correct pattern for singly linked list insertion and deletion. We can also calculate the list head outside of the mutex. Fixes: e3b37f11e6e4 ("netfilter: replace list_head with single linked list") Signed-off-by: Linus Torvalds Reviewed-by: Aaron Conole Signed-off-by: David S. Miller net/netfilter/core.c | 108 ++++++++++++++++----------------------------------- 1 file changed, 33 insertions(+), 75 deletions(-) --- net/netfilter/core.c | 108 ++++++++++++++++----------------------------------- 1 file changed, 33 insertions(+), 75 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index c9d90eb..fcb5d1d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -65,49 +65,24 @@ static DEFINE_MUTEX(nf_hook_mutex); #define nf_entry_dereference(e) \ rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) -static struct nf_hook_entry *nf_hook_entry_head(struct net *net, - const struct nf_hook_ops *reg) +static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry *hook_head = NULL; - if (reg->pf != NFPROTO_NETDEV) - hook_head = nf_entry_dereference(net->nf.hooks[reg->pf] - [reg->hooknum]); - else if (reg->hooknum == NF_NETDEV_INGRESS) { + return net->nf.hooks[reg->pf]+reg->hooknum; + #ifdef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) { if (reg->dev && dev_net(reg->dev) == net) - hook_head = - nf_entry_dereference( - reg->dev->nf_hooks_ingress); -#endif + return ®->dev->nf_hooks_ingress; } - return hook_head; -} - -/* must hold nf_hook_mutex */ -static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg, - struct nf_hook_entry *entry) -{ - switch (reg->pf) { - case NFPROTO_NETDEV: -#ifdef CONFIG_NETFILTER_INGRESS - /* We already checked in nf_register_net_hook() that this is - * used from ingress. - */ - rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry); #endif - break; - default: - rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum], - entry); - break; - } + return NULL; } int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry *hooks_entry; - struct nf_hook_entry *entry; + struct nf_hook_entry __rcu **pp; + struct nf_hook_entry *entry, *p; if (reg->pf == NFPROTO_NETDEV) { #ifndef CONFIG_NETFILTER_INGRESS @@ -119,6 +94,10 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) return -EINVAL; } + pp = nf_hook_entry_head(net, reg); + if (!pp) + return -EINVAL; + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; @@ -128,26 +107,15 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) entry->next = NULL; mutex_lock(&nf_hook_mutex); - hooks_entry = nf_hook_entry_head(net, reg); - - if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) { - /* This is the case where we need to insert at the head */ - entry->next = hooks_entry; - hooks_entry = NULL; - } - - while (hooks_entry && - reg->priority >= hooks_entry->orig_ops->priority && - nf_entry_dereference(hooks_entry->next)) { - hooks_entry = nf_entry_dereference(hooks_entry->next); - } - if (hooks_entry) { - entry->next = nf_entry_dereference(hooks_entry->next); - rcu_assign_pointer(hooks_entry->next, entry); - } else { - nf_set_hooks_head(net, reg, entry); + /* Find the spot in the list */ + while ((p = nf_entry_dereference(*pp)) != NULL) { + if (reg->priority < p->orig_ops->priority) + break; + pp = &p->next; } + rcu_assign_pointer(entry->next, p); + rcu_assign_pointer(*pp, entry); mutex_unlock(&nf_hook_mutex); #ifdef CONFIG_NETFILTER_INGRESS @@ -163,33 +131,23 @@ EXPORT_SYMBOL(nf_register_net_hook); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry *hooks_entry; + struct nf_hook_entry __rcu **pp; + struct nf_hook_entry *p; - mutex_lock(&nf_hook_mutex); - hooks_entry = nf_hook_entry_head(net, reg); - if (hooks_entry && hooks_entry->orig_ops == reg) { - nf_set_hooks_head(net, reg, - nf_entry_dereference(hooks_entry->next)); - goto unlock; - } - while (hooks_entry && nf_entry_dereference(hooks_entry->next)) { - struct nf_hook_entry *next = - nf_entry_dereference(hooks_entry->next); - struct nf_hook_entry *nnext; + pp = nf_hook_entry_head(net, reg); + if (WARN_ON_ONCE(!pp)) + return; - if (next->orig_ops != reg) { - hooks_entry = next; - continue; + mutex_lock(&nf_hook_mutex); + while ((p = nf_entry_dereference(*pp)) != NULL) { + if (p->orig_ops == reg) { + rcu_assign_pointer(*pp, p->next); + break; } - nnext = nf_entry_dereference(next->next); - rcu_assign_pointer(hooks_entry->next, nnext); - hooks_entry = next; - break; + pp = &p->next; } - -unlock: mutex_unlock(&nf_hook_mutex); - if (!hooks_entry) { + if (!p) { WARN(1, "nf_unregister_net_hook: hook not found!\n"); return; } @@ -201,10 +159,10 @@ unlock: static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); - nf_queue_nf_hook_drop(net, hooks_entry); + nf_queue_nf_hook_drop(net, p); /* other cpu might still process nfqueue verdict that used reg */ synchronize_net(); - kfree(hooks_entry); + kfree(p); } EXPORT_SYMBOL(nf_unregister_net_hook); -- cgit v1.1