diff options
author | Pavel Emelyanov <xemul@parallels.com> | 2012-04-19 03:40:39 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-04-21 15:52:25 -0400 |
commit | ee9952831cfd0bbe834f4a26489d7dce74582e37 (patch) | |
tree | 64c195fa45e1a200f38d68751161d8e06dfb5a6c /net/ipv4/tcp.c | |
parent | 370816aef0c5436c2adbec3966038f36ca326933 (diff) | |
download | op-kernel-dev-ee9952831cfd0bbe834f4a26489d7dce74582e37.zip op-kernel-dev-ee9952831cfd0bbe834f4a26489d7dce74582e37.tar.gz |
tcp: Initial repair mode
This includes (according the the previous description):
* TCP_REPAIR sockoption
This one just puts the socket in/out of the repair mode.
Allowed for CAP_NET_ADMIN and for closed/establised sockets only.
When repair mode is turned off and the socket happens to be in
the established state the window probe is sent to the peer to
'unlock' the connection.
* TCP_REPAIR_QUEUE sockoption
This one sets the queue which we're about to repair. The
'no-queue' is set by default.
* TCP_QUEUE_SEQ socoption
Sets the write_seq/rcv_nxt of a selected repaired queue.
Allowed for TCP_CLOSE-d sockets only. When the socket changes
its state the other seq-s are changed by the kernel according
to the protocol rules (most of the existing code is actually
reused).
* Ability to forcibly bind a socket to a port
The sk->sk_reuse is set to SK_FORCE_REUSE.
* Immediate connect modification
The connect syscall initializes the connection, then directly jumps
to the code which finalizes it.
* Silent close modification
The close just aborts the connection (similar to SO_LINGER with 0
time) but without sending any FIN/RST-s to peer.
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 68 |
1 files changed, 67 insertions, 1 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bb4200f..e38d6f2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1935,7 +1935,9 @@ void tcp_close(struct sock *sk, long timeout) * advertise a zero window, then kill -9 the FTP client, wheee... * Note: timeout is always zero in such a case. */ - if (data_was_unread) { + if (unlikely(tcp_sk(sk)->repair)) { + sk->sk_prot->disconnect(sk, 0); + } else if (data_was_unread) { /* Unread data was tossed, zap the connection. */ NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); @@ -2074,6 +2076,8 @@ int tcp_disconnect(struct sock *sk, int flags) /* ABORT function of RFC793 */ if (old_state == TCP_LISTEN) { inet_csk_listen_stop(sk); + } else if (unlikely(tp->repair)) { + sk->sk_err = ECONNABORTED; } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { @@ -2125,6 +2129,12 @@ int tcp_disconnect(struct sock *sk, int flags) } EXPORT_SYMBOL(tcp_disconnect); +static inline int tcp_can_repair_sock(struct sock *sk) +{ + return capable(CAP_NET_ADMIN) && + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); +} + /* * Socket option code for TCP. */ @@ -2297,6 +2307,42 @@ static int do_tcp_setsockopt(struct sock *sk, int level, tp->thin_dupack = val; break; + case TCP_REPAIR: + if (!tcp_can_repair_sock(sk)) + err = -EPERM; + else if (val == 1) { + tp->repair = 1; + sk->sk_reuse = SK_FORCE_REUSE; + tp->repair_queue = TCP_NO_QUEUE; + } else if (val == 0) { + tp->repair = 0; + sk->sk_reuse = SK_NO_REUSE; + tcp_send_window_probe(sk); + } else + err = -EINVAL; + + break; + + case TCP_REPAIR_QUEUE: + if (!tp->repair) + err = -EPERM; + else if (val < TCP_QUEUES_NR) + tp->repair_queue = val; + else + err = -EINVAL; + break; + + case TCP_QUEUE_SEQ: + if (sk->sk_state != TCP_CLOSE) + err = -EPERM; + else if (tp->repair_queue == TCP_SEND_QUEUE) + tp->write_seq = val; + else if (tp->repair_queue == TCP_RECV_QUEUE) + tp->rcv_nxt = val; + else + err = -EINVAL; + break; + case TCP_CORK: /* When set indicates to always queue non-full frames. * Later the user clears this option and we transmit @@ -2632,6 +2678,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tp->thin_dupack; break; + case TCP_REPAIR: + val = tp->repair; + break; + + case TCP_REPAIR_QUEUE: + if (tp->repair) + val = tp->repair_queue; + else + return -EINVAL; + break; + + case TCP_QUEUE_SEQ: + if (tp->repair_queue == TCP_SEND_QUEUE) + val = tp->write_seq; + else if (tp->repair_queue == TCP_RECV_QUEUE) + val = tp->rcv_nxt; + else + return -EINVAL; + break; + case TCP_USER_TIMEOUT: val = jiffies_to_msecs(icsk->icsk_user_timeout); break; |