summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@ghostprotocols.net>2005-08-09 20:15:09 -0700
committerDavid S. Miller <davem@sunset.davemloft.net>2005-08-29 15:49:50 -0700
commita019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3 (patch)
treef82f0523c313228d64998fac30790edcfd0785c3
parent7c657876b63cb1d8a2ec06f8fc6c37bb8412e66c (diff)
downloadop-kernel-dev-a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3.zip
op-kernel-dev-a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3.tar.gz
[ICSK]: Move generalised functions from tcp to inet_connection_sock
This also improves reqsk_queue_prune and renames it to inet_csk_reqsk_queue_prune, as it deals with both inet_connection_sock and inet_request_sock objects, not just with request_sock ones thus belonging to inet_request_sock. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_connection_sock.h7
-rw-r--r--include/net/request_sock.h4
-rw-r--r--include/net/tcp.h6
-rw-r--r--net/dccp/timer.c6
-rw-r--r--net/ipv4/inet_connection_sock.c214
-rw-r--r--net/ipv4/tcp.c120
-rw-r--r--net/ipv4/tcp_timer.c93
7 files changed, 224 insertions, 226 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 692825f..bec19d5 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -239,6 +239,13 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
reqsk_free(req);
}
+extern void inet_csk_reqsk_queue_prune(struct sock *parent,
+ const unsigned long interval,
+ const unsigned long timeout,
+ const unsigned long max_rto);
+
+extern void inet_csk_destroy_sock(struct sock *sk);
+extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries);
extern void inet_csk_listen_stop(struct sock *sk);
#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 447d287..b52cc52 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -258,8 +258,4 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
write_unlock(&queue->syn_wait_lock);
}
-extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
- const unsigned long interval, const unsigned long timeout,
- const unsigned long max_rto, int max_retries);
-
#endif /* _REQUEST_SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2423f05..077db85 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -423,9 +423,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
size_t len, int nonblock,
int flags, int *addr_len);
-extern int inet_csk_listen_start(struct sock *sk,
- const int nr_table_entries);
-
extern void tcp_parse_options(struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab);
@@ -861,9 +858,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
tp->snd_wl1 = seq;
}
-extern void inet_csk_destroy_sock(struct sock *sk);
-
-
/*
* Calculate(/check) TCP checksum
*/
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 8c396ee..9f1f1ab 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -220,11 +220,7 @@ out:
*/
static void dccp_response_timer(struct sock *sk)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
- const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */;
-
- reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
- DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries);
+ inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
}
static void dccp_keepalive_timer(unsigned long data)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 136ada0..026630a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -23,6 +23,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/tcp_states.h>
+#include <net/xfrm.h>
#ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -398,8 +399,100 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
inet_csk_reqsk_queue_added(sk, timeout);
}
+/* Only thing we need from tcp.h */
+extern int sysctl_tcp_synack_retries;
+
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
+void inet_csk_reqsk_queue_prune(struct sock *parent,
+ const unsigned long interval,
+ const unsigned long timeout,
+ const unsigned long max_rto)
+{
+ struct inet_connection_sock *icsk = inet_csk(parent);
+ struct request_sock_queue *queue = &icsk->icsk_accept_queue;
+ struct listen_sock *lopt = queue->listen_opt;
+ int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+ int thresh = max_retries;
+ unsigned long now = jiffies;
+ struct request_sock **reqp, *req;
+ int i, budget;
+
+ if (lopt == NULL || lopt->qlen == 0)
+ return;
+
+ /* Normally all the openreqs are young and become mature
+ * (i.e. converted to established socket) for first timeout.
+ * If synack was not acknowledged for 3 seconds, it means
+ * one of the following things: synack was lost, ack was lost,
+ * rtt is high or nobody planned to ack (i.e. synflood).
+ * When server is a bit loaded, queue is populated with old
+ * open requests, reducing effective size of queue.
+ * When server is well loaded, queue size reduces to zero
+ * after several minutes of work. It is not synflood,
+ * it is normal operation. The solution is pruning
+ * too old entries overriding normal timeout, when
+ * situation becomes dangerous.
+ *
+ * Essentially, we reserve half of room for young
+ * embrions; and abort old ones without pity, if old
+ * ones are about to clog our table.
+ */
+ if (lopt->qlen>>(lopt->max_qlen_log-1)) {
+ int young = (lopt->qlen_young<<1);
+
+ while (thresh > 2) {
+ if (lopt->qlen < young)
+ break;
+ thresh--;
+ young <<= 1;
+ }
+ }
+
+ if (queue->rskq_defer_accept)
+ max_retries = queue->rskq_defer_accept;
+
+ budget = 2 * (lopt->nr_table_entries / (timeout / interval));
+ i = lopt->clock_hand;
+
+ do {
+ reqp=&lopt->syn_table[i];
+ while ((req = *reqp) != NULL) {
+ if (time_after_eq(now, req->expires)) {
+ if ((req->retrans < thresh ||
+ (inet_rsk(req)->acked && req->retrans < max_retries))
+ && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
+ unsigned long timeo;
+
+ if (req->retrans++ == 0)
+ lopt->qlen_young--;
+ timeo = min((timeout << req->retrans), max_rto);
+ req->expires = now + timeo;
+ reqp = &req->dl_next;
+ continue;
+ }
+
+ /* Drop this request */
+ inet_csk_reqsk_queue_unlink(parent, req, reqp);
+ reqsk_queue_removed(queue, req);
+ reqsk_free(req);
+ continue;
+ }
+ reqp = &req->dl_next;
+ }
+
+ i = (i + 1) & (lopt->nr_table_entries - 1);
+
+ } while (--budget > 0);
+
+ lopt->clock_hand = i;
+
+ if (lopt->qlen)
+ inet_csk_reset_keepalive_timer(parent, interval);
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
+
struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
const unsigned int __nocast priority)
{
@@ -424,3 +517,124 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
}
EXPORT_SYMBOL_GPL(inet_csk_clone);
+
+/*
+ * At this point, there should be no process reference to this
+ * socket, and thus no user references at all. Therefore we
+ * can assume the socket waitqueue is inactive and nobody will
+ * try to jump onto it.
+ */
+void inet_csk_destroy_sock(struct sock *sk)
+{
+ BUG_TRAP(sk->sk_state == TCP_CLOSE);
+ BUG_TRAP(sock_flag(sk, SOCK_DEAD));
+
+ /* It cannot be in hash table! */
+ BUG_TRAP(sk_unhashed(sk));
+
+ /* If it has not 0 inet_sk(sk)->num, it must be bound */
+ BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
+
+ sk->sk_prot->destroy(sk);
+
+ sk_stream_kill_queues(sk);
+
+ xfrm_sk_free_policy(sk);
+
+ sk_refcnt_debug_release(sk);
+
+ atomic_dec(sk->sk_prot->orphan_count);
+ sock_put(sk);
+}
+
+EXPORT_SYMBOL(inet_csk_destroy_sock);
+
+int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
+
+ if (rc != 0)
+ return rc;
+
+ sk->sk_max_ack_backlog = 0;
+ sk->sk_ack_backlog = 0;
+ inet_csk_delack_init(sk);
+
+ /* There is race window here: we announce ourselves listening,
+ * but this transition is still not validated by get_port().
+ * It is OK, because this socket enters to hash table only
+ * after validation is complete.
+ */
+ sk->sk_state = TCP_LISTEN;
+ if (!sk->sk_prot->get_port(sk, inet->num)) {
+ inet->sport = htons(inet->num);
+
+ sk_dst_reset(sk);
+ sk->sk_prot->hash(sk);
+
+ return 0;
+ }
+
+ sk->sk_state = TCP_CLOSE;
+ __reqsk_queue_destroy(&icsk->icsk_accept_queue);
+ return -EADDRINUSE;
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_listen_start);
+
+/*
+ * This routine closes sockets which have been at least partially
+ * opened, but not yet accepted.
+ */
+void inet_csk_listen_stop(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct request_sock *acc_req;
+ struct request_sock *req;
+
+ inet_csk_delete_keepalive_timer(sk);
+
+ /* make all the listen_opt local to us */
+ acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
+
+ /* Following specs, it would be better either to send FIN
+ * (and enter FIN-WAIT-1, it is normal close)
+ * or to send active reset (abort).
+ * Certainly, it is pretty dangerous while synflood, but it is
+ * bad justification for our negligence 8)
+ * To be honest, we are not able to make either
+ * of the variants now. --ANK
+ */
+ reqsk_queue_destroy(&icsk->icsk_accept_queue);
+
+ while ((req = acc_req) != NULL) {
+ struct sock *child = req->sk;
+
+ acc_req = req->dl_next;
+
+ local_bh_disable();
+ bh_lock_sock(child);
+ BUG_TRAP(!sock_owned_by_user(child));
+ sock_hold(child);
+
+ sk->sk_prot->disconnect(child, O_NONBLOCK);
+
+ sock_orphan(child);
+
+ atomic_inc(sk->sk_prot->orphan_count);
+
+ inet_csk_destroy_sock(child);
+
+ bh_unlock_sock(child);
+ local_bh_enable();
+ sock_put(child);
+
+ sk_acceptq_removed(sk);
+ __reqsk_free(req);
+ }
+ BUG_TRAP(!sk->sk_ack_backlog);
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a4e9eec..4bda522 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -456,96 +456,6 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return put_user(answ, (int __user *)arg);
}
-int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
-{
- struct inet_sock *inet = inet_sk(sk);
- struct inet_connection_sock *icsk = inet_csk(sk);
- int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
-
- if (rc != 0)
- return rc;
-
- sk->sk_max_ack_backlog = 0;
- sk->sk_ack_backlog = 0;
- inet_csk_delack_init(sk);
-
- /* There is race window here: we announce ourselves listening,
- * but this transition is still not validated by get_port().
- * It is OK, because this socket enters to hash table only
- * after validation is complete.
- */
- sk->sk_state = TCP_LISTEN;
- if (!sk->sk_prot->get_port(sk, inet->num)) {
- inet->sport = htons(inet->num);
-
- sk_dst_reset(sk);
- sk->sk_prot->hash(sk);
-
- return 0;
- }
-
- sk->sk_state = TCP_CLOSE;
- __reqsk_queue_destroy(&icsk->icsk_accept_queue);
- return -EADDRINUSE;
-}
-
-EXPORT_SYMBOL_GPL(inet_csk_listen_start);
-
-/*
- * This routine closes sockets which have been at least partially
- * opened, but not yet accepted.
- */
-void inet_csk_listen_stop(struct sock *sk)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct request_sock *acc_req;
- struct request_sock *req;
-
- inet_csk_delete_keepalive_timer(sk);
-
- /* make all the listen_opt local to us */
- acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
-
- /* Following specs, it would be better either to send FIN
- * (and enter FIN-WAIT-1, it is normal close)
- * or to send active reset (abort).
- * Certainly, it is pretty dangerous while synflood, but it is
- * bad justification for our negligence 8)
- * To be honest, we are not able to make either
- * of the variants now. --ANK
- */
- reqsk_queue_destroy(&icsk->icsk_accept_queue);
-
- while ((req = acc_req) != NULL) {
- struct sock *child = req->sk;
-
- acc_req = req->dl_next;
-
- local_bh_disable();
- bh_lock_sock(child);
- BUG_TRAP(!sock_owned_by_user(child));
- sock_hold(child);
-
- sk->sk_prot->disconnect(child, O_NONBLOCK);
-
- sock_orphan(child);
-
- atomic_inc(sk->sk_prot->orphan_count);
-
- inet_csk_destroy_sock(child);
-
- bh_unlock_sock(child);
- local_bh_enable();
- sock_put(child);
-
- sk_acceptq_removed(sk);
- __reqsk_free(req);
- }
- BUG_TRAP(!sk->sk_ack_backlog);
-}
-
-EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
-
static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
{
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
@@ -1559,35 +1469,6 @@ void tcp_shutdown(struct sock *sk, int how)
}
}
-/*
- * At this point, there should be no process reference to this
- * socket, and thus no user references at all. Therefore we
- * can assume the socket waitqueue is inactive and nobody will
- * try to jump onto it.
- */
-void inet_csk_destroy_sock(struct sock *sk)
-{
- BUG_TRAP(sk->sk_state == TCP_CLOSE);
- BUG_TRAP(sock_flag(sk, SOCK_DEAD));
-
- /* It cannot be in hash table! */
- BUG_TRAP(sk_unhashed(sk));
-
- /* If it has not 0 inet_sk(sk)->num, it must be bound */
- BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
-
- sk->sk_prot->destroy(sk);
-
- sk_stream_kill_queues(sk);
-
- xfrm_sk_free_policy(sk);
-
- sk_refcnt_debug_release(sk);
-
- atomic_dec(sk->sk_prot->orphan_count);
- sock_put(sk);
-}
-
void tcp_close(struct sock *sk, long timeout)
{
struct sk_buff *skb;
@@ -2258,7 +2139,6 @@ void __init tcp_init(void)
}
EXPORT_SYMBOL(tcp_close);
-EXPORT_SYMBOL(inet_csk_destroy_sock);
EXPORT_SYMBOL(tcp_disconnect);
EXPORT_SYMBOL(tcp_getsockopt);
EXPORT_SYMBOL(tcp_ioctl);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b614ad4..72cec69 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -424,103 +424,14 @@ out_unlock:
sock_put(sk);
}
-void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
- const unsigned long interval, const unsigned long timeout,
- const unsigned long max_rto, int max_retries)
-{
- struct inet_connection_sock *icsk = inet_csk(parent);
- struct listen_sock *lopt = queue->listen_opt;
- int thresh = max_retries;
- unsigned long now = jiffies;
- struct request_sock **reqp, *req;
- int i, budget;
-
- if (lopt == NULL || lopt->qlen == 0)
- return;
-
- /* Normally all the openreqs are young and become mature
- * (i.e. converted to established socket) for first timeout.
- * If synack was not acknowledged for 3 seconds, it means
- * one of the following things: synack was lost, ack was lost,
- * rtt is high or nobody planned to ack (i.e. synflood).
- * When server is a bit loaded, queue is populated with old
- * open requests, reducing effective size of queue.
- * When server is well loaded, queue size reduces to zero
- * after several minutes of work. It is not synflood,
- * it is normal operation. The solution is pruning
- * too old entries overriding normal timeout, when
- * situation becomes dangerous.
- *
- * Essentially, we reserve half of room for young
- * embrions; and abort old ones without pity, if old
- * ones are about to clog our table.
- */
- if (lopt->qlen>>(lopt->max_qlen_log-1)) {
- int young = (lopt->qlen_young<<1);
-
- while (thresh > 2) {
- if (lopt->qlen < young)
- break;
- thresh--;
- young <<= 1;
- }
- }
-
- if (queue->rskq_defer_accept)
- max_retries = queue->rskq_defer_accept;
-
- budget = 2 * (lopt->nr_table_entries / (timeout / interval));
- i = lopt->clock_hand;
-
- do {
- reqp=&lopt->syn_table[i];
- while ((req = *reqp) != NULL) {
- if (time_after_eq(now, req->expires)) {
- if ((req->retrans < thresh ||
- (inet_rsk(req)->acked && req->retrans < max_retries))
- && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
- unsigned long timeo;
-
- if (req->retrans++ == 0)
- lopt->qlen_young--;
- timeo = min((timeout << req->retrans), max_rto);
- req->expires = now + timeo;
- reqp = &req->dl_next;
- continue;
- }
-
- /* Drop this request */
- inet_csk_reqsk_queue_unlink(parent, req, reqp);
- reqsk_queue_removed(&icsk->icsk_accept_queue, req);
- reqsk_free(req);
- continue;
- }
- reqp = &req->dl_next;
- }
-
- i = (i + 1) & (lopt->nr_table_entries - 1);
-
- } while (--budget > 0);
-
- lopt->clock_hand = i;
-
- if (lopt->qlen)
- inet_csk_reset_keepalive_timer(parent, interval);
-}
-
-EXPORT_SYMBOL_GPL(reqsk_queue_prune);
-
/*
* Timer for listening sockets
*/
static void tcp_synack_timer(struct sock *sk)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
- const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
-
- reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
- TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries);
+ inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL,
+ TCP_TIMEOUT_INIT, TCP_RTO_MAX);
}
void tcp_set_keepalive(struct sock *sk, int val)
OpenPOWER on IntegriCloud