From 69336bd2d3ddce315dc57be894e4b57a91036a14 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 22 Sep 2013 10:32:26 -0700 Subject: sock.h: Remove extern from function prototypes There are a mix of function prototypes with and without extern in the kernel sources. Standardize on not using extern for function prototypes. Function prototypes don't need to be written with extern. extern is assumed by the compiler. Its use is as unnecessary as using auto to declare automatic/local variables in a block. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/sock.h | 223 ++++++++++++++++++++++++----------------------------- 1 file changed, 99 insertions(+), 124 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 6ba2e7b..4625d2e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -746,7 +746,7 @@ static inline int sk_stream_wspace(const struct sock *sk) return sk->sk_sndbuf - sk->sk_wmem_queued; } -extern void sk_stream_write_space(struct sock *sk); +void sk_stream_write_space(struct sock *sk); /* OOB backlog add */ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) @@ -788,7 +788,7 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s return 0; } -extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); +int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { @@ -853,15 +853,15 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) __rc; \ }) -extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p); -extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p); -extern void sk_stream_wait_close(struct sock *sk, long timeo_p); -extern int sk_stream_error(struct sock *sk, int flags, int err); -extern void sk_stream_kill_queues(struct sock *sk); -extern void sk_set_memalloc(struct sock *sk); -extern void sk_clear_memalloc(struct sock *sk); +int sk_stream_wait_connect(struct sock *sk, long *timeo_p); +int sk_stream_wait_memory(struct sock *sk, long *timeo_p); +void sk_stream_wait_close(struct sock *sk, long timeo_p); +int sk_stream_error(struct sock *sk, int flags, int err); +void sk_stream_kill_queues(struct sock *sk); +void sk_set_memalloc(struct sock *sk); +void sk_clear_memalloc(struct sock *sk); -extern int sk_wait_data(struct sock *sk, long *timeo); +int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; struct timewait_sock_ops; @@ -1031,8 +1031,8 @@ struct cg_proto { struct mem_cgroup *memcg; }; -extern int proto_register(struct proto *prot, int alloc_slab); -extern void proto_unregister(struct proto *prot); +int proto_register(struct proto *prot, int alloc_slab); +void proto_unregister(struct proto *prot); static inline bool memcg_proto_active(struct cg_proto *cg_proto) { @@ -1287,8 +1287,8 @@ proto_memory_pressure(struct proto *prot) #ifdef CONFIG_PROC_FS /* Called with local bh disabled */ -extern void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); -extern int sock_prot_inuse_get(struct net *net, struct proto *proto); +void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); +int sock_prot_inuse_get(struct net *net, struct proto *proto); #else static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc) @@ -1364,8 +1364,8 @@ static inline struct inode *SOCK_INODE(struct socket *socket) /* * Functions for memory accounting */ -extern int __sk_mem_schedule(struct sock *sk, int size, int kind); -extern void __sk_mem_reclaim(struct sock *sk); +int __sk_mem_schedule(struct sock *sk, int size, int kind); +void __sk_mem_reclaim(struct sock *sk); #define SK_MEM_QUANTUM ((int)PAGE_SIZE) #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) @@ -1473,14 +1473,14 @@ do { \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) -extern void lock_sock_nested(struct sock *sk, int subclass); +void lock_sock_nested(struct sock *sk, int subclass); static inline void lock_sock(struct sock *sk) { lock_sock_nested(sk, 0); } -extern void release_sock(struct sock *sk); +void release_sock(struct sock *sk); /* BH context may only use the following locking interface. */ #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) @@ -1489,7 +1489,7 @@ extern void release_sock(struct sock *sk); SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -extern bool lock_sock_fast(struct sock *sk); +bool lock_sock_fast(struct sock *sk); /** * unlock_sock_fast - complement of lock_sock_fast * @sk: socket @@ -1507,108 +1507,84 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) } -extern struct sock *sk_alloc(struct net *net, int family, - gfp_t priority, - struct proto *prot); -extern void sk_free(struct sock *sk); -extern void sk_release_kernel(struct sock *sk); -extern struct sock *sk_clone_lock(const struct sock *sk, - const gfp_t priority); - -extern struct sk_buff *sock_wmalloc(struct sock *sk, - unsigned long size, int force, - gfp_t priority); -extern struct sk_buff *sock_rmalloc(struct sock *sk, - unsigned long size, int force, - gfp_t priority); -extern void sock_wfree(struct sk_buff *skb); -extern void skb_orphan_partial(struct sk_buff *skb); -extern void sock_rfree(struct sk_buff *skb); -extern void sock_edemux(struct sk_buff *skb); - -extern int sock_setsockopt(struct socket *sock, int level, - int op, char __user *optval, - unsigned int optlen); - -extern int sock_getsockopt(struct socket *sock, int level, - int op, char __user *optval, - int __user *optlen); -extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, - unsigned long size, - int noblock, - int *errcode); -extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk, - unsigned long header_len, - unsigned long data_len, - int noblock, - int *errcode, - int max_page_order); -extern void *sock_kmalloc(struct sock *sk, int size, - gfp_t priority); -extern void sock_kfree_s(struct sock *sk, void *mem, int size); -extern void sk_send_sigurg(struct sock *sk); +struct sock *sk_alloc(struct net *net, int family, gfp_t priority, + struct proto *prot); +void sk_free(struct sock *sk); +void sk_release_kernel(struct sock *sk); +struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); + +struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, + gfp_t priority); +struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, + gfp_t priority); +void sock_wfree(struct sk_buff *skb); +void skb_orphan_partial(struct sk_buff *skb); +void sock_rfree(struct sk_buff *skb); +void sock_edemux(struct sk_buff *skb); + +int sock_setsockopt(struct socket *sock, int level, int op, + char __user *optval, unsigned int optlen); + +int sock_getsockopt(struct socket *sock, int level, int op, + char __user *optval, int __user *optlen); +struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, + int noblock, int *errcode); +struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, + unsigned long data_len, int noblock, + int *errcode, int max_page_order); +void *sock_kmalloc(struct sock *sk, int size, gfp_t priority); +void sock_kfree_s(struct sock *sk, void *mem, int size); +void sk_send_sigurg(struct sock *sk); /* * Functions to fill in entries in struct proto_ops when a protocol * does not implement a particular function. */ -extern int sock_no_bind(struct socket *, - struct sockaddr *, int); -extern int sock_no_connect(struct socket *, - struct sockaddr *, int, int); -extern int sock_no_socketpair(struct socket *, - struct socket *); -extern int sock_no_accept(struct socket *, - struct socket *, int); -extern int sock_no_getname(struct socket *, - struct sockaddr *, int *, int); -extern unsigned int sock_no_poll(struct file *, struct socket *, - struct poll_table_struct *); -extern int sock_no_ioctl(struct socket *, unsigned int, - unsigned long); -extern int sock_no_listen(struct socket *, int); -extern int sock_no_shutdown(struct socket *, int); -extern int sock_no_getsockopt(struct socket *, int , int, - char __user *, int __user *); -extern int sock_no_setsockopt(struct socket *, int, int, - char __user *, unsigned int); -extern int sock_no_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -extern int sock_no_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); -extern int sock_no_mmap(struct file *file, - struct socket *sock, - struct vm_area_struct *vma); -extern ssize_t sock_no_sendpage(struct socket *sock, - struct page *page, - int offset, size_t size, - int flags); +int sock_no_bind(struct socket *, struct sockaddr *, int); +int sock_no_connect(struct socket *, struct sockaddr *, int, int); +int sock_no_socketpair(struct socket *, struct socket *); +int sock_no_accept(struct socket *, struct socket *, int); +int sock_no_getname(struct socket *, struct sockaddr *, int *, int); +unsigned int sock_no_poll(struct file *, struct socket *, + struct poll_table_struct *); +int sock_no_ioctl(struct socket *, unsigned int, unsigned long); +int sock_no_listen(struct socket *, int); +int sock_no_shutdown(struct socket *, int); +int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *); +int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int); +int sock_no_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); +int sock_no_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, + int); +int sock_no_mmap(struct file *file, struct socket *sock, + struct vm_area_struct *vma); +ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags); /* * Functions to fill in entries in struct proto_ops when a protocol * uses the inet style. */ -extern int sock_common_getsockopt(struct socket *sock, int level, int optname, +int sock_common_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); -extern int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, +int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags); -extern int sock_common_setsockopt(struct socket *sock, int level, int optname, +int sock_common_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); -extern int compat_sock_common_getsockopt(struct socket *sock, int level, +int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); -extern int compat_sock_common_setsockopt(struct socket *sock, int level, +int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); -extern void sk_common_release(struct sock *sk); +void sk_common_release(struct sock *sk); /* * Default socket callbacks and setup code */ /* Initialise core socket variables */ -extern void sock_init_data(struct socket *sock, struct sock *sk); +void sock_init_data(struct socket *sock, struct sock *sk); -extern void sk_filter_release_rcu(struct rcu_head *rcu); +void sk_filter_release_rcu(struct rcu_head *rcu); /** * sk_filter_release - release a socket filter @@ -1669,8 +1645,7 @@ static inline void sock_put(struct sock *sk) sk_free(sk); } -extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb, - const int nested); +int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested); static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) { @@ -1724,8 +1699,8 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) write_unlock_bh(&sk->sk_callback_lock); } -extern kuid_t sock_i_uid(struct sock *sk); -extern unsigned long sock_i_ino(struct sock *sk); +kuid_t sock_i_uid(struct sock *sk); +unsigned long sock_i_ino(struct sock *sk); static inline struct dst_entry * __sk_dst_get(struct sock *sk) @@ -1747,7 +1722,7 @@ sk_dst_get(struct sock *sk) return dst; } -extern void sk_reset_txq(struct sock *sk); +void sk_reset_txq(struct sock *sk); static inline void dst_negative_advice(struct sock *sk) { @@ -1800,16 +1775,16 @@ sk_dst_reset(struct sock *sk) spin_unlock(&sk->sk_dst_lock); } -extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); +struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); -extern struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); static inline bool sk_can_gso(const struct sock *sk) { return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); } -extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); +void sk_setup_caps(struct sock *sk, struct dst_entry *dst); static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) { @@ -2022,14 +1997,14 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) sk_mem_charge(sk, skb->truesize); } -extern void sk_reset_timer(struct sock *sk, struct timer_list *timer, - unsigned long expires); +void sk_reset_timer(struct sock *sk, struct timer_list *timer, + unsigned long expires); -extern void sk_stop_timer(struct sock *sk, struct timer_list *timer); +void sk_stop_timer(struct sock *sk, struct timer_list *timer); -extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -extern int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); +int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); /* * Recover an error report and clear atomically @@ -2097,7 +2072,7 @@ static inline struct page_frag *sk_page_frag(struct sock *sk) return &sk->sk_frag; } -extern bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); +bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); /* * Default write policy as shown to user space via poll/select/SIGIO @@ -2135,10 +2110,10 @@ static inline int sock_intr_errno(long timeo) return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR; } -extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb); -extern void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb); +void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb); +void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb); static inline void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) @@ -2171,8 +2146,8 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) __sock_recv_wifi_status(msg, sk, skb); } -extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb); +void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb); static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) @@ -2197,7 +2172,7 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, * * Currently only depends on SOCK_TIMESTAMPING* flags. */ -extern void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); +void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed @@ -2261,11 +2236,11 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb) return NULL; } -extern void sock_enable_timestamp(struct sock *sk, int flag); -extern int sock_get_timestamp(struct sock *, struct timeval __user *); -extern int sock_get_timestampns(struct sock *, struct timespec __user *); -extern int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, - int level, int type); +void sock_enable_timestamp(struct sock *sk, int flag); +int sock_get_timestamp(struct sock *, struct timeval __user *); +int sock_get_timestampns(struct sock *, struct timespec __user *); +int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, + int type); /* * Enable debug/info messages -- cgit v1.1 From 62748f32d501f5d3712a7c372bbb92abc7c62bc7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Sep 2013 08:20:52 -0700 Subject: net: introduce SO_MAX_PACING_RATE As mentioned in commit afe4fd062416b ("pkt_sched: fq: Fair Queue packet scheduler"), this patch adds a new socket option. SO_MAX_PACING_RATE offers the application the ability to cap the rate computed by transport layer. Value is in bytes per second. u32 val = 1000000; setsockopt(sockfd, SOL_SOCKET, SO_MAX_PACING_RATE, &val, sizeof(val)); To be effectively paced, a flow must use FQ packet scheduler. Note that a packet scheduler takes into account the headers for its computations. The effective payload rate depends on MSS and retransmits if any. I chose to make this pacing rate a SOL_SOCKET option instead of a TCP one because this can be used by other protocols. Signed-off-by: Eric Dumazet Cc: Steinar H. Gunderson Cc: Michael Kerrisk Signed-off-by: David S. Miller --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 4625d2e..240aa3f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -363,6 +363,7 @@ struct sock { int sk_wmem_queued; gfp_t sk_allocation; u32 sk_pacing_rate; /* bytes per second */ + u32 sk_max_pacing_rate; netdev_features_t sk_route_caps; netdev_features_t sk_route_nocaps; int sk_gso_type; -- cgit v1.1 From c3f40d7c04152c6f168db2f9b43438015cf092c4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Sep 2013 01:12:40 -0700 Subject: net: add missing sk_max_pacing_rate doc Warning(include/net/sock.h:411): No description found for parameter 'sk_max_pacing_rate' Lets please "make htmldocs" and kbuild bot. Reported-by: Wu Fengguang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 240aa3f..cf91c8e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -233,6 +233,7 @@ struct cg_proto; * @sk_ll_usec: usecs to busypoll when there is no data * @sk_allocation: allocation mode * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) + * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings -- cgit v1.1 From 5080546682bae3d32734b18e281091684f0ebbe4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Oct 2013 04:29:50 -0700 Subject: inet: consolidate INET_TW_MATCH TCP listener refactoring, part 2 : We can use a generic lookup, sockets being in whatever state, if we are sure all relevant fields are at the same place in all socket types (ESTABLISH, TIME_WAIT, SYN_RECV) This patch removes these macros : inet_addrpair, inet_addrpair, tw_addrpair, tw_portpair And adds : sk_portpair, sk_addrpair, sk_daddr, sk_rcv_saddr Then, INET_TW_MATCH() is really the same than INET_MATCH() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index f0a44cc..e3bf213 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -300,6 +300,10 @@ struct sock { #define sk_dontcopy_begin __sk_common.skc_dontcopy_begin #define sk_dontcopy_end __sk_common.skc_dontcopy_end #define sk_hash __sk_common.skc_hash +#define sk_portpair __sk_common.skc_portpair +#define sk_addrpair __sk_common.skc_addrpair +#define sk_daddr __sk_common.skc_daddr +#define sk_rcv_saddr __sk_common.skc_rcv_saddr #define sk_family __sk_common.skc_family #define sk_state __sk_common.skc_state #define sk_reuse __sk_common.skc_reuse -- cgit v1.1 From 421b3885bf6d56391297844f43fb7154a6396e12 Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Mon, 7 Oct 2013 11:01:39 -0500 Subject: udp: ipv4: Add udp early demux The removal of the routing cache introduced a performance regression for some UDP workloads since a dst lookup must be done for each packet. This change caches the dst per socket in a similar manner to what we do for TCP by implementing early_demux. For UDP multicast we can only cache the dst if there is only one receiving socket on the host. Since caching only works when there is one receiving socket we do the multicast socket lookup using RCU. For UDP unicast we only demux sockets with an exact match in order to not break forwarding setups. Additionally since the hash chains may be long we only check the first socket to see if it is a match and not waste extra time searching the whole chain when we might not find an exact match. Benchmark results from a netperf UDP_RR test: Before 87961.22 transactions/s After 89789.68 transactions/s Benchmark results from a fio 1 byte UDP multicast pingpong test (Multicast one way unicast response): Before 12.97us RTT After 12.63us RTT Signed-off-by: Shawn Bohrer Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index e3bf213..7953254 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -218,7 +218,7 @@ struct cg_proto; * @sk_lock: synchronizer * @sk_rcvbuf: size of receive buffer in bytes * @sk_wq: sock wait queue and async head - * @sk_rx_dst: receive input route used by early tcp demux + * @sk_rx_dst: receive input route used by early demux * @sk_dst_cache: destination cache * @sk_dst_lock: destination cache lock * @sk_policy: flow policy -- cgit v1.1 From 05dbc7b59481ca891bbcfe6799a562d48159fbf7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Oct 2013 00:22:02 -0700 Subject: tcp/dccp: remove twchain TCP listener refactoring, part 3 : Our goal is to hash SYN_RECV sockets into main ehash for fast lookup, and parallel SYN processing. Current inet_ehash_bucket contains two chains, one for ESTABLISH (and friend states) sockets, another for TIME_WAIT sockets only. As the hash table is sized to get at most one socket per bucket, it makes little sense to have separate twchain, as it makes the lookup slightly more complicated, and doubles hash table memory usage. If we make sure all socket types have the lookup keys at the same offsets, we can use a generic and faster lookup. It turns out TIME_WAIT and ESTABLISHED sockets already have common lookup fields for IPv4. [ INET_TW_MATCH() is no longer needed ] I'll provide a follow-up to factorize IPv6 lookup as well, to remove INET6_TW_MATCH() This way, SYN_RECV pseudo sockets will be supported the same. A new sock_gen_put() helper is added, doing either a sock_put() or inet_twsk_put() [ and will support SYN_RECV later ]. Note this helper should only be called in real slow path, when rcu lookup found a socket that was moved to another identity (freed/reused immediately), but could eventually be used in other contexts, like sock_edemux() Before patch : dmesg | grep "TCP established" TCP established hash table entries: 524288 (order: 11, 8388608 bytes) After patch : TCP established hash table entries: 524288 (order: 10, 4194304 bytes) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 7cf8d23..3f3e48c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -156,7 +156,7 @@ typedef __u64 __bitwise __addrpair; */ struct sock_common { /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned - * address on 64bit arches : cf INET_MATCH() and INET_TW_MATCH() + * address on 64bit arches : cf INET_MATCH() */ union { __addrpair skc_addrpair; @@ -301,6 +301,8 @@ struct sock { #define sk_dontcopy_end __sk_common.skc_dontcopy_end #define sk_hash __sk_common.skc_hash #define sk_portpair __sk_common.skc_portpair +#define sk_num __sk_common.skc_num +#define sk_dport __sk_common.skc_dport #define sk_addrpair __sk_common.skc_addrpair #define sk_daddr __sk_common.skc_daddr #define sk_rcv_saddr __sk_common.skc_rcv_saddr @@ -1653,6 +1655,10 @@ static inline void sock_put(struct sock *sk) if (atomic_dec_and_test(&sk->sk_refcnt)) sk_free(sk); } +/* Generic version of sock_put(), dealing with all sockets + * (TCP_TIMEWAIT, ESTABLISHED...) + */ +void sock_gen_put(struct sock *sk); int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested); -- cgit v1.1 From efe4208f47f907b86f528788da711e8ab9dea44d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Oct 2013 15:42:29 -0700 Subject: ipv6: make lookups simpler and faster TCP listener refactoring, part 4 : To speed up inet lookups, we moved IPv4 addresses from inet to struct sock_common Now is time to do the same for IPv6, because it permits us to have fast lookups for all kind of sockets, including upcoming SYN_RECV. Getting IPv6 addresses in TCP lookups currently requires two extra cache lines, plus a dereference (and memory stall). inet6_sk(sk) does the dereference of inet_sk(__sk)->pinet6 This patch is way bigger than its IPv4 counter part, because for IPv4, we could add aliases (inet_daddr, inet_rcv_saddr), while on IPv6, it's not doable easily. inet6_sk(sk)->daddr becomes sk->sk_v6_daddr inet6_sk(sk)->rcv_saddr becomes sk->sk_v6_rcv_saddr And timewait socket also have tw->tw_v6_daddr & tw->tw_v6_rcv_saddr at the same offset. We get rid of INET6_TW_MATCH() as INET6_MATCH() is now the generic macro. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 3f3e48c..7e50df5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -191,6 +191,12 @@ struct sock_common { #ifdef CONFIG_NET_NS struct net *skc_net; #endif + +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr skc_v6_daddr; + struct in6_addr skc_v6_rcv_saddr; +#endif + /* * fields between dontcopy_begin/dontcopy_end * are not copied in sock_copy() @@ -314,6 +320,9 @@ struct sock { #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot #define sk_net __sk_common.skc_net +#define sk_v6_daddr __sk_common.skc_v6_daddr +#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr + socket_lock_t sk_lock; struct sk_buff_head sk_receive_queue; /* -- cgit v1.1 From 2e685cad57906e19add7189b5ff49dfb6aaa21d3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 19 Oct 2013 16:26:19 -0700 Subject: tcp_memcontrol: Kill struct tcp_memcontrol Replace the pointers in struct cg_proto with actual data fields and kill struct tcp_memcontrol as it is not fully redundant. This removes a confusing, unnecessary layer of abstraction. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/sock.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 7e50df5..86bb066 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1036,10 +1036,10 @@ enum cg_proto_flags { struct cg_proto { void (*enter_memory_pressure)(struct sock *sk); - struct res_counter *memory_allocated; /* Current allocated memory. */ - struct percpu_counter *sockets_allocated; /* Current number of sockets. */ - int *memory_pressure; - long *sysctl_mem; + struct res_counter memory_allocated; /* Current allocated memory. */ + struct percpu_counter sockets_allocated; /* Current number of sockets. */ + int memory_pressure; + long sysctl_mem[3]; unsigned long flags; /* * memcg field is used to find which memcg we belong directly @@ -1135,9 +1135,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) return false; if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - return !!*sk->sk_cgrp->memory_pressure; + return !!sk->sk_cgrp->memory_pressure; - return !!*sk->sk_prot->memory_pressure; + return !!sk->sk_prot->memory_pressure; } static inline void sk_leave_memory_pressure(struct sock *sk) @@ -1155,8 +1155,8 @@ static inline void sk_leave_memory_pressure(struct sock *sk) struct proto *prot = sk->sk_prot; for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - if (*cg_proto->memory_pressure) - *cg_proto->memory_pressure = 0; + if (cg_proto->memory_pressure) + cg_proto->memory_pressure = 0; } } @@ -1192,7 +1192,7 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot, struct res_counter *fail; int ret; - ret = res_counter_charge_nofail(prot->memory_allocated, + ret = res_counter_charge_nofail(&prot->memory_allocated, amt << PAGE_SHIFT, &fail); if (ret < 0) *parent_status = OVER_LIMIT; @@ -1201,13 +1201,13 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot, static inline void memcg_memory_allocated_sub(struct cg_proto *prot, unsigned long amt) { - res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT); + res_counter_uncharge(&prot->memory_allocated, amt << PAGE_SHIFT); } static inline u64 memcg_memory_allocated_read(struct cg_proto *prot) { u64 ret; - ret = res_counter_read_u64(prot->memory_allocated, RES_USAGE); + ret = res_counter_read_u64(&prot->memory_allocated, RES_USAGE); return ret >> PAGE_SHIFT; } @@ -1255,7 +1255,7 @@ static inline void sk_sockets_allocated_dec(struct sock *sk) struct cg_proto *cg_proto = sk->sk_cgrp; for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - percpu_counter_dec(cg_proto->sockets_allocated); + percpu_counter_dec(&cg_proto->sockets_allocated); } percpu_counter_dec(prot->sockets_allocated); @@ -1269,7 +1269,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk) struct cg_proto *cg_proto = sk->sk_cgrp; for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - percpu_counter_inc(cg_proto->sockets_allocated); + percpu_counter_inc(&cg_proto->sockets_allocated); } percpu_counter_inc(prot->sockets_allocated); @@ -1281,7 +1281,7 @@ sk_sockets_allocated_read_positive(struct sock *sk) struct proto *prot = sk->sk_prot; if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - return percpu_counter_read_positive(sk->sk_cgrp->sockets_allocated); + return percpu_counter_read_positive(&sk->sk_cgrp->sockets_allocated); return percpu_counter_read_positive(prot->sockets_allocated); } -- cgit v1.1 From 0a6957e7d47096bbeedda4e1d926359eb487dcfc Mon Sep 17 00:00:00 2001 From: ZHAO Gang Date: Tue, 22 Oct 2013 16:23:38 +0800 Subject: net: remove function sk_reset_txq() What sk_reset_txq() does is just calls function sk_tx_queue_reset(), and sk_reset_txq() is used only in sock.h, by dst_negative_advice(). Let dst_negative_advice() calls sk_tx_queue_reset() directly so we can remove unneeded sk_reset_txq(). Signed-off-by: ZHAO Gang Signed-off-by: David S. Miller --- include/net/sock.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 86bb066..c93542f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1746,8 +1746,6 @@ sk_dst_get(struct sock *sk) return dst; } -void sk_reset_txq(struct sock *sk); - static inline void dst_negative_advice(struct sock *sk) { struct dst_entry *ndst, *dst = __sk_dst_get(sk); @@ -1757,7 +1755,7 @@ static inline void dst_negative_advice(struct sock *sk) if (ndst != dst) { rcu_assign_pointer(sk->sk_dst_cache, ndst); - sk_reset_txq(sk); + sk_tx_queue_clear(sk); } } } -- cgit v1.1 From 35b87f6c135b7ded8ab1a44e46d792f7688f3608 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 23 Oct 2013 12:49:21 -0700 Subject: net: Dereference pointer-value of sk_prot->memory_pressure 2e685cad57 (tcp_memcontrol: Kill struct tcp_memcontrol) falsly modified the access to memory_pressure of sk->sk_prot->memory_pressure. The patch did modify the memory_pressure-field of struct cg_proto, but not the one of struct proto. So, the access to sk_prot->memory_pressure should not be changed. Acked-by: Eric Dumazet Reported-by: Fengguang Wu Signed-off-by: Christoph Paasch Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index c93542f..e3a18ff 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1137,7 +1137,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) if (mem_cgroup_sockets_enabled && sk->sk_cgrp) return !!sk->sk_cgrp->memory_pressure; - return !!sk->sk_prot->memory_pressure; + return !!*sk->sk_prot->memory_pressure; } static inline void sk_leave_memory_pressure(struct sock *sk) -- cgit v1.1