diff options
Diffstat (limited to 'include/net')
58 files changed, 3232 insertions, 1263 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index ed00a99..b55eb7c 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -63,7 +63,7 @@ struct tc_action_ops __u32 type; /* TBD to match kind */ __u32 capab; /* capabilities includes 4 bit version */ struct module *owner; - int (*act)(struct sk_buff **, struct tc_action *); + int (*act)(struct sk_buff **, struct tc_action *, struct tcf_result *); int (*get_stats)(struct sk_buff *, struct tc_action *); int (*dump)(struct sk_buff *, struct tc_action *,int , int); int (*cleanup)(struct tc_action *, int bind); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index a0ed936..750e250 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -45,6 +45,7 @@ struct prefix_info { #ifdef __KERNEL__ +#include <linux/config.h> #include <linux/netdevice.h> #include <net/if_inet6.h> #include <net/ipv6.h> @@ -238,5 +239,10 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) addr->s6_addr32[3] == htonl(0x00000002)); } +#ifdef CONFIG_PROC_FS +extern int if6_proc_init(void); +extern void if6_proc_exit(void); +#endif + #endif #endif diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b60b384..b5d785a 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -1,5 +1,11 @@ #ifndef __LINUX_NET_AFUNIX_H #define __LINUX_NET_AFUNIX_H + +#include <linux/config.h> +#include <linux/socket.h> +#include <linux/un.h> +#include <net/sock.h> + extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); @@ -74,5 +80,14 @@ struct unix_sock { wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) + +#ifdef CONFIG_SYSCTL +extern int sysctl_unix_max_dgram_qlen; +extern void unix_sysctl_register(void); +extern void unix_sysctl_unregister(void); +#else +static inline void unix_sysctl_register(void) {} +static inline void unix_sysctl_unregister(void) {} +#endif #endif #endif diff --git a/include/net/arp.h b/include/net/arp.h index a1f09fa..a13e30c 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl; extern void arp_init(void); extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int arp_find(unsigned char *haddr, struct sk_buff *skb); extern int arp_ioctl(unsigned int cmd, void __user *arg); extern void arp_send(int type, int ptype, u32 dest_ip, diff --git a/include/net/ax25.h b/include/net/ax25.h index 9e6368a..364b046 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -139,11 +139,25 @@ enum { #define AX25_DEF_DS_TIMEOUT (3 * 60 * HZ) /* DAMA timeout 3 minutes */ typedef struct ax25_uid_assoc { - struct ax25_uid_assoc *next; + struct hlist_node uid_node; + atomic_t refcount; uid_t uid; ax25_address call; } ax25_uid_assoc; +#define ax25_uid_for_each(__ax25, node, list) \ + hlist_for_each_entry(__ax25, node, list, uid_node) + +#define ax25_uid_hold(ax25) \ + atomic_inc(&((ax25)->refcount)) + +static inline void ax25_uid_put(ax25_uid_assoc *assoc) +{ + if (atomic_dec_and_test(&assoc->refcount)) { + kfree(assoc); + } +} + typedef struct { ax25_address calls[AX25_MAX_DIGIS]; unsigned char repeated[AX25_MAX_DIGIS]; @@ -220,7 +234,7 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } -static inline unsigned short ax25_type_trans(struct sk_buff *skb, struct net_device *dev) +static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; skb->pkt_type = PACKET_HOST; @@ -243,7 +257,7 @@ extern struct sock *ax25_make_new(struct sock *, struct ax25_dev *); /* ax25_addr.c */ extern ax25_address null_ax25_address; -extern char *ax2asc(ax25_address *); +extern char *ax2asc(char *buf, ax25_address *); extern ax25_address *asc2ax(char *); extern int ax25cmp(ax25_address *, ax25_address *); extern int ax25digicmp(ax25_digi *, ax25_digi *); @@ -302,7 +316,7 @@ extern int ax25_protocol_is_registered(unsigned int); /* ax25_in.c */ extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); -extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); /* ax25_ip.c */ extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); @@ -376,7 +390,7 @@ extern unsigned long ax25_display_timer(struct timer_list *); /* ax25_uid.c */ extern int ax25_uid_policy; -extern ax25_address *ax25_findbyuid(uid_t); +extern ax25_uid_assoc *ax25_findbyuid(uid_t); extern int ax25_uid_ioctl(int, struct sockaddr_ax25 *); extern struct file_operations ax25_uid_fops; extern void ax25_uid_free(void); diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 42a84c5..6dfa4a6 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -57,12 +57,6 @@ #define BT_DBG(fmt, arg...) printk(KERN_INFO "%s: " fmt "\n" , __FUNCTION__ , ## arg) #define BT_ERR(fmt, arg...) printk(KERN_ERR "%s: " fmt "\n" , __FUNCTION__ , ## arg) -#ifdef HCI_DATA_DUMP -#define BT_DMP(buf, len) bt_dump(__FUNCTION__, buf, len) -#else -#define BT_DMP(D...) -#endif - extern struct proc_dir_entry *proc_bt; /* Connection and socket states */ @@ -137,11 +131,12 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); /* Skb helpers */ struct bt_skb_cb { - int incoming; + __u8 pkt_type; + __u8 incoming; }; #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) -static inline struct sk_buff *bt_skb_alloc(unsigned int len, int how) +static inline struct sk_buff *bt_skb_alloc(unsigned int len, unsigned int __nocast how) { struct sk_buff *skb; @@ -174,8 +169,6 @@ static inline int skb_frags_no(struct sk_buff *skb) return n; } -void bt_dump(char *pref, __u8 *buf, int count); - int bt_err(__u16 code); #endif /* __BLUETOOTH_H */ diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 6f0706f..371e7d3 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -453,6 +453,15 @@ struct inquiry_info_with_rssi { __u16 clock_offset; __s8 rssi; } __attribute__ ((packed)); +struct inquiry_info_with_rssi_and_pscan_mode { + bdaddr_t bdaddr; + __u8 pscan_rep_mode; + __u8 pscan_period_mode; + __u8 pscan_mode; + __u8 dev_class[3]; + __u16 clock_offset; + __s8 rssi; +} __attribute__ ((packed)); #define HCI_EV_CONN_COMPLETE 0x03 struct hci_ev_conn_complete { @@ -584,6 +593,12 @@ struct hci_ev_clock_offset { __u16 clock_offset; } __attribute__ ((packed)); +#define HCI_EV_PSCAN_REP_MODE 0x20 +struct hci_ev_pscan_rep_mode { + bdaddr_t bdaddr; + __u8 pscan_rep_mode; +} __attribute__ ((packed)); + /* Internal events generated by Bluetooth stack */ #define HCI_EV_STACK_INTERNAL 0xFD struct hci_ev_stack_internal { diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6d63a47..7f933f3 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -404,7 +404,7 @@ static inline int hci_recv_frame(struct sk_buff *skb) bt_cb(skb)->incoming = 1; /* Time stamp */ - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); /* Queue frame for rx task */ skb_queue_tail(&hdev->rx_q, skb); diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 13669ba..ffea9d5 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -80,9 +80,9 @@ #define RFCOMM_RPN_STOP_15 1 #define RFCOMM_RPN_PARITY_NONE 0x0 -#define RFCOMM_RPN_PARITY_ODD 0x4 -#define RFCOMM_RPN_PARITY_EVEN 0x5 -#define RFCOMM_RPN_PARITY_MARK 0x6 +#define RFCOMM_RPN_PARITY_ODD 0x1 +#define RFCOMM_RPN_PARITY_EVEN 0x3 +#define RFCOMM_RPN_PARITY_MARK 0x5 #define RFCOMM_RPN_PARITY_SPACE 0x7 #define RFCOMM_RPN_FLOW_NONE 0x00 @@ -223,8 +223,14 @@ struct rfcomm_dlc { #define RFCOMM_CFC_DISABLED 0 #define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS +/* ---- RFCOMM SEND RPN ---- */ +int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, + u8 bit_rate, u8 data_bits, u8 stop_bits, + u8 parity, u8 flow_ctrl_settings, + u8 xon_char, u8 xoff_char, u16 param_mask); + /* ---- RFCOMM DLCs (channels) ---- */ -struct rfcomm_dlc *rfcomm_dlc_alloc(int prio); +struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio); void rfcomm_dlc_free(struct rfcomm_dlc *d); int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); diff --git a/include/net/compat.h b/include/net/compat.h index 9983fd8..482eb82 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -33,7 +33,7 @@ extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsi extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *); extern int put_cmsg_compat(struct msghdr*, int, int, int, void *); -extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, unsigned char *, +extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, unsigned char *, int); #endif /* NET_COMPAT_H */ diff --git a/include/net/datalink.h b/include/net/datalink.h index 5797ba3..deb7ca7 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -9,7 +9,7 @@ struct datalink_proto { unsigned short header_length; int (*rcvfunc)(struct sk_buff *, struct net_device *, - struct packet_type *); + struct packet_type *, struct net_device *); int (*request)(struct datalink_proto *, struct sk_buff *, unsigned char *); struct list_head node; diff --git a/include/net/dn.h b/include/net/dn.h index 5551c46..c1dbbd2 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -3,6 +3,7 @@ #include <linux/dn.h> #include <net/sock.h> +#include <net/tcp.h> #include <asm/byteorder.h> typedef unsigned short dn_address; diff --git a/include/net/icmp.h b/include/net/icmp.h index e5ef0d15..6cdebee 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -57,4 +57,11 @@ static inline struct raw_sock *raw_sk(const struct sock *sk) return (struct raw_sock *)sk; } +extern int sysctl_icmp_echo_ignore_all; +extern int sysctl_icmp_echo_ignore_broadcasts; +extern int sysctl_icmp_ignore_bogus_error_responses; +extern int sysctl_icmp_errors_use_inbound_ifaddr; +extern int sysctl_icmp_ratelimit; +extern int sysctl_icmp_ratemask; + #endif /* _ICMP_H */ diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h new file mode 100644 index 0000000..dc36b1b --- /dev/null +++ b/include/net/ieee80211.h @@ -0,0 +1,855 @@ +/* + * Merged with mainline ieee80211.h in Aug 2004. Original ieee802_11 + * remains copyright by the original authors + * + * Portions of the merged code are based on Host AP (software wireless + * LAN access point) driver for Intersil Prism2/2.5/3. + * + * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen + * <jkmaline@cc.hut.fi> + * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi> + * + * Adaption to a generic IEEE 802.11 stack by James Ketrenos + * <jketreno@linux.intel.com> + * Copyright (c) 2004, Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. See README and COPYING for + * more details. + */ +#ifndef IEEE80211_H +#define IEEE80211_H +#include <linux/if_ether.h> /* ETH_ALEN */ +#include <linux/kernel.h> /* ARRAY_SIZE */ +#include <linux/wireless.h> + +#define IEEE80211_DATA_LEN 2304 +/* Maximum size for the MA-UNITDATA primitive, 802.11 standard section + 6.2.1.1.2. + + The figure in section 7.1.2 suggests a body size of up to 2312 + bytes is allowed, which is a bit confusing, I suspect this + represents the 2304 bytes of real data, plus a possible 8 bytes of + WEP IV and ICV. (this interpretation suggested by Ramiro Barreiro) */ + + +#define IEEE80211_HLEN 30 +#define IEEE80211_FRAME_LEN (IEEE80211_DATA_LEN + IEEE80211_HLEN) + +struct ieee80211_hdr { + __le16 frame_ctl; + __le16 duration_id; + u8 addr1[ETH_ALEN]; + u8 addr2[ETH_ALEN]; + u8 addr3[ETH_ALEN]; + __le16 seq_ctl; + u8 addr4[ETH_ALEN]; +} __attribute__ ((packed)); + +struct ieee80211_hdr_3addr { + __le16 frame_ctl; + __le16 duration_id; + u8 addr1[ETH_ALEN]; + u8 addr2[ETH_ALEN]; + u8 addr3[ETH_ALEN]; + __le16 seq_ctl; +} __attribute__ ((packed)); + +#define IEEE80211_1ADDR_LEN 10 +#define IEEE80211_2ADDR_LEN 16 +#define IEEE80211_3ADDR_LEN 24 +#define IEEE80211_4ADDR_LEN 30 +#define IEEE80211_FCS_LEN 4 + +#define MIN_FRAG_THRESHOLD 256U +#define MAX_FRAG_THRESHOLD 2346U + +/* Frame control field constants */ +#define IEEE80211_FCTL_VERS 0x0003 +#define IEEE80211_FCTL_FTYPE 0x000c +#define IEEE80211_FCTL_STYPE 0x00f0 +#define IEEE80211_FCTL_TODS 0x0100 +#define IEEE80211_FCTL_FROMDS 0x0200 +#define IEEE80211_FCTL_MOREFRAGS 0x0400 +#define IEEE80211_FCTL_RETRY 0x0800 +#define IEEE80211_FCTL_PM 0x1000 +#define IEEE80211_FCTL_MOREDATA 0x2000 +#define IEEE80211_FCTL_PROTECTED 0x4000 +#define IEEE80211_FCTL_ORDER 0x8000 + +#define IEEE80211_FTYPE_MGMT 0x0000 +#define IEEE80211_FTYPE_CTL 0x0004 +#define IEEE80211_FTYPE_DATA 0x0008 + +/* management */ +#define IEEE80211_STYPE_ASSOC_REQ 0x0000 +#define IEEE80211_STYPE_ASSOC_RESP 0x0010 +#define IEEE80211_STYPE_REASSOC_REQ 0x0020 +#define IEEE80211_STYPE_REASSOC_RESP 0x0030 +#define IEEE80211_STYPE_PROBE_REQ 0x0040 +#define IEEE80211_STYPE_PROBE_RESP 0x0050 +#define IEEE80211_STYPE_BEACON 0x0080 +#define IEEE80211_STYPE_ATIM 0x0090 +#define IEEE80211_STYPE_DISASSOC 0x00A0 +#define IEEE80211_STYPE_AUTH 0x00B0 +#define IEEE80211_STYPE_DEAUTH 0x00C0 +#define IEEE80211_STYPE_ACTION 0x00D0 + +/* control */ +#define IEEE80211_STYPE_PSPOLL 0x00A0 +#define IEEE80211_STYPE_RTS 0x00B0 +#define IEEE80211_STYPE_CTS 0x00C0 +#define IEEE80211_STYPE_ACK 0x00D0 +#define IEEE80211_STYPE_CFEND 0x00E0 +#define IEEE80211_STYPE_CFENDACK 0x00F0 + +/* data */ +#define IEEE80211_STYPE_DATA 0x0000 +#define IEEE80211_STYPE_DATA_CFACK 0x0010 +#define IEEE80211_STYPE_DATA_CFPOLL 0x0020 +#define IEEE80211_STYPE_DATA_CFACKPOLL 0x0030 +#define IEEE80211_STYPE_NULLFUNC 0x0040 +#define IEEE80211_STYPE_CFACK 0x0050 +#define IEEE80211_STYPE_CFPOLL 0x0060 +#define IEEE80211_STYPE_CFACKPOLL 0x0070 + +#define IEEE80211_SCTL_FRAG 0x000F +#define IEEE80211_SCTL_SEQ 0xFFF0 + + +/* debug macros */ + +#ifdef CONFIG_IEEE80211_DEBUG +extern u32 ieee80211_debug_level; +#define IEEE80211_DEBUG(level, fmt, args...) \ +do { if (ieee80211_debug_level & (level)) \ + printk(KERN_DEBUG "ieee80211: %c %s " fmt, \ + in_interrupt() ? 'I' : 'U', __FUNCTION__ , ## args); } while (0) +#else +#define IEEE80211_DEBUG(level, fmt, args...) do {} while (0) +#endif /* CONFIG_IEEE80211_DEBUG */ + + +/* debug macros not dependent on CONFIG_IEEE80211_DEBUG */ + +#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" +#define MAC_ARG(x) ((u8*)(x))[0],((u8*)(x))[1],((u8*)(x))[2],((u8*)(x))[3],((u8*)(x))[4],((u8*)(x))[5] + +/* escape_essid() is intended to be used in debug (and possibly error) + * messages. It should never be used for passing essid to user space. */ +const char *escape_essid(const char *essid, u8 essid_len); + + +/* + * To use the debug system: + * + * If you are defining a new debug classification, simply add it to the #define + * list here in the form of: + * + * #define IEEE80211_DL_xxxx VALUE + * + * shifting value to the left one bit from the previous entry. xxxx should be + * the name of the classification (for example, WEP) + * + * You then need to either add a IEEE80211_xxxx_DEBUG() macro definition for your + * classification, or use IEEE80211_DEBUG(IEEE80211_DL_xxxx, ...) whenever you want + * to send output to that classification. + * + * To add your debug level to the list of levels seen when you perform + * + * % cat /proc/net/ieee80211/debug_level + * + * you simply need to add your entry to the ieee80211_debug_level array. + * + * If you do not see debug_level in /proc/net/ieee80211 then you do not have + * CONFIG_IEEE80211_DEBUG defined in your kernel configuration + * + */ + +#define IEEE80211_DL_INFO (1<<0) +#define IEEE80211_DL_WX (1<<1) +#define IEEE80211_DL_SCAN (1<<2) +#define IEEE80211_DL_STATE (1<<3) +#define IEEE80211_DL_MGMT (1<<4) +#define IEEE80211_DL_FRAG (1<<5) +#define IEEE80211_DL_DROP (1<<7) + +#define IEEE80211_DL_TX (1<<8) +#define IEEE80211_DL_RX (1<<9) + +#define IEEE80211_ERROR(f, a...) printk(KERN_ERR "ieee80211: " f, ## a) +#define IEEE80211_WARNING(f, a...) printk(KERN_WARNING "ieee80211: " f, ## a) +#define IEEE80211_DEBUG_INFO(f, a...) IEEE80211_DEBUG(IEEE80211_DL_INFO, f, ## a) + +#define IEEE80211_DEBUG_WX(f, a...) IEEE80211_DEBUG(IEEE80211_DL_WX, f, ## a) +#define IEEE80211_DEBUG_SCAN(f, a...) IEEE80211_DEBUG(IEEE80211_DL_SCAN, f, ## a) +#define IEEE80211_DEBUG_STATE(f, a...) IEEE80211_DEBUG(IEEE80211_DL_STATE, f, ## a) +#define IEEE80211_DEBUG_MGMT(f, a...) IEEE80211_DEBUG(IEEE80211_DL_MGMT, f, ## a) +#define IEEE80211_DEBUG_FRAG(f, a...) IEEE80211_DEBUG(IEEE80211_DL_FRAG, f, ## a) +#define IEEE80211_DEBUG_DROP(f, a...) IEEE80211_DEBUG(IEEE80211_DL_DROP, f, ## a) +#define IEEE80211_DEBUG_TX(f, a...) IEEE80211_DEBUG(IEEE80211_DL_TX, f, ## a) +#define IEEE80211_DEBUG_RX(f, a...) IEEE80211_DEBUG(IEEE80211_DL_RX, f, ## a) +#include <linux/netdevice.h> +#include <linux/wireless.h> +#include <linux/if_arp.h> /* ARPHRD_ETHER */ + +#ifndef WIRELESS_SPY +#define WIRELESS_SPY /* enable iwspy support */ +#endif +#include <net/iw_handler.h> /* new driver API */ + +#ifndef ETH_P_PAE +#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ +#endif /* ETH_P_PAE */ + +#define ETH_P_PREAUTH 0x88C7 /* IEEE 802.11i pre-authentication */ + +#ifndef ETH_P_80211_RAW +#define ETH_P_80211_RAW (ETH_P_ECONET + 1) +#endif + +/* IEEE 802.11 defines */ + +#define P80211_OUI_LEN 3 + +struct ieee80211_snap_hdr { + + u8 dsap; /* always 0xAA */ + u8 ssap; /* always 0xAA */ + u8 ctrl; /* always 0x03 */ + u8 oui[P80211_OUI_LEN]; /* organizational universal id */ + +} __attribute__ ((packed)); + +#define SNAP_SIZE sizeof(struct ieee80211_snap_hdr) + +#define WLAN_FC_GET_VERS(fc) ((fc) & IEEE80211_FCTL_VERS) +#define WLAN_FC_GET_TYPE(fc) ((fc) & IEEE80211_FCTL_FTYPE) +#define WLAN_FC_GET_STYPE(fc) ((fc) & IEEE80211_FCTL_STYPE) + +#define WLAN_GET_SEQ_FRAG(seq) ((seq) & IEEE80211_SCTL_FRAG) +#define WLAN_GET_SEQ_SEQ(seq) ((seq) & IEEE80211_SCTL_SEQ) + +/* Authentication algorithms */ +#define WLAN_AUTH_OPEN 0 +#define WLAN_AUTH_SHARED_KEY 1 + +#define WLAN_AUTH_CHALLENGE_LEN 128 + +#define WLAN_CAPABILITY_ESS (1<<0) +#define WLAN_CAPABILITY_IBSS (1<<1) +#define WLAN_CAPABILITY_CF_POLLABLE (1<<2) +#define WLAN_CAPABILITY_CF_POLL_REQUEST (1<<3) +#define WLAN_CAPABILITY_PRIVACY (1<<4) +#define WLAN_CAPABILITY_SHORT_PREAMBLE (1<<5) +#define WLAN_CAPABILITY_PBCC (1<<6) +#define WLAN_CAPABILITY_CHANNEL_AGILITY (1<<7) +#define WLAN_CAPABILITY_SPECTRUM_MGMT (1<<8) +#define WLAN_CAPABILITY_SHORT_SLOT_TIME (1<<10) +#define WLAN_CAPABILITY_OSSS_OFDM (1<<13) + +/* Status codes */ +enum ieee80211_statuscode { + WLAN_STATUS_SUCCESS = 0, + WLAN_STATUS_UNSPECIFIED_FAILURE = 1, + WLAN_STATUS_CAPS_UNSUPPORTED = 10, + WLAN_STATUS_REASSOC_NO_ASSOC = 11, + WLAN_STATUS_ASSOC_DENIED_UNSPEC = 12, + WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG = 13, + WLAN_STATUS_UNKNOWN_AUTH_TRANSACTION = 14, + WLAN_STATUS_CHALLENGE_FAIL = 15, + WLAN_STATUS_AUTH_TIMEOUT = 16, + WLAN_STATUS_AP_UNABLE_TO_HANDLE_NEW_STA = 17, + WLAN_STATUS_ASSOC_DENIED_RATES = 18, + /* 802.11b */ + WLAN_STATUS_ASSOC_DENIED_NOSHORTPREAMBLE = 19, + WLAN_STATUS_ASSOC_DENIED_NOPBCC = 20, + WLAN_STATUS_ASSOC_DENIED_NOAGILITY = 21, + /* 802.11h */ + WLAN_STATUS_ASSOC_DENIED_NOSPECTRUM = 22, + WLAN_STATUS_ASSOC_REJECTED_BAD_POWER = 23, + WLAN_STATUS_ASSOC_REJECTED_BAD_SUPP_CHAN = 24, + /* 802.11g */ + WLAN_STATUS_ASSOC_DENIED_NOSHORTTIME = 25, + WLAN_STATUS_ASSOC_DENIED_NODSSSOFDM = 26, + /* 802.11i */ + WLAN_STATUS_INVALID_IE = 40, + WLAN_STATUS_INVALID_GROUP_CIPHER = 41, + WLAN_STATUS_INVALID_PAIRWISE_CIPHER = 42, + WLAN_STATUS_INVALID_AKMP = 43, + WLAN_STATUS_UNSUPP_RSN_VERSION = 44, + WLAN_STATUS_INVALID_RSN_IE_CAP = 45, + WLAN_STATUS_CIPHER_SUITE_REJECTED = 46, +}; + +/* Reason codes */ +enum ieee80211_reasoncode { + WLAN_REASON_UNSPECIFIED = 1, + WLAN_REASON_PREV_AUTH_NOT_VALID = 2, + WLAN_REASON_DEAUTH_LEAVING = 3, + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY = 4, + WLAN_REASON_DISASSOC_AP_BUSY = 5, + WLAN_REASON_CLASS2_FRAME_FROM_NONAUTH_STA = 6, + WLAN_REASON_CLASS3_FRAME_FROM_NONASSOC_STA = 7, + WLAN_REASON_DISASSOC_STA_HAS_LEFT = 8, + WLAN_REASON_STA_REQ_ASSOC_WITHOUT_AUTH = 9, + /* 802.11h */ + WLAN_REASON_DISASSOC_BAD_POWER = 10, + WLAN_REASON_DISASSOC_BAD_SUPP_CHAN = 11, + /* 802.11i */ + WLAN_REASON_INVALID_IE = 13, + WLAN_REASON_MIC_FAILURE = 14, + WLAN_REASON_4WAY_HANDSHAKE_TIMEOUT = 15, + WLAN_REASON_GROUP_KEY_HANDSHAKE_TIMEOUT = 16, + WLAN_REASON_IE_DIFFERENT = 17, + WLAN_REASON_INVALID_GROUP_CIPHER = 18, + WLAN_REASON_INVALID_PAIRWISE_CIPHER = 19, + WLAN_REASON_INVALID_AKMP = 20, + WLAN_REASON_UNSUPP_RSN_VERSION = 21, + WLAN_REASON_INVALID_RSN_IE_CAP = 22, + WLAN_REASON_IEEE8021X_FAILED = 23, + WLAN_REASON_CIPHER_SUITE_REJECTED = 24, +}; + + +#define IEEE80211_STATMASK_SIGNAL (1<<0) +#define IEEE80211_STATMASK_RSSI (1<<1) +#define IEEE80211_STATMASK_NOISE (1<<2) +#define IEEE80211_STATMASK_RATE (1<<3) +#define IEEE80211_STATMASK_WEMASK 0x7 + + +#define IEEE80211_CCK_MODULATION (1<<0) +#define IEEE80211_OFDM_MODULATION (1<<1) + +#define IEEE80211_24GHZ_BAND (1<<0) +#define IEEE80211_52GHZ_BAND (1<<1) + +#define IEEE80211_CCK_RATE_1MB 0x02 +#define IEEE80211_CCK_RATE_2MB 0x04 +#define IEEE80211_CCK_RATE_5MB 0x0B +#define IEEE80211_CCK_RATE_11MB 0x16 +#define IEEE80211_OFDM_RATE_6MB 0x0C +#define IEEE80211_OFDM_RATE_9MB 0x12 +#define IEEE80211_OFDM_RATE_12MB 0x18 +#define IEEE80211_OFDM_RATE_18MB 0x24 +#define IEEE80211_OFDM_RATE_24MB 0x30 +#define IEEE80211_OFDM_RATE_36MB 0x48 +#define IEEE80211_OFDM_RATE_48MB 0x60 +#define IEEE80211_OFDM_RATE_54MB 0x6C +#define IEEE80211_BASIC_RATE_MASK 0x80 + +#define IEEE80211_CCK_RATE_1MB_MASK (1<<0) +#define IEEE80211_CCK_RATE_2MB_MASK (1<<1) +#define IEEE80211_CCK_RATE_5MB_MASK (1<<2) +#define IEEE80211_CCK_RATE_11MB_MASK (1<<3) +#define IEEE80211_OFDM_RATE_6MB_MASK (1<<4) +#define IEEE80211_OFDM_RATE_9MB_MASK (1<<5) +#define IEEE80211_OFDM_RATE_12MB_MASK (1<<6) +#define IEEE80211_OFDM_RATE_18MB_MASK (1<<7) +#define IEEE80211_OFDM_RATE_24MB_MASK (1<<8) +#define IEEE80211_OFDM_RATE_36MB_MASK (1<<9) +#define IEEE80211_OFDM_RATE_48MB_MASK (1<<10) +#define IEEE80211_OFDM_RATE_54MB_MASK (1<<11) + +#define IEEE80211_CCK_RATES_MASK 0x0000000F +#define IEEE80211_CCK_BASIC_RATES_MASK (IEEE80211_CCK_RATE_1MB_MASK | \ + IEEE80211_CCK_RATE_2MB_MASK) +#define IEEE80211_CCK_DEFAULT_RATES_MASK (IEEE80211_CCK_BASIC_RATES_MASK | \ + IEEE80211_CCK_RATE_5MB_MASK | \ + IEEE80211_CCK_RATE_11MB_MASK) + +#define IEEE80211_OFDM_RATES_MASK 0x00000FF0 +#define IEEE80211_OFDM_BASIC_RATES_MASK (IEEE80211_OFDM_RATE_6MB_MASK | \ + IEEE80211_OFDM_RATE_12MB_MASK | \ + IEEE80211_OFDM_RATE_24MB_MASK) +#define IEEE80211_OFDM_DEFAULT_RATES_MASK (IEEE80211_OFDM_BASIC_RATES_MASK | \ + IEEE80211_OFDM_RATE_9MB_MASK | \ + IEEE80211_OFDM_RATE_18MB_MASK | \ + IEEE80211_OFDM_RATE_36MB_MASK | \ + IEEE80211_OFDM_RATE_48MB_MASK | \ + IEEE80211_OFDM_RATE_54MB_MASK) +#define IEEE80211_DEFAULT_RATES_MASK (IEEE80211_OFDM_DEFAULT_RATES_MASK | \ + IEEE80211_CCK_DEFAULT_RATES_MASK) + +#define IEEE80211_NUM_OFDM_RATES 8 +#define IEEE80211_NUM_CCK_RATES 4 +#define IEEE80211_OFDM_SHIFT_MASK_A 4 + + + + +/* NOTE: This data is for statistical purposes; not all hardware provides this + * information for frames received. Not setting these will not cause + * any adverse affects. */ +struct ieee80211_rx_stats { + u32 mac_time; + s8 rssi; + u8 signal; + u8 noise; + u16 rate; /* in 100 kbps */ + u8 received_channel; + u8 control; + u8 mask; + u8 freq; + u16 len; +}; + +/* IEEE 802.11 requires that STA supports concurrent reception of at least + * three fragmented frames. This define can be increased to support more + * concurrent frames, but it should be noted that each entry can consume about + * 2 kB of RAM and increasing cache size will slow down frame reassembly. */ +#define IEEE80211_FRAG_CACHE_LEN 4 + +struct ieee80211_frag_entry { + unsigned long first_frag_time; + unsigned int seq; + unsigned int last_frag; + struct sk_buff *skb; + u8 src_addr[ETH_ALEN]; + u8 dst_addr[ETH_ALEN]; +}; + +struct ieee80211_stats { + unsigned int tx_unicast_frames; + unsigned int tx_multicast_frames; + unsigned int tx_fragments; + unsigned int tx_unicast_octets; + unsigned int tx_multicast_octets; + unsigned int tx_deferred_transmissions; + unsigned int tx_single_retry_frames; + unsigned int tx_multiple_retry_frames; + unsigned int tx_retry_limit_exceeded; + unsigned int tx_discards; + unsigned int rx_unicast_frames; + unsigned int rx_multicast_frames; + unsigned int rx_fragments; + unsigned int rx_unicast_octets; + unsigned int rx_multicast_octets; + unsigned int rx_fcs_errors; + unsigned int rx_discards_no_buffer; + unsigned int tx_discards_wrong_sa; + unsigned int rx_discards_undecryptable; + unsigned int rx_message_in_msg_fragments; + unsigned int rx_message_in_bad_msg_fragments; +}; + +struct ieee80211_device; + +#include "ieee80211_crypt.h" + +#define SEC_KEY_1 (1<<0) +#define SEC_KEY_2 (1<<1) +#define SEC_KEY_3 (1<<2) +#define SEC_KEY_4 (1<<3) +#define SEC_ACTIVE_KEY (1<<4) +#define SEC_AUTH_MODE (1<<5) +#define SEC_UNICAST_GROUP (1<<6) +#define SEC_LEVEL (1<<7) +#define SEC_ENABLED (1<<8) + +#define SEC_LEVEL_0 0 /* None */ +#define SEC_LEVEL_1 1 /* WEP 40 and 104 bit */ +#define SEC_LEVEL_2 2 /* Level 1 + TKIP */ +#define SEC_LEVEL_2_CKIP 3 /* Level 1 + CKIP */ +#define SEC_LEVEL_3 4 /* Level 2 + CCMP */ + +#define WEP_KEYS 4 +#define WEP_KEY_LEN 13 + +struct ieee80211_security { + u16 active_key:2, + enabled:1, + auth_mode:2, + auth_algo:4, + unicast_uses_group:1; + u8 key_sizes[WEP_KEYS]; + u8 keys[WEP_KEYS][WEP_KEY_LEN]; + u8 level; + u16 flags; +} __attribute__ ((packed)); + + +/* + + 802.11 data frame from AP + + ,-------------------------------------------------------------------. +Bytes | 2 | 2 | 6 | 6 | 6 | 2 | 0..2312 | 4 | + |------|------|---------|---------|---------|------|---------|------| +Desc. | ctrl | dura | DA/RA | TA | SA | Sequ | frame | fcs | + | | tion | (BSSID) | | | ence | data | | + `-------------------------------------------------------------------' + +Total: 28-2340 bytes + +*/ + +#define BEACON_PROBE_SSID_ID_POSITION 12 + +/* Management Frame Information Element Types */ +enum ieee80211_mfie { + MFIE_TYPE_SSID = 0, + MFIE_TYPE_RATES = 1, + MFIE_TYPE_FH_SET = 2, + MFIE_TYPE_DS_SET = 3, + MFIE_TYPE_CF_SET = 4, + MFIE_TYPE_TIM = 5, + MFIE_TYPE_IBSS_SET = 6, + MFIE_TYPE_COUNTRY = 7, + MFIE_TYPE_HOP_PARAMS = 8, + MFIE_TYPE_HOP_TABLE = 9, + MFIE_TYPE_REQUEST = 10, + MFIE_TYPE_CHALLENGE = 16, + MFIE_TYPE_POWER_CONSTRAINT = 32, + MFIE_TYPE_POWER_CAPABILITY = 33, + MFIE_TYPE_TPC_REQUEST = 34, + MFIE_TYPE_TPC_REPORT = 35, + MFIE_TYPE_SUPP_CHANNELS = 36, + MFIE_TYPE_CSA = 37, + MFIE_TYPE_MEASURE_REQUEST = 38, + MFIE_TYPE_MEASURE_REPORT = 39, + MFIE_TYPE_QUIET = 40, + MFIE_TYPE_IBSS_DFS = 41, + MFIE_TYPE_ERP_INFO = 42, + MFIE_TYPE_RSN = 48, + MFIE_TYPE_RATES_EX = 50, + MFIE_TYPE_GENERIC = 221, +}; + +struct ieee80211_info_element_hdr { + u8 id; + u8 len; +} __attribute__ ((packed)); + +struct ieee80211_info_element { + u8 id; + u8 len; + u8 data[0]; +} __attribute__ ((packed)); + +/* + * These are the data types that can make up management packets + * + u16 auth_algorithm; + u16 auth_sequence; + u16 beacon_interval; + u16 capability; + u8 current_ap[ETH_ALEN]; + u16 listen_interval; + struct { + u16 association_id:14, reserved:2; + } __attribute__ ((packed)); + u32 time_stamp[2]; + u16 reason; + u16 status; +*/ + +struct ieee80211_authentication { + struct ieee80211_hdr_3addr header; + __le16 algorithm; + __le16 transaction; + __le16 status; + struct ieee80211_info_element info_element; +} __attribute__ ((packed)); + + +struct ieee80211_probe_response { + struct ieee80211_hdr_3addr header; + u32 time_stamp[2]; + __le16 beacon_interval; + __le16 capability; + struct ieee80211_info_element info_element; +} __attribute__ ((packed)); + +struct ieee80211_assoc_request_frame { + __le16 capability; + __le16 listen_interval; + u8 current_ap[ETH_ALEN]; + struct ieee80211_info_element info_element; +} __attribute__ ((packed)); + +struct ieee80211_assoc_response_frame { + struct ieee80211_hdr_3addr header; + __le16 capability; + __le16 status; + __le16 aid; + struct ieee80211_info_element info_element; /* supported rates */ +} __attribute__ ((packed)); + + +struct ieee80211_txb { + u8 nr_frags; + u8 encrypted; + u16 reserved; + u16 frag_size; + u16 payload_size; + struct sk_buff *fragments[0]; +}; + + +/* SWEEP TABLE ENTRIES NUMBER */ +#define MAX_SWEEP_TAB_ENTRIES 42 +#define MAX_SWEEP_TAB_ENTRIES_PER_PACKET 7 +/* MAX_RATES_LENGTH needs to be 12. The spec says 8, and many APs + * only use 8, and then use extended rates for the remaining supported + * rates. Other APs, however, stick all of their supported rates on the + * main rates information element... */ +#define MAX_RATES_LENGTH ((u8)12) +#define MAX_RATES_EX_LENGTH ((u8)16) +#define MAX_NETWORK_COUNT 128 + +#define CRC_LENGTH 4U + +#define MAX_WPA_IE_LEN 64 + +#define NETWORK_EMPTY_ESSID (1<<0) +#define NETWORK_HAS_OFDM (1<<1) +#define NETWORK_HAS_CCK (1<<2) + +struct ieee80211_network { + /* These entries are used to identify a unique network */ + u8 bssid[ETH_ALEN]; + u8 channel; + /* Ensure null-terminated for any debug msgs */ + u8 ssid[IW_ESSID_MAX_SIZE + 1]; + u8 ssid_len; + + /* These are network statistics */ + struct ieee80211_rx_stats stats; + u16 capability; + u8 rates[MAX_RATES_LENGTH]; + u8 rates_len; + u8 rates_ex[MAX_RATES_EX_LENGTH]; + u8 rates_ex_len; + unsigned long last_scanned; + u8 mode; + u8 flags; + u32 last_associate; + u32 time_stamp[2]; + u16 beacon_interval; + u16 listen_interval; + u16 atim_window; + u8 wpa_ie[MAX_WPA_IE_LEN]; + size_t wpa_ie_len; + u8 rsn_ie[MAX_WPA_IE_LEN]; + size_t rsn_ie_len; + struct list_head list; +}; + +enum ieee80211_state { + IEEE80211_UNINITIALIZED = 0, + IEEE80211_INITIALIZED, + IEEE80211_ASSOCIATING, + IEEE80211_ASSOCIATED, + IEEE80211_AUTHENTICATING, + IEEE80211_AUTHENTICATED, + IEEE80211_SHUTDOWN +}; + +#define DEFAULT_MAX_SCAN_AGE (15 * HZ) +#define DEFAULT_FTS 2346 + + +#define CFG_IEEE80211_RESERVE_FCS (1<<0) +#define CFG_IEEE80211_COMPUTE_FCS (1<<1) + +struct ieee80211_device { + struct net_device *dev; + + /* Bookkeeping structures */ + struct net_device_stats stats; + struct ieee80211_stats ieee_stats; + + /* Probe / Beacon management */ + struct list_head network_free_list; + struct list_head network_list; + struct ieee80211_network *networks; + int scans; + int scan_age; + + int iw_mode; /* operating mode (IW_MODE_*) */ + + spinlock_t lock; + + int tx_headroom; /* Set to size of any additional room needed at front + * of allocated Tx SKBs */ + u32 config; + + /* WEP and other encryption related settings at the device level */ + int open_wep; /* Set to 1 to allow unencrypted frames */ + + int reset_on_keychange; /* Set to 1 if the HW needs to be reset on + * WEP key changes */ + + /* If the host performs {en,de}cryption, then set to 1 */ + int host_encrypt; + int host_decrypt; + int ieee802_1x; /* is IEEE 802.1X used */ + + /* WPA data */ + int wpa_enabled; + int drop_unencrypted; + int tkip_countermeasures; + int privacy_invoked; + size_t wpa_ie_len; + u8 *wpa_ie; + + struct list_head crypt_deinit_list; + struct ieee80211_crypt_data *crypt[WEP_KEYS]; + int tx_keyidx; /* default TX key index (crypt[tx_keyidx]) */ + struct timer_list crypt_deinit_timer; + + int bcrx_sta_key; /* use individual keys to override default keys even + * with RX of broad/multicast frames */ + + /* Fragmentation structures */ + struct ieee80211_frag_entry frag_cache[IEEE80211_FRAG_CACHE_LEN]; + unsigned int frag_next_idx; + u16 fts; /* Fragmentation Threshold */ + + /* Association info */ + u8 bssid[ETH_ALEN]; + + enum ieee80211_state state; + + int mode; /* A, B, G */ + int modulation; /* CCK, OFDM */ + int freq_band; /* 2.4Ghz, 5.2Ghz, Mixed */ + int abg_ture; /* ABG flag */ + + /* Callback functions */ + void (*set_security)(struct net_device *dev, + struct ieee80211_security *sec); + int (*hard_start_xmit)(struct ieee80211_txb *txb, + struct net_device *dev); + int (*reset_port)(struct net_device *dev); + + /* This must be the last item so that it points to the data + * allocated beyond this structure by alloc_ieee80211 */ + u8 priv[0]; +}; + +#define IEEE_A (1<<0) +#define IEEE_B (1<<1) +#define IEEE_G (1<<2) +#define IEEE_MODE_MASK (IEEE_A|IEEE_B|IEEE_G) + +extern inline void *ieee80211_priv(struct net_device *dev) +{ + return ((struct ieee80211_device *)netdev_priv(dev))->priv; +} + +extern inline int ieee80211_is_empty_essid(const char *essid, int essid_len) +{ + /* Single white space is for Linksys APs */ + if (essid_len == 1 && essid[0] == ' ') + return 1; + + /* Otherwise, if the entire essid is 0, we assume it is hidden */ + while (essid_len) { + essid_len--; + if (essid[essid_len] != '\0') + return 0; + } + + return 1; +} + +extern inline int ieee80211_is_valid_mode(struct ieee80211_device *ieee, int mode) +{ + /* + * It is possible for both access points and our device to support + * combinations of modes, so as long as there is one valid combination + * of ap/device supported modes, then return success + * + */ + if ((mode & IEEE_A) && + (ieee->modulation & IEEE80211_OFDM_MODULATION) && + (ieee->freq_band & IEEE80211_52GHZ_BAND)) + return 1; + + if ((mode & IEEE_G) && + (ieee->modulation & IEEE80211_OFDM_MODULATION) && + (ieee->freq_band & IEEE80211_24GHZ_BAND)) + return 1; + + if ((mode & IEEE_B) && + (ieee->modulation & IEEE80211_CCK_MODULATION) && + (ieee->freq_band & IEEE80211_24GHZ_BAND)) + return 1; + + return 0; +} + +extern inline int ieee80211_get_hdrlen(u16 fc) +{ + int hdrlen = IEEE80211_3ADDR_LEN; + + switch (WLAN_FC_GET_TYPE(fc)) { + case IEEE80211_FTYPE_DATA: + if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS)) + hdrlen = IEEE80211_4ADDR_LEN; + break; + case IEEE80211_FTYPE_CTL: + switch (WLAN_FC_GET_STYPE(fc)) { + case IEEE80211_STYPE_CTS: + case IEEE80211_STYPE_ACK: + hdrlen = IEEE80211_1ADDR_LEN; + break; + default: + hdrlen = IEEE80211_2ADDR_LEN; + break; + } + break; + } + + return hdrlen; +} + + + +/* ieee80211.c */ +extern void free_ieee80211(struct net_device *dev); +extern struct net_device *alloc_ieee80211(int sizeof_priv); + +extern int ieee80211_set_encryption(struct ieee80211_device *ieee); + +/* ieee80211_tx.c */ +extern int ieee80211_xmit(struct sk_buff *skb, + struct net_device *dev); +extern void ieee80211_txb_free(struct ieee80211_txb *); + + +/* ieee80211_rx.c */ +extern int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, + struct ieee80211_rx_stats *rx_stats); +extern void ieee80211_rx_mgt(struct ieee80211_device *ieee, + struct ieee80211_hdr *header, + struct ieee80211_rx_stats *stats); + +/* ieee80211_wx.c */ +extern int ieee80211_wx_get_scan(struct ieee80211_device *ieee, + struct iw_request_info *info, + union iwreq_data *wrqu, char *key); +extern int ieee80211_wx_set_encode(struct ieee80211_device *ieee, + struct iw_request_info *info, + union iwreq_data *wrqu, char *key); +extern int ieee80211_wx_get_encode(struct ieee80211_device *ieee, + struct iw_request_info *info, + union iwreq_data *wrqu, char *key); + + +extern inline void ieee80211_increment_scans(struct ieee80211_device *ieee) +{ + ieee->scans++; +} + +extern inline int ieee80211_get_scans(struct ieee80211_device *ieee) +{ + return ieee->scans; +} + + +#endif /* IEEE80211_H */ diff --git a/include/net/ieee80211_crypt.h b/include/net/ieee80211_crypt.h new file mode 100644 index 0000000..b58a3bc --- /dev/null +++ b/include/net/ieee80211_crypt.h @@ -0,0 +1,86 @@ +/* + * Original code based on Host AP (software wireless LAN access point) driver + * for Intersil Prism2/2.5/3. + * + * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen + * <jkmaline@cc.hut.fi> + * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi> + * + * Adaption to a generic IEEE 802.11 stack by James Ketrenos + * <jketreno@linux.intel.com> + * + * Copyright (c) 2004, Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. See README and COPYING for + * more details. + */ + +/* + * This file defines the interface to the ieee80211 crypto module. + */ +#ifndef IEEE80211_CRYPT_H +#define IEEE80211_CRYPT_H + +#include <linux/skbuff.h> + +struct ieee80211_crypto_ops { + const char *name; + + /* init new crypto context (e.g., allocate private data space, + * select IV, etc.); returns NULL on failure or pointer to allocated + * private data on success */ + void * (*init)(int keyidx); + + /* deinitialize crypto context and free allocated private data */ + void (*deinit)(void *priv); + + /* encrypt/decrypt return < 0 on error or >= 0 on success. The return + * value from decrypt_mpdu is passed as the keyidx value for + * decrypt_msdu. skb must have enough head and tail room for the + * encryption; if not, error will be returned; these functions are + * called for all MPDUs (i.e., fragments). + */ + int (*encrypt_mpdu)(struct sk_buff *skb, int hdr_len, void *priv); + int (*decrypt_mpdu)(struct sk_buff *skb, int hdr_len, void *priv); + + /* These functions are called for full MSDUs, i.e. full frames. + * These can be NULL if full MSDU operations are not needed. */ + int (*encrypt_msdu)(struct sk_buff *skb, int hdr_len, void *priv); + int (*decrypt_msdu)(struct sk_buff *skb, int keyidx, int hdr_len, + void *priv); + + int (*set_key)(void *key, int len, u8 *seq, void *priv); + int (*get_key)(void *key, int len, u8 *seq, void *priv); + + /* procfs handler for printing out key information and possible + * statistics */ + char * (*print_stats)(char *p, void *priv); + + /* maximum number of bytes added by encryption; encrypt buf is + * allocated with extra_prefix_len bytes, copy of in_buf, and + * extra_postfix_len; encrypt need not use all this space, but + * the result must start at the beginning of the buffer and correct + * length must be returned */ + int extra_prefix_len, extra_postfix_len; + + struct module *owner; +}; + +struct ieee80211_crypt_data { + struct list_head list; /* delayed deletion list */ + struct ieee80211_crypto_ops *ops; + void *priv; + atomic_t refcnt; +}; + +int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops); +int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops); +struct ieee80211_crypto_ops * ieee80211_get_crypto_ops(const char *name); +void ieee80211_crypt_deinit_entries(struct ieee80211_device *, int); +void ieee80211_crypt_deinit_handler(unsigned long); +void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee, + struct ieee80211_crypt_data **crypt); + +#endif diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h new file mode 100644 index 0000000..03df3b1 --- /dev/null +++ b/include/net/inet6_hashtables.h @@ -0,0 +1,130 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Authors: Lotsa people, from code originally in tcp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _INET6_HASHTABLES_H +#define _INET6_HASHTABLES_H + +#include <linux/config.h> + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#include <linux/in6.h> +#include <linux/ipv6.h> +#include <linux/types.h> + +#include <net/ipv6.h> + +struct inet_hashinfo; + +/* I have no idea if this is a good hash for v6 or not. -DaveM */ +static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const u16 fport, + const int ehash_size) +{ + int hashent = (lport ^ fport); + + hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); + hashent ^= hashent >> 16; + hashent ^= hashent >> 8; + return (hashent & (ehash_size - 1)); +} + +static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *laddr = &np->rcv_saddr; + const struct in6_addr *faddr = &np->daddr; + const __u16 lport = inet->num; + const __u16 fport = inet->dport; + return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); +} + +/* + * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so + * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM + * + * The sockhash lock must be held as a reader here. + */ +static inline struct sock * + __inet6_lookup_established(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) +{ + struct sock *sk; + const struct hlist_node *node; + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + /* Optimize here for direct hit, only listening connections can + * have wildcards anyways. + */ + const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, + hashinfo->ehash_size); + struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; + + read_lock(&head->lock); + sk_for_each(sk, node, &head->chain) { + /* For IPV6 do the cheaper port and family tests first. */ + if (INET6_MATCH(sk, saddr, daddr, ports, dif)) + goto hit; /* You sunk my battleship! */ + } + /* Must check for a TIME_WAIT'er before going to listener hash. */ + sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { + const struct inet_timewait_sock *tw = inet_twsk(sk); + + if(*((__u32 *)&(tw->tw_dport)) == ports && + sk->sk_family == PF_INET6) { + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + + if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) + goto hit; + } + } + read_unlock(&head->lock); + return NULL; + +hit: + sock_hold(sk); + read_unlock(&head->lock); + return sk; +} + +extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, + const struct in6_addr *daddr, + const unsigned short hnum, + const int dif); + +static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) +{ + struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, + daddr, hnum, dif); + if (sk) + return sk; + + return inet6_lookup_listener(hashinfo, daddr, hnum, dif); +} + +extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const u16 sport, + const struct in6_addr *daddr, const u16 dport, + const int dif); +#endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */ +#endif /* _INET6_HASHTABLES_H */ diff --git a/include/net/inet_common.h b/include/net/inet_common.h index fbc1f4d..f943306 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -8,6 +8,11 @@ extern struct proto_ops inet_dgram_ops; * INET4 prototypes used by INET6 */ +struct msghdr; +struct sock; +struct sockaddr; +struct socket; + extern void inet_remove_sock(struct sock *sk1); extern void inet_put_sock(unsigned short num, struct sock *sk); @@ -29,7 +34,6 @@ extern unsigned int inet_poll(struct file * file, struct socket *sock, struct p extern int inet_listen(struct socket *sock, int backlog); extern void inet_sock_destruct(struct sock *sk); -extern atomic_t inet_sock_nr; extern int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h new file mode 100644 index 0000000..651f824 --- /dev/null +++ b/include/net/inet_connection_sock.h @@ -0,0 +1,276 @@ +/* + * NET Generic infrastructure for INET connection oriented protocols. + * + * Definitions for inet_connection_sock + * + * Authors: Many people, see the TCP sources + * + * From code originally in TCP + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_CONNECTION_SOCK_H +#define _INET_CONNECTION_SOCK_H + +#include <linux/ip.h> +#include <linux/string.h> +#include <linux/timer.h> +#include <net/request_sock.h> + +#define INET_CSK_DEBUG 1 + +/* Cancel timers, when they are not required. */ +#undef INET_CSK_CLEAR_TIMERS + +struct inet_bind_bucket; +struct inet_hashinfo; +struct tcp_congestion_ops; + +/** inet_connection_sock - INET connection oriented sock + * + * @icsk_accept_queue: FIFO of established children + * @icsk_bind_hash: Bind node + * @icsk_timeout: Timeout + * @icsk_retransmit_timer: Resend (no ack) + * @icsk_rto: Retransmit timeout + * @icsk_ca_ops Pluggable congestion control hook + * @icsk_ca_state: Congestion control state + * @icsk_retransmits: Number of unrecovered [RTO] timeouts + * @icsk_pending: Scheduled timer event + * @icsk_backoff: Backoff + * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries + * @icsk_probes_out: unanswered 0 window probes + * @icsk_ack: Delayed ACK control data + */ +struct inet_connection_sock { + /* inet_sock has to be the first member! */ + struct inet_sock icsk_inet; + struct request_sock_queue icsk_accept_queue; + struct inet_bind_bucket *icsk_bind_hash; + unsigned long icsk_timeout; + struct timer_list icsk_retransmit_timer; + struct timer_list icsk_delack_timer; + __u32 icsk_rto; + struct tcp_congestion_ops *icsk_ca_ops; + __u8 icsk_ca_state; + __u8 icsk_retransmits; + __u8 icsk_pending; + __u8 icsk_backoff; + __u8 icsk_syn_retries; + __u8 icsk_probes_out; + /* 2 BYTES HOLE, TRY TO PACK! */ + struct { + __u8 pending; /* ACK is pending */ + __u8 quick; /* Scheduled number of quick acks */ + __u8 pingpong; /* The session is interactive */ + __u8 blocked; /* Delayed ACK was blocked by socket lock */ + __u32 ato; /* Predicted tick of soft clock */ + unsigned long timeout; /* Currently scheduled timeout */ + __u32 lrcvtime; /* timestamp of last received data packet */ + __u16 last_seg_size; /* Size of last incoming segment */ + __u16 rcv_mss; /* MSS used for delayed ACK decisions */ + } icsk_ack; + u32 icsk_ca_priv[16]; +#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) +}; + +#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ +#define ICSK_TIME_DACK 2 /* Delayed ack timer */ +#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ + +static inline struct inet_connection_sock *inet_csk(const struct sock *sk) +{ + return (struct inet_connection_sock *)sk; +} + +static inline void *inet_csk_ca(const struct sock *sk) +{ + return (void *)inet_csk(sk)->icsk_ca_priv; +} + +extern struct sock *inet_csk_clone(struct sock *sk, + const struct request_sock *req, + const unsigned int __nocast priority); + +enum inet_csk_ack_state_t { + ICSK_ACK_SCHED = 1, + ICSK_ACK_TIMER = 2, + ICSK_ACK_PUSHED = 4 +}; + +extern void inet_csk_init_xmit_timers(struct sock *sk, + void (*retransmit_handler)(unsigned long), + void (*delack_handler)(unsigned long), + void (*keepalive_handler)(unsigned long)); +extern void inet_csk_clear_xmit_timers(struct sock *sk); + +static inline void inet_csk_schedule_ack(struct sock *sk) +{ + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; +} + +static inline int inet_csk_ack_scheduled(const struct sock *sk) +{ + return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; +} + +static inline void inet_csk_delack_init(struct sock *sk) +{ + memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); +} + +extern void inet_csk_delete_keepalive_timer(struct sock *sk); +extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); + +#ifdef INET_CSK_DEBUG +extern const char inet_csk_timer_bug_msg[]; +#endif + +static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); +#endif + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_delack_timer); +#endif + } +#ifdef INET_CSK_DEBUG + else { + pr_debug("%s", inet_csk_timer_bug_msg); + } +#endif +} + +/* + * Reset the retransmission timer + */ +static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, + unsigned long when, + const unsigned long max_when) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (when > max_when) { +#ifdef INET_CSK_DEBUG + pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", + sk, what, when, current_text_addr()); +#endif + when = max_when; + } + + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = what; + icsk->icsk_timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.pending |= ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug("%s", inet_csk_timer_bug_msg); + } +#endif +} + +extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); + +extern struct request_sock *inet_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, + const __u32 raddr, + const __u32 laddr); +extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, + struct sock *sk, unsigned short snum); + +extern struct dst_entry* inet_csk_route_req(struct sock *sk, + const struct request_sock *req); + +static inline void inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child) +{ + reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); +} + +extern void inet_csk_reqsk_queue_hash_add(struct sock *sk, + struct request_sock *req, + const unsigned timeout); + +static inline void inet_csk_reqsk_queue_removed(struct sock *sk, + struct request_sock *req) +{ + if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) + inet_csk_delete_keepalive_timer(sk); +} + +static inline void inet_csk_reqsk_queue_added(struct sock *sk, + const unsigned long timeout) +{ + if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) + inet_csk_reset_keepalive_timer(sk, timeout); +} + +static inline int inet_csk_reqsk_queue_len(const struct sock *sk) +{ + return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); +} + +static inline int inet_csk_reqsk_queue_young(const struct sock *sk) +{ + return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); +} + +static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) +{ + return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); +} + +static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) +{ + reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); +} + +static inline void inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) +{ + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); + reqsk_free(req); +} + +extern void inet_csk_reqsk_queue_prune(struct sock *parent, + const unsigned long interval, + const unsigned long timeout, + const unsigned long max_rto); + +extern void inet_csk_destroy_sock(struct sock *sk); + +/* + * LISTEN is a special case for poll.. + */ +static inline unsigned int inet_csk_listen_poll(const struct sock *sk) +{ + return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? + (POLLIN | POLLRDNORM) : 0; +} + +extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries); +extern void inet_csk_listen_stop(struct sock *sk); + +#endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h new file mode 100644 index 0000000..646b6ea --- /dev/null +++ b/include/net/inet_hashtables.h @@ -0,0 +1,427 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Authors: Lotsa people, from code originally in tcp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _INET_HASHTABLES_H +#define _INET_HASHTABLES_H + +#include <linux/config.h> + +#include <linux/interrupt.h> +#include <linux/ipv6.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/socket.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/wait.h> + +#include <net/inet_connection_sock.h> +#include <net/route.h> +#include <net/sock.h> +#include <net/tcp_states.h> + +#include <asm/atomic.h> +#include <asm/byteorder.h> + +/* This is for all connections with a full identity, no wildcards. + * New scheme, half the table is for TIME_WAIT, the other half is + * for the rest. I'll experiment with dynamic table growth later. + */ +struct inet_ehash_bucket { + rwlock_t lock; + struct hlist_head chain; +} __attribute__((__aligned__(8))); + +/* There are a few simple rules, which allow for local port reuse by + * an application. In essence: + * + * 1) Sockets bound to different interfaces may share a local port. + * Failing that, goto test 2. + * 2) If all sockets have sk->sk_reuse set, and none of them are in + * TCP_LISTEN state, the port may be shared. + * Failing that, goto test 3. + * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local + * address, and none of them are the same, the port may be + * shared. + * Failing this, the port cannot be shared. + * + * The interesting point, is test #2. This is what an FTP server does + * all day. To optimize this case we use a specific flag bit defined + * below. As we add sockets to a bind bucket list, we perform a + * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) + * As long as all sockets added to a bind bucket pass this test, + * the flag bit will be set. + * The resulting situation is that tcp_v[46]_verify_bind() can just check + * for this flag bit, if it is set and the socket trying to bind has + * sk->sk_reuse set, we don't even have to walk the owners list at all, + * we return that it is ok to bind this socket to the requested local port. + * + * Sounds like a lot of work, but it is worth it. In a more naive + * implementation (ie. current FreeBSD etc.) the entire list of ports + * must be walked for each data port opened by an ftp server. Needless + * to say, this does not scale at all. With a couple thousand FTP + * users logged onto your box, isn't it nice to know that new data + * ports are created in O(1) time? I thought so. ;-) -DaveM + */ +struct inet_bind_bucket { + unsigned short port; + signed short fastreuse; + struct hlist_node node; + struct hlist_head owners; +}; + +#define inet_bind_bucket_for_each(tb, node, head) \ + hlist_for_each_entry(tb, node, head, node) + +struct inet_bind_hashbucket { + spinlock_t lock; + struct hlist_head chain; +}; + +/* This is for listening sockets, thus all sockets which possess wildcards. */ +#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ + +struct inet_hashinfo { + /* This is for sockets with full identity only. Sockets here will + * always be without wildcards and will have the following invariant: + * + * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE + * + * First half of the table is for sockets not in TIME_WAIT, second half + * is for TIME_WAIT sockets only. + */ + struct inet_ehash_bucket *ehash; + + /* Ok, let's try this, I give up, we do need a local binding + * TCP hash as well as the others for fast bind/connect. + */ + struct inet_bind_hashbucket *bhash; + + int bhash_size; + int ehash_size; + + /* All sockets in TCP_LISTEN state will be in here. This is the only + * table where wildcard'd TCP sockets can exist. Hash function here + * is just local port number. + */ + struct hlist_head listening_hash[INET_LHTABLE_SIZE]; + + /* All the above members are written once at bootup and + * never written again _or_ are predominantly read-access. + * + * Now align to a new cache line as all the following members + * are often dirty. + */ + rwlock_t lhash_lock ____cacheline_aligned; + atomic_t lhash_users; + wait_queue_head_t lhash_wait; + spinlock_t portalloc_lock; + kmem_cache_t *bind_bucket_cachep; + int port_rover; +}; + +static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, + const __u32 faddr, const __u16 fport, + const int ehash_size) +{ + int h = (laddr ^ lport) ^ (faddr ^ fport); + h ^= h >> 16; + h ^= h >> 8; + return h & (ehash_size - 1); +} + +static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size) +{ + const struct inet_sock *inet = inet_sk(sk); + const __u32 laddr = inet->rcv_saddr; + const __u16 lport = inet->num; + const __u32 faddr = inet->daddr; + const __u16 fport = inet->dport; + + return inet_ehashfn(laddr, lport, faddr, fport, ehash_size); +} + +extern struct inet_bind_bucket * + inet_bind_bucket_create(kmem_cache_t *cachep, + struct inet_bind_hashbucket *head, + const unsigned short snum); +extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, + struct inet_bind_bucket *tb); + +static inline int inet_bhashfn(const __u16 lport, const int bhash_size) +{ + return lport & (bhash_size - 1); +} + +extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + const unsigned short snum); + +/* These can have wildcards, don't try too hard. */ +static inline int inet_lhashfn(const unsigned short num) +{ + return num & (INET_LHTABLE_SIZE - 1); +} + +static inline int inet_sk_listen_hashfn(const struct sock *sk) +{ + return inet_lhashfn(inet_sk(sk)->num); +} + +/* Caller must disable local BH processing. */ +static inline void __inet_inherit_port(struct inet_hashinfo *table, + struct sock *sk, struct sock *child) +{ + const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); + struct inet_bind_hashbucket *head = &table->bhash[bhash]; + struct inet_bind_bucket *tb; + + spin_lock(&head->lock); + tb = inet_csk(sk)->icsk_bind_hash; + sk_add_bind_node(child, &tb->owners); + inet_csk(child)->icsk_bind_hash = tb; + spin_unlock(&head->lock); +} + +static inline void inet_inherit_port(struct inet_hashinfo *table, + struct sock *sk, struct sock *child) +{ + local_bh_disable(); + __inet_inherit_port(table, sk, child); + local_bh_enable(); +} + +extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); + +extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); + +/* + * - We may sleep inside this lock. + * - If sleeping is not required (or called from BH), + * use plain read_(un)lock(&inet_hashinfo.lhash_lock). + */ +static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) +{ + /* read_lock synchronizes to candidates to writers */ + read_lock(&hashinfo->lhash_lock); + atomic_inc(&hashinfo->lhash_users); + read_unlock(&hashinfo->lhash_lock); +} + +static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) +{ + if (atomic_dec_and_test(&hashinfo->lhash_users)) + wake_up(&hashinfo->lhash_wait); +} + +static inline void __inet_hash(struct inet_hashinfo *hashinfo, + struct sock *sk, const int listen_possible) +{ + struct hlist_head *list; + rwlock_t *lock; + + BUG_TRAP(sk_unhashed(sk)); + if (listen_possible && sk->sk_state == TCP_LISTEN) { + list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &hashinfo->lhash_lock; + inet_listen_wlock(hashinfo); + } else { + sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); + list = &hashinfo->ehash[sk->sk_hashent].chain; + lock = &hashinfo->ehash[sk->sk_hashent].lock; + write_lock(lock); + } + __sk_add_node(sk, list); + sock_prot_inc_use(sk->sk_prot); + write_unlock(lock); + if (listen_possible && sk->sk_state == TCP_LISTEN) + wake_up(&hashinfo->lhash_wait); +} + +static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) +{ + if (sk->sk_state != TCP_CLOSE) { + local_bh_disable(); + __inet_hash(hashinfo, sk, 1); + local_bh_enable(); + } +} + +static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) +{ + rwlock_t *lock; + + if (sk_unhashed(sk)) + goto out; + + if (sk->sk_state == TCP_LISTEN) { + local_bh_disable(); + inet_listen_wlock(hashinfo); + lock = &hashinfo->lhash_lock; + } else { + struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent]; + lock = &head->lock; + write_lock_bh(&head->lock); + } + + if (__sk_del_node_init(sk)) + sock_prot_dec_use(sk->sk_prot); + write_unlock_bh(lock); +out: + if (sk->sk_state == TCP_LISTEN) + wake_up(&hashinfo->lhash_wait); +} + +static inline int inet_iif(const struct sk_buff *skb) +{ + return ((struct rtable *)skb->dst)->rt_iif; +} + +extern struct sock *__inet_lookup_listener(const struct hlist_head *head, + const u32 daddr, + const unsigned short hnum, + const int dif); + +/* Optimize the common listener case. */ +static inline struct sock * + inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, + const unsigned short hnum, const int dif) +{ + struct sock *sk = NULL; + const struct hlist_head *head; + + read_lock(&hashinfo->lhash_lock); + head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + if (!hlist_empty(head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + + if (inet->num == hnum && !sk->sk_node.next && + (!inet->rcv_saddr || inet->rcv_saddr == daddr) && + (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && + !sk->sk_bound_dev_if) + goto sherry_cache; + sk = __inet_lookup_listener(head, daddr, hnum, dif); + } + if (sk) { +sherry_cache: + sock_hold(sk); + } + read_unlock(&hashinfo->lhash_lock); + return sk; +} + +/* Socket demux engine toys. */ +#ifdef __BIG_ENDIAN +#define INET_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__sport) << 16) | (__u32)(__dport)) +#else /* __LITTLE_ENDIAN */ +#define INET_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__dport) << 16) | (__u32)(__sport)) +#endif + +#if (BITS_PER_LONG == 64) +#ifdef __BIG_ENDIAN +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); +#else /* __LITTLE_ENDIAN */ +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); +#endif /* __BIG_ENDIAN */ +#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ + ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#else /* 32-bit arch */ +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) +#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ + ((inet_sk(__sk)->daddr == (__saddr)) && \ + (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ + ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \ + (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#endif /* 64-bit arch */ + +/* + * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need + * not check it for lookups anymore, thanks Alexey. -DaveM + * + * Local BH must be disabled here. + */ +static inline struct sock * + __inet_lookup_established(struct inet_hashinfo *hashinfo, + const u32 saddr, const u16 sport, + const u32 daddr, const u16 hnum, + const int dif) +{ + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + struct sock *sk; + const struct hlist_node *node; + /* Optimize here for direct hit, only listening connections can + * have wildcards anyways. + */ + const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size); + struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; + + read_lock(&head->lock); + sk_for_each(sk, node, &head->chain) { + if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) + goto hit; /* You sunk my battleship! */ + } + + /* Must check for a TIME_WAIT'er before going to listener hash. */ + sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { + if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) + goto hit; + } + sk = NULL; +out: + read_unlock(&head->lock); + return sk; +hit: + sock_hold(sk); + goto out; +} + +static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, + const u32 saddr, const u16 sport, + const u32 daddr, const u16 hnum, + const int dif) +{ + struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, + hnum, dif); + return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); +} + +static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, + const u32 saddr, const u16 sport, + const u32 daddr, const u16 dport, + const int dif) +{ + struct sock *sk; + + local_bh_disable(); + sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + local_bh_enable(); + + return sk; +} +#endif /* _INET_HASHTABLES_H */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h new file mode 100644 index 0000000..3b07035 --- /dev/null +++ b/include/net/inet_timewait_sock.h @@ -0,0 +1,219 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for a generic INET TIMEWAIT sock + * + * From code originally in net/tcp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_TIMEWAIT_SOCK_ +#define _INET_TIMEWAIT_SOCK_ + +#include <linux/config.h> + +#include <linux/ip.h> +#include <linux/list.h> +#include <linux/timer.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#include <net/sock.h> +#include <net/tcp_states.h> + +#include <asm/atomic.h> + +struct inet_hashinfo; + +#define INET_TWDR_RECYCLE_SLOTS_LOG 5 +#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG) + +/* + * If time > 4sec, it is "slow" path, no recycling is required, + * so that we select tick to get range about 4 seconds. + */ +#if HZ <= 16 || HZ > 4096 +# error Unsupported: HZ <= 16 or HZ > 4096 +#elif HZ <= 32 +# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 64 +# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 128 +# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 256 +# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 512 +# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 1024 +# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 2048 +# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#else +# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#endif + +/* TIME_WAIT reaping mechanism. */ +#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ + +#define INET_TWDR_TWKILL_QUOTA 100 + +struct inet_timewait_death_row { + /* Short-time timewait calendar */ + int twcal_hand; + int twcal_jiffie; + struct timer_list twcal_timer; + struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS]; + + spinlock_t death_lock; + int tw_count; + int period; + u32 thread_slots; + struct work_struct twkill_work; + struct timer_list tw_timer; + int slot; + struct hlist_head cells[INET_TWDR_TWKILL_SLOTS]; + struct inet_hashinfo *hashinfo; + int sysctl_tw_recycle; + int sysctl_max_tw_buckets; +}; + +extern void inet_twdr_hangman(unsigned long data); +extern void inet_twdr_twkill_work(void *data); +extern void inet_twdr_twcal_tick(unsigned long data); + +#if (BITS_PER_LONG == 64) +#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 +#else +#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 +#endif + +struct inet_bind_bucket; + +/* + * This is a TIME_WAIT sock. It works around the memory consumption + * problems of sockets in such a state on heavily loaded servers, but + * without violating the protocol specification. + */ +struct inet_timewait_sock { + /* + * Now struct sock also uses sock_common, so please just + * don't add nothing before this first member (__tw_common) --acme + */ + struct sock_common __tw_common; +#define tw_family __tw_common.skc_family +#define tw_state __tw_common.skc_state +#define tw_reuse __tw_common.skc_reuse +#define tw_bound_dev_if __tw_common.skc_bound_dev_if +#define tw_node __tw_common.skc_node +#define tw_bind_node __tw_common.skc_bind_node +#define tw_refcnt __tw_common.skc_refcnt +#define tw_prot __tw_common.skc_prot + volatile unsigned char tw_substate; + /* 3 bits hole, try to pack */ + unsigned char tw_rcv_wscale; + /* Socket demultiplex comparisons on incoming packets. */ + /* these five are in inet_sock */ + __u16 tw_sport; + __u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); + __u32 tw_rcv_saddr; + __u16 tw_dport; + __u16 tw_num; + /* And these are ours. */ + __u8 tw_ipv6only:1; + /* 31 bits hole, try to pack */ + int tw_hashent; + int tw_timeout; + unsigned long tw_ttd; + struct inet_bind_bucket *tw_tb; + struct hlist_node tw_death_node; +}; + +static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_node, list); +} + +static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind_node, list); +} + +static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) +{ + return tw->tw_death_node.pprev != NULL; +} + +static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) +{ + tw->tw_death_node.pprev = NULL; +} + +static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) +{ + __hlist_del(&tw->tw_death_node); + inet_twsk_dead_node_init(tw); +} + +static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) +{ + if (inet_twsk_dead_hashed(tw)) { + __inet_twsk_del_dead_node(tw); + return 1; + } + return 0; +} + +#define inet_twsk_for_each(tw, node, head) \ + hlist_for_each_entry(tw, node, head, tw_node) + +#define inet_twsk_for_each_inmate(tw, node, jail) \ + hlist_for_each_entry(tw, node, jail, tw_death_node) + +#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ + hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) + +static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) +{ + return (struct inet_timewait_sock *)sk; +} + +static inline u32 inet_rcv_saddr(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; +} + +static inline void inet_twsk_put(struct inet_timewait_sock *tw) +{ + if (atomic_dec_and_test(&tw->tw_refcnt)) { +#ifdef SOCK_REFCNT_DEBUG + printk(KERN_DEBUG "%s timewait_sock %p released\n", + tw->tw_prot->name, tw); +#endif + kmem_cache_free(tw->tw_prot->twsk_slab, tw); + } +} + +extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, + const int state); + +extern void __inet_twsk_kill(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo); + +extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, + struct sock *sk, + struct inet_hashinfo *hashinfo); + +extern void inet_twsk_schedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr, + const int timeo, const int timewait_len); +extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr); +#endif /* _INET_TIMEWAIT_SOCK_ */ diff --git a/include/net/ip.h b/include/net/ip.h index 3f63992..e4563bb 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -86,7 +86,7 @@ extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr, struct ip_options *opt); extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int ip_local_deliver(struct sk_buff *skb); extern int ip_mr_input(struct sk_buff *skb); extern int ip_output(struct sk_buff *skb); @@ -140,8 +140,6 @@ struct ip_reply_arg { void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); -extern int ip_finish_output(struct sk_buff *skb); - struct ipv4_config { int log_martians; @@ -163,6 +161,25 @@ DECLARE_SNMP_STAT(struct linux_mib, net_statistics); extern int sysctl_local_port_range[2]; extern int sysctl_ip_default_ttl; +extern int sysctl_ip_nonlocal_bind; + +/* From ip_fragment.c */ +extern int sysctl_ipfrag_high_thresh; +extern int sysctl_ipfrag_low_thresh; +extern int sysctl_ipfrag_time; +extern int sysctl_ipfrag_secret_interval; + +/* From inetpeer.c */ +extern int inet_peer_threshold; +extern int inet_peer_minttl; +extern int inet_peer_maxttl; +extern int inet_peer_gc_mintime; +extern int inet_peer_gc_maxtime; + +/* From ip_output.c */ +extern int sysctl_ip_dynaddr; + +extern void ipfrag_init(void); #ifdef CONFIG_INET /* The function in 2.2 was invalid, producing wrong result for @@ -318,7 +335,10 @@ extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 da extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); extern void ip_options_fragment(struct sk_buff *skb); extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb); -extern int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user); +extern int ip_options_get(struct ip_options **optp, + unsigned char *data, int optlen); +extern int ip_options_get_from_user(struct ip_options **optp, + unsigned char __user *data, int optlen); extern void ip_options_undo(struct ip_options * opt); extern void ip_forward_options(struct sk_buff *skb); extern int ip_options_rcv_srr(struct sk_buff *skb); @@ -349,5 +369,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, void **context); +#ifdef CONFIG_PROC_FS +extern int ip_misc_proc_init(void); +#endif + +extern struct ctl_table ipv4_table[]; #endif /* _IP_H */ diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 3199045..a66e9de 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -167,14 +167,17 @@ extern int fib6_walk_continue(struct fib6_walker_t *w); extern int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, - void *rtattr); + void *rtattr, + struct netlink_skb_parms *req); extern int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, - void *rtattr); + void *rtattr, + struct netlink_skb_parms *req); extern void inet6_rt_notify(int event, struct rt6_info *rt, - struct nlmsghdr *nlh); + struct nlmsghdr *nlh, + struct netlink_skb_parms *req); extern void fib6_run_gc(unsigned long dummy); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index d5d1dd1..1f2e428 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -12,7 +12,6 @@ #include <net/flow.h> #include <net/ip6_fib.h> #include <net/sock.h> -#include <linux/tcp.h> #include <linux/ip.h> #include <linux/ipv6.h> @@ -41,13 +40,16 @@ extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg); extern int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *, - void *rtattr); + void *rtattr, + struct netlink_skb_parms *req); extern int ip6_ins_rt(struct rt6_info *, struct nlmsghdr *, - void *rtattr); + void *rtattr, + struct netlink_skb_parms *req); extern int ip6_del_rt(struct rt6_info *, struct nlmsghdr *, - void *rtattr); + void *rtattr, + struct netlink_skb_parms *req); extern int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e5a5f6b..14de4eb 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -109,6 +109,20 @@ struct fib_result { #endif }; +struct fib_result_nl { + u32 fl_addr; /* To be looked up*/ + u32 fl_fwmark; + unsigned char fl_tos; + unsigned char fl_scope; + unsigned char tb_id_in; + + unsigned char tb_id; /* Results */ + unsigned char prefixlen; + unsigned char nh_sel; + unsigned char type; + unsigned char scope; + int err; +}; #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -281,4 +295,9 @@ static inline void fib_res_put(struct fib_result *res) #endif } +#ifdef CONFIG_PROC_FS +extern int fib_proc_init(void); +extern void fib_proc_exit(void); +#endif + #endif /* _NET_FIB_H */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 52da5d2..e426641 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -255,7 +255,6 @@ struct ip_vs_daemon_user { #include <asm/atomic.h> /* for struct atomic_t */ #include <linux/netdevice.h> /* for struct neighbour */ #include <net/dst.h> /* for struct dst_entry */ -#include <net/tcp.h> #include <net/udp.h> #include <linux/compiler.h> @@ -959,7 +958,7 @@ static __inline__ int ip_vs_todrop(void) */ #define IP_VS_FWD_METHOD(cp) (cp->flags & IP_VS_CONN_F_FWD_MASK) -extern __inline__ char ip_vs_fwd_tag(struct ip_vs_conn *cp) +static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) { char fwd; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 771b47e..3203eaf 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -104,6 +104,7 @@ struct frag_hdr { #ifdef __KERNEL__ +#include <linux/config.h> #include <net/sock.h> /* sysctls */ @@ -145,7 +146,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); #define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) #define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) #define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) -extern atomic_t inet6_sock_nr; int snmp6_register_dev(struct inet6_dev *idev); int snmp6_unregister_dev(struct inet6_dev *idev); @@ -183,7 +183,6 @@ struct ipv6_txoptions struct ipv6_opt_hdr *hopopt; struct ipv6_opt_hdr *dst0opt; struct ipv6_rt_hdr *srcrt; /* Routing Header */ - struct ipv6_opt_hdr *auth; struct ipv6_opt_hdr *dst1opt; /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ @@ -347,7 +346,8 @@ static inline int ipv6_addr_any(const struct in6_addr *a) extern int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); /* * upper-layer output functions @@ -465,8 +465,38 @@ extern int sysctl_ip6frag_low_thresh; extern int sysctl_ip6frag_time; extern int sysctl_ip6frag_secret_interval; -#endif /* __KERNEL__ */ -#endif /* _NET_IPV6_H */ +extern struct proto_ops inet6_stream_ops; +extern struct proto_ops inet6_dgram_ops; + +extern int ip6_mc_source(int add, int omode, struct sock *sk, + struct group_source_req *pgsr); +extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); +extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, + struct group_filter __user *optval, + int __user *optlen); + +#ifdef CONFIG_PROC_FS +extern int ac6_proc_init(void); +extern void ac6_proc_exit(void); +extern int raw6_proc_init(void); +extern void raw6_proc_exit(void); +extern int tcp6_proc_init(void); +extern void tcp6_proc_exit(void); +extern int udp6_proc_init(void); +extern void udp6_proc_exit(void); +extern int ipv6_misc_proc_init(void); +extern void ipv6_misc_proc_exit(void); + +extern struct rt6_statistics rt6_stats; +#endif +#ifdef CONFIG_SYSCTL +extern ctl_table ipv6_route_table[]; +extern ctl_table ipv6_icmp_table[]; +extern void ipv6_sysctl_register(void); +extern void ipv6_sysctl_unregister(void); +#endif +#endif /* __KERNEL__ */ +#endif /* _NET_IPV6_H */ diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h index 71d6af8..92c8280 100644 --- a/include/net/irda/irda_device.h +++ b/include/net/irda/irda_device.h @@ -224,7 +224,7 @@ int irda_device_is_receiving(struct net_device *dev); /* Interface for internal use */ static inline int irda_device_txqueue_empty(const struct net_device *dev) { - return (skb_queue_len(&dev->qdisc->q) == 0); + return skb_queue_empty(&dev->qdisc->q); } int irda_device_set_raw_mode(struct net_device* self, int status); struct net_device *alloc_irdadev(int sizeof_priv); diff --git a/include/net/irda/irlan_filter.h b/include/net/irda/irlan_filter.h index 3afeb6c..492deda 100644 --- a/include/net/irda/irlan_filter.h +++ b/include/net/irda/irlan_filter.h @@ -28,6 +28,6 @@ void irlan_check_command_param(struct irlan_cb *self, char *param, char *value); void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb); -int irlan_print_filter(struct seq_file *seq, int filter_type); +void irlan_print_filter(struct seq_file *seq, int filter_type); #endif /* IRLAN_FILTER_H */ diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index 44edd48..d67c839 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -1,10 +1,10 @@ /* * This file define the new driver API for Wireless Extensions * - * Version : 6 21.6.04 + * Version : 7 18.3.05 * * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> - * Copyright (c) 2001-2004 Jean Tourrilhes, All Rights Reserved. + * Copyright (c) 2001-2005 Jean Tourrilhes, All Rights Reserved. */ #ifndef _IW_HANDLER_H @@ -207,7 +207,7 @@ * will be needed... * I just plan to increment with each new version. */ -#define IW_HANDLER_VERSION 6 +#define IW_HANDLER_VERSION 7 /* * Changes : @@ -232,6 +232,13 @@ * - Remove spy #ifdef, they are always on -> cleaner code * - Add IW_DESCR_FLAG_NOMAX flag for very large requests * - Start migrating get_wireless_stats to struct iw_handler_def + * + * V6 to V7 + * -------- + * - Add struct ieee80211_device pointer in struct iw_public_data + * - Remove (struct iw_point *)->pointer from events and streams + * - Remove spy_offset from struct iw_handler_def + * - Add "check" version of event macros for ieee802.11 stack */ /**************************** CONSTANTS ****************************/ @@ -334,9 +341,6 @@ struct iw_handler_def * We will automatically export that to user space... */ const struct iw_priv_args * private_args; - /* This field will be *removed* in the next version of WE */ - long spy_offset; /* DO NOT USE */ - /* New location of get_wireless_stats, to de-bloat struct net_device. * The old pointer in struct net_device will be gradually phased * out, and drivers are encouraged to use this one... */ @@ -400,16 +404,21 @@ struct iw_spy_data /* --------------------- DEVICE WIRELESS DATA --------------------- */ /* * This is all the wireless data specific to a device instance that - * is managed by the core of Wireless Extensions. + * is managed by the core of Wireless Extensions or the 802.11 layer. * We only keep pointer to those structures, so that a driver is free * to share them between instances. * This structure should be initialised before registering the device. * Access to this data follow the same rules as any other struct net_device * data (i.e. valid as long as struct net_device exist, same locking rules). */ +/* Forward declaration */ +struct ieee80211_device; +/* The struct */ struct iw_public_data { /* Driver enhanced spy support */ - struct iw_spy_data * spy_data; + struct iw_spy_data * spy_data; + /* Structure managed by the in-kernel IEEE 802.11 layer */ + struct ieee80211_device * ieee80211; }; /**************************** PROTOTYPES ****************************/ @@ -424,7 +433,7 @@ struct iw_public_data { extern int dev_get_wireless_info(char * buffer, char **start, off_t offset, int length); -/* Handle IOCTLs, called in net/code/dev.c */ +/* Handle IOCTLs, called in net/core/dev.c */ extern int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd); /* Second : functions that may be called by driver modules */ @@ -479,7 +488,7 @@ iwe_stream_add_event(char * stream, /* Stream of events */ int event_len) /* Real size of payload */ { /* Check if it's possible */ - if((stream + event_len) < ends) { + if(likely((stream + event_len) < ends)) { iwe->len = event_len; memcpy(stream, (char *) iwe, event_len); stream += event_len; @@ -495,14 +504,17 @@ iwe_stream_add_event(char * stream, /* Stream of events */ static inline char * iwe_stream_add_point(char * stream, /* Stream of events */ char * ends, /* End of stream */ - struct iw_event *iwe, /* Payload */ - char * extra) + struct iw_event *iwe, /* Payload length + flags */ + char * extra) /* More payload */ { int event_len = IW_EV_POINT_LEN + iwe->u.data.length; /* Check if it's possible */ - if((stream + event_len) < ends) { + if(likely((stream + event_len) < ends)) { iwe->len = event_len; - memcpy(stream, (char *) iwe, IW_EV_POINT_LEN); + memcpy(stream, (char *) iwe, IW_EV_LCP_LEN); + memcpy(stream + IW_EV_LCP_LEN, + ((char *) iwe) + IW_EV_LCP_LEN + IW_EV_POINT_OFF, + IW_EV_POINT_LEN - IW_EV_LCP_LEN); memcpy(stream + IW_EV_POINT_LEN, extra, iwe->u.data.length); stream += event_len; } @@ -526,7 +538,7 @@ iwe_stream_add_value(char * event, /* Event in the stream */ event_len -= IW_EV_LCP_LEN; /* Check if it's possible */ - if((value + event_len) < ends) { + if(likely((value + event_len) < ends)) { /* Add new value */ memcpy(value, (char *) iwe + IW_EV_LCP_LEN, event_len); value += event_len; @@ -537,4 +549,85 @@ iwe_stream_add_value(char * event, /* Event in the stream */ return value; } +/*------------------------------------------------------------------*/ +/* + * Wrapper to add an Wireless Event to a stream of events. + * Same as above, with explicit error check... + */ +static inline char * +iwe_stream_check_add_event(char * stream, /* Stream of events */ + char * ends, /* End of stream */ + struct iw_event *iwe, /* Payload */ + int event_len, /* Size of payload */ + int * perr) /* Error report */ +{ + /* Check if it's possible, set error if not */ + if(likely((stream + event_len) < ends)) { + iwe->len = event_len; + memcpy(stream, (char *) iwe, event_len); + stream += event_len; + } else + *perr = -E2BIG; + return stream; +} + +/*------------------------------------------------------------------*/ +/* + * Wrapper to add an short Wireless Event containing a pointer to a + * stream of events. + * Same as above, with explicit error check... + */ +static inline char * +iwe_stream_check_add_point(char * stream, /* Stream of events */ + char * ends, /* End of stream */ + struct iw_event *iwe, /* Payload length + flags */ + char * extra, /* More payload */ + int * perr) /* Error report */ +{ + int event_len = IW_EV_POINT_LEN + iwe->u.data.length; + /* Check if it's possible */ + if(likely((stream + event_len) < ends)) { + iwe->len = event_len; + memcpy(stream, (char *) iwe, IW_EV_LCP_LEN); + memcpy(stream + IW_EV_LCP_LEN, + ((char *) iwe) + IW_EV_LCP_LEN + IW_EV_POINT_OFF, + IW_EV_POINT_LEN - IW_EV_LCP_LEN); + memcpy(stream + IW_EV_POINT_LEN, extra, iwe->u.data.length); + stream += event_len; + } else + *perr = -E2BIG; + return stream; +} + +/*------------------------------------------------------------------*/ +/* + * Wrapper to add a value to a Wireless Event in a stream of events. + * Be careful, this one is tricky to use properly : + * At the first run, you need to have (value = event + IW_EV_LCP_LEN). + * Same as above, with explicit error check... + */ +static inline char * +iwe_stream_check_add_value(char * event, /* Event in the stream */ + char * value, /* Value in event */ + char * ends, /* End of stream */ + struct iw_event *iwe, /* Payload */ + int event_len, /* Size of payload */ + int * perr) /* Error report */ +{ + /* Don't duplicate LCP */ + event_len -= IW_EV_LCP_LEN; + + /* Check if it's possible */ + if(likely((value + event_len) < ends)) { + /* Add new value */ + memcpy(value, (char *) iwe + IW_EV_LCP_LEN, event_len); + value += event_len; + /* Patch LCP */ + iwe->len = value - event; + memcpy(event, (char *) iwe, IW_EV_LCP_LEN); + } else + *perr = -E2BIG; + return value; +} + #endif /* _IW_HANDLER_H */ diff --git a/include/net/llc.h b/include/net/llc.h index c9aed2a..71769a5 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -46,7 +46,8 @@ struct llc_sap { unsigned char f_bit; int (*rcv_func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); struct llc_addr laddr; struct list_head node; struct { @@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock; extern unsigned char llc_station_mac_sa[ETH_ALEN]; extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int llc_mac_hdr_init(struct sk_buff *skb, unsigned char *sa, unsigned char *da); @@ -78,7 +79,8 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb)); extern struct llc_sap *llc_sap_open(unsigned char lsap, int (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void llc_sap_close(struct llc_sap *sap); extern struct llc_sap *llc_sap_find(unsigned char sap_value); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4f33bbc..34c0773 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -65,11 +65,10 @@ struct neighbour; struct neigh_parms { + struct net_device *dev; struct neigh_parms *next; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; - int entries; - void *priv; void *sysctl_table; @@ -192,7 +191,6 @@ struct neigh_table atomic_t entries; rwlock_t lock; unsigned long last_rand; - struct neigh_parms *parms_list; kmem_cache_t *kmem_cachep; struct neigh_statistics *stats; struct neighbour **hash_buckets; @@ -252,6 +250,9 @@ extern int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); extern int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); extern void neigh_app_ns(struct neighbour *n); +extern int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb); +extern int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); + extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie); extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *)); @@ -362,7 +363,14 @@ __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, return neigh_create(tbl, pkey, dev); } -#define LOCALLY_ENQUEUED -2 +struct neighbour_cb { + unsigned long sched_next; + unsigned int flags; +}; + +#define LOCALLY_ENQUEUED 0x1 + +#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) #endif #endif diff --git a/include/net/p8022.h b/include/net/p8022.h index 3c99a86..42e9fac 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -4,7 +4,10 @@ extern struct datalink_proto * register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void unregister_8022_client(struct datalink_proto *proto); +extern struct datalink_proto *make_8023_client(void); +extern void destroy_8023_client(struct datalink_proto *dl); #endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4abda6a..b902d24 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -352,10 +352,10 @@ tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv) static inline int tcf_match_indev(struct sk_buff *skb, char *indev) { - if (0 != indev[0]) { - if (NULL == skb->input_dev) + if (indev[0]) { + if (!skb->input_dev) return 0; - else if (0 != strcmp(indev, skb->input_dev->name)) + if (strcmp(indev, skb->input_dev->name)) return 0; } diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index fcb05a3..6492e73 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -13,13 +13,12 @@ struct qdisc_walker extern rwlock_t qdisc_tree_lock; -#define QDISC_ALIGN 32 -#define QDISC_ALIGN_CONST (QDISC_ALIGN - 1) +#define QDISC_ALIGNTO 32 +#define QDISC_ALIGN(len) (((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1)) static inline void *qdisc_priv(struct Qdisc *q) { - return (char *)q + ((sizeof(struct Qdisc) + QDISC_ALIGN_CONST) - & ~QDISC_ALIGN_CONST); + return (char *) q + QDISC_ALIGN(sizeof(struct Qdisc)); } /* @@ -207,8 +206,6 @@ psched_tod_diff(int delta_sec, int bound) #endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ -extern struct Qdisc noop_qdisc; -extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_qdisc_ops; extern struct Qdisc_ops bfifo_qdisc_ops; @@ -216,14 +213,6 @@ extern int register_qdisc(struct Qdisc_ops *qops); extern int unregister_qdisc(struct Qdisc_ops *qops); extern struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); -extern void dev_init_scheduler(struct net_device *dev); -extern void dev_shutdown(struct net_device *dev); -extern void dev_activate(struct net_device *dev); -extern void dev_deactivate(struct net_device *dev); -extern void qdisc_reset(struct Qdisc *qdisc); -extern void qdisc_destroy(struct Qdisc *qdisc); -extern struct Qdisc * qdisc_create_dflt(struct net_device *dev, - struct Qdisc_ops *ops); extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab); extern void qdisc_put_rtab(struct qdisc_rate_table *tab); diff --git a/include/net/psnap.h b/include/net/psnap.h index 9c94e8f..b2e01cc 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h @@ -1,7 +1,7 @@ #ifndef _NET_PSNAP_H #define _NET_PSNAP_H -extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *)); +extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev)); extern void unregister_snap_client(struct datalink_proto *proto); #endif diff --git a/include/net/raw.h b/include/net/raw.h index 1c411c4..f479174 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -17,10 +17,10 @@ #ifndef _RAW_H #define _RAW_H +#include <linux/config.h> extern struct proto raw_prot; - extern void raw_err(struct sock *, struct sk_buff *, u32 info); extern int raw_rcv(struct sock *, struct sk_buff *); @@ -37,6 +37,11 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, int dif); -extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); +extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); + +#ifdef CONFIG_PROC_FS +extern int raw_proc_init(void); +extern void raw_proc_exit(void); +#endif #endif /* _RAW_H */ diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 23fd9a6..14476a7 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -7,10 +7,11 @@ extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; extern rwlock_t raw_v6_lock; -extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); +extern int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, - struct in6_addr *loc_addr, struct in6_addr *rmt_addr); + struct in6_addr *loc_addr, struct in6_addr *rmt_addr, + int dif); extern int rawv6_rcv(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/request_sock.h b/include/net/request_sock.h new file mode 100644 index 0000000..b52cc52 --- /dev/null +++ b/include/net/request_sock.h @@ -0,0 +1,261 @@ +/* + * NET Generic infrastructure for Network protocols. + * + * Definitions for request_sock + * + * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> + * + * From code originally in include/net/tcp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _REQUEST_SOCK_H +#define _REQUEST_SOCK_H + +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <net/sock.h> + +struct request_sock; +struct sk_buff; +struct dst_entry; +struct proto; + +struct request_sock_ops { + int family; + kmem_cache_t *slab; + int obj_size; + int (*rtx_syn_ack)(struct sock *sk, + struct request_sock *req, + struct dst_entry *dst); + void (*send_ack)(struct sk_buff *skb, + struct request_sock *req); + void (*send_reset)(struct sk_buff *skb); + void (*destructor)(struct request_sock *req); +}; + +/* struct request_sock - mini sock to represent a connection request + */ +struct request_sock { + struct request_sock *dl_next; /* Must be first member! */ + u16 mss; + u8 retrans; + u8 __pad; + /* The following two fields can be easily recomputed I think -AK */ + u32 window_clamp; /* window clamp at creation time */ + u32 rcv_wnd; /* rcv_wnd offered first time */ + u32 ts_recent; + unsigned long expires; + struct request_sock_ops *rsk_ops; + struct sock *sk; +}; + +static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops) +{ + struct request_sock *req = kmem_cache_alloc(ops->slab, SLAB_ATOMIC); + + if (req != NULL) + req->rsk_ops = ops; + + return req; +} + +static inline void __reqsk_free(struct request_sock *req) +{ + kmem_cache_free(req->rsk_ops->slab, req); +} + +static inline void reqsk_free(struct request_sock *req) +{ + req->rsk_ops->destructor(req); + __reqsk_free(req); +} + +extern int sysctl_max_syn_backlog; + +/** struct listen_sock - listen state + * + * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs + */ +struct listen_sock { + u8 max_qlen_log; + /* 3 bytes hole, try to use */ + int qlen; + int qlen_young; + int clock_hand; + u32 hash_rnd; + u32 nr_table_entries; + struct request_sock *syn_table[0]; +}; + +/** struct request_sock_queue - queue of request_socks + * + * @rskq_accept_head - FIFO head of established children + * @rskq_accept_tail - FIFO tail of established children + * @rskq_defer_accept - User waits for some data after accept() + * @syn_wait_lock - serializer + * + * %syn_wait_lock is necessary only to avoid proc interface having to grab the main + * lock sock while browsing the listening hash (otherwise it's deadlock prone). + * + * This lock is acquired in read mode only from listening_get_next() seq_file + * op and it's acquired in write mode _only_ from code that is actively + * changing rskq_accept_head. All readers that are holding the master sock lock + * don't need to grab this lock in read mode too as rskq_accept_head. writes + * are always protected from the main sock lock. + */ +struct request_sock_queue { + struct request_sock *rskq_accept_head; + struct request_sock *rskq_accept_tail; + rwlock_t syn_wait_lock; + u8 rskq_defer_accept; + /* 3 bytes hole, try to pack */ + struct listen_sock *listen_opt; +}; + +extern int reqsk_queue_alloc(struct request_sock_queue *queue, + const int nr_table_entries); + +static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue) +{ + struct listen_sock *lopt; + + write_lock_bh(&queue->syn_wait_lock); + lopt = queue->listen_opt; + queue->listen_opt = NULL; + write_unlock_bh(&queue->syn_wait_lock); + + return lopt; +} + +static inline void __reqsk_queue_destroy(struct request_sock_queue *queue) +{ + kfree(reqsk_queue_yank_listen_sk(queue)); +} + +extern void reqsk_queue_destroy(struct request_sock_queue *queue); + +static inline struct request_sock * + reqsk_queue_yank_acceptq(struct request_sock_queue *queue) +{ + struct request_sock *req = queue->rskq_accept_head; + + queue->rskq_accept_head = queue->rskq_accept_head = NULL; + return req; +} + +static inline int reqsk_queue_empty(struct request_sock_queue *queue) +{ + return queue->rskq_accept_head == NULL; +} + +static inline void reqsk_queue_unlink(struct request_sock_queue *queue, + struct request_sock *req, + struct request_sock **prev_req) +{ + write_lock(&queue->syn_wait_lock); + *prev_req = req->dl_next; + write_unlock(&queue->syn_wait_lock); +} + +static inline void reqsk_queue_add(struct request_sock_queue *queue, + struct request_sock *req, + struct sock *parent, + struct sock *child) +{ + req->sk = child; + sk_acceptq_added(parent); + + if (queue->rskq_accept_head == NULL) + queue->rskq_accept_head = req; + else + queue->rskq_accept_tail->dl_next = req; + + queue->rskq_accept_tail = req; + req->dl_next = NULL; +} + +static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue) +{ + struct request_sock *req = queue->rskq_accept_head; + + BUG_TRAP(req != NULL); + + queue->rskq_accept_head = req->dl_next; + if (queue->rskq_accept_head == NULL) + queue->rskq_accept_tail = NULL; + + return req; +} + +static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue, + struct sock *parent) +{ + struct request_sock *req = reqsk_queue_remove(queue); + struct sock *child = req->sk; + + BUG_TRAP(child != NULL); + + sk_acceptq_removed(parent); + __reqsk_free(req); + return child; +} + +static inline int reqsk_queue_removed(struct request_sock_queue *queue, + struct request_sock *req) +{ + struct listen_sock *lopt = queue->listen_opt; + + if (req->retrans == 0) + --lopt->qlen_young; + + return --lopt->qlen; +} + +static inline int reqsk_queue_added(struct request_sock_queue *queue) +{ + struct listen_sock *lopt = queue->listen_opt; + const int prev_qlen = lopt->qlen; + + lopt->qlen_young++; + lopt->qlen++; + return prev_qlen; +} + +static inline int reqsk_queue_len(const struct request_sock_queue *queue) +{ + return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; +} + +static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) +{ + return queue->listen_opt->qlen_young; +} + +static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) +{ + return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; +} + +static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, + u32 hash, struct request_sock *req, + unsigned timeout) +{ + struct listen_sock *lopt = queue->listen_opt; + + req->expires = jiffies + timeout; + req->retrans = 0; + req->sk = NULL; + req->dl_next = lopt->syn_table[hash]; + + write_lock(&queue->syn_wait_lock); + lopt->syn_table[hash] = req; + write_unlock(&queue->syn_wait_lock); +} + +#endif /* _REQUEST_SOCK_H */ diff --git a/include/net/route.h b/include/net/route.h index d34ca8f..dbe79ca 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -105,10 +105,6 @@ struct rt_cache_stat unsigned int out_hlist_search; }; -extern struct rt_cache_stat *rt_cache_stat; -#define RT_CACHE_STAT_INC(field) \ - (per_cpu_ptr(rt_cache_stat, _smp_processor_id())->field++) - extern struct ip_rt_acct *ip_rt_acct; struct in_device; @@ -199,4 +195,6 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) return rt->peer; } +extern ctl_table ipv4_route_table[]; + #endif /* _ROUTE_H */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c57504b..7b6ec99 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -164,6 +164,19 @@ extern void qdisc_unlock_tree(struct net_device *dev); #define tcf_tree_lock(tp) qdisc_lock_tree((tp)->q->dev) #define tcf_tree_unlock(tp) qdisc_unlock_tree((tp)->q->dev) +extern struct Qdisc noop_qdisc; +extern struct Qdisc_ops noop_qdisc_ops; + +extern void dev_init_scheduler(struct net_device *dev); +extern void dev_shutdown(struct net_device *dev); +extern void dev_activate(struct net_device *dev); +extern void dev_deactivate(struct net_device *dev); +extern void qdisc_reset(struct Qdisc *qdisc); +extern void qdisc_destroy(struct Qdisc *qdisc); +extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops); +extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, + struct Qdisc_ops *ops); + static inline void tcf_destroy(struct tcf_proto *tp) { @@ -172,4 +185,126 @@ tcf_destroy(struct tcf_proto *tp) kfree(tp); } +static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff_head *list) +{ + __skb_queue_tail(list, skb); + sch->qstats.backlog += skb->len; + sch->bstats.bytes += skb->len; + sch->bstats.packets++; + + return NET_XMIT_SUCCESS; +} + +static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) +{ + return __qdisc_enqueue_tail(skb, sch, &sch->q); +} + +static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, + struct sk_buff_head *list) +{ + struct sk_buff *skb = __skb_dequeue(list); + + if (likely(skb != NULL)) + sch->qstats.backlog -= skb->len; + + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) +{ + return __qdisc_dequeue_head(sch, &sch->q); +} + +static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch, + struct sk_buff_head *list) +{ + struct sk_buff *skb = __skb_dequeue_tail(list); + + if (likely(skb != NULL)) + sch->qstats.backlog -= skb->len; + + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_tail(struct Qdisc *sch) +{ + return __qdisc_dequeue_tail(sch, &sch->q); +} + +static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff_head *list) +{ + __skb_queue_head(list, skb); + sch->qstats.backlog += skb->len; + sch->qstats.requeues++; + + return NET_XMIT_SUCCESS; +} + +static inline int qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch) +{ + return __qdisc_requeue(skb, sch, &sch->q); +} + +static inline void __qdisc_reset_queue(struct Qdisc *sch, + struct sk_buff_head *list) +{ + /* + * We do not know the backlog in bytes of this list, it + * is up to the caller to correct it + */ + skb_queue_purge(list); +} + +static inline void qdisc_reset_queue(struct Qdisc *sch) +{ + __qdisc_reset_queue(sch, &sch->q); + sch->qstats.backlog = 0; +} + +static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch, + struct sk_buff_head *list) +{ + struct sk_buff *skb = __qdisc_dequeue_tail(sch, list); + + if (likely(skb != NULL)) { + unsigned int len = skb->len; + kfree_skb(skb); + return len; + } + + return 0; +} + +static inline unsigned int qdisc_queue_drop(struct Qdisc *sch) +{ + return __qdisc_queue_drop(sch, &sch->q); +} + +static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) +{ + kfree_skb(skb); + sch->qstats.drops++; + + return NET_XMIT_DROP; +} + +static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch) +{ + sch->qstats.drops++; + +#ifdef CONFIG_NET_CLS_POLICE + if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) + goto drop; + + return NET_XMIT_SUCCESS; + +drop: +#endif + kfree_skb(skb); + return NET_XMIT_DROP; +} + #endif diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index ebc5282..dc107ff 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -65,9 +65,11 @@ typedef enum { SCTP_CMD_TIMER_START, /* Start a timer. */ SCTP_CMD_TIMER_RESTART, /* Restart a timer. */ SCTP_CMD_TIMER_STOP, /* Stop a timer. */ - SCTP_CMD_COUNTER_RESET, /* Reset a counter. */ - SCTP_CMD_COUNTER_INC, /* Increment a counter. */ + SCTP_CMD_INIT_CHOOSE_TRANSPORT, /* Choose transport for an INIT. */ + SCTP_CMD_INIT_COUNTER_RESET, /* Reset init counter. */ + SCTP_CMD_INIT_COUNTER_INC, /* Increment init counter. */ SCTP_CMD_INIT_RESTART, /* High level, do init timer work. */ + SCTP_CMD_COOKIEECHO_RESTART, /* High level, do cookie-echo timer work. */ SCTP_CMD_INIT_FAILED, /* High level, do init failure work. */ SCTP_CMD_REPORT_DUP, /* Report a duplicate TSN. */ SCTP_CMD_STRIKE, /* Mark a strike against a transport. */ @@ -118,7 +120,6 @@ typedef union { int error; sctp_state_t state; sctp_event_timeout_t to; - sctp_counter_t counter; void *ptr; struct sctp_chunk *chunk; struct sctp_association *asoc; @@ -165,7 +166,6 @@ SCTP_ARG_CONSTRUCTOR(U16, __u16, u16) SCTP_ARG_CONSTRUCTOR(U8, __u8, u8) SCTP_ARG_CONSTRUCTOR(ERROR, int, error) SCTP_ARG_CONSTRUCTOR(STATE, sctp_state_t, state) -SCTP_ARG_CONSTRUCTOR(COUNTER, sctp_counter_t, counter) SCTP_ARG_CONSTRUCTOR(TO, sctp_event_timeout_t, to) SCTP_ARG_CONSTRUCTOR(PTR, void *, ptr) SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 2b76c0f..c51541e 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -47,10 +47,10 @@ #ifndef __sctp_constants_h__ #define __sctp_constants_h__ -#include <linux/tcp.h> /* For TCP states used in sctp_sock_state_t */ #include <linux/sctp.h> #include <linux/ipv6.h> /* For ipv6hdr. */ #include <net/sctp/user.h> +#include <net/tcp_states.h> /* For TCP states used in sctp_sock_state_t */ /* Value used for stream negotiation. */ enum { SCTP_MAX_STREAM = 0xffff }; @@ -263,30 +263,11 @@ enum { SCTP_MIN_PMTU = 576 }; enum { SCTP_MAX_DUP_TSNS = 16 }; enum { SCTP_MAX_GABS = 16 }; -typedef enum { - SCTP_COUNTER_INIT_ERROR, -} sctp_counter_t; - -/* How many counters does an association need? */ -#define SCTP_NUMBER_COUNTERS 5 - -/* Here we define the default timers. */ - -/* cookie timer def = ? seconds */ -#define SCTP_DEFAULT_TIMEOUT_T1_COOKIE (3 * HZ) - -/* init timer def = 3 seconds */ -#define SCTP_DEFAULT_TIMEOUT_T1_INIT (3 * HZ) - -/* shutdown timer def = 300 ms */ -#define SCTP_DEFAULT_TIMEOUT_T2_SHUTDOWN ((300 * HZ) / 1000) - -/* 0 seconds + RTO */ -#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT (10 * HZ) +/* Heartbeat interval - 30 secs */ +#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT (30 * HZ) -/* recv timer def = 200ms (in usec) */ +/* Delayed sack timer - 200ms */ #define SCTP_DEFAULT_TIMEOUT_SACK ((200 * HZ) / 1000) -#define SCTP_DEFAULT_TIMEOUT_SACK_MAX ((500 * HZ) / 1000) /* 500 ms */ /* RTO.Initial - 3 seconds * RTO.Min - 1 second diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 960abfa..e1d5ec1 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -125,7 +125,8 @@ */ extern struct sock *sctp_get_ctl_sock(void); extern int sctp_copy_local_addr_list(struct sctp_bind_addr *, - sctp_scope_t, int gfp, int flags); + sctp_scope_t, unsigned int __nocast gfp, + int flags); extern struct sctp_pf *sctp_get_pf_specific(sa_family_t family); extern int sctp_register_pf(struct sctp_pf *, sa_family_t); @@ -166,15 +167,12 @@ void sctp_unhash_established(struct sctp_association *); void sctp_hash_endpoint(struct sctp_endpoint *); void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(int family, struct sk_buff *, - struct sctphdr *, struct sctp_endpoint **, - struct sctp_association **, + struct sctphdr *, struct sctp_association **, struct sctp_transport **); -void sctp_err_finish(struct sock *, struct sctp_endpoint *, - struct sctp_association *); +void sctp_err_finish(struct sock *, struct sctp_association *); void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, struct sctp_transport *t, __u32 pmtu); void sctp_icmp_proto_unreachable(struct sock *sk, - struct sctp_endpoint *ep, struct sctp_association *asoc, struct sctp_transport *t); @@ -223,6 +221,22 @@ DECLARE_SNMP_STAT(struct sctp_mib, sctp_statistics); extern int sctp_debug_flag; #define SCTP_DEBUG_PRINTK(whatever...) \ ((void) (sctp_debug_flag && printk(KERN_DEBUG whatever))) +#define SCTP_DEBUG_PRINTK_IPADDR(lead, trail, leadparm, saddr, otherparms...) \ + if (sctp_debug_flag) { \ + if (saddr->sa.sa_family == AF_INET6) { \ + printk(KERN_DEBUG \ + lead "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x" trail, \ + leadparm, \ + NIP6(saddr->v6.sin6_addr), \ + otherparms); \ + } else { \ + printk(KERN_DEBUG \ + lead "%u.%u.%u.%u" trail, \ + leadparm, \ + NIPQUAD(saddr->v4.sin_addr.s_addr), \ + otherparms); \ + } \ + } #define SCTP_ENABLE_DEBUG { sctp_debug_flag = 1; } #define SCTP_DISABLE_DEBUG { sctp_debug_flag = 0; } @@ -236,6 +250,7 @@ extern int sctp_debug_flag; #else /* SCTP_DEBUG */ #define SCTP_DEBUG_PRINTK(whatever...) +#define SCTP_DEBUG_PRINTK_IPADDR(whatever...) #define SCTP_ENABLE_DEBUG #define SCTP_DISABLE_DEBUG #define SCTP_ASSERT(expr, str, func) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index f4fcee1..58462164 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -116,7 +116,8 @@ sctp_state_fn_t sctp_sf_eat_data_fast_4_4; sctp_state_fn_t sctp_sf_eat_sack_6_2; sctp_state_fn_t sctp_sf_tabort_8_4_8; sctp_state_fn_t sctp_sf_operr_notify; -sctp_state_fn_t sctp_sf_t1_timer_expire; +sctp_state_fn_t sctp_sf_t1_init_timer_expire; +sctp_state_fn_t sctp_sf_t1_cookie_timer_expire; sctp_state_fn_t sctp_sf_t2_timer_expire; sctp_state_fn_t sctp_sf_t4_timer_expire; sctp_state_fn_t sctp_sf_t5_timer_expire; @@ -130,7 +131,6 @@ sctp_state_fn_t sctp_sf_do_ecne; sctp_state_fn_t sctp_sf_ootb; sctp_state_fn_t sctp_sf_pdiscard; sctp_state_fn_t sctp_sf_violation; -sctp_state_fn_t sctp_sf_violation_chunklen; sctp_state_fn_t sctp_sf_discard_chunk; sctp_state_fn_t sctp_sf_do_5_2_1_siminit; sctp_state_fn_t sctp_sf_do_5_2_2_dupinit; @@ -181,17 +181,17 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t, int sctp_chunk_iif(const struct sctp_chunk *); struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *, struct sctp_chunk *, - int gfp); + unsigned int __nocast gfp); __u32 sctp_generate_verification_tag(void); void sctp_populate_tie_tags(__u8 *cookie, __u32 curTag, __u32 hisTag); /* Prototypes for chunk-building functions. */ struct sctp_chunk *sctp_make_init(const struct sctp_association *, const struct sctp_bind_addr *, - int gfp, int vparam_len); + unsigned int __nocast gfp, int vparam_len); struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *, const struct sctp_chunk *, - const int gfp, + const unsigned int __nocast gfp, const int unkparam_len); struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *, const struct sctp_chunk *); @@ -258,8 +258,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, void sctp_chunk_assign_tsn(struct sctp_chunk *); void sctp_chunk_assign_ssn(struct sctp_chunk *); -void sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, __u16 error); - /* Prototypes for statetable processing. */ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, @@ -267,7 +265,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, struct sctp_endpoint *, struct sctp_association *asoc, void *event_arg, - int gfp); + unsigned int __nocast gfp); /* 2nd level prototypes */ void sctp_generate_t3_rtx_event(unsigned long peer); @@ -277,7 +275,8 @@ void sctp_ootb_pkt_free(struct sctp_packet *); struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *, const struct sctp_association *, - struct sctp_chunk *, int gfp, int *err, + struct sctp_chunk *, + unsigned int __nocast gfp, int *err, struct sctp_chunk **err_chk_p); int sctp_addip_addr_config(struct sctp_association *, sctp_param_t, struct sockaddr_storage*, int); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6c24d9c..994009b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -161,6 +161,9 @@ extern struct sctp_globals { */ int sndbuf_policy; + /* Delayed SACK timeout 200ms default*/ + int sack_timeout; + /* HB.interval - 30 seconds */ int hb_interval; @@ -217,6 +220,7 @@ extern struct sctp_globals { #define sctp_sndbuf_policy (sctp_globals.sndbuf_policy) #define sctp_max_retrans_path (sctp_globals.max_retrans_path) #define sctp_max_retrans_init (sctp_globals.max_retrans_init) +#define sctp_sack_timeout (sctp_globals.sack_timeout) #define sctp_hb_interval (sctp_globals.hb_interval) #define sctp_max_instreams (sctp_globals.max_instreams) #define sctp_max_outstreams (sctp_globals.max_outstreams) @@ -441,7 +445,8 @@ struct sctp_ssnmap { int malloced; }; -struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, int gfp); +struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, + unsigned int __nocast gfp); void sctp_ssnmap_free(struct sctp_ssnmap *map); void sctp_ssnmap_clear(struct sctp_ssnmap *map); @@ -578,7 +583,6 @@ void sctp_datamsg_track(struct sctp_chunk *); void sctp_chunk_fail(struct sctp_chunk *, int error); int sctp_chunk_abandoned(struct sctp_chunk *); - /* RFC2960 1.4 Key Terms * * o Chunk: A unit of information within an SCTP packet, consisting of @@ -588,13 +592,8 @@ int sctp_chunk_abandoned(struct sctp_chunk *); * each chunk as well as a few other header pointers... */ struct sctp_chunk { - /* These first three elements MUST PRECISELY match the first - * three elements of struct sk_buff. This allows us to reuse - * all the skb_* queue management functions. - */ - struct sctp_chunk *next; - struct sctp_chunk *prev; - struct sk_buff_head *list; + struct list_head list; + atomic_t refcnt; /* This is our link to the per-transport transmitted list. */ @@ -713,7 +712,7 @@ struct sctp_packet { __u32 vtag; /* This contains the payload chunks. */ - struct sk_buff_head chunks; + struct list_head chunk_list; /* This is the overhead of the sctp and ip headers. */ size_t overhead; @@ -867,10 +866,13 @@ struct sctp_transport { */ unsigned long last_time_ecne_reduced; - /* active : The current active state of this destination, - * : i.e. DOWN, UP, etc. + /* The number of times INIT has been sent on this transport. */ + int init_sent_count; + + /* state : The current state of this destination, + * : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKOWN. */ - int active; + int state; /* hb_allowed : The current heartbeat state of this destination, * : i.e. ALLOW-HB, NO-HEARTBEAT, etc. @@ -944,7 +946,8 @@ struct sctp_transport { } cacc; }; -struct sctp_transport *sctp_transport_new(const union sctp_addr *, int); +struct sctp_transport *sctp_transport_new(const union sctp_addr *, + unsigned int __nocast); void sctp_transport_set_owner(struct sctp_transport *, struct sctp_association *); void sctp_transport_route(struct sctp_transport *, union sctp_addr *, @@ -967,7 +970,7 @@ struct sctp_inq { /* This is actually a queue of sctp_chunk each * containing a partially decoded packet. */ - struct sk_buff_head in; + struct list_head in_chunk_list; /* This is the packet which is currently off the in queue and is * being worked on through the inbound chunk processing. */ @@ -1010,7 +1013,7 @@ struct sctp_outq { struct sctp_association *asoc; /* Data pending that has never been transmitted. */ - struct sk_buff_head out; + struct list_head out_chunk_list; unsigned out_qlen; /* Total length of queued data chunks. */ @@ -1018,7 +1021,7 @@ struct sctp_outq { unsigned error; /* These are control chunks we want to send. */ - struct sk_buff_head control; + struct list_head control_chunk_list; /* These are chunks that have been sacked but are above the * CTSN, or cumulative tsn ack point. @@ -1092,9 +1095,10 @@ void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port); void sctp_bind_addr_free(struct sctp_bind_addr *); int sctp_bind_addr_copy(struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, - sctp_scope_t scope, int gfp,int flags); + sctp_scope_t scope, unsigned int __nocast gfp, + int flags); int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *, - int gfp); + unsigned int __nocast gfp); int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *); int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *); @@ -1103,9 +1107,10 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, int addrcnt, struct sctp_sock *opt); union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp, - int *addrs_len, int gfp); + int *addrs_len, + unsigned int __nocast gfp); int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len, - __u16 port, int gfp); + __u16 port, unsigned int __nocast gfp); sctp_scope_t sctp_scope(const union sctp_addr *); int sctp_in_scope(const union sctp_addr *addr, const sctp_scope_t scope); @@ -1222,9 +1227,6 @@ struct sctp_endpoint { /* sendbuf acct. policy. */ __u32 sndbuf_policy; - - /* Name for debugging output... */ - char *debug_name; }; /* Recover the outter endpoint structure. */ @@ -1237,7 +1239,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) } /* These are function signatures for manipulating endpoints. */ -struct sctp_endpoint *sctp_endpoint_new(struct sock *, int); +struct sctp_endpoint *sctp_endpoint_new(struct sock *, unsigned int __nocast); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); void sctp_endpoint_hold(struct sctp_endpoint *); @@ -1258,7 +1260,7 @@ int sctp_verify_init(const struct sctp_association *asoc, sctp_cid_t, struct sctp_chunk **err_chunk); int sctp_process_init(struct sctp_association *, sctp_cid_t cid, const union sctp_addr *peer, - sctp_init_chunk_t *init, int gfp); + sctp_init_chunk_t *init, unsigned int __nocast gfp); __u32 sctp_generate_tag(const struct sctp_endpoint *); __u32 sctp_generate_tsn(const struct sctp_endpoint *); @@ -1314,11 +1316,23 @@ struct sctp_association { * : association. Normally this information is * : hashed or keyed for quick lookup and access * : of the TCB. + * : The list is also initialized with the list + * : of addresses passed with the sctp_connectx() + * : call. * * It is a list of SCTP_transport's. */ struct list_head transport_addr_list; + /* transport_count + * + * Peer : A count of the number of peer addresses + * Transport : in the Peer Transport Address List. + * Address : + * Count : + */ + __u16 transport_count; + /* port * The transport layer port number. */ @@ -1486,6 +1500,9 @@ struct sctp_association { /* Transport to which SHUTDOWN chunk was last sent. */ struct sctp_transport *shutdown_last_sent_to; + /* Transport to which INIT chunk was last sent. */ + struct sctp_transport *init_last_sent_to; + /* Next TSN : The next TSN number to be assigned to a new * : DATA chunk. This is sent in the INIT or INIT * : ACK chunk to the peer and incremented each @@ -1549,8 +1566,11 @@ struct sctp_association { /* The message size at which SCTP fragmentation will occur. */ __u32 frag_point; - /* Currently only one counter is used to count INIT errors. */ - int counters[SCTP_NUMBER_COUNTERS]; + /* Counter used to count INIT errors. */ + int init_err_counter; + + /* Count the number of INIT cycles (for doubling timeout). */ + int init_cycle; /* Default send parameters. */ __u16 default_stream; @@ -1650,7 +1670,7 @@ struct sctp_association { * which already resides in sctp_outq. Please move this * queue and its supporting logic down there. --piggy] */ - struct sk_buff_head addip_chunks; + struct list_head addip_chunk_list; /* ADDIP Section 4.1 ASCONF Chunk Procedures * @@ -1703,11 +1723,13 @@ static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base) struct sctp_association * sctp_association_new(const struct sctp_endpoint *, const struct sock *, - sctp_scope_t scope, int gfp); + sctp_scope_t scope, unsigned int __nocast gfp); void sctp_association_free(struct sctp_association *); void sctp_association_put(struct sctp_association *); void sctp_association_hold(struct sctp_association *); +struct sctp_transport *sctp_assoc_choose_init_transport( + struct sctp_association *); struct sctp_transport *sctp_assoc_choose_shutdown_transport( struct sctp_association *); void sctp_assoc_update_retran_path(struct sctp_association *); @@ -1717,9 +1739,12 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc, const union sctp_addr *laddr); struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *, const union sctp_addr *address, - const int gfp); + const unsigned int __nocast gfp, + const int peer_state); void sctp_assoc_del_peer(struct sctp_association *asoc, const union sctp_addr *addr); +void sctp_assoc_rm_peer(struct sctp_association *asoc, + struct sctp_transport *peer); void sctp_assoc_control_transport(struct sctp_association *, struct sctp_transport *, sctp_transport_cmd_t, sctp_sn_error_t); @@ -1738,9 +1763,11 @@ void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned); void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned); void sctp_assoc_set_primary(struct sctp_association *, struct sctp_transport *); -int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *, int); +int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *, + unsigned int __nocast); int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *, - struct sctp_cookie*, int gfp); + struct sctp_cookie*, + unsigned int __nocast gfp); int sctp_cmp_addr_exact(const union sctp_addr *ss1, const union sctp_addr *ss2); diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 1019d83..90fe4bf 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -88,7 +88,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( __u16 error, __u16 outbound, __u16 inbound, - int gfp); + unsigned int __nocast gfp); struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( const struct sctp_association *asoc, @@ -96,35 +96,35 @@ struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( int flags, int state, int error, - int gfp); + unsigned int __nocast gfp); struct sctp_ulpevent *sctp_ulpevent_make_remote_error( const struct sctp_association *asoc, struct sctp_chunk *chunk, __u16 flags, - int gfp); + unsigned int __nocast gfp); struct sctp_ulpevent *sctp_ulpevent_make_send_failed( const struct sctp_association *asoc, struct sctp_chunk *chunk, __u16 flags, __u32 error, - int gfp); + unsigned int __nocast gfp); struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( const struct sctp_association *asoc, __u16 flags, - int gfp); + unsigned int __nocast gfp); struct sctp_ulpevent *sctp_ulpevent_make_pdapi( const struct sctp_association *asoc, - __u32 indication, int gfp); + __u32 indication, unsigned int __nocast gfp); struct sctp_ulpevent *sctp_ulpevent_make_adaption_indication( - const struct sctp_association *asoc, int gfp); + const struct sctp_association *asoc, unsigned int __nocast gfp); struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, struct sctp_chunk *chunk, - int gfp); + unsigned int __nocast gfp); void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *); diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index 961736d..1a60c6d 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -62,19 +62,22 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *, void sctp_ulpq_free(struct sctp_ulpq *); /* Add a new DATA chunk for processing. */ -int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *, int); +int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *, + unsigned int __nocast); /* Add a new event for propagation to the ULP. */ int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sctp_ulpevent *ev); /* Renege previously received chunks. */ -void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, int); +void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, + unsigned int __nocast); /* Perform partial delivery. */ -void sctp_ulpq_partial_delivery(struct sctp_ulpq *, struct sctp_chunk *, int); +void sctp_ulpq_partial_delivery(struct sctp_ulpq *, struct sctp_chunk *, + unsigned int __nocast); /* Abort the partial delivery. */ -void sctp_ulpq_abort_pd(struct sctp_ulpq *, int); +void sctp_ulpq_abort_pd(struct sctp_ulpq *, unsigned int __nocast); /* Clear the partial data delivery condition on this socket. */ int sctp_clear_pd(struct sock *sk); diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index 2758e8c..f6328ae 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -111,6 +111,8 @@ enum sctp_optname { #define SCTP_GET_LOCAL_ADDRS_NUM SCTP_GET_LOCAL_ADDRS_NUM SCTP_GET_LOCAL_ADDRS, /* Get all local addresss. */ #define SCTP_GET_LOCAL_ADDRS SCTP_GET_LOCAL_ADDRS + SCTP_SOCKOPT_CONNECTX, /* CONNECTX requests. */ +#define SCTP_SOCKOPT_CONNECTX SCTP_SOCKOPT_CONNECTX }; /* @@ -527,6 +529,7 @@ struct sctp_paddrinfo { enum sctp_spinfo_state { SCTP_INACTIVE, SCTP_ACTIVE, + SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */ }; /* diff --git a/include/net/slhc_vj.h b/include/net/slhc_vj.h index 0b2c278..8716d59 100644 --- a/include/net/slhc_vj.h +++ b/include/net/slhc_vj.h @@ -170,19 +170,14 @@ struct slcompress { }; #define NULLSLCOMPR (struct slcompress *)0 -#define __ARGS(x) x - /* In slhc.c: */ -struct slcompress *slhc_init __ARGS((int rslots, int tslots)); -void slhc_free __ARGS((struct slcompress *comp)); - -int slhc_compress __ARGS((struct slcompress *comp, unsigned char *icp, - int isize, unsigned char *ocp, unsigned char **cpp, - int compress_cid)); -int slhc_uncompress __ARGS((struct slcompress *comp, unsigned char *icp, - int isize)); -int slhc_remember __ARGS((struct slcompress *comp, unsigned char *icp, - int isize)); -int slhc_toss __ARGS((struct slcompress *comp)); +struct slcompress *slhc_init(int rslots, int tslots); +void slhc_free(struct slcompress *comp); + +int slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, + unsigned char *ocp, unsigned char **cpp, int compress_cid); +int slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize); +int slhc_remember(struct slcompress *comp, unsigned char *icp, int isize); +int slhc_toss(struct slcompress *comp); #endif /* _SLHC_H */ diff --git a/include/net/snmp.h b/include/net/snmp.h index a15ab25..a36bed8 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -128,18 +128,18 @@ struct linux_mib { #define SNMP_STAT_USRPTR(name) (name[1]) #define SNMP_INC_STATS_BH(mib, field) \ - (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field]++) + (per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field]++) #define SNMP_INC_STATS_OFFSET_BH(mib, field, offset) \ - (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field + (offset)]++) + (per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field + (offset)]++) #define SNMP_INC_STATS_USER(mib, field) \ - (per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field]++) + (per_cpu_ptr(mib[1], raw_smp_processor_id())->mibs[field]++) #define SNMP_INC_STATS(mib, field) \ - (per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]++) + (per_cpu_ptr(mib[!in_softirq()], raw_smp_processor_id())->mibs[field]++) #define SNMP_DEC_STATS(mib, field) \ - (per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]--) + (per_cpu_ptr(mib[!in_softirq()], raw_smp_processor_id())->mibs[field]--) #define SNMP_ADD_STATS_BH(mib, field, addend) \ - (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field] += addend) + (per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field] += addend) #define SNMP_ADD_STATS_USER(mib, field, addend) \ - (per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field] += addend) + (per_cpu_ptr(mib[1], raw_smp_processor_id())->mibs[field] += addend) #endif diff --git a/include/net/sock.h b/include/net/sock.h index a9ef3a6..8c48fbe 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \ } while(0) struct sock; +struct proto; /** * struct sock_common - minimal network layer representation of sockets @@ -98,10 +99,11 @@ struct sock; * @skc_node: main hash linkage for various protocol lookup tables * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count + * @skc_prot: protocol handlers inside a network family * * This is the minimal network layer representation of sockets, the header - * for struct sock and struct tcp_tw_bucket. - */ + * for struct sock and struct inet_timewait_sock. + */ struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; @@ -110,11 +112,12 @@ struct sock_common { struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + struct proto *skc_prot; }; /** * struct sock - network layer representation of sockets - * @__sk_common: shared layout with tcp_tw_bucket + * @__sk_common: shared layout with inet_timewait_sock * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings * @sk_lock: synchronizer @@ -136,11 +139,10 @@ struct sock_common { * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_lingertime: %SO_LINGER l_linger setting - * @sk_hashent: hash entry in several tables (e.g. tcp_ehash) + * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash) * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used - * @sk_prot: protocol handlers inside a network family * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) * @sk_err: last error * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' @@ -173,7 +175,7 @@ struct sock_common { */ struct sock { /* - * Now struct tcp_tw_bucket also uses sock_common, so please just + * Now struct inet_timewait_sock also uses sock_common, so please just * don't add nothing before this first member (__sk_common) --acme */ struct sock_common __sk_common; @@ -184,6 +186,7 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_prot __sk_common.skc_prot unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; @@ -218,7 +221,6 @@ struct sock { struct sk_buff *tail; } sk_backlog; struct sk_buff_head sk_error_queue; - struct proto *sk_prot; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, @@ -253,28 +255,28 @@ struct sock { /* * Hashed lists helper routines */ -static inline struct sock *__sk_head(struct hlist_head *head) +static inline struct sock *__sk_head(const struct hlist_head *head) { return hlist_entry(head->first, struct sock, sk_node); } -static inline struct sock *sk_head(struct hlist_head *head) +static inline struct sock *sk_head(const struct hlist_head *head) { return hlist_empty(head) ? NULL : __sk_head(head); } -static inline struct sock *sk_next(struct sock *sk) +static inline struct sock *sk_next(const struct sock *sk) { return sk->sk_node.next ? hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; } -static inline int sk_unhashed(struct sock *sk) +static inline int sk_unhashed(const struct sock *sk) { return hlist_unhashed(&sk->sk_node); } -static inline int sk_hashed(struct sock *sk) +static inline int sk_hashed(const struct sock *sk) { return sk->sk_node.pprev != NULL; } @@ -384,6 +386,11 @@ enum sock_flags { SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ }; +static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) +{ + nsk->sk_flags = osk->sk_flags; +} + static inline void sock_set_flag(struct sock *sk, enum sock_flags flag) { __set_bit(flag, &sk->sk_flags); @@ -484,6 +491,8 @@ extern void sk_stream_kill_queues(struct sock *sk); extern int sk_wait_data(struct sock *sk, long *timeo); +struct request_sock_ops; + /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface * transport -> network interface is defined by struct inet_proto @@ -547,12 +556,20 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; + kmem_cache_t *twsk_slab; + unsigned int twsk_obj_size; + atomic_t *orphan_count; + + struct request_sock_ops *rsk_prot; + struct module *owner; char name[32]; struct list_head node; - +#ifdef SOCK_REFCNT_DEBUG + atomic_t socks; +#endif struct { int inuse; u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; @@ -562,6 +579,31 @@ struct proto { extern int proto_register(struct proto *prot, int alloc_slab); extern void proto_unregister(struct proto *prot); +#ifdef SOCK_REFCNT_DEBUG +static inline void sk_refcnt_debug_inc(struct sock *sk) +{ + atomic_inc(&sk->sk_prot->socks); +} + +static inline void sk_refcnt_debug_dec(struct sock *sk) +{ + atomic_dec(&sk->sk_prot->socks); + printk(KERN_DEBUG "%s socket %p released, %d are still alive\n", + sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); +} + +static inline void sk_refcnt_debug_release(const struct sock *sk) +{ + if (atomic_read(&sk->sk_refcnt) != 1) + printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", + sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); +} +#else /* SOCK_REFCNT_DEBUG */ +#define sk_refcnt_debug_inc(sk) do { } while (0) +#define sk_refcnt_debug_dec(sk) do { } while (0) +#define sk_refcnt_debug_release(sk) do { } while (0) +#endif /* SOCK_REFCNT_DEBUG */ + /* Called with local bh disabled */ static __inline__ void sock_prot_inc_use(struct proto *prot) { @@ -573,6 +615,15 @@ static __inline__ void sock_prot_dec_use(struct proto *prot) prot->stats[smp_processor_id()].inuse--; } +/* With per-bucket locks this operation is not-atomic, so that + * this version is not worse. + */ +static inline void __sk_prot_rehash(struct sock *sk) +{ + sk->sk_prot->unhash(sk); + sk->sk_prot->hash(sk); +} + /* About 10 seconds */ #define SOCK_DESTROY_TIME (10*HZ) @@ -658,6 +709,12 @@ static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb) sk_stream_mem_schedule(sk, skb->truesize, 1); } +static inline int sk_stream_wmem_schedule(struct sock *sk, int size) +{ + return size <= sk->sk_forward_alloc || + sk_stream_mem_schedule(sk, size, 0); +} + /* Used by processes to "lock" a socket state, so that * interrupts and bottom half handlers won't change it * from under us. It essentially blocks any incoming @@ -680,16 +737,19 @@ extern void FASTCALL(release_sock(struct sock *sk)); #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -extern struct sock *sk_alloc(int family, int priority, +extern struct sock *sk_alloc(int family, + unsigned int __nocast priority, struct proto *prot, int zero_it); extern void sk_free(struct sock *sk); +extern struct sock *sk_clone(const struct sock *sk, + const unsigned int __nocast priority); extern struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, - int priority); + unsigned int __nocast priority); extern struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - int priority); + unsigned int __nocast priority); extern void sock_wfree(struct sk_buff *skb); extern void sock_rfree(struct sk_buff *skb); @@ -704,7 +764,8 @@ extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode); -extern void *sock_kmalloc(struct sock *sk, int size, int priority); +extern void *sock_kmalloc(struct sock *sk, int size, + unsigned int __nocast priority); extern void sock_kfree_s(struct sock *sk, void *mem, int size); extern void sk_send_sigurg(struct sock *sk); @@ -975,6 +1036,16 @@ sk_dst_check(struct sock *sk, u32 cookie) return dst; } +static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst) +{ + __sk_dst_set(sk, dst); + sk->sk_route_caps = dst->dev->features; + if (sk->sk_route_caps & NETIF_F_TSO) { + if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) + sk->sk_route_caps &= ~NETIF_F_TSO; + } +} + static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) { sk->sk_wmem_queued += skb->truesize; @@ -1128,15 +1199,18 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) } static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, - int size, int mem, int gfp) + int size, int mem, + unsigned int __nocast gfp) { - struct sk_buff *skb = alloc_skb(size + sk->sk_prot->max_header, gfp); + struct sk_buff *skb; + int hdr_len; + hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); + skb = alloc_skb_fclone(size + hdr_len, gfp); if (skb) { skb->truesize += mem; - if (sk->sk_forward_alloc >= (int)skb->truesize || - sk_stream_mem_schedule(sk, skb->truesize, 0)) { - skb_reserve(skb, sk->sk_prot->max_header); + if (sk_stream_wmem_schedule(sk, skb->truesize)) { + skb_reserve(skb, hdr_len); return skb; } __kfree_skb(skb); @@ -1148,7 +1222,8 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, } static inline struct sk_buff *sk_stream_alloc_skb(struct sock *sk, - int size, int gfp) + int size, + unsigned int __nocast gfp) { return sk_stream_alloc_pskb(sk, size, 0, gfp); } @@ -1157,10 +1232,8 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk) { struct page *page = NULL; - if (sk->sk_forward_alloc >= (int)PAGE_SIZE || - sk_stream_mem_schedule(sk, PAGE_SIZE, 0)) - page = alloc_pages(sk->sk_allocation, 0); - else { + page = alloc_pages(sk->sk_allocation, 0); + if (!page) { sk->sk_prot->enter_memory_pressure(); sk_stream_moderate_sndbuf(sk); } @@ -1181,7 +1254,7 @@ static inline int sock_writeable(const struct sock *sk) return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf / 2); } -static inline int gfp_any(void) +static inline unsigned int __nocast gfp_any(void) { return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } @@ -1212,16 +1285,19 @@ static inline int sock_intr_errno(long timeo) static __inline__ void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - struct timeval *stamp = &skb->stamp; + struct timeval stamp; + + skb_get_timestamp(skb, &stamp); if (sock_flag(sk, SOCK_RCVTSTAMP)) { /* Race occurred between timestamp enabling and packet receiving. Fill in the current time for now. */ - if (stamp->tv_sec == 0) - do_gettimeofday(stamp); + if (stamp.tv_sec == 0) + do_gettimeofday(&stamp); + skb_set_timestamp(skb, &stamp); put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), - stamp); + &stamp); } else - sk->sk_stamp = *stamp; + sk->sk_stamp = stamp; } /** @@ -1246,11 +1322,11 @@ extern int sock_get_timestamp(struct sock *, struct timeval __user *); */ #if 0 -#define NETDEBUG(x) do { } while (0) -#define LIMIT_NETDEBUG(x) do {} while(0) +#define NETDEBUG(fmt, args...) do { } while (0) +#define LIMIT_NETDEBUG(fmt, args...) do { } while(0) #else -#define NETDEBUG(x) do { x; } while (0) -#define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0) +#define NETDEBUG(fmt, args...) printk(fmt,##args) +#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0) #endif /* @@ -1297,4 +1373,15 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign } #endif +extern void sk_init(void); + +#ifdef CONFIG_SYSCTL +extern struct ctl_table core_table[]; +#endif + +extern int sysctl_optmem_max; + +extern __u32 sysctl_wmem_default; +extern __u32 sysctl_rmem_default; + #endif /* _SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index e71f8ba..97af77c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -21,359 +21,29 @@ #define TCP_DEBUG 1 #define FASTRETRANS_DEBUG 1 -/* Cancel timers, when they are not required. */ -#undef TCP_CLEAR_TIMERS - #include <linux/config.h> #include <linux/list.h> #include <linux/tcp.h> #include <linux/slab.h> #include <linux/cache.h> #include <linux/percpu.h> + +#include <net/inet_connection_sock.h> +#include <net/inet_timewait_sock.h> +#include <net/inet_hashtables.h> #include <net/checksum.h> +#include <net/request_sock.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ip.h> -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -#include <linux/ipv6.h> -#endif -#include <linux/seq_file.h> - -/* This is for all connections with a full identity, no wildcards. - * New scheme, half the table is for TIME_WAIT, the other half is - * for the rest. I'll experiment with dynamic table growth later. - */ -struct tcp_ehash_bucket { - rwlock_t lock; - struct hlist_head chain; -} __attribute__((__aligned__(8))); - -/* This is for listening sockets, thus all sockets which possess wildcards. */ -#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ - -/* There are a few simple rules, which allow for local port reuse by - * an application. In essence: - * - * 1) Sockets bound to different interfaces may share a local port. - * Failing that, goto test 2. - * 2) If all sockets have sk->sk_reuse set, and none of them are in - * TCP_LISTEN state, the port may be shared. - * Failing that, goto test 3. - * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local - * address, and none of them are the same, the port may be - * shared. - * Failing this, the port cannot be shared. - * - * The interesting point, is test #2. This is what an FTP server does - * all day. To optimize this case we use a specific flag bit defined - * below. As we add sockets to a bind bucket list, we perform a - * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) - * As long as all sockets added to a bind bucket pass this test, - * the flag bit will be set. - * The resulting situation is that tcp_v[46]_verify_bind() can just check - * for this flag bit, if it is set and the socket trying to bind has - * sk->sk_reuse set, we don't even have to walk the owners list at all, - * we return that it is ok to bind this socket to the requested local port. - * - * Sounds like a lot of work, but it is worth it. In a more naive - * implementation (ie. current FreeBSD etc.) the entire list of ports - * must be walked for each data port opened by an ftp server. Needless - * to say, this does not scale at all. With a couple thousand FTP - * users logged onto your box, isn't it nice to know that new data - * ports are created in O(1) time? I thought so. ;-) -DaveM - */ -struct tcp_bind_bucket { - unsigned short port; - signed short fastreuse; - struct hlist_node node; - struct hlist_head owners; -}; - -#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node) - -struct tcp_bind_hashbucket { - spinlock_t lock; - struct hlist_head chain; -}; - -static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head) -{ - return hlist_entry(head->chain.first, struct tcp_bind_bucket, node); -} - -static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head) -{ - return hlist_empty(&head->chain) ? NULL : __tb_head(head); -} - -extern struct tcp_hashinfo { - /* This is for sockets with full identity only. Sockets here will - * always be without wildcards and will have the following invariant: - * - * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE - * - * First half of the table is for sockets not in TIME_WAIT, second half - * is for TIME_WAIT sockets only. - */ - struct tcp_ehash_bucket *__tcp_ehash; - - /* Ok, let's try this, I give up, we do need a local binding - * TCP hash as well as the others for fast bind/connect. - */ - struct tcp_bind_hashbucket *__tcp_bhash; - - int __tcp_bhash_size; - int __tcp_ehash_size; - - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. - */ - struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE]; - - /* All the above members are written once at bootup and - * never written again _or_ are predominantly read-access. - * - * Now align to a new cache line as all the following members - * are often dirty. - */ - rwlock_t __tcp_lhash_lock ____cacheline_aligned; - atomic_t __tcp_lhash_users; - wait_queue_head_t __tcp_lhash_wait; - spinlock_t __tcp_portalloc_lock; -} tcp_hashinfo; - -#define tcp_ehash (tcp_hashinfo.__tcp_ehash) -#define tcp_bhash (tcp_hashinfo.__tcp_bhash) -#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size) -#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size) -#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash) -#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock) -#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users) -#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait) -#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock) - -extern kmem_cache_t *tcp_bucket_cachep; -extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, - unsigned short snum); -extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb); -extern void tcp_bucket_unlock(struct sock *sk); -extern int tcp_port_rover; - -/* These are AF independent. */ -static __inline__ int tcp_bhashfn(__u16 lport) -{ - return (lport & (tcp_bhash_size - 1)); -} - -extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, - unsigned short snum); - -#if (BITS_PER_LONG == 64) -#define TCP_ADDRCMP_ALIGN_BYTES 8 -#else -#define TCP_ADDRCMP_ALIGN_BYTES 4 -#endif - -/* This is a TIME_WAIT bucket. It works around the memory consumption - * problems of sockets in such a state on heavily loaded servers, but - * without violating the protocol specification. - */ -struct tcp_tw_bucket { - /* - * Now struct sock also uses sock_common, so please just - * don't add nothing before this first member (__tw_common) --acme - */ - struct sock_common __tw_common; -#define tw_family __tw_common.skc_family -#define tw_state __tw_common.skc_state -#define tw_reuse __tw_common.skc_reuse -#define tw_bound_dev_if __tw_common.skc_bound_dev_if -#define tw_node __tw_common.skc_node -#define tw_bind_node __tw_common.skc_bind_node -#define tw_refcnt __tw_common.skc_refcnt - volatile unsigned char tw_substate; - unsigned char tw_rcv_wscale; - __u16 tw_sport; - /* Socket demultiplex comparisons on incoming packets. */ - /* these five are in inet_sock */ - __u32 tw_daddr - __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES))); - __u32 tw_rcv_saddr; - __u16 tw_dport; - __u16 tw_num; - /* And these are ours. */ - int tw_hashent; - int tw_timeout; - __u32 tw_rcv_nxt; - __u32 tw_snd_nxt; - __u32 tw_rcv_wnd; - __u32 tw_ts_recent; - long tw_ts_recent_stamp; - unsigned long tw_ttd; - struct tcp_bind_bucket *tw_tb; - struct hlist_node tw_death_node; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct in6_addr tw_v6_daddr; - struct in6_addr tw_v6_rcv_saddr; - int tw_v6_ipv6only; -#endif -}; +#include <net/tcp_states.h> -static __inline__ void tw_add_node(struct tcp_tw_bucket *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_node, list); -} - -static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_bind_node, list); -} - -static inline int tw_dead_hashed(struct tcp_tw_bucket *tw) -{ - return tw->tw_death_node.pprev != NULL; -} - -static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw) -{ - tw->tw_death_node.pprev = NULL; -} - -static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw) -{ - __hlist_del(&tw->tw_death_node); - tw_dead_node_init(tw); -} - -static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw) -{ - if (tw_dead_hashed(tw)) { - __tw_del_dead_node(tw); - return 1; - } - return 0; -} - -#define tw_for_each(tw, node, head) \ - hlist_for_each_entry(tw, node, head, tw_node) - -#define tw_for_each_inmate(tw, node, jail) \ - hlist_for_each_entry(tw, node, jail, tw_death_node) - -#define tw_for_each_inmate_safe(tw, node, safe, jail) \ - hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) - -#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk)) - -static inline u32 tcp_v4_rcv_saddr(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr; -} - -static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) -{ - return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; -} - -#define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only) - -static inline int tcp_v6_ipv6only(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk); -} -#else -# define __tcp_v6_rcv_saddr(__sk) NULL -# define tcp_v6_rcv_saddr(__sk) NULL -# define tcptw_sk_ipv6only(__sk) 0 -# define tcp_v6_ipv6only(__sk) 0 -#endif +#include <linux/seq_file.h> -extern kmem_cache_t *tcp_timewait_cachep; - -static inline void tcp_tw_put(struct tcp_tw_bucket *tw) -{ - if (atomic_dec_and_test(&tw->tw_refcnt)) { -#ifdef INET_REFCNT_DEBUG - printk(KERN_DEBUG "tw_bucket %p released\n", tw); -#endif - kmem_cache_free(tcp_timewait_cachep, tw); - } -} +extern struct inet_hashinfo tcp_hashinfo; extern atomic_t tcp_orphan_count; -extern int tcp_tw_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); -extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); - - -/* Socket demux engine toys. */ -#ifdef __BIG_ENDIAN -#define TCP_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__sport)<<16) | (__u32)(__dport)) -#else /* __LITTLE_ENDIAN */ -#define TCP_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__dport)<<16) | (__u32)(__sport)) -#endif - -#if (BITS_PER_LONG == 64) -#ifdef __BIG_ENDIAN -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ - __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr)); -#else /* __LITTLE_ENDIAN */ -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ - __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); -#endif /* __BIG_ENDIAN */ -#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \ - ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#else /* 32-bit arch */ -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) -#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - ((inet_sk(__sk)->daddr == (__saddr)) && \ - (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \ - (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#endif /* 64-bit arch */ - -#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ - (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ - ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) - -/* These can have wildcards, don't try too hard. */ -static __inline__ int tcp_lhashfn(unsigned short num) -{ - return num & (TCP_LHTABLE_SIZE - 1); -} - -static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) -{ - return tcp_lhashfn(inet_sk(sk)->num); -} #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -477,52 +147,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) * timestamps. It must be less than * minimal timewait lifetime. */ - -#define TCP_TW_RECYCLE_SLOTS_LOG 5 -#define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG) - -/* If time > 4sec, it is "slow" path, no recycling is required, - so that we select tick to get range about 4 seconds. - */ - -#if HZ <= 16 || HZ > 4096 -# error Unsupported: HZ <= 16 or HZ > 4096 -#elif HZ <= 32 -# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 64 -# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 128 -# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 256 -# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 512 -# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 1024 -# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 2048 -# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG) -#else -# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG) -#endif - -#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation - * max_cwnd = snd_cwnd * beta - */ -#define BICTCP_MAX_INCREMENT 32 /* - * Limit on the amount of - * increment allowed during - * binary search. - */ -#define BICTCP_FUNC_OF_MIN_INCR 11 /* - * log(B/Smin)/log(B/(B-1))+1, - * Smin:min increment - * B:log factor - */ -#define BICTCP_B 4 /* - * In binary search, - * go to point (max+min)/N - */ - /* * TCP option */ @@ -552,23 +176,18 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 -#define TCP_TIME_RETRANS 1 /* Retransmit timer */ -#define TCP_TIME_DACK 2 /* Delayed ack timer */ -#define TCP_TIME_PROBE0 3 /* Zero window probe timer */ -#define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */ - /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ #define TCP_NAGLE_CORK 2 /* Socket is corked */ #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ +extern struct inet_timewait_death_row tcp_death_row; + /* sysctl variables for tcp */ -extern int sysctl_max_syn_backlog; extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_tw_recycle; extern int sysctl_tcp_keepalive_time; extern int sysctl_tcp_keepalive_probes; extern int sysctl_tcp_keepalive_intvl; @@ -583,7 +202,6 @@ extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_max_tw_buckets; extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_ecn; @@ -596,16 +214,7 @@ extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; extern int sysctl_tcp_low_latency; -extern int sysctl_tcp_westwood; -extern int sysctl_tcp_vegas_cong_avoid; -extern int sysctl_tcp_vegas_alpha; -extern int sysctl_tcp_vegas_beta; -extern int sysctl_tcp_vegas_gamma; extern int sysctl_tcp_nometrics_save; -extern int sysctl_tcp_bic; -extern int sysctl_tcp_bic_fast_convergence; -extern int sysctl_tcp_bic_low_window; -extern int sysctl_tcp_bic_beta; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; @@ -613,80 +222,6 @@ extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; -struct open_request; - -struct or_calltable { - int family; - int (*rtx_syn_ack) (struct sock *sk, struct open_request *req, struct dst_entry*); - void (*send_ack) (struct sk_buff *skb, struct open_request *req); - void (*destructor) (struct open_request *req); - void (*send_reset) (struct sk_buff *skb); -}; - -struct tcp_v4_open_req { - __u32 loc_addr; - __u32 rmt_addr; - struct ip_options *opt; -}; - -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -struct tcp_v6_open_req { - struct in6_addr loc_addr; - struct in6_addr rmt_addr; - struct sk_buff *pktopts; - int iif; -}; -#endif - -/* this structure is too big */ -struct open_request { - struct open_request *dl_next; /* Must be first member! */ - __u32 rcv_isn; - __u32 snt_isn; - __u16 rmt_port; - __u16 mss; - __u8 retrans; - __u8 __pad; - __u16 snd_wscale : 4, - rcv_wscale : 4, - tstamp_ok : 1, - sack_ok : 1, - wscale_ok : 1, - ecn_ok : 1, - acked : 1; - /* The following two fields can be easily recomputed I think -AK */ - __u32 window_clamp; /* window clamp at creation time */ - __u32 rcv_wnd; /* rcv_wnd offered first time */ - __u32 ts_recent; - unsigned long expires; - struct or_calltable *class; - struct sock *sk; - union { - struct tcp_v4_open_req v4_req; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - struct tcp_v6_open_req v6_req; -#endif - } af; -}; - -/* SLAB cache for open requests. */ -extern kmem_cache_t *tcp_openreq_cachep; - -#define tcp_openreq_alloc() kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC) -#define tcp_openreq_fastfree(req) kmem_cache_free(tcp_openreq_cachep, req) - -static inline void tcp_openreq_free(struct open_request *req) -{ - req->class->destructor(req); - tcp_openreq_fastfree(req); -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -#define TCP_INET_FAMILY(fam) ((fam) == AF_INET) -#else -#define TCP_INET_FAMILY(fam) 1 -#endif - /* * Pointers to address related TCP functions * (i.e. things that depend on the address family) @@ -708,7 +243,7 @@ struct tcp_func { struct sock * (*syn_recv_sock) (struct sock *sk, struct sk_buff *skb, - struct open_request *req, + struct request_sock *req, struct dst_entry *dst); int (*remember_stamp) (struct sock *sk); @@ -767,9 +302,6 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics); #define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) #define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) -extern void tcp_put_port(struct sock *sk); -extern void tcp_inherit_port(struct sock *sk, struct sock *child); - extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); @@ -778,7 +310,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb); extern int tcp_v4_remember_stamp(struct sock *sk); -extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); +extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); @@ -800,37 +332,22 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); -enum tcp_ack_state_t +static inline void tcp_dec_quickack_mode(struct sock *sk, + const unsigned int pkts) { - TCP_ACK_SCHED = 1, - TCP_ACK_TIMER = 2, - TCP_ACK_PUSHED= 4 -}; + struct inet_connection_sock *icsk = inet_csk(sk); -static inline void tcp_schedule_ack(struct tcp_sock *tp) -{ - tp->ack.pending |= TCP_ACK_SCHED; -} - -static inline int tcp_ack_scheduled(struct tcp_sock *tp) -{ - return tp->ack.pending&TCP_ACK_SCHED; -} - -static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp) -{ - if (tp->ack.quick && --tp->ack.quick == 0) { - /* Leaving quickack mode we deflate ATO. */ - tp->ack.ato = TCP_ATO_MIN; + if (icsk->icsk_ack.quick) { + if (pkts >= icsk->icsk_ack.quick) { + icsk->icsk_ack.quick = 0; + /* Leaving quickack mode we deflate ATO. */ + icsk->icsk_ack.ato = TCP_ATO_MIN; + } else + icsk->icsk_ack.quick -= pkts; } } -extern void tcp_enter_quickack_mode(struct tcp_sock *tp); - -static __inline__ void tcp_delack_init(struct tcp_sock *tp) -{ - memset(&tp->ack, 0, sizeof(tp->ack)); -} +extern void tcp_enter_quickack_mode(struct sock *sk); static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { @@ -846,14 +363,13 @@ enum tcp_tw_status }; -extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, +extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - struct tcphdr *th, - unsigned len); + const struct tcphdr *th); extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, - struct open_request *req, - struct open_request **prev); + struct request_sock *req, + struct request_sock **prev); extern int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); @@ -864,7 +380,6 @@ extern void tcp_update_metrics(struct sock *sk); extern void tcp_close(struct sock *sk, long timeout); -extern struct sock * tcp_accept(struct sock *sk, int flags, int *err); extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); extern int tcp_getsockopt(struct sock *sk, int level, @@ -880,8 +395,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, size_t len, int nonblock, int flags, int *addr_len); -extern int tcp_listen_start(struct sock *sk); - extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab); @@ -890,11 +403,6 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern int tcp_v4_rebuild_header(struct sock *sk); - -extern int tcp_v4_build_header(struct sock *sk, - struct sk_buff *skb); - extern void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); @@ -903,12 +411,12 @@ extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); extern struct sock * tcp_create_openreq_child(struct sock *sk, - struct open_request *req, + struct request_sock *req, struct sk_buff *skb); extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct open_request *req, + struct request_sock *req, struct dst_entry *dst); extern int tcp_v4_do_rcv(struct sock *sk, @@ -922,7 +430,7 @@ extern int tcp_connect(struct sock *sk); extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct open_request *req); + struct request_sock *req); extern int tcp_disconnect(struct sock *sk, int flags); @@ -939,36 +447,40 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, /* tcp_output.c */ -extern int tcp_write_xmit(struct sock *, int nonagle); +extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, + unsigned int cur_mss, int nonagle); +extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp); extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); extern void tcp_xmit_retransmit_queue(struct sock *); extern void tcp_simple_retransmit(struct sock *); extern int tcp_trim_head(struct sock *, struct sk_buff *, u32); +extern int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int); extern void tcp_send_probe0(struct sock *); extern void tcp_send_partial(struct sock *); extern int tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); -extern void tcp_send_active_reset(struct sock *sk, int priority); +extern void tcp_send_active_reset(struct sock *sk, + unsigned int __nocast priority); extern int tcp_send_synack(struct sock *); -extern void tcp_push_one(struct sock *, unsigned mss_now); +extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); +/* tcp_input.c */ +extern void tcp_cwnd_application_limited(struct sock *sk); + /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); -extern void tcp_clear_xmit_timers(struct sock *); +static inline void tcp_clear_xmit_timers(struct sock *sk) +{ + inet_csk_clear_xmit_timers(sk); +} -extern void tcp_delete_keepalive_timer(struct sock *); -extern void tcp_reset_keepalive_timer(struct sock *, unsigned long); extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); extern unsigned int tcp_current_mss(struct sock *sk, int large); -#ifdef TCP_DEBUG -extern const char tcp_timer_bug_msg[]; -#endif - -/* tcp_diag.c */ +/* tcp.c */ extern void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ @@ -977,72 +489,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -static inline void tcp_clear_xmit_timer(struct sock *sk, int what) -{ - struct tcp_sock *tp = tcp_sk(sk); - - switch (what) { - case TCP_TIME_RETRANS: - case TCP_TIME_PROBE0: - tp->pending = 0; - -#ifdef TCP_CLEAR_TIMERS - sk_stop_timer(sk, &tp->retransmit_timer); -#endif - break; - case TCP_TIME_DACK: - tp->ack.blocked = 0; - tp->ack.pending = 0; - -#ifdef TCP_CLEAR_TIMERS - sk_stop_timer(sk, &tp->delack_timer); -#endif - break; - default: -#ifdef TCP_DEBUG - printk(tcp_timer_bug_msg); -#endif - return; - }; - -} - -/* - * Reset the retransmission timer - */ -static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (when > TCP_RTO_MAX) { -#ifdef TCP_DEBUG - printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr()); -#endif - when = TCP_RTO_MAX; - } - - switch (what) { - case TCP_TIME_RETRANS: - case TCP_TIME_PROBE0: - tp->pending = what; - tp->timeout = jiffies+when; - sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout); - break; - - case TCP_TIME_DACK: - tp->ack.pending |= TCP_ACK_TIMER; - tp->ack.timeout = jiffies+when; - sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); - break; - - default: -#ifdef TCP_DEBUG - printk(tcp_timer_bug_msg); -#endif - return; - }; -} - /* Initialize RCV_MSS value. * RCV_MSS is an our guess about MSS used by the peer. * We haven't any direct information about the MSS. @@ -1054,13 +500,13 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long static inline void tcp_initialize_rcv_mss(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - unsigned int hint = min(tp->advmss, tp->mss_cache_std); + unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd/2); hint = min(hint, TCP_MIN_RCVMSS); hint = max(hint, TCP_MIN_MSS); - tp->ack.rcv_mss = hint; + inet_csk(sk)->icsk_ack.rcv_mss = hint; } static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) @@ -1077,7 +523,7 @@ static __inline__ void tcp_fast_path_on(struct tcp_sock *tp) static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp) { - if (skb_queue_len(&tp->out_of_order_queue) == 0 && + if (skb_queue_empty(&tp->out_of_order_queue) && tp->rcv_wnd && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && !tp->urg_data) @@ -1195,7 +641,8 @@ static inline void tcp_packets_out_inc(struct sock *sk, tp->packets_out += tcp_skb_pcount(skb); if (!orig) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); } static inline void tcp_packets_out_dec(struct tcp_sock *tp, @@ -1204,6 +651,86 @@ static inline void tcp_packets_out_dec(struct tcp_sock *tp, tp->packets_out -= tcp_skb_pcount(skb); } +/* Events passed to congestion control interface */ +enum tcp_ca_event { + CA_EVENT_TX_START, /* first transmit when no packets in flight */ + CA_EVENT_CWND_RESTART, /* congestion window restart */ + CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ + CA_EVENT_FRTO, /* fast recovery timeout */ + CA_EVENT_LOSS, /* loss timeout */ + CA_EVENT_FAST_ACK, /* in sequence ack */ + CA_EVENT_SLOW_ACK, /* other ack */ +}; + +/* + * Interface for adding new TCP congestion control handlers + */ +#define TCP_CA_NAME_MAX 16 +struct tcp_congestion_ops { + struct list_head list; + + /* initialize private data (optional) */ + void (*init)(struct sock *sk); + /* cleanup private data (optional) */ + void (*release)(struct sock *sk); + + /* return slow start threshold (required) */ + u32 (*ssthresh)(struct sock *sk); + /* lower bound for congestion window (optional) */ + u32 (*min_cwnd)(struct sock *sk); + /* do new cwnd calculation (required) */ + void (*cong_avoid)(struct sock *sk, u32 ack, + u32 rtt, u32 in_flight, int good_ack); + /* round trip time sample per acked packet (optional) */ + void (*rtt_sample)(struct sock *sk, u32 usrtt); + /* call before changing ca_state (optional) */ + void (*set_state)(struct sock *sk, u8 new_state); + /* call when cwnd event occurs (optional) */ + void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); + /* new value of cwnd after loss (optional) */ + u32 (*undo_cwnd)(struct sock *sk); + /* hook for packet ack accounting (optional) */ + void (*pkts_acked)(struct sock *sk, u32 num_acked); + /* get info for inet_diag (optional) */ + void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); + + char name[TCP_CA_NAME_MAX]; + struct module *owner; +}; + +extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); +extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); + +extern void tcp_init_congestion_control(struct sock *sk); +extern void tcp_cleanup_congestion_control(struct sock *sk); +extern int tcp_set_default_congestion_control(const char *name); +extern void tcp_get_default_congestion_control(char *name); +extern int tcp_set_congestion_control(struct sock *sk, const char *name); + +extern struct tcp_congestion_ops tcp_init_congestion_ops; +extern u32 tcp_reno_ssthresh(struct sock *sk); +extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, + u32 rtt, u32 in_flight, int flag); +extern u32 tcp_reno_min_cwnd(struct sock *sk); +extern struct tcp_congestion_ops tcp_reno; + +static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->set_state) + icsk->icsk_ca_ops->set_state(sk, ca_state); + icsk->icsk_ca_state = ca_state; +} + +static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cwnd_event) + icsk->icsk_ca_ops->cwnd_event(sk, event); +} + /* This determines how many packets are "in the network" to the best * of our knowledge. In many cases it is conservative, but where * detailed information is available from the receiver (via SACK @@ -1223,98 +750,14 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) return (tp->packets_out - tp->left_out + tp->retrans_out); } -/* - * Which congestion algorithim is in use on the connection. - */ -#define tcp_is_vegas(__tp) ((__tp)->adv_cong == TCP_VEGAS) -#define tcp_is_westwood(__tp) ((__tp)->adv_cong == TCP_WESTWOOD) -#define tcp_is_bic(__tp) ((__tp)->adv_cong == TCP_BIC) - -/* Recalculate snd_ssthresh, we want to set it to: - * - * Reno: - * one half the current congestion window, but no - * less than two segments - * - * BIC: - * behave like Reno until low_window is reached, - * then increase congestion window slowly - */ -static inline __u32 tcp_recalc_ssthresh(struct tcp_sock *tp) -{ - if (tcp_is_bic(tp)) { - if (sysctl_tcp_bic_fast_convergence && - tp->snd_cwnd < tp->bictcp.last_max_cwnd) - tp->bictcp.last_max_cwnd = (tp->snd_cwnd * - (BICTCP_BETA_SCALE - + sysctl_tcp_bic_beta)) - / (2 * BICTCP_BETA_SCALE); - else - tp->bictcp.last_max_cwnd = tp->snd_cwnd; - - if (tp->snd_cwnd > sysctl_tcp_bic_low_window) - return max((tp->snd_cwnd * sysctl_tcp_bic_beta) - / BICTCP_BETA_SCALE, 2U); - } - - return max(tp->snd_cwnd >> 1U, 2U); -} - -/* Stop taking Vegas samples for now. */ -#define tcp_vegas_disable(__tp) ((__tp)->vegas.doing_vegas_now = 0) - -static inline void tcp_vegas_enable(struct tcp_sock *tp) -{ - /* There are several situations when we must "re-start" Vegas: - * - * o when a connection is established - * o after an RTO - * o after fast recovery - * o when we send a packet and there is no outstanding - * unacknowledged data (restarting an idle connection) - * - * In these circumstances we cannot do a Vegas calculation at the - * end of the first RTT, because any calculation we do is using - * stale info -- both the saved cwnd and congestion feedback are - * stale. - * - * Instead we must wait until the completion of an RTT during - * which we actually receive ACKs. - */ - - /* Begin taking Vegas samples next time we send something. */ - tp->vegas.doing_vegas_now = 1; - - /* Set the beginning of the next send window. */ - tp->vegas.beg_snd_nxt = tp->snd_nxt; - - tp->vegas.cntRTT = 0; - tp->vegas.minRTT = 0x7fffffff; -} - -/* Should we be taking Vegas samples right now? */ -#define tcp_vegas_enabled(__tp) ((__tp)->vegas.doing_vegas_now) - -extern void tcp_ca_init(struct tcp_sock *tp); - -static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) -{ - if (tcp_is_vegas(tp)) { - if (ca_state == TCP_CA_Open) - tcp_vegas_enable(tp); - else - tcp_vegas_disable(tp); - } - tp->ca_state = ca_state; -} - /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. */ -static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) +static inline __u32 tcp_current_ssthresh(const struct sock *sk) { - if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) + const struct tcp_sock *tp = tcp_sk(sk); + if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, @@ -1330,33 +773,14 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) tp->left_out = tp->sacked_out + tp->lost_out; } -extern void tcp_cwnd_application_limited(struct sock *sk); - -/* Congestion window validation. (RFC2861) */ - -static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) +/* Set slow start threshold and cwnd not falling to slow start */ +static inline void __tcp_enter_cwr(struct sock *sk) { - __u32 packets_out = tp->packets_out; - - if (packets_out >= tp->snd_cwnd) { - /* Network is feed fully. */ - tp->snd_cwnd_used = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; - } else { - /* Network starves. */ - if (tp->packets_out > tp->snd_cwnd_used) - tp->snd_cwnd_used = tp->packets_out; - - if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) - tcp_cwnd_application_limited(sk); - } -} + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); -/* Set slow start threshould and cwnd not falling to slow start */ -static inline void __tcp_enter_cwr(struct tcp_sock *tp) -{ tp->undo_marker = 0; - tp->snd_ssthresh = tcp_recalc_ssthresh(tp); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; @@ -1365,12 +789,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp) TCP_ECN_queue_cwr(tp); } -static inline void tcp_enter_cwr(struct tcp_sock *tp) +static inline void tcp_enter_cwr(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + tp->prior_ssthresh = 0; - if (tp->ca_state < TCP_CA_CWR) { - __tcp_enter_cwr(tp); - tcp_set_ca_state(tp, TCP_CA_CWR); + if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { + __tcp_enter_cwr(sk); + tcp_set_ca_state(sk, TCP_CA_CWR); } } @@ -1384,12 +810,6 @@ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp) return 3; } -static __inline__ int tcp_minshall_check(const struct tcp_sock *tp) -{ - return after(tp->snd_sml,tp->snd_una) && - !after(tp->snd_sml, tp->snd_nxt); -} - static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, const struct sk_buff *skb) { @@ -1397,105 +817,12 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, tp->snd_sml = TCP_SKB_CB(skb)->end_seq; } -/* Return 0, if packet can be sent now without violation Nagle's rules: - 1. It is full sized. - 2. Or it contains FIN. - 3. Or TCP_NODELAY was set. - 4. Or TCP_CORK is not set, and all sent packets are ACKed. - With Minshall's modification: all sent small packets are ACKed. - */ - -static __inline__ int -tcp_nagle_check(const struct tcp_sock *tp, const struct sk_buff *skb, - unsigned mss_now, int nonagle) -{ - return (skb->len < mss_now && - !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && - ((nonagle&TCP_NAGLE_CORK) || - (!nonagle && - tp->packets_out && - tcp_minshall_check(tp)))); -} - -extern void tcp_set_skb_tso_segs(struct sock *, struct sk_buff *); - -/* This checks if the data bearing packet SKB (usually sk->sk_send_head) - * should be put on the wire right now. - */ -static __inline__ int tcp_snd_test(struct sock *sk, - struct sk_buff *skb, - unsigned cur_mss, int nonagle) -{ - struct tcp_sock *tp = tcp_sk(sk); - int pkts = tcp_skb_pcount(skb); - - if (!pkts) { - tcp_set_skb_tso_segs(sk, skb); - pkts = tcp_skb_pcount(skb); - } - - /* RFC 1122 - section 4.2.3.4 - * - * We must queue if - * - * a) The right edge of this frame exceeds the window - * b) There are packets in flight and we have a small segment - * [SWS avoidance and Nagle algorithm] - * (part of SWS is done on packetization) - * Minshall version sounds: there are no _small_ - * segments in flight. (tcp_nagle_check) - * c) We have too many packets 'in flight' - * - * Don't use the nagle rule for urgent data (or - * for the final FIN -DaveM). - * - * Also, Nagle rule does not apply to frames, which - * sit in the middle of queue (they have no chances - * to get new data) and if room at tail of skb is - * not enough to save something seriously (<32 for now). - */ - - /* Don't be strict about the congestion window for the - * final FIN frame. -DaveM - */ - return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode - || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && - (((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) || - (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && - !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)); -} - static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) { - if (!tp->packets_out && !tp->pending) - tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); -} - -static __inline__ int tcp_skb_is_last(const struct sock *sk, - const struct sk_buff *skb) -{ - return skb->next == (struct sk_buff *)&sk->sk_write_queue; -} - -/* Push out any pending frames which were held back due to - * TCP_CORK or attempt at coalescing tiny packets. - * The socket must be locked by the caller. - */ -static __inline__ void __tcp_push_pending_frames(struct sock *sk, - struct tcp_sock *tp, - unsigned cur_mss, - int nonagle) -{ - struct sk_buff *skb = sk->sk_send_head; - - if (skb) { - if (!tcp_skb_is_last(sk, skb)) - nonagle = TCP_NAGLE_PUSH; - if (!tcp_snd_test(sk, skb, cur_mss, nonagle) || - tcp_write_xmit(sk, nonagle)) - tcp_check_probe_timer(sk, tp); - } - tcp_cwnd_validate(sk, tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + if (!tp->packets_out && !icsk->icsk_pending) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + icsk->icsk_rto, TCP_RTO_MAX); } static __inline__ void tcp_push_pending_frames(struct sock *sk, @@ -1504,15 +831,6 @@ static __inline__ void tcp_push_pending_frames(struct sock *sk, __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); } -static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp) -{ - struct sk_buff *skb = sk->sk_send_head; - - return (skb && - tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), - tcp_skb_is_last(sk, skb) ? TCP_NAGLE_PUSH : tp->nonagle)); -} - static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) { tp->snd_wl1 = seq; @@ -1523,9 +841,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) tp->snd_wl1 = seq; } -extern void tcp_destroy_sock(struct sock *sk); - - /* * Calculate(/check) TCP checksum */ @@ -1585,8 +900,10 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { wake_up_interruptible(sk->sk_sleep); - if (!tcp_ack_scheduled(tp)) - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4); + if (!inet_csk_ack_scheduled(sk)) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + (3 * TCP_RTO_MIN) / 4, + TCP_RTO_MAX); } return 1; } @@ -1619,9 +936,9 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) TCP_INC_STATS(TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); - if (tcp_sk(sk)->bind_hash && + if (inet_csk(sk)->icsk_bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) - tcp_put_port(sk); + inet_put_port(&tcp_hashinfo, sk); /* fall through */ default: if (oldstate==TCP_ESTABLISHED) @@ -1648,7 +965,7 @@ static __inline__ void tcp_done(struct sock *sk) if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); else - tcp_destroy_sock(sk); + inet_csk_destroy_sock(sk); } static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) @@ -1750,124 +1067,27 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req, - struct sock *child) -{ - struct tcp_sock *tp = tcp_sk(sk); - - req->sk = child; - sk_acceptq_added(sk); - - if (!tp->accept_queue_tail) { - tp->accept_queue = req; - } else { - tp->accept_queue_tail->dl_next = req; - } - tp->accept_queue_tail = req; - req->dl_next = NULL; -} - -struct tcp_listen_opt -{ - u8 max_qlen_log; /* log_2 of maximal queued SYNs */ - int qlen; - int qlen_young; - int clock_hand; - u32 hash_rnd; - struct open_request *syn_table[TCP_SYNQ_HSIZE]; -}; - -static inline void -tcp_synq_removed(struct sock *sk, struct open_request *req) -{ - struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; - - if (--lopt->qlen == 0) - tcp_delete_keepalive_timer(sk); - if (req->retrans == 0) - lopt->qlen_young--; -} - -static inline void tcp_synq_added(struct sock *sk) -{ - struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; - - if (lopt->qlen++ == 0) - tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); - lopt->qlen_young++; -} - -static inline int tcp_synq_len(struct sock *sk) -{ - return tcp_sk(sk)->listen_opt->qlen; -} - -static inline int tcp_synq_young(struct sock *sk) -{ - return tcp_sk(sk)->listen_opt->qlen_young; -} - -static inline int tcp_synq_is_full(struct sock *sk) -{ - return tcp_synq_len(sk) >> tcp_sk(sk)->listen_opt->max_qlen_log; -} - -static inline void tcp_synq_unlink(struct tcp_sock *tp, struct open_request *req, - struct open_request **prev) -{ - write_lock(&tp->syn_wait_lock); - *prev = req->dl_next; - write_unlock(&tp->syn_wait_lock); -} - -static inline void tcp_synq_drop(struct sock *sk, struct open_request *req, - struct open_request **prev) -{ - tcp_synq_unlink(tcp_sk(sk), req, prev); - tcp_synq_removed(sk, req); - tcp_openreq_free(req); -} - -static __inline__ void tcp_openreq_init(struct open_request *req, +static __inline__ void tcp_openreq_init(struct request_sock *req, struct tcp_options_received *rx_opt, struct sk_buff *skb) { + struct inet_request_sock *ireq = inet_rsk(req); + req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ - req->rcv_isn = TCP_SKB_CB(skb)->seq; + tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; - req->tstamp_ok = rx_opt->tstamp_ok; - req->sack_ok = rx_opt->sack_ok; - req->snd_wscale = rx_opt->snd_wscale; - req->wscale_ok = rx_opt->wscale_ok; - req->acked = 0; - req->ecn_ok = 0; - req->rmt_port = skb->h.th->source; + ireq->tstamp_ok = rx_opt->tstamp_ok; + ireq->sack_ok = rx_opt->sack_ok; + ireq->snd_wscale = rx_opt->snd_wscale; + ireq->wscale_ok = rx_opt->wscale_ok; + ireq->acked = 0; + ireq->ecn_ok = 0; + ireq->rmt_port = skb->h.th->source; } extern void tcp_enter_memory_pressure(void); -extern void tcp_listen_wlock(void); - -/* - We may sleep inside this lock. - * - If sleeping is not required (or called from BH), - * use plain read_(un)lock(&tcp_lhash_lock). - */ - -static inline void tcp_listen_lock(void) -{ - /* read_lock synchronizes to candidates to writers */ - read_lock(&tcp_lhash_lock); - atomic_inc(&tcp_lhash_users); - read_unlock(&tcp_lhash_lock); -} - -static inline void tcp_listen_unlock(void) -{ - if (atomic_dec_and_test(&tcp_lhash_users)) - wake_up(&tcp_lhash_wait); -} - static inline int keepalive_intvl_when(const struct tcp_sock *tp) { return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; @@ -1878,12 +1098,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) return tp->keepalive_time ? : sysctl_tcp_keepalive_time; } -static inline int tcp_fin_time(const struct tcp_sock *tp) +static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; + const int rto = inet_csk(sk)->icsk_rto; - if (fin_timeout < (tp->rto<<2) - (tp->rto>>1)) - fin_timeout = (tp->rto<<2) - (tp->rto>>1); + if (fin_timeout < (rto << 2) - (rto >> 1)) + fin_timeout = (rto << 2) - (rto >> 1); return fin_timeout; } @@ -1912,15 +1133,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int return 1; } -static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst) -{ - sk->sk_route_caps = dst->dev->features; - if (sk->sk_route_caps & NETIF_F_TSO) { - if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) - sk->sk_route_caps &= ~NETIF_F_TSO; - } -} - #define TCP_CHECK_TIMER(sk) do { } while (0) static inline int tcp_use_frto(const struct sock *sk) @@ -1972,52 +1184,16 @@ struct tcp_iter_state { extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); -/* TCP Westwood functions and constants */ - -#define TCP_WESTWOOD_INIT_RTT (20*HZ) /* maybe too conservative?! */ -#define TCP_WESTWOOD_RTT_MIN (HZ/20) /* 50ms */ - -static inline void tcp_westwood_update_rtt(struct tcp_sock *tp, __u32 rtt_seq) -{ - if (tcp_is_westwood(tp)) - tp->westwood.rtt = rtt_seq; -} +extern struct request_sock_ops tcp_request_sock_ops; -static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_sock *tp) -{ - return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) / - (__u32) (tp->mss_cache_std), - 2U); -} - -static inline __u32 tcp_westwood_bw_rttmin(const struct tcp_sock *tp) -{ - return tcp_is_westwood(tp) ? __tcp_westwood_bw_rttmin(tp) : 0; -} +extern int tcp_v4_destroy_sock(struct sock *sk); -static inline int tcp_westwood_ssthresh(struct tcp_sock *tp) -{ - __u32 ssthresh = 0; - - if (tcp_is_westwood(tp)) { - ssthresh = __tcp_westwood_bw_rttmin(tp); - if (ssthresh) - tp->snd_ssthresh = ssthresh; - } - - return (ssthresh != 0); -} +#ifdef CONFIG_PROC_FS +extern int tcp4_proc_init(void); +extern void tcp4_proc_exit(void); +#endif -static inline int tcp_westwood_cwnd(struct tcp_sock *tp) -{ - __u32 cwnd = 0; +extern void tcp_v4_init(struct net_proto_family *ops); +extern void tcp_init(void); - if (tcp_is_westwood(tp)) { - cwnd = __tcp_westwood_bw_rttmin(tp); - if (cwnd) - tp->snd_cwnd = cwnd; - } - - return (cwnd != 0); -} #endif /* _TCP_H */ diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index dc14563..c6b8439 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -2,6 +2,7 @@ #define _NET_TCP_ECN_H_ 1 #include <net/inet_ecn.h> +#include <net/request_sock.h> #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -38,9 +39,9 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp, } static __inline__ void -TCP_ECN_make_synack(struct open_request *req, struct tcphdr *th) +TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) { - if (req->ecn_ok) + if (inet_rsk(req)->ecn_ok) th->ece = 1; } @@ -87,7 +88,7 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) * it is surely retransmit. It is not in ECN RFC, * but Linux follows this rule. */ else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) - tcp_enter_quickack_mode(tp); + tcp_enter_quickack_mode((struct sock *)tp); } } @@ -111,16 +112,16 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) } static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, - struct open_request *req) + struct request_sock *req) { - tp->ecn_flags = req->ecn_ok ? TCP_ECN_OK : 0; + tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; } static __inline__ void -TCP_ECN_create_request(struct open_request *req, struct tcphdr *th) +TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th) { if (sysctl_tcp_ecn && th->ece && th->cwr) - req->ecn_ok = 1; + inet_rsk(req)->ecn_ok = 1; } #endif diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h new file mode 100644 index 0000000..b9d4176 --- /dev/null +++ b/include/net/tcp_states.h @@ -0,0 +1,34 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP protocol sk_state field. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_TCP_STATES_H +#define _LINUX_TCP_STATES_H + +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING, /* Now a valid state */ + + TCP_MAX_STATES /* Leave at the end! */ +}; + +#define TCP_STATE_MASK 0xF + +#endif /* _LINUX_TCP_STATES_H */ diff --git a/include/net/udp.h b/include/net/udp.h index ac229b7..107b9d7 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -94,6 +94,11 @@ struct udp_iter_state { struct seq_operations seq_ops; }; +#ifdef CONFIG_PROC_FS extern int udp_proc_register(struct udp_seq_afinfo *afinfo); extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); + +extern int udp4_proc_init(void); +extern void udp4_proc_exit(void); +#endif #endif /* _UDP_H */ diff --git a/include/net/x25.h b/include/net/x25.h index 7a1ba5b..fee62ff 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -79,6 +79,8 @@ enum { #define X25_DEFAULT_PACKET_SIZE X25_PS128 /* Default Packet Size */ #define X25_DEFAULT_THROUGHPUT 0x0A /* Deafult Throughput */ #define X25_DEFAULT_REVERSE 0x00 /* Default Reverse Charging */ +#define X25_DENY_ACCPT_APPRV 0x01 /* Default value */ +#define X25_ALLOW_ACCPT_APPRV 0x00 /* Control enabled */ #define X25_SMODULUS 8 #define X25_EMODULUS 128 @@ -94,7 +96,7 @@ enum { #define X25_FAC_CLASS_C 0x80 #define X25_FAC_CLASS_D 0xC0 -#define X25_FAC_REVERSE 0x01 +#define X25_FAC_REVERSE 0x01 /* also fast select */ #define X25_FAC_THROUGHPUT 0x02 #define X25_FAC_PACKET_SIZE 0x42 #define X25_FAC_WINDOW_SIZE 0x43 @@ -134,8 +136,8 @@ struct x25_sock { struct sock sk; struct x25_address source_addr, dest_addr; struct x25_neigh *neighbour; - unsigned int lci; - unsigned char state, condition, qbitincl, intflag; + unsigned int lci, cudmatchlength; + unsigned char state, condition, qbitincl, intflag, accptapprv; unsigned short vs, vr, va, vl; unsigned long t2, t21, t22, t23; unsigned short fraglen; @@ -173,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *); /* x25_dev.c */ extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); -extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); +extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); extern void x25_establish_link(struct x25_neigh *); extern void x25_terminate_link(struct x25_neigh *); @@ -242,7 +244,6 @@ extern int x25_validate_nr(struct sock *, unsigned short); extern void x25_write_internal(struct sock *, int); extern int x25_decode(struct sock *, struct sk_buff *, int *, int *, int *, int *, int *); extern void x25_disconnect(struct sock *, int, unsigned char, unsigned char); -extern int x25_check_calluserdata(struct x25_calluserdata *,struct x25_calluserdata *); /* x25_timer.c */ extern void x25_start_heartbeat(struct sock *); diff --git a/include/net/x25device.h b/include/net/x25device.h index cf36a20..1a31837 100644 --- a/include/net/x25device.h +++ b/include/net/x25device.h @@ -5,11 +5,9 @@ #include <linux/if_packet.h> #include <linux/skbuff.h> -static inline unsigned short x25_type_trans(struct sk_buff *skb, - struct net_device *dev) +static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->mac.raw = skb->data; - skb->input_dev = skb->dev = dev; skb->pkt_type = PACKET_HOST; return htons(ETH_P_X25); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d675836..a9d0d8c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -158,6 +158,20 @@ enum { XFRM_STATE_DEAD }; +/* callback structure passed from either netlink or pfkey */ +struct km_event +{ + union { + u32 hard; + u32 proto; + u32 byid; + } data; + + u32 seq; + u32 pid; + u32 event; +}; + struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { @@ -179,6 +193,8 @@ struct xfrm_policy_afinfo { extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo); extern int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo); +extern void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c); +extern void km_state_notify(struct xfrm_state *x, struct km_event *c); #define XFRM_ACQ_EXPIRES 30 @@ -188,6 +204,7 @@ struct xfrm_state_afinfo { rwlock_t lock; struct list_head *state_bydst; struct list_head *state_byspi; + int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); @@ -209,7 +226,7 @@ struct xfrm_type struct module *owner; __u8 proto; - int (*init_state)(struct xfrm_state *x, void *args); + int (*init_state)(struct xfrm_state *x); void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb); int (*post_input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb); @@ -290,11 +307,11 @@ struct xfrm_mgr { struct list_head list; char *id; - int (*notify)(struct xfrm_state *x, int event); + int (*notify)(struct xfrm_state *x, struct km_event *c); int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir); struct xfrm_policy *(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); - int (*notify_policy)(struct xfrm_policy *x, int dir, int event); + int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); }; extern int xfrm_register_km(struct xfrm_mgr *km); @@ -656,7 +673,7 @@ static inline int xfrm_sk_clone_policy(struct sock *sk) return 0; } -extern void xfrm_policy_delete(struct xfrm_policy *pol, int dir); +extern int xfrm_policy_delete(struct xfrm_policy *pol, int dir); static inline void xfrm_sk_free_policy(struct sock *sk) { @@ -786,7 +803,7 @@ struct xfrm_algo_desc { /* XFRM tunnel handlers. */ struct xfrm_tunnel { int (*handler)(struct sk_buff *skb); - void (*err_handler)(struct sk_buff *skb, void *info); + void (*err_handler)(struct sk_buff *skb, __u32 info); }; struct xfrm6_tunnel { @@ -801,7 +818,6 @@ extern void xfrm6_init(void); extern void xfrm6_fini(void); extern void xfrm_state_init(void); extern void xfrm4_state_init(void); -extern void xfrm4_state_fini(void); extern void xfrm6_state_init(void); extern void xfrm6_state_fini(void); @@ -817,12 +833,13 @@ extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); -extern void xfrm_state_delete(struct xfrm_state *x); +extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); extern int xfrm_replay_check(struct xfrm_state *x, u32 seq); extern void xfrm_replay_advance(struct xfrm_state *x, u32 seq); extern int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm_state_mtu(struct xfrm_state *x, int mtu); +extern int xfrm_init_state(struct xfrm_state *x); extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); |